Index: projects/clang1000-import/.cirrus.yml =================================================================== --- projects/clang1000-import/.cirrus.yml (revision 356919) +++ projects/clang1000-import/.cirrus.yml (revision 356920) @@ -1,18 +1,20 @@ # $FreeBSD$ freebsd_instance: - image: freebsd-12-0-release-amd64 + image: freebsd-12-1-release-amd64 cpu: 8 memory: 24G env: CIRRUS_CLONE_DEPTH: 1 task: timeout_in: 90m install_script: - pkg install -y qemu-devel uefi-edk2-qemu-x86_64 script: - make -j$(sysctl -n hw.ncpu) WITHOUT_TOOLCHAIN=yes buildworld buildkernel + package_script: + - make WITHOUT_TOOLCHAIN=yes PKG_FORMAT=tar packages test_script: - sh tools/boot/ci-qemu-test.sh Index: projects/clang1000-import/Makefile.inc1 =================================================================== --- projects/clang1000-import/Makefile.inc1 (revision 356919) +++ projects/clang1000-import/Makefile.inc1 (revision 356920) @@ -1,3458 +1,3467 @@ # # $FreeBSD$ # # Make command line options: # -DNO_CLEANDIR run ${MAKE} clean, instead of ${MAKE} cleandir # -DNO_CLEAN do not clean at all # -DDB_FROM_SRC use the user/group databases in src/etc instead of # the system database when installing. # -DNO_SHARE do not go into share subdir # -DKERNFAST define NO_KERNEL{CONFIG,CLEAN,OBJ} # -DNO_KERNELCONFIG do not run config in ${MAKE} buildkernel # -DNO_KERNELCLEAN do not run ${MAKE} clean in ${MAKE} buildkernel # -DNO_KERNELOBJ do not run ${MAKE} obj in ${MAKE} buildkernel # -DNO_PORTSUPDATE do not update ports in ${MAKE} update # -DNO_ROOT install without using root privilege # -DNO_DOCUPDATE do not update doc in ${MAKE} update # -DWITHOUT_CTF do not run the DTrace CTF conversion tools on built objects # LOCAL_DIRS="list of dirs" to add additional dirs to the SUBDIR list # LOCAL_ITOOLS="list of tools" to add additional tools to the ITOOLS list # LOCAL_LIB_DIRS="list of dirs" to add additional dirs to libraries target # LOCAL_MTREE="list of mtree files" to process to allow local directories # to be created before files are installed # LOCAL_TOOL_DIRS="list of dirs" to add additional dirs to the build-tools # list # LOCAL_XTOOL_DIRS="list of dirs" to add additional dirs to the # cross-tools target # METALOG="path to metadata log" to write permission and ownership # when NO_ROOT is set. (default: ${DESTDIR}/METALOG) # TARGET="machine" to crossbuild world for a different machine type # TARGET_ARCH= may be required when a TARGET supports multiple endians # BUILDENV_SHELL= shell to launch for the buildenv target (def:${SHELL}) # WORLD_FLAGS= additional flags to pass to make(1) during buildworld # KERNEL_FLAGS= additional flags to pass to make(1) during buildkernel # SUBDIR_OVERRIDE="list of dirs" to build rather than everything. # All libraries and includes, and some build tools will still build. # # The intended user-driven targets are: # buildworld - rebuild *everything*, including glue to help do upgrades # installworld- install everything built by "buildworld" # checkworld - run test suite on installed world # doxygen - build API documentation of the kernel # update - convenient way to update your source tree (eg: svn/svnup) # # Standard targets (not defined here) are documented in the makefiles in # /usr/share/mk. These include: # obj depend all install clean cleandepend cleanobj .if !defined(TARGET) || !defined(TARGET_ARCH) .error "Both TARGET and TARGET_ARCH must be defined." .endif .if make(showconfig) || make(test-system-*) _MKSHOWCONFIG= t .endif SRCDIR?= ${.CURDIR} LOCALBASE?= /usr/local # Cross toolchain changes must be in effect before bsd.compiler.mk # so that gets the right CC, and pass CROSS_TOOLCHAIN to submakes. .if defined(CROSS_TOOLCHAIN) .if exists(${LOCALBASE}/share/toolchains/${CROSS_TOOLCHAIN}.mk) .include "${LOCALBASE}/share/toolchains/${CROSS_TOOLCHAIN}.mk" .elif exists(/usr/share/toolchains/${CROSS_TOOLCHAIN}.mk) .include "/usr/share/toolchains/${CROSS_TOOLCHAIN}.mk" .elif exists(${CROSS_TOOLCHAIN}) .include "${CROSS_TOOLCHAIN}" .else .error CROSS_TOOLCHAIN ${CROSS_TOOLCHAIN} not found .endif CROSSENV+=CROSS_TOOLCHAIN="${CROSS_TOOLCHAIN}" .endif .if defined(CROSS_TOOLCHAIN_PREFIX) CROSS_COMPILER_PREFIX?=${CROSS_TOOLCHAIN_PREFIX} .endif XCOMPILERS= CC CXX CPP .for COMPILER in ${XCOMPILERS} .if defined(CROSS_COMPILER_PREFIX) X${COMPILER}?= ${CROSS_COMPILER_PREFIX}${${COMPILER}} .else X${COMPILER}?= ${${COMPILER}} .endif .endfor # If a full path to an external cross compiler is given, don't build # a cross compiler. .if ${XCC:N${CCACHE_BIN}:M/*} MK_CLANG_BOOTSTRAP= no MK_GCC_BOOTSTRAP= no .endif # Pull in compiler metadata from buildworld/toolchain if possible to avoid # running CC from bsd.compiler.mk. .if make(installworld) || make(install) || make(distributeworld) || \ make(stageworld) .-include "${OBJTOP}/toolchain-metadata.mk" .if !defined(_LOADED_TOOLCHAIN_METADATA) .error A build is required first. You may have the wrong MAKEOBJDIRPREFIX set. .endif .endif # Pull in COMPILER_TYPE and COMPILER_FREEBSD_VERSION early. Pull it from the # tree to be friendlier to foreign OS builds. It's safe to do so unconditionally # here since we will always have the right make, unlike in src/Makefile # Don't include bsd.linker.mk yet until XBINUTILS is handled (after src.opts.mk) _NO_INCLUDE_LINKERMK= t # We also want the X_COMPILER* variables if we are using an external toolchain. _WANT_TOOLCHAIN_CROSS_VARS= t .include "share/mk/bsd.compiler.mk" .undef _NO_INCLUDE_LINKERMK .undef _WANT_TOOLCHAIN_CROSS_VARS # src.opts.mk depends on COMPILER_FEATURES .include "share/mk/src.opts.mk" .if ${TARGET} == ${MACHINE} TARGET_CPUTYPE?=${CPUTYPE} .else TARGET_CPUTYPE?= .endif .if !empty(TARGET_CPUTYPE) _TARGET_CPUTYPE=${TARGET_CPUTYPE} .else _TARGET_CPUTYPE=dummy .endif .if ${TARGET} == "arm" .if ${TARGET_ARCH:Marmv[67]*} != "" && ${TARGET_CPUTYPE:M*soft*} == "" TARGET_ABI= gnueabihf .else TARGET_ABI= gnueabi .endif .endif MACHINE_ABI?= unknown MACHINE_TRIPLE?=${MACHINE_ARCH:S/amd64/x86_64/:C/[hs]f$//:S/mipsn32/mips64/}-${MACHINE_ABI}-freebsd13.0 TARGET_ABI?= unknown TARGET_TRIPLE?= ${TARGET_ARCH:S/amd64/x86_64/:C/[hs]f$//:S/mipsn32/mips64/}-${TARGET_ABI}-freebsd13.0 KNOWN_ARCHES?= aarch64/arm64 \ amd64 \ armv6/arm \ armv7/arm \ i386 \ mips \ mipsel/mips \ mips64el/mips \ mipsn32el/mips \ mips64/mips \ mipsn32/mips \ mipshf/mips \ mipselhf/mips \ mips64elhf/mips \ mips64hf/mips \ powerpc \ powerpc64/powerpc \ powerpcspe/powerpc \ riscv64/riscv \ riscv64sf/riscv \ sparc64 .if ${TARGET} == ${TARGET_ARCH} _t= ${TARGET} .else _t= ${TARGET_ARCH}/${TARGET} .endif .for _t in ${_t} .if empty(KNOWN_ARCHES:M${_t}) .error Unknown target ${TARGET_ARCH}:${TARGET}. .endif .endfor # If all targets are disabled for system llvm then don't expect it to work # for cross-builds. .if !defined(TOOLS_PREFIX) && ${MK_LLVM_TARGET_ALL} == "no" && \ ${MACHINE} != ${TARGET} && ${MACHINE_ARCH} != ${TARGET_ARCH} && \ !make(showconfig) MK_SYSTEM_COMPILER= no MK_SYSTEM_LINKER= no .endif # Handle external binutils. .if defined(CROSS_TOOLCHAIN_PREFIX) CROSS_BINUTILS_PREFIX?=${CROSS_TOOLCHAIN_PREFIX} .endif # If we do not have a bootstrap binutils (because the in-tree one does not # support the target architecture), provide a default cross-binutils prefix. # This allows riscv64 builds, for example, to automatically use the # riscv64-binutils port or package. .if !make(showconfig) && !defined(_NO_INCLUDE_COMPILERMK) .if !empty(BROKEN_OPTIONS:MBINUTILS_BOOTSTRAP) && \ ${MK_LLD_BOOTSTRAP} == "no" && \ !defined(CROSS_BINUTILS_PREFIX) CROSS_BINUTILS_PREFIX=/usr/local/${TARGET_TRIPLE}/bin/ .if !exists(${CROSS_BINUTILS_PREFIX}) .if !empty(BROKEN_OPTIONS:MGCC_BOOTSTRAP) && ${MK_CLANG_BOOTSTRAP} == "no" .error In-tree toolchain does not support the ${TARGET_ARCH} architecture. Install the ${TARGET_ARCH}-xtoolchain-gcc port or package or set CROSS_TOOLCHAIN_PREFIX. .else .error In-tree binutils does not support the ${TARGET_ARCH} architecture. Install the ${TARGET_ARCH}-binutils port or package or set CROSS_BINUTILS_PREFIX. .endif .endif .endif .endif XBINUTILS= AS AR LD NM OBJCOPY RANLIB SIZE STRINGS .for BINUTIL in ${XBINUTILS} .if defined(CROSS_BINUTILS_PREFIX) && \ exists(${CROSS_BINUTILS_PREFIX}/${${BINUTIL}}) X${BINUTIL}?= ${CROSS_BINUTILS_PREFIX:C,/*$,,}/${${BINUTIL}} .else X${BINUTIL}?= ${${BINUTIL}} .endif .endfor # If a full path to an external linker is given, don't build lld. .if ${XLD:M/*} MK_LLD_BOOTSTRAP= no .endif # We also want the X_LINKER* variables if we are using an external toolchain. _WANT_TOOLCHAIN_CROSS_VARS= t .include "share/mk/bsd.linker.mk" .undef _WANT_TOOLCHAIN_CROSS_VARS # Begin WITH_SYSTEM_COMPILER / WITH_SYSTEM_LD # WITH_SYSTEM_COMPILER - Pull in needed values and make a decision. # Check if there is a local compiler that can satisfy as an external compiler. # Which compiler is expected to be used? .if ${MK_CLANG_BOOTSTRAP} == "yes" WANT_COMPILER_TYPE= clang .elif ${MK_GCC_BOOTSTRAP} == "yes" WANT_COMPILER_TYPE= gcc .else WANT_COMPILER_TYPE= .endif .if !defined(WANT_COMPILER_FREEBSD_VERSION) && !make(showconfig) && \ !make(test-system-linker) .if ${WANT_COMPILER_TYPE} == "clang" WANT_COMPILER_FREEBSD_VERSION_FILE= lib/clang/freebsd_cc_version.h WANT_COMPILER_FREEBSD_VERSION!= \ awk '$$2 == "FREEBSD_CC_VERSION" {printf("%d\n", $$3)}' \ ${SRCDIR}/${WANT_COMPILER_FREEBSD_VERSION_FILE} || echo unknown WANT_COMPILER_VERSION_FILE= lib/clang/include/clang/Basic/Version.inc WANT_COMPILER_VERSION!= \ awk '$$2 == "CLANG_VERSION" {split($$3, a, "."); print a[1] * 10000 + a[2] * 100 + a[3]}' \ ${SRCDIR}/${WANT_COMPILER_VERSION_FILE} || echo unknown .elif ${WANT_COMPILER_TYPE} == "gcc" WANT_COMPILER_FREEBSD_VERSION_FILE= gnu/usr.bin/cc/cc_tools/freebsd-native.h WANT_COMPILER_FREEBSD_VERSION!= \ awk '$$2 == "FBSD_CC_VER" {printf("%d\n", $$3)}' \ ${SRCDIR}/${WANT_COMPILER_FREEBSD_VERSION_FILE} || echo unknown WANT_COMPILER_VERSION_FILE= contrib/gcc/BASE-VER WANT_COMPILER_VERSION!= \ awk -F. '{print $$1 * 10000 + $$2 * 100 + $$3}' \ ${SRCDIR}/${WANT_COMPILER_VERSION_FILE} || echo unknown .endif .export WANT_COMPILER_FREEBSD_VERSION WANT_COMPILER_VERSION .endif # !defined(WANT_COMPILER_FREEBSD_VERSION) # It needs to be the same revision as we would build for the bootstrap. # If the expected vs CC is different then we can't skip. # GCC cannot be used for cross-arch yet. For clang we pass -target later if # TARGET_ARCH!=MACHINE_ARCH. .if ${MK_SYSTEM_COMPILER} == "yes" && \ defined(WANT_COMPILER_FREEBSD_VERSION) && \ (${MK_CLANG_BOOTSTRAP} == "yes" || ${MK_GCC_BOOTSTRAP} == "yes") && \ !make(xdev*) && \ ${X_COMPILER_TYPE} == ${WANT_COMPILER_TYPE} && \ (${X_COMPILER_TYPE} == "clang" || ${TARGET_ARCH} == ${MACHINE_ARCH}) && \ ${X_COMPILER_VERSION} == ${WANT_COMPILER_VERSION} && \ ${X_COMPILER_FREEBSD_VERSION} == ${WANT_COMPILER_FREEBSD_VERSION} # Everything matches, disable the bootstrap compiler. MK_CLANG_BOOTSTRAP= no MK_GCC_BOOTSTRAP= no USING_SYSTEM_COMPILER= yes .endif # ${WANT_COMPILER_TYPE} == ${COMPILER_TYPE} # WITH_SYSTEM_LD - Pull in needed values and make a decision. # Check if there is a local linker that can satisfy as an external linker. # Which linker is expected to be used? .if ${MK_LLD_BOOTSTRAP} == "yes" WANT_LINKER_TYPE= lld .elif ${MK_BINUTILS_BOOTSTRAP} == "yes" # Note that there's no support for bfd in WITH_SYSTEM_LINKER. WANT_LINKER_TYPE= bfd .else WANT_LINKER_TYPE= .endif .if !defined(WANT_LINKER_FREEBSD_VERSION) && !make(showconfig) && \ !make(test-system-compiler) .if ${WANT_LINKER_TYPE} == "lld" WANT_LINKER_FREEBSD_VERSION_FILE= lib/clang/include/VCSVersion.inc WANT_LINKER_FREEBSD_VERSION!= \ awk '$$2 == "LLD_REVISION" {gsub(/"/, "", $$3); print $$3}' \ ${SRCDIR}/${WANT_LINKER_FREEBSD_VERSION_FILE} || echo unknown WANT_LINKER_VERSION_FILE= lib/clang/include/lld/Common/Version.inc WANT_LINKER_VERSION!= \ awk '$$2 == "LLD_VERSION" {split($$3, a, "."); print a[1] * 10000 + a[2] * 100 + a[3]}' \ ${SRCDIR}/${WANT_LINKER_VERSION_FILE} || echo unknown .else WANT_LINKER_FREEBSD_VERSION_FILE= WANT_LINKER_FREEBSD_VERSION= .endif .export WANT_LINKER_FREEBSD_VERSION WANT_LINKER_VERSION .endif # !defined(WANT_LINKER_FREEBSD_VERSION) .if ${MK_SYSTEM_LINKER} == "yes" && \ defined(WANT_LINKER_FREEBSD_VERSION) && \ (${MK_LLD_BOOTSTRAP} == "yes") && \ !make(xdev*) && \ ${X_LINKER_TYPE} == ${WANT_LINKER_TYPE} && \ ${X_LINKER_VERSION} == ${WANT_LINKER_VERSION} && \ ${X_LINKER_FREEBSD_VERSION} == ${WANT_LINKER_FREEBSD_VERSION} # Everything matches, disable the bootstrap linker. MK_LLD_BOOTSTRAP= no USING_SYSTEM_LINKER= yes .endif # ${WANT_LINKER_TYPE} == ${LINKER_TYPE} # WITH_SYSTEM_COMPILER / WITH_SYSTEM_LINKER - Handle defaults and debug. USING_SYSTEM_COMPILER?= no USING_SYSTEM_LINKER?= no TEST_SYSTEM_COMPILER_VARS= \ USING_SYSTEM_COMPILER MK_SYSTEM_COMPILER \ MK_CROSS_COMPILER MK_CLANG_BOOTSTRAP MK_GCC_BOOTSTRAP \ WANT_COMPILER_TYPE WANT_COMPILER_VERSION WANT_COMPILER_VERSION_FILE \ WANT_COMPILER_FREEBSD_VERSION WANT_COMPILER_FREEBSD_VERSION_FILE \ CC COMPILER_TYPE COMPILER_FEATURES COMPILER_VERSION \ COMPILER_FREEBSD_VERSION \ XCC X_COMPILER_TYPE X_COMPILER_FEATURES X_COMPILER_VERSION \ X_COMPILER_FREEBSD_VERSION TEST_SYSTEM_LINKER_VARS= \ USING_SYSTEM_LINKER MK_SYSTEM_LINKER \ MK_LLD_BOOTSTRAP MK_BINUTILS_BOOTSTRAP \ WANT_LINKER_TYPE WANT_LINKER_VERSION WANT_LINKER_VERSION_FILE \ WANT_LINKER_FREEBSD_VERSION WANT_LINKER_FREEBSD_VERSION_FILE \ LD LINKER_TYPE LINKER_FEATURES LINKER_VERSION \ LINKER_FREEBSD_VERSION \ XLD X_LINKER_TYPE X_LINKER_FEATURES X_LINKER_VERSION \ X_LINKER_FREEBSD_VERSION .for _t in compiler linker test-system-${_t}: .PHONY .for v in ${TEST_SYSTEM_${_t:tu}_VARS} ${_+_}@printf "%-35s= %s\n" "${v}" "${${v}}" .endfor .endfor .if (make(buildworld) || make(buildkernel) || make(kernel-toolchain) || \ make(toolchain) || make(_cross-tools)) .if ${USING_SYSTEM_COMPILER} == "yes" .info SYSTEM_COMPILER: Determined that CC=${CC} matches the source tree. Not bootstrapping a cross-compiler. .elif ${MK_CLANG_BOOTSTRAP} == "yes" .info SYSTEM_COMPILER: libclang will be built for bootstrapping a cross-compiler. .endif .if ${USING_SYSTEM_LINKER} == "yes" .info SYSTEM_LINKER: Determined that LD=${LD} matches the source tree. Not bootstrapping a cross-linker. .elif ${MK_LLD_BOOTSTRAP} == "yes" .info SYSTEM_LINKER: libclang will be built for bootstrapping a cross-linker. .endif .endif # End WITH_SYSTEM_COMPILER / WITH_SYSTEM_LD # Store some compiler metadata for use in installworld where we don't # want to invoke CC at all. _TOOLCHAIN_METADATA_VARS= COMPILER_VERSION \ COMPILER_TYPE \ COMPILER_FEATURES \ COMPILER_FREEBSD_VERSION \ LINKER_VERSION \ LINKER_FEATURES \ LINKER_TYPE \ LINKER_FREEBSD_VERSION toolchain-metadata.mk: .PHONY .META @: > ${.TARGET} @echo ".info Using cached toolchain metadata from build at $$(hostname) on $$(date)" \ > ${.TARGET} @echo "_LOADED_TOOLCHAIN_METADATA=t" >> ${.TARGET} .for v in ${_TOOLCHAIN_METADATA_VARS} @echo "${v}=${${v}}" >> ${.TARGET} @echo "X_${v}=${X_${v}}" >> ${.TARGET} .endfor @echo ".export ${_TOOLCHAIN_METADATA_VARS}" >> ${.TARGET} @echo ".export ${_TOOLCHAIN_METADATA_VARS:C,^,X_,}" >> ${.TARGET} # We must do lib/ and libexec/ before bin/ in case of a mid-install error to # keep the users system reasonably usable. For static->dynamic root upgrades, # we don't want to install a dynamic binary without rtld and the needed # libraries. More commonly, for dynamic root, we don't want to install a # binary that requires a newer library version that hasn't been installed yet. # This ordering is not a guarantee though. The only guarantee of a working # system here would require fine-grained ordering of all components based # on their dependencies. .if !empty(SUBDIR_OVERRIDE) SUBDIR= ${SUBDIR_OVERRIDE} .else SUBDIR= lib libexec # Add LOCAL_LIB_DIRS, but only if they will not be picked up as a SUBDIR # of a LOCAL_DIRS directory. This allows LOCAL_DIRS=foo and # LOCAL_LIB_DIRS=foo/lib to behave as expected. .for _DIR in ${LOCAL_DIRS:M*/} ${LOCAL_DIRS:N*/:S|$|/|} _REDUNDANT_LIB_DIRS+= ${LOCAL_LIB_DIRS:M${_DIR}*} .endfor .for _DIR in ${LOCAL_LIB_DIRS} .if ${_DIR} == ".WAIT" || (empty(_REDUNDANT_LIB_DIRS:M${_DIR}) && exists(${.CURDIR}/${_DIR}/Makefile)) SUBDIR+= ${_DIR} .endif .endfor .if !defined(NO_ROOT) && (make(installworld) || make(install)) # Ensure libraries are installed before progressing. SUBDIR+=.WAIT .endif SUBDIR+=bin .if ${MK_CDDL} != "no" SUBDIR+=cddl .endif SUBDIR+=gnu include .if ${MK_KERBEROS} != "no" SUBDIR+=kerberos5 .endif .if ${MK_RESCUE} != "no" SUBDIR+=rescue .endif SUBDIR+=sbin .if ${MK_CRYPT} != "no" SUBDIR+=secure .endif .if !defined(NO_SHARE) SUBDIR+=share .endif .if ${MK_BOOT} != "no" SUBDIR+=stand .endif SUBDIR+=sys usr.bin usr.sbin .if ${MK_TESTS} != "no" SUBDIR+= tests .endif # Local directories are built in parallel with the base system directories. # Users may insert a .WAIT directive at the beginning or elsewhere within # the LOCAL_DIRS and LOCAL_LIB_DIRS lists as needed. .for _DIR in ${LOCAL_DIRS} .if ${_DIR} == ".WAIT" || exists(${.CURDIR}/${_DIR}/Makefile) SUBDIR+= ${_DIR} .endif .endfor # We must do etc/ last as it hooks into building the man whatis file # by calling 'makedb' in share/man. This is only relevant for # install/distribute so they build the whatis file after every manpage is # installed. .if make(installworld) || make(install) SUBDIR+=.WAIT .endif SUBDIR+=etc .endif # !empty(SUBDIR_OVERRIDE) .if defined(NOCLEAN) .warning NOCLEAN option is deprecated. Use NO_CLEAN instead. NO_CLEAN= ${NOCLEAN} .endif .if defined(NO_CLEANDIR) CLEANDIR= clean cleandepend .else CLEANDIR= cleandir .endif .if defined(WORLDFAST) NO_CLEAN= t NO_OBJWALK= t .endif .if ${MK_META_MODE} == "yes" # If filemon is used then we can rely on the build being incremental-safe. # The .meta files will also track the build command and rebuild should # it change. .if empty(.MAKE.MODE:Mnofilemon) NO_CLEAN= t .endif .endif .if defined(NO_OBJWALK) || ${MK_AUTO_OBJ} == "yes" NO_OBJWALK= t NO_KERNELOBJ= t .endif .if !defined(NO_OBJWALK) _obj= obj .endif LOCAL_TOOL_DIRS?= PACKAGEDIR?= ${DESTDIR}/${DISTDIR} .if empty(SHELL:M*csh*) BUILDENV_SHELL?=${SHELL} .else BUILDENV_SHELL?=/bin/sh .endif .if !defined(_MKSHOWCONFIG) .if !defined(SVN_CMD) || empty(SVN_CMD) . for _P in /usr/bin /usr/local/bin . for _S in svn svnlite . if exists(${_P}/${_S}) SVN_CMD= ${_P}/${_S} . endif . endfor . endfor .export SVN_CMD .endif SVNFLAGS?= -r HEAD .if !defined(VCS_REVISION) || empty(VCS_REVISION) .if !defined(SVNVERSION_CMD) || empty(SVNVERSION_CMD) . for _D in ${PATH:S,:, ,g} . if exists(${_D}/svnversion) SVNVERSION_CMD?=${_D}/svnversion . endif . if exists(${_D}/svnliteversion) SVNVERSION_CMD?=${_D}/svnliteversion . endif . endfor .endif _VCS_REVISION?= $$(eval ${SVNVERSION_CMD} ${SRCDIR}) . if !empty(_VCS_REVISION) VCS_REVISION= $$(echo r${_VCS_REVISION}) . endif .export VCS_REVISION .endif .if !defined(OSRELDATE) .if exists(/usr/include/osreldate.h) OSRELDATE!= awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \ /usr/include/osreldate.h .else OSRELDATE= 0 .endif .export OSRELDATE .endif # Set VERSION for CTFMERGE to use via the default CTFFLAGS=-L VERSION. .if !defined(_REVISION) _REVISION!= ${MAKE} -C ${SRCDIR}/release MK_AUTO_OBJ=no -V REVISION .export _REVISION .endif .if !defined(_BRANCH) _BRANCH!= ${MAKE} -C ${SRCDIR}/release MK_AUTO_OBJ=no -V BRANCH .export _BRANCH .endif .if !defined(SRCRELDATE) SRCRELDATE!= awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \ ${SRCDIR}/sys/sys/param.h .export SRCRELDATE .endif .if !defined(VERSION) VERSION= FreeBSD ${_REVISION}-${_BRANCH:C/-p[0-9]+$//} ${TARGET_ARCH} ${SRCRELDATE} .export VERSION .endif .if !defined(PKG_VERSION) .if ${_BRANCH:MSTABLE*} || ${_BRANCH:MCURRENT*} TIMENOW= %Y%m%d%H%M%S EXTRA_REVISION= .s${TIMENOW:gmtime} .elif ${_BRANCH:MALPHA*} EXTRA_REVISION= _${_BRANCH:C/-ALPHA/.a/} .elif ${_BRANCH:MBETA*} EXTRA_REVISION= _${_BRANCH:C/-BETA/.b/} .elif ${_BRANCH:MRC*} EXTRA_REVISION= _${_BRANCH:C/-RC/.r/} .elif ${_BRANCH:MPRERELEASE*} EXTRA_REVISION= _${_BRANCH:C/-PRERELEASE/.p/} .elif ${_BRANCH:M*-p*} EXTRA_REVISION= _${_BRANCH:C/.*-p([0-9]+$)/\1/} .endif PKG_VERSION= ${_REVISION}${EXTRA_REVISION} .endif .endif # !defined(PKG_VERSION) .if !defined(_MKSHOWCONFIG) _CPUTYPE!= MAKEFLAGS= CPUTYPE=${_TARGET_CPUTYPE} ${MAKE} -f /dev/null \ -m ${.CURDIR}/share/mk MK_AUTO_OBJ=no -V CPUTYPE .if ${_CPUTYPE} != ${_TARGET_CPUTYPE} .error CPUTYPE global should be set with ?=. .endif .endif .if make(buildworld) BUILD_ARCH!= uname -p .if ${MACHINE_ARCH} != ${BUILD_ARCH} .error To cross-build, set TARGET_ARCH. .endif .endif WORLDTMP?= ${OBJTOP}/tmp BPATH= ${CCACHE_WRAPPER_PATH_PFX}${WORLDTMP}/legacy/usr/sbin:${WORLDTMP}/legacy/usr/bin:${WORLDTMP}/legacy/bin:${WORLDTMP}/legacy/usr/libexec XPATH= ${WORLDTMP}/usr/sbin:${WORLDTMP}/usr/bin # When building we want to find the cross tools before the host tools in ${BPATH}. # We also need to add UNIVERSE_TOOLCHAIN_PATH so that we can find the shared # toolchain files (clang, lld, etc.) during make universe/tinderbox STRICTTMPPATH= ${XPATH}:${BPATH}:${UNIVERSE_TOOLCHAIN_PATH} # We should not be using tools from /usr/bin accidentally since this could cause # the build to break on other systems that don't have that tool. For now we # still allow using the old behaviour (inheriting $PATH) if # BUILD_WITH_STRICT_TMPPATH is set to 0 but this will eventually be removed. # Currently strict $PATH can cause build failures and does not work yet with # USING_SYSTEM_LINKER/USING_SYSTEM_COMPILER. Once these issues have been # resolved it will be turned on by default. BUILD_WITH_STRICT_TMPPATH?=0 .if ${BUILD_WITH_STRICT_TMPPATH} != 0 TMPPATH= ${STRICTTMPPATH} .else TMPPATH= ${STRICTTMPPATH}:${PATH} .endif # # Avoid running mktemp(1) unless actually needed. # It may not be functional, e.g., due to new ABI # when in the middle of installing over this system. # .if make(distributeworld) || make(installworld) || make(stageworld) .if ${BUILD_WITH_STRICT_TMPPATH} != 0 MKTEMP=${WORLDTMP}/legacy/usr/bin/mktemp .if !exists(${MKTEMP}) .error "mktemp binary doesn't exist in expected location: ${MKTEMP}" .endif .else MKTEMP=mktemp .endif INSTALLTMP!= ${MKTEMP} -d -u -t install .endif .if make(stagekernel) || make(distributekernel) TAGS+= kernel PACKAGE= kernel .endif # # Building a world goes through the following stages # # 1. legacy stage [BMAKE] # This stage is responsible for creating compatibility # shims that are needed by the bootstrap-tools, # build-tools and cross-tools stages. These are generally # APIs that tools from one of those three stages need to # build that aren't present on the host. # 1. bootstrap-tools stage [BMAKE] # This stage is responsible for creating programs that # are needed for backward compatibility reasons. They # are not built as cross-tools. # 2. build-tools stage [TMAKE] # This stage is responsible for creating the object # tree and building any tools that are needed during # the build process. Some programs are listed during # this phase because they build binaries to generate # files needed to build these programs. This stage also # builds the 'build-tools' target rather than 'all'. # 3. cross-tools stage [XMAKE] # This stage is responsible for creating any tools that # are needed for building the system. A cross-compiler is one # of them. This differs from build tools in two ways: # 1. the 'all' target is built rather than 'build-tools' # 2. these tools are installed into TMPPATH for stage 4. # 4. world stage [WMAKE] # This stage actually builds the world. # 5. install stage (optional) [IMAKE] # This stage installs a previously built world. # BOOTSTRAPPING?= 0 # Keep these in sync MINIMUM_SUPPORTED_OSREL?= 1002501 MINIMUM_SUPPORTED_REL?= 10.3 # Common environment for world related stages CROSSENV+= \ MACHINE_ARCH=${TARGET_ARCH} \ MACHINE=${TARGET} \ CPUTYPE=${TARGET_CPUTYPE} .if ${MK_META_MODE} != "no" # Don't rebuild build-tools targets during normal build. CROSSENV+= BUILD_TOOLS_META=.NOMETA .endif .if defined(TARGET_CFLAGS) CROSSENV+= ${TARGET_CFLAGS} .endif .if (${TARGET} != ${MACHINE} && !defined(WITH_LOCAL_MODULES)) || \ defined(WITHOUT_LOCAL_MODULES) CROSSENV+= LOCAL_MODULES= .endif BOOTSTRAPPING_OSRELDATE?=${OSRELDATE} # bootstrap-tools stage BMAKEENV= INSTALL="sh ${.CURDIR}/tools/install.sh" \ TOOLS_PREFIX=${TOOLS_PREFIX_UNDEF:U${WORLDTMP}} \ PATH=${BPATH}:${PATH} \ WORLDTMP=${WORLDTMP} \ MAKEFLAGS="-m ${.CURDIR}/tools/build/mk ${.MAKEFLAGS}" # need to keep this in sync with targets/pseudo/bootstrap-tools/Makefile BSARGS= DESTDIR= \ OBJTOP='${WORLDTMP}/obj-tools' \ OBJROOT='$${OBJTOP}/' \ MAKEOBJDIRPREFIX= \ BOOTSTRAPPING=${BOOTSTRAPPING_OSRELDATE} \ BWPHASE=${.TARGET:C,^_,,} \ SSP_CFLAGS= \ MK_HTML=no NO_LINT=yes MK_MAN=no \ -DNO_PIC MK_PROFILE=no -DNO_SHARED \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \ MK_LLDB=no MK_RETPOLINE=no MK_TESTS=no \ MK_INCLUDES=yes BMAKE= \ ${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ ${BSARGS} .if empty(.MAKEOVERRIDES:MMK_LLVM_TARGET_ALL) BMAKE+= MK_LLVM_TARGET_ALL=no .endif # build-tools stage TMAKE= \ ${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ DESTDIR= \ BOOTSTRAPPING=${BOOTSTRAPPING_OSRELDATE} \ BWPHASE=${.TARGET:C,^_,,} \ SSP_CFLAGS= \ -DNO_LINT \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \ MK_LLDB=no MK_RETPOLINE=no MK_TESTS=no # cross-tools stage # TOOLS_PREFIX set in BMAKE XMAKE= ${BMAKE} \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ MK_CLANG_IS_CC=${MK_CLANG_BOOTSTRAP} \ MK_GDB=no MK_TESTS=no # kernel-tools stage KTMAKEENV= INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${BPATH}:${PATH} \ WORLDTMP=${WORLDTMP} KTMAKE= \ TOOLS_PREFIX=${TOOLS_PREFIX_UNDEF:U${WORLDTMP}} \ ${KTMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ DESTDIR= \ OBJTOP='${WORLDTMP}/obj-kernel-tools' \ OBJROOT='$${OBJTOP}/' \ MAKEOBJDIRPREFIX= \ BOOTSTRAPPING=${BOOTSTRAPPING_OSRELDATE} \ SSP_CFLAGS= \ MK_HTML=no -DNO_LINT MK_MAN=no \ -DNO_PIC MK_PROFILE=no -DNO_SHARED \ -DNO_CPU_CFLAGS MK_RETPOLINE=no MK_WARNS=no MK_CTF=no # world stage WMAKEENV= ${CROSSENV} \ INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${TMPPATH} \ SYSROOT=${WORLDTMP} # make hierarchy HMAKE= PATH=${TMPPATH} ${MAKE} LOCAL_MTREE=${LOCAL_MTREE:Q} .if defined(NO_ROOT) HMAKE+= PATH=${TMPPATH} METALOG=${METALOG} -DNO_ROOT .endif CROSSENV+= CC="${XCC} ${XCFLAGS}" CXX="${XCXX} ${XCXXFLAGS} ${XCFLAGS}" \ CPP="${XCPP} ${XCFLAGS}" \ AS="${XAS}" AR="${XAR}" LD="${XLD}" LLVM_LINK="${XLLVM_LINK}" \ NM=${XNM} OBJCOPY="${XOBJCOPY}" \ RANLIB=${XRANLIB} STRINGS=${XSTRINGS} \ SIZE="${XSIZE}" .if defined(CROSS_BINUTILS_PREFIX) && exists(${CROSS_BINUTILS_PREFIX}) # In the case of xdev-build tools, CROSS_BINUTILS_PREFIX won't be a # directory, but the compiler will look in the right place for its # tools so we don't need to tell it where to look. BFLAGS+= -B${CROSS_BINUTILS_PREFIX} .endif # The internal bootstrap compiler has a default sysroot set by TOOLS_PREFIX # and target set by TARGET/TARGET_ARCH. However, there are several needs to # always pass an explicit --sysroot and -target. # - External compiler needs sysroot and target flags. # - External ld needs sysroot. # - To be clear about the use of a sysroot when using the internal compiler. # - Easier debugging. # - Allowing WITH_SYSTEM_COMPILER+WITH_META_MODE to work together due to # the flip-flopping build command when sometimes using external and # sometimes using internal. # - Allow using lld which has no support for default paths. .if !defined(CROSS_BINUTILS_PREFIX) || !exists(${CROSS_BINUTILS_PREFIX}) BFLAGS+= -B${WORLDTMP}/usr/bin .endif .if ${WANT_COMPILER_TYPE} == gcc || \ (defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == gcc) .elif ${WANT_COMPILER_TYPE} == clang || \ (defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == clang) XCFLAGS+= -target ${TARGET_TRIPLE} .endif XCFLAGS+= --sysroot=${WORLDTMP} .if !empty(BFLAGS) XCFLAGS+= ${BFLAGS} .endif .if ${MK_LIB32} == "yes" _LIBCOMPAT= 32 .include "Makefile.libcompat" .elif ${MK_LIBSOFT} == "yes" _LIBCOMPAT= SOFT .include "Makefile.libcompat" .endif # META_MODE normally ignores host file changes since every build updates # timestamps (see NO_META_IGNORE_HOST in sys.mk). There are known times # when the ABI breaks though that we want to force rebuilding WORLDTMP # to get updated host tools. .if ${MK_META_MODE} == "yes" && defined(NO_CLEAN) && \ !defined(NO_META_IGNORE_HOST) && !defined(NO_META_IGNORE_HOST_HEADERS) && \ !defined(_MKSHOWCONFIG) # r318736 - ino64 major ABI breakage META_MODE_BAD_ABI_VERS+= 1200031 .if !defined(OBJDIR_HOST_OSRELDATE) .if exists(${OBJTOP}/host-osreldate.h) OBJDIR_HOST_OSRELDATE!= \ awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \ ${OBJTOP}/host-osreldate.h .elif exists(${WORLDTMP}/usr/include/osreldate.h) OBJDIR_HOST_OSRELDATE= 0 .endif .export OBJDIR_HOST_OSRELDATE .endif # Note that this logic is the opposite of normal BOOTSTRAP handling. We want # to compare the WORLDTMP's OSRELDATE to the host's OSRELDATE. If the WORLDTMP # is older than the ABI-breakage OSRELDATE of the HOST then we rebuild. .if defined(OBJDIR_HOST_OSRELDATE) .for _ver in ${META_MODE_BAD_ABI_VERS} .if ${OSRELDATE} >= ${_ver} && ${OBJDIR_HOST_OSRELDATE} < ${_ver} _meta_mode_need_rebuild= ${_ver} .endif .endfor .if defined(_meta_mode_need_rebuild) .info META_MODE: Rebuilding host tools due to ABI breakage in __FreeBSD_version ${_meta_mode_need_rebuild}. NO_META_IGNORE_HOST_HEADERS= 1 .export NO_META_IGNORE_HOST_HEADERS .endif # defined(_meta_mode_need_rebuild) .endif # defined(OBJDIR_HOST_OSRELDATE) .endif # ${MK_META_MODE} == "yes" && defined(NO_CLEAN) ... # This is only used for META_MODE+filemon to track what the oldest # __FreeBSD_version is in WORLDTMP. This purposely does NOT have # a make dependency on /usr/include/osreldate.h as the file should # only be copied when it is missing or meta mode determines it has changed. # Since host files are normally ignored without NO_META_IGNORE_HOST # the file will never be updated unless that flag is specified. This # allows tracking the oldest osreldate to force rebuilds via # META_MODE_BADABI_REVS above. host-osreldate.h: # DO NOT ADD /usr/include/osreldate.h here @cp -f /usr/include/osreldate.h ${.TARGET} WMAKE= ${WMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ BWPHASE=${.TARGET:C,^_,,} \ DESTDIR=${WORLDTMP} IMAKEENV= ${CROSSENV} IMAKE= ${IMAKEENV} ${MAKE} -f Makefile.inc1 \ ${IMAKE_INSTALL} ${IMAKE_MTREE} .if empty(.MAKEFLAGS:M-n) IMAKEENV+= PATH=${STRICTTMPPATH}:${INSTALLTMP} \ LD_LIBRARY_PATH=${INSTALLTMP} \ PATH_LOCALE=${INSTALLTMP}/locale IMAKE+= __MAKE_SHELL=${INSTALLTMP}/sh .else IMAKEENV+= PATH=${TMPPATH}:${INSTALLTMP} .endif # When generating install media, do not allow user and group information from # the build host to affect the contents of the distribution. .if make(distributeworld) || make(distrib-dirs) || make(distribution) DB_FROM_SRC= yes .endif .if defined(DB_FROM_SRC) INSTALLFLAGS+= -N ${.CURDIR}/etc MTREEFLAGS+= -N ${.CURDIR}/etc .endif _INSTALL_DDIR= ${DESTDIR}/${DISTDIR} INSTALL_DDIR= ${_INSTALL_DDIR:S://:/:g:C:/$::} .if defined(NO_ROOT) METALOG?= ${DESTDIR}/${DISTDIR}/METALOG METALOG:= ${METALOG:C,//+,/,g} IMAKE+= -DNO_ROOT METALOG=${METALOG} INSTALLFLAGS+= -U -M ${METALOG} -D ${INSTALL_DDIR} MTREEFLAGS+= -W .endif .if defined(BUILD_PKGS) INSTALLFLAGS+= -h sha256 .endif .if defined(DB_FROM_SRC) || defined(NO_ROOT) IMAKE_INSTALL= INSTALL="install ${INSTALLFLAGS}" IMAKE_MTREE= MTREE_CMD="mtree ${MTREEFLAGS}" .endif DESTDIR_MTREEFLAGS= -deU # When creating worldtmp we don't need to set the directories as owned by root # so we also pass -W WORLDTMP_MTREEFLAGS= -deUW .if defined(NO_ROOT) # When building with -DNO_ROOT we shouldn't be changing the directories # that are created by mtree to be owned by root/wheel. DESTDIR_MTREEFLAGS+= -W .endif MTREE?= mtree .if ${BUILD_WITH_STRICT_TMPPATH} != 0 MTREE= ${WORLDTMP}/legacy/usr/sbin/mtree .endif WORLDTMP_MTREE= ${MTREE} ${WORLDTMP_MTREEFLAGS} DESTDIR_MTREE= ${MTREE} ${DESTDIR_MTREEFLAGS} # kernel stage KMAKEENV= ${WMAKEENV:NSYSROOT=*} KMAKE= ${KMAKEENV} ${MAKE} ${.MAKEFLAGS} ${KERNEL_FLAGS} KERNEL=${INSTKERNNAME} # # buildworld # # Attempt to rebuild the entire system, with reasonable chance of # success, regardless of how old your existing system is. # _sanity_check: .PHONY .MAKE .if ${.CURDIR:C/[^,]//g} != "" # The m4 build of sendmail files doesn't like it if ',' is used # anywhere in the path of it's files. @echo @echo "*** Error: path to source tree contains a comma ','" @echo @false .elif ${.CURDIR:M*\:*} != "" # Using ':' leaks into PATH and breaks finding cross-tools. @echo @echo "*** Error: path to source tree contains a colon ':'" @echo @false .endif # Our current approach to dependency tracking cannot cope with certain source # tree changes, particularly with respect to removing source files and # replacing generated files. Handle these cases here in an ad-hoc fashion. _cleanobj_fast_depend_hack: .PHONY # Syscall stubs rewritten in C and obsolete MD assembly implementations # Date SVN Rev Syscalls/Changes # 20191009 r353340 removal of opensolaris_atomic.S (also r353381) .if ${MACHINE} != i386 .for f in opensolaris_atomic @if [ -e "${OBJTOP}/cddl/lib/libzpool/.depend.${f}.o" ] && \ egrep -qw 'opensolaris_atomic\.S' ${OBJTOP}/cddl/lib/libzpool/.depend.${f}.o; then \ echo "Removing stale dependencies for opensolaris_atomic"; \ rm -f ${OBJTOP}/cddl/lib/libzpool/.depend.${f}.* \ ${LIBCOMPAT:D${LIBCOMPAT_OBJTOP}/cddl/lib/libzpool/.depend.${f}.*}; \ fi .endfor .endif # 20190925 r352689 removal of obsolete i386 memchr.S .for f in memchr @if [ -e "${OBJTOP}/lib/libc/.depend.${f}.o" ] && \ egrep -qw 'i386/string/memchr\.S' ${OBJTOP}/lib/libc/.depend.${f}.o; then \ echo "Removing stale dependencies for memchr"; \ rm -f ${OBJTOP}/lib/libc/.depend.${f}.*; \ fi .if defined(_LIBCOMPAT) @if [ -e "${LIBCOMPAT_OBJTOP}/lib/libc/.depend.${f}.o" ] && \ egrep -qw 'i386/string/memchr\.S' ${LIBCOMPAT_OBJTOP}/lib/libc/.depend.${f}.o; then \ echo "Removing stale dependencies for memchr"; \ rm -f ${LIBCOMPAT_OBJTOP}/lib/libc/.depend.${f}.*; \ fi .endif .endfor # 20180604 r334626 brk sbrk # 20190916 r352703 shm_open .for f in brk sbrk shm_open @if [ -e "${OBJTOP}/lib/libc/.depend.${f}.o" ] && \ egrep -qw '${f}\.[sS]' ${OBJTOP}/lib/libc/.depend.${f}.o; then \ echo "Removing stale dependencies for ${f} syscall wrappers"; \ rm -f ${OBJTOP}/lib/libc/.depend.${f}.* \ ${_LIBCOMPAT:D${LIBCOMPAT_OBJTOP}/lib/libc/.depend.${f}.*}; \ fi .endfor # 20181013 r339348 bcopy reimplemented as .c .for f in bcopy memcpy memmove @if [ -e "${OBJTOP}/lib/libc/.depend.${f}.o" ] && \ egrep -qw 'bcopy\.[sS]' ${OBJTOP}/lib/libc/.depend.${f}.o; then \ echo "Removing stale dependencies for bcopy"; \ rm -f ${OBJTOP}/lib/libc/.depend.${f}.* \ ${_LIBCOMPAT:D${LIBCOMPAT_OBJTOP}/lib/libc/.depend.${f}.*}; \ fi .endfor # 20181115 r340463 bzero reimplemented as .c @if [ -e "${OBJTOP}/lib/libc/.depend.bzero.o" ] && \ egrep -qw 'bzero\.[sS]' ${OBJTOP}/lib/libc/.depend.bzero.o; then \ echo "Removing stale dependencies for bzero"; \ rm -f ${OBJTOP}/lib/libc/.depend.bzero.* \ ${_LIBCOMPAT:D${LIBCOMPAT_OBJTOP}/lib/libc/.depend.bzero.*}; \ fi # 20181009 track migration from ntp's embedded libevent to updated one @if [ -e "${OBJTOP}/usr.sbin/ntp/libntpevent/.depend.bufferevent_openssl.o" ] && \ egrep -q 'contrib/ntp/sntp/libevent/bufferevent_openssl.c' \ ${OBJTOP}/usr.sbin/ntp/libntpevent/.depend.bufferevent_openssl.o ; then \ echo "Removing stale libevent dependencies"; \ rm -f ${OBJTOP}/usr.sbin/ntp/libntpevent/.depend.*; \ fi # 20181209 r341759 track migration across wpa update @if [ -e "${OBJTOP}/usr.sbin/wpa/wpa_supplicant/.depend.rrm.o" ] && \ egrep -q 'src/ap/rrm.c' \ ${OBJTOP}/usr.sbin/wpa/wpa_supplicant/.depend.rrm.o; then \ echo "Removing stale wpa dependencies"; \ rm -f ${OBJTOP}/usr.sbin/wpa/*/.depend*; \ fi _worldtmp: .PHONY @echo @echo "--------------------------------------------------------------" @echo ">>> Rebuilding the temporary build tree" @echo "--------------------------------------------------------------" .if !defined(NO_CLEAN) rm -rf ${WORLDTMP} .else # Note: for delete-old we need to set $PATH to also include the host $PATH # since otherwise a partial build with missing symlinks in ${WORLDTMP}/legacy/ # will fail to run due to missing binaries. $WMAKE sets PATH to only ${TMPPATH} # so we remove that assingnment from $WMAKE and prepend the new $PATH ${_+_}@if [ -e "${WORLDTMP}" ]; then \ echo ">>> Deleting stale files in build tree..."; \ cd ${.CURDIR}; env PATH=${TMPPATH}:${PATH} ${WMAKE:NPATH=*} \ _NO_INCLUDE_COMPILERMK=t -DBATCH_DELETE_OLD_FILES delete-old \ delete-old-libs >/dev/null; \ fi rm -rf ${WORLDTMP}/legacy/usr/include .if ${USING_SYSTEM_COMPILER} == "yes" .for cc in cc c++ if [ -x ${WORLDTMP}/usr/bin/${cc} ]; then \ inum=$$(stat -f %i ${WORLDTMP}/usr/bin/${cc}); \ find ${WORLDTMP}/usr/bin -inum $${inum} -delete; \ fi .endfor .endif # ${USING_SYSTEM_COMPILER} == "yes" .if ${USING_SYSTEM_LINKER} == "yes" @rm -f ${WORLDTMP}/usr/bin/ld ${WORLDTMP}/usr/bin/ld.lld .endif # ${USING_SYSTEM_LINKER} == "yes" .endif # !defined(NO_CLEAN) @mkdir -p ${WORLDTMP} @touch ${WORLDTMP}/${.TARGET} # We can't use mtree to create the worldtmp directories since it may not be # available on the target system (this happens e.g. when building on non-FreeBSD) cd ${.CURDIR}/tools/build; \ ${MAKE} DIRPRFX=tools/build/ DESTDIR=${WORLDTMP}/legacy installdirs # In order to build without inheriting $PATH we need to add symlinks to the host # tools in $WORLDTMP for the tools that we don't build during bootstrap-tools cd ${.CURDIR}/tools/build; \ ${MAKE} DIRPRFX=tools/build/ DESTDIR=${WORLDTMP}/legacy host-symlinks _legacy: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1.1: legacy release compatibility shims" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${BMAKE} legacy _bootstrap-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1.2: bootstrap tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${BMAKE} bootstrap-tools mkdir -p ${WORLDTMP}/usr ${WORLDTMP}/lib/casper ${WORLDTMP}/lib/geom ${WORLDTMP_MTREE} -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${WORLDTMP}/usr >/dev/null ${WORLDTMP_MTREE} -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${WORLDTMP}/usr/include >/dev/null ln -sf ${.CURDIR}/sys ${WORLDTMP} .if ${MK_DEBUG_FILES} != "no" ${WORLDTMP_MTREE} -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${WORLDTMP}/usr/lib >/dev/null .endif .for _mtree in ${LOCAL_MTREE} ${WORLDTMP_MTREE} -f ${.CURDIR}/${_mtree} -p ${WORLDTMP} > /dev/null .endfor _cleanobj: .if !defined(NO_CLEAN) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.1: cleaning up the object tree" @echo "--------------------------------------------------------------" # Avoid including bsd.compiler.mk in clean and obj with _NO_INCLUDE_COMPILERMK # since the restricted $PATH might not contain a valid cc binary ${_+_}cd ${.CURDIR}; ${WMAKE} _NO_INCLUDE_COMPILERMK=t ${CLEANDIR} .if defined(_LIBCOMPAT) ${_+_}cd ${.CURDIR}; ${LIBCOMPATWMAKE} _NO_INCLUDE_COMPILERMK=t -f Makefile.inc1 ${CLEANDIR} .endif .else ${_+_}cd ${.CURDIR}; ${WMAKE} _NO_INCLUDE_COMPILERMK=t _cleanobj_fast_depend_hack .endif # !defined(NO_CLEAN) _obj: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.2: rebuilding the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${WMAKE} _NO_INCLUDE_COMPILERMK=t obj _build-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.3: build tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${TMAKE} build-tools _cross-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 3: cross tools" @echo "--------------------------------------------------------------" @rm -f ${OBJTOP}/toolchain-metadata.mk ${_+_}cd ${.CURDIR}; ${XMAKE} cross-tools ${_+_}cd ${.CURDIR}; ${XMAKE} kernel-tools _build-metadata: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 3.1: recording build metadata" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${WMAKE} toolchain-metadata.mk ${_+_}cd ${.CURDIR}; ${WMAKE} host-osreldate.h _includes: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.1: building includes" @echo "--------------------------------------------------------------" # Special handling for SUBDIR_OVERRIDE in buildworld as they most likely need # headers from default SUBDIR. Do SUBDIR_OVERRIDE includes last. ${_+_}cd ${.CURDIR}; ${WMAKE} SUBDIR_OVERRIDE= SHARED=symlinks \ MK_INCLUDES=yes includes .if !empty(SUBDIR_OVERRIDE) && make(buildworld) ${_+_}cd ${.CURDIR}; ${WMAKE} MK_INCLUDES=yes SHARED=symlinks includes .endif _libraries: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.2: building libraries" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; \ ${WMAKE} -DNO_FSCHG MK_HTML=no -DNO_LINT MK_MAN=no \ MK_PROFILE=no MK_TESTS=no MK_TESTS_SUPPORT=${MK_TESTS} libraries everything: .PHONY @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.4: building everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; _PARALLEL_SUBDIR_OK=1 ${WMAKE} all WMAKE_TGTS= .if !defined(WORLDFAST) WMAKE_TGTS+= _sanity_check _worldtmp _legacy .if empty(SUBDIR_OVERRIDE) WMAKE_TGTS+= _bootstrap-tools .endif WMAKE_TGTS+= _cleanobj .if !defined(NO_OBJWALK) WMAKE_TGTS+= _obj .endif WMAKE_TGTS+= _build-tools _cross-tools WMAKE_TGTS+= _build-metadata WMAKE_TGTS+= _includes .endif .if !defined(NO_LIBS) WMAKE_TGTS+= _libraries .endif .if defined(_LIBCOMPAT) && empty(SUBDIR_OVERRIDE) WMAKE_TGTS+= build${libcompat} .endif WMAKE_TGTS+= everything # record buildworld time in seconds .if make(buildworld) _BUILDWORLD_START!= date '+%s' .export _BUILDWORLD_START .endif buildworld: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue .PHONY .ORDER: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue buildworld_prologue: .PHONY @echo "--------------------------------------------------------------" @echo ">>> World build started on `LC_ALL=C date`" @echo "--------------------------------------------------------------" buildworld_epilogue: .PHONY @echo @echo "--------------------------------------------------------------" @echo ">>> World build completed on `LC_ALL=C date`" @seconds=$$(($$(date '+%s') - ${_BUILDWORLD_START})); \ echo -n ">>> World built in $$seconds seconds, "; \ echo "ncpu: $$(sysctl -n hw.ncpu)${.MAKE.JOBS:S/^/, make -j/}" @echo "--------------------------------------------------------------" # # We need to have this as a target because the indirection between Makefile # and Makefile.inc1 causes the correct PATH to be used, rather than a # modification of the current environment's PATH. In addition, we need # to quote multiword values. # buildenvvars: .PHONY @echo ${WMAKEENV:Q} ${.MAKE.EXPORTED:@v@$v=\"${$v}\"@} .if ${.TARGETS:Mbuildenv} .if ${.MAKEFLAGS:M-j} .error The buildenv target is incompatible with -j .endif .endif BUILDENV_DIR?= ${.CURDIR} # # Note: make will report any errors the shell reports. This can # be odd if the last command in an interactive shell generates an # error or is terminated by SIGINT. These reported errors look bad, # but are harmless. Allowing them also allows BUIDLENV_SHELL to # be a complex command whose status will be returned to the caller. # Some scripts in tools rely on this behavior to report build errors. # buildenv: .PHONY @echo Entering world for ${TARGET_ARCH}:${TARGET} .if ${BUILDENV_SHELL:M*zsh*} @echo For ZSH you must run: export CPUTYPE=${TARGET_CPUTYPE} .endif @cd ${BUILDENV_DIR} && env ${WMAKEENV} BUILDENV=1 ${BUILDENV_SHELL} TOOLCHAIN_TGTS= ${WMAKE_TGTS:Neverything:Nbuild${libcompat}} toolchain: ${TOOLCHAIN_TGTS} .PHONY KERNEL_TOOLCHAIN_TGTS= ${TOOLCHAIN_TGTS:N_obj:N_cleanobj:N_includes:N_libraries} .if make(kernel-toolchain) .ORDER: ${KERNEL_TOOLCHAIN_TGTS} .endif kernel-toolchain: ${KERNEL_TOOLCHAIN_TGTS} .PHONY # # installcheck # # Checks to be sure system is ready for installworld/installkernel. # installcheck: _installcheck_world _installcheck_kernel .PHONY _installcheck_world: .PHONY @echo "--------------------------------------------------------------" @echo ">>> Install check world" @echo "--------------------------------------------------------------" _installcheck_kernel: .PHONY @echo "--------------------------------------------------------------" @echo ">>> Install check kernel" @echo "--------------------------------------------------------------" # # Require DESTDIR to be set if installing for a different architecture or # using the user/group database in the source tree. # .if ${TARGET_ARCH} != ${MACHINE_ARCH} || ${TARGET} != ${MACHINE} || \ defined(DB_FROM_SRC) .if !make(distributeworld) _installcheck_world: __installcheck_DESTDIR _installcheck_kernel: __installcheck_DESTDIR __installcheck_DESTDIR: .PHONY .if !defined(DESTDIR) || empty(DESTDIR) @echo "ERROR: Please set DESTDIR!"; \ false .endif .endif .endif .if !defined(DB_FROM_SRC) # # Check for missing UIDs/GIDs. # CHECK_UIDS= auditdistd CHECK_GIDS= audit CHECK_UIDS+= ntpd CHECK_GIDS+= ntpd CHECK_UIDS+= proxy CHECK_GIDS+= proxy authpf CHECK_UIDS+= smmsp CHECK_GIDS+= smmsp CHECK_UIDS+= unbound CHECK_GIDS+= unbound _installcheck_world: __installcheck_UGID __installcheck_UGID: .PHONY .for uid in ${CHECK_UIDS} @if ! `id -u ${uid} >/dev/null 2>&1`; then \ echo "ERROR: Required ${uid} user is missing, see /usr/src/UPDATING."; \ false; \ fi .endfor .for gid in ${CHECK_GIDS} @if ! `find / -prune -group ${gid} >/dev/null 2>&1`; then \ echo "ERROR: Required ${gid} group is missing, see /usr/src/UPDATING."; \ false; \ fi .endfor .endif # # If installing over the running system (DESTDIR is / or unset) and the install # includes rescue, try running rescue from the objdir as a sanity check. If # rescue is not functional (e.g., because it depends on a system call not # supported by the currently running kernel), abort the installation. # .if !make(distributeworld) && ${MK_RESCUE} != "no" && \ (empty(DESTDIR) || ${DESTDIR} == "/") && empty(BYPASS_INSTALLCHECK_SH) _installcheck_world: __installcheck_sh_check __installcheck_sh_check: .PHONY @if [ "`${OBJTOP}/rescue/rescue/rescue sh -c 'echo OK'`" != \ OK ]; then \ echo "rescue/sh check failed, installation aborted" >&2; \ false; \ fi .endif # # Required install tools to be saved in a scratch dir for safety. # .if ${MK_ZONEINFO} != "no" _zoneinfo= zic tzsetup .endif ITOOLS= [ awk cap_mkdb cat chflags chmod chown cmp cp \ date echo egrep find grep id install ${_install-info} \ ln make mkdir mtree mv pwd_mkdb \ rm sed services_mkdb sh sort strip sysctl test true uname wc ${_zoneinfo} \ ${LOCAL_ITOOLS} # Needed for share/man .if ${MK_MAN_UTILS} != "no" ITOOLS+=makewhatis .endif # # distributeworld # # Distributes everything compiled by a `buildworld'. # # installworld # # Installs everything compiled by a 'buildworld'. # # Non-base distributions produced by the base system EXTRA_DISTRIBUTIONS= .if defined(_LIBCOMPAT) EXTRA_DISTRIBUTIONS+= lib${libcompat} .endif .if ${MK_TESTS} != "no" EXTRA_DISTRIBUTIONS+= tests .endif DEBUG_DISTRIBUTIONS= .if ${MK_DEBUG_FILES} != "no" DEBUG_DISTRIBUTIONS+= base ${EXTRA_DISTRIBUTIONS:S,tests,,} .endif MTREE_MAGIC?= mtree 2.0 distributeworld installworld stageworld: _installcheck_world .PHONY mkdir -p ${INSTALLTMP} progs=$$(for prog in ${ITOOLS}; do \ if progpath=`which $$prog`; then \ echo $$progpath; \ else \ echo "Required tool $$prog not found in PATH." >&2; \ exit 1; \ fi; \ done); \ libs=$$(ldd -f "%o %p\n" -f "%o %p\n" $$progs 2>/dev/null | sort -u | \ while read line; do \ set -- $$line; \ if [ "$$2 $$3" != "not found" ]; then \ echo $$2; \ else \ echo "Required library $$1 not found." >&2; \ exit 1; \ fi; \ done); \ cp $$libs $$progs ${INSTALLTMP} cp -R $${PATH_LOCALE:-"/usr/share/locale"} ${INSTALLTMP}/locale .if defined(NO_ROOT) -mkdir -p ${METALOG:H} echo "#${MTREE_MAGIC}" > ${METALOG} .endif .if make(distributeworld) .for dist in ${EXTRA_DISTRIBUTIONS} -mkdir ${DESTDIR}/${DISTDIR}/${dist} ${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.root.dist \ -p ${DESTDIR}/${DISTDIR}/${dist} >/dev/null ${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null ${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/include >/dev/null .if ${MK_DEBUG_FILES} != "no" ${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib >/dev/null .endif .if defined(_LIBCOMPAT) ${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null .if ${MK_DEBUG_FILES} != "no" ${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/usr >/dev/null .endif .endif .if ${MK_TESTS} != "no" && ${dist} == "tests" -mkdir -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE} ${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE} >/dev/null .if ${MK_DEBUG_FILES} != "no" ${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/${TESTSBASE} >/dev/null .endif .endif .if defined(NO_ROOT) ${IMAKEENV} ${MTREE} -C -f ${.CURDIR}/etc/mtree/BSD.root.dist | \ sed -e 's#^\./#./${dist}/#' >> ${METALOG} ${IMAKEENV} ${MTREE} -C -f ${.CURDIR}/etc/mtree/BSD.usr.dist | \ sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG} ${IMAKEENV} ${MTREE} -C -f ${.CURDIR}/etc/mtree/BSD.include.dist | \ sed -e 's#^\./#./${dist}/usr/include/#' >> ${METALOG} .if defined(_LIBCOMPAT) ${IMAKEENV} ${MTREE} -C -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist | \ sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG} .endif .endif .endfor -mkdir ${DESTDIR}/${DISTDIR}/base ${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \ METALOG=${METALOG} ${IMAKE_INSTALL} ${IMAKE_MTREE} \ DISTBASE=/base DESTDIR=${DESTDIR}/${DISTDIR}/base \ LOCAL_MTREE=${LOCAL_MTREE:Q} distrib-dirs ${INSTALL_SYMLINK} ${INSTALLFLAGS} usr/src/sys ${INSTALL_DDIR}/base/sys .endif ${_+_}cd ${.CURDIR}; ${IMAKE} re${.TARGET:S/world$//}; \ ${IMAKEENV} rm -rf ${INSTALLTMP} .if make(distributeworld) .for dist in ${EXTRA_DISTRIBUTIONS} find ${DESTDIR}/${DISTDIR}/${dist} -mindepth 1 -type d -empty -delete .endfor .if defined(NO_ROOT) .for dist in base ${EXTRA_DISTRIBUTIONS} @# For each file that exists in this dist, print the corresponding @# line from the METALOG. This relies on the fact that @# a line containing only the filename will sort immediately before @# the relevant mtree line. cd ${DESTDIR}/${DISTDIR}; \ find ./${dist} | sort -u ${METALOG} - | \ awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \ ${DESTDIR}/${DISTDIR}/${dist}.meta .endfor .for dist in ${DEBUG_DISTRIBUTIONS} @# For each file that exists in this dist, print the corresponding @# line from the METALOG. This relies on the fact that @# a line containing only the filename will sort immediately before @# the relevant mtree line. cd ${DESTDIR}/${DISTDIR}; \ find ./${dist}/usr/lib/debug | sort -u ${METALOG} - | \ awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \ ${DESTDIR}/${DISTDIR}/${dist}.debug.meta .endfor .endif .endif packageworld: .PHONY .for dist in base ${EXTRA_DISTRIBUTIONS} .if defined(NO_ROOT) ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - --exclude usr/lib/debug \ @${DESTDIR}/${DISTDIR}/${dist}.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz .else ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - --exclude usr/lib/debug . | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz .endif .endfor .for dist in ${DEBUG_DISTRIBUTIONS} . if defined(NO_ROOT) ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - @${DESTDIR}/${DISTDIR}/${dist}.debug.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz . else ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvLf - usr/lib/debug | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz . endif .endfor makeman: .PHONY ${_+_}cd ${.CURDIR}/tools/build/options; sh makeman > \ ${.CURDIR}/share/man/man5/src.conf.5 # We can't assume here that ${TMPPATH} will include ${PATH} or /usr/libexec # because we may be building with a STRICTTMPPATH, so we explicitly include # /usr/libexec here for flua. ${TMPPATH} still usefully includes anything else # we may need to function. _sysent_PATH= ${TMPPATH}:/usr/libexec _sysent_dirs= sys/kern _sysent_dirs+= sys/compat/freebsd32 _sysent_dirs+= sys/compat/cloudabi32 \ sys/compat/cloudabi64 _sysent_dirs+= sys/amd64/linux \ sys/amd64/linux32 \ sys/arm/linux \ sys/arm64/linux \ sys/i386/linux sysent: .PHONY .for _dir in ${_sysent_dirs} @echo "${MAKE} -C ${.CURDIR}/${_dir} sysent" ${_+_}@env PATH=${_sysent_PATH} ${MAKE} -C ${.CURDIR}/${_dir} sysent .endfor # # reinstall # # If you have a build server, you can NFS mount the source and obj directories # and do a 'make reinstall' on the *client* to install new binaries from the # most recent server build. # restage reinstall: .MAKE .PHONY @echo "--------------------------------------------------------------" @echo ">>> Making hierarchy" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \ LOCAL_MTREE=${LOCAL_MTREE:Q} hierarchy .if make(restage) @echo "--------------------------------------------------------------" @echo ">>> Making distribution" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \ LOCAL_MTREE=${LOCAL_MTREE:Q} distribution .endif @echo @echo "--------------------------------------------------------------" @echo ">>> Installing everything started on `LC_ALL=C date`" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install .if defined(_LIBCOMPAT) ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install${libcompat} .endif @echo "--------------------------------------------------------------" @echo ">>> Installing everything completed on `LC_ALL=C date`" @echo "--------------------------------------------------------------" redistribute: .MAKE .PHONY @echo "--------------------------------------------------------------" @echo ">>> Distributing everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute .if defined(_LIBCOMPAT) ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute${libcompat} \ DISTRIBUTION=lib${libcompat} .endif distrib-dirs distribution: .MAKE .PHONY ${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \ ${IMAKE_INSTALL} ${IMAKE_MTREE} METALOG=${METALOG} ${.TARGET} .if make(distribution) ${_+_}cd ${.CURDIR}; ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} -f Makefile.inc1 ${IMAKE_INSTALL} \ METALOG=${METALOG} MK_TESTS=no installconfig .endif # # buildkernel and installkernel # # Which kernels to build and/or install is specified by setting # KERNCONF. If not defined a GENERIC kernel is built/installed. # Only the existing (depending TARGET) config files are used # for building kernels and only the first of these is designated # as the one being installed. # # Note that we have to use TARGET instead of TARGET_ARCH when # we're in kernel-land. Since only TARGET_ARCH is (expected) to # be set to cross-build, we have to make sure TARGET is set # properly. .if defined(KERNFAST) NO_KERNELCLEAN= t NO_KERNELCONFIG= t NO_KERNELOBJ= t # Shortcut for KERNCONF=Blah -DKERNFAST is now KERNFAST=Blah .if !defined(KERNCONF) && ${KERNFAST} != "1" KERNCONF=${KERNFAST} .endif .endif .if ${TARGET_ARCH} == "powerpc64" KERNCONF?= GENERIC64 .else KERNCONF?= GENERIC .endif INSTKERNNAME?= kernel KERNSRCDIR?= ${.CURDIR}/sys KRNLCONFDIR= ${KERNSRCDIR}/${TARGET}/conf KRNLOBJDIR= ${OBJTOP}${KERNSRCDIR:C,^${.CURDIR},,} KERNCONFDIR?= ${KRNLCONFDIR} BUILDKERNELS= INSTALLKERNEL= .if defined(NO_INSTALLKERNEL) # All of the BUILDKERNELS loops start at index 1. BUILDKERNELS+= dummy .endif .for _kernel in ${KERNCONF} .if !defined(_MKSHOWCONFIG) && exists(${KERNCONFDIR}/${_kernel}) BUILDKERNELS+= ${_kernel} .if empty(INSTALLKERNEL) && !defined(NO_INSTALLKERNEL) INSTALLKERNEL= ${_kernel} .endif .else .if make(buildkernel) .error Missing KERNCONF ${KERNCONFDIR}/${_kernel} .endif .endif .endfor _cleankernobj_fast_depend_hack: .PHONY # 20180320 remove stale generated assym.s after renaming to .inc in r331254 @if [ -e "${OBJTOP}/sys/${KERNCONF}/assym.s" ]; then \ echo "Removing stale generated assym files"; \ rm -f ${OBJTOP}/sys/${KERNCONF}/assym.* \ ${OBJTOP}/sys/${KERNCONF}/.depend.assym.*; \ fi # 20191009 r353340 removal of opensolaris_atomic.S (also r353381) .if ${MACHINE} != i386 .for f in opensolaris_atomic .for m in opensolaris zfs @if [ -e "${KRNLOBJDIR}/${KERNCONF}/modules${SRCTOP}/sys/modules/${m}/.depend.${f}.o" ] && \ grep -q ${f}.S "${KRNLOBJDIR}/${KERNCONF}/modules${SRCTOP}/sys/modules/${m}/.depend.${f}.o"; then \ echo "Removing stale dependencies for opensolaris_atomic"; \ rm -f ${KRNLOBJDIR}/${KERNCONF}/modules${SRCTOP}/sys/modules/${m}/.depend.${f}.*; \ fi .endfor .endfor .endif ${WMAKE_TGTS:N_worldtmp:Nbuild${libcompat}} ${.ALLTARGETS:M_*:N_worldtmp}: .MAKE .PHONY # record kernel(s) build time in seconds .if make(buildkernel) _BUILDKERNEL_START!= date '+%s' .endif # # buildkernel # # Builds all kernels defined by BUILDKERNELS. # buildkernel: .MAKE .PHONY .if empty(BUILDKERNELS:Ndummy) @echo "ERROR: Missing kernel configuration file(s) (${KERNCONF})."; \ false .endif @echo .for _kernel in ${BUILDKERNELS:Ndummy} @echo "--------------------------------------------------------------" @echo ">>> Kernel build for ${_kernel} started on `LC_ALL=C date`" @echo "--------------------------------------------------------------" @echo "===> ${_kernel}" mkdir -p ${KRNLOBJDIR} .if !defined(NO_KERNELCONFIG) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1: configuring the kernel" @echo "--------------------------------------------------------------" cd ${KRNLCONFDIR}; \ PATH=${TMPPATH} \ config ${CONFIGARGS} -d ${KRNLOBJDIR}/${_kernel} \ -I '${KERNCONFDIR}' -I '${KRNLCONFDIR}' \ '${KERNCONFDIR}/${_kernel}' .endif .if !defined(NO_CLEAN) && !defined(NO_KERNELCLEAN) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.1: cleaning up the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} ${CLEANDIR} .else ${_+_}cd ${.CURDIR}; ${WMAKE} _cleankernobj_fast_depend_hack .endif .if !defined(NO_KERNELOBJ) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.2: rebuilding the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} obj .endif @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.3: build tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${KTMAKE} kernel-tools @echo @echo "--------------------------------------------------------------" @echo ">>> stage 3.1: building everything" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} all -DNO_MODULES_OBJ @echo "--------------------------------------------------------------" @echo ">>> Kernel build for ${_kernel} completed on `LC_ALL=C date`" @echo "--------------------------------------------------------------" .endfor @seconds=$$(($$(date '+%s') - ${_BUILDKERNEL_START})); \ echo -n ">>> Kernel(s) ${BUILDKERNELS} built in $$seconds seconds, "; \ echo "ncpu: $$(sysctl -n hw.ncpu)${.MAKE.JOBS:S/^/, make -j/}" @echo "--------------------------------------------------------------" NO_INSTALLEXTRAKERNELS?= yes # # installkernel, etc. # # Install the kernel defined by INSTALLKERNEL # installkernel installkernel.debug \ reinstallkernel reinstallkernel.debug: _installcheck_kernel .PHONY .if !defined(NO_INSTALLKERNEL) .if empty(INSTALLKERNEL) @echo "ERROR: No kernel \"${KERNCONF}\" to install."; \ false .endif @echo "--------------------------------------------------------------" @echo ">>> Installing kernel ${INSTALLKERNEL} on $$(LC_ALL=C date)" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \ ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME} ${.TARGET:S/kernel//} @echo "--------------------------------------------------------------" @echo ">>> Installing kernel ${INSTALLKERNEL} completed on $$(LC_ALL=C date)" @echo "--------------------------------------------------------------" .endif .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} @echo "--------------------------------------------------------------" @echo ">>> Installing kernel ${_kernel} $$(LC_ALL=C date)" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; \ ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME}.${_kernel} ${.TARGET:S/kernel//} @echo "--------------------------------------------------------------" @echo ">>> Installing kernel ${_kernel} completed on $$(LC_ALL=C date)" @echo "--------------------------------------------------------------" .endfor .endif distributekernel distributekernel.debug: .PHONY .if !defined(NO_INSTALLKERNEL) .if empty(INSTALLKERNEL) @echo "ERROR: No kernel \"${KERNCONF}\" to install."; \ false .endif mkdir -p ${DESTDIR}/${DISTDIR} .if defined(NO_ROOT) @echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.premeta .endif ${_+_}cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \ ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.premeta/} \ ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} KERNEL=${INSTKERNNAME} \ DESTDIR=${INSTALL_DDIR}/kernel \ ${.TARGET:S/distributekernel/install/} .if defined(NO_ROOT) @sed -e 's|^./kernel|.|' ${DESTDIR}/${DISTDIR}/kernel.premeta > \ ${DESTDIR}/${DISTDIR}/kernel.meta .endif .endif .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} .if defined(NO_ROOT) @echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta .endif ${_+_}cd ${KRNLOBJDIR}/${_kernel}; \ ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.${_kernel}.premeta/} \ ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} \ KERNEL=${INSTKERNNAME}.${_kernel} \ DESTDIR=${INSTALL_DDIR}/kernel.${_kernel} \ ${.TARGET:S/distributekernel/install/} .if defined(NO_ROOT) @sed -e "s|^./kernel.${_kernel}|.|" \ ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta > \ ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta .endif .endfor .endif packagekernel: .PHONY .if defined(NO_ROOT) .if !defined(NO_INSTALLKERNEL) cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --exclude '*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz .endif .if ${MK_DEBUG_FILES} != "no" cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --include '*/*/*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.meta | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz .endif .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --exclude '*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz .if ${MK_DEBUG_FILES} != "no" cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --include '*/*/*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz .endif .endfor .endif .else .if !defined(NO_INSTALLKERNEL) cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --exclude '*.debug' . | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz .endif .if ${MK_DEBUG_FILES} != "no" cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --include '*/*/*.debug' $$(eval find .) | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz .endif .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --exclude '*.debug' . | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz .if ${MK_DEBUG_FILES} != "no" cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --include '*/*/*.debug' $$(eval find .) | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz .endif .endfor .endif .endif stagekernel: .PHONY ${_+_}${MAKE} -C ${.CURDIR} ${.MAKEFLAGS} distributekernel PORTSDIR?= /usr/ports WSTAGEDIR?= ${OBJTOP}/worldstage KSTAGEDIR?= ${OBJTOP}/kernelstage REPODIR?= ${OBJROOT}repo +PKG_FORMAT?= txz PKGSIGNKEY?= # empty .ORDER: stage-packages create-packages .ORDER: create-packages create-world-packages .ORDER: create-packages create-kernel-packages .ORDER: create-packages sign-packages _pkgbootstrap: .PHONY .if make(*package*) && !exists(${LOCALBASE}/sbin/pkg) @env ASSUME_ALWAYS_YES=YES pkg bootstrap .endif packages: .PHONY ${_+_}${MAKE} -C ${.CURDIR} PKG_VERSION=${PKG_VERSION} real-packages package-pkg: .PHONY rm -rf /tmp/ports.${TARGET} || : env ${WMAKEENV:Q} SRCDIR=${.CURDIR} PORTSDIR=${PORTSDIR} REVISION=${_REVISION} \ PKG_CMD=${PKG_CMD} PKG_VERSION=${PKG_VERSION} REPODIR=${REPODIR} \ WSTAGEDIR=${WSTAGEDIR} \ sh ${.CURDIR}/release/scripts/make-pkg-package.sh real-packages: stage-packages create-packages sign-packages .PHONY stage-packages-world: .PHONY @mkdir -p ${WSTAGEDIR} ${_+_}@cd ${.CURDIR}; \ ${MAKE} DESTDIR=${WSTAGEDIR} -DNO_ROOT stageworld stage-packages-kernel: .PHONY @mkdir -p ${KSTAGEDIR} ${_+_}@cd ${.CURDIR}; \ ${MAKE} DESTDIR=${KSTAGEDIR} -DNO_ROOT stagekernel stage-packages: .PHONY stage-packages-world stage-packages-kernel _repodir: .PHONY @mkdir -p ${REPODIR} create-packages-world: _pkgbootstrap _repodir .PHONY ${_+_}@cd ${.CURDIR}; \ ${MAKE} -f Makefile.inc1 \ DESTDIR=${WSTAGEDIR} \ PKG_VERSION=${PKG_VERSION} create-world-packages create-packages-kernel: _pkgbootstrap _repodir .PHONY ${_+_}@cd ${.CURDIR}; \ ${MAKE} -f Makefile.inc1 \ DESTDIR=${KSTAGEDIR} \ PKG_VERSION=${PKG_VERSION} DISTDIR=kernel \ create-kernel-packages create-packages: .PHONY create-packages-world create-packages-kernel create-world-packages: _pkgbootstrap .PHONY @rm -f ${WSTAGEDIR}/*.plist 2>/dev/null || : @cd ${WSTAGEDIR} ; \ env -i LC_COLLATE=C sort ${WSTAGEDIR}/${DISTDIR}/METALOG | \ awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk @for plist in ${WSTAGEDIR}/*.plist; do \ plist=$${plist##*/} ; \ pkgname=$${plist%.plist} ; \ echo "_PKGS+= $${pkgname}" ; \ done > ${WSTAGEDIR}/packages.mk ${_+_}@cd ${.CURDIR}; \ ${MAKE} -f Makefile.inc1 create-world-packages-jobs \ .MAKE.JOB.PREFIX= .if make(create-world-packages-jobs) .include "${WSTAGEDIR}/packages.mk" .endif +.if make(create-world-packages-jobs) || make(create-kernel-packages*) +PKG_ABI!=${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI +.endif + create-world-packages-jobs: .PHONY .for pkgname in ${_PKGS} create-world-packages-jobs: create-world-package-${pkgname} create-world-package-${pkgname}: .PHONY @sh ${SRCDIR}/release/packages/generate-ucl.sh -o ${pkgname} \ -s ${SRCDIR} -u ${WSTAGEDIR}/${pkgname}.ucl @awk -F\" ' \ /^name/ { printf("===> Creating %s-", $$2); next } \ /^version/ { print $$2; next } \ ' ${WSTAGEDIR}/${pkgname}.ucl @if [ "${pkgname}" == "runtime" ]; then \ sed -i '' -e "s/%VCS_REVISION%/${VCS_REVISION}/" ${WSTAGEDIR}/${pkgname}.ucl ; \ fi ${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname -o ALLOW_BASE_SHLIBS=yes \ - create -M ${WSTAGEDIR}/${pkgname}.ucl \ + create -f ${PKG_FORMAT} -M ${WSTAGEDIR}/${pkgname}.ucl \ -p ${WSTAGEDIR}/${pkgname}.plist \ -r ${WSTAGEDIR} \ - -o ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI)/${PKG_VERSION} + -o ${REPODIR}/${PKG_ABI}/${PKG_VERSION} .endfor _default_flavor= -default .if make(*package*) && exists(${KSTAGEDIR}/kernel.meta) . if ${MK_DEBUG_FILES} != "no" _debug=-debug . endif create-kernel-packages: .PHONY . for flavor in "" ${_debug} create-kernel-packages: create-kernel-packages-flavor${flavor:C,^""$,${_default_flavor},} create-kernel-packages-flavor${flavor:C,^""$,${_default_flavor},}: _pkgbootstrap .PHONY @cd ${KSTAGEDIR}/${DISTDIR} ; \ env -i LC_COLLATE=C sort ${KSTAGEDIR}/kernel.meta | \ awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \ -v kernel=yes -v _kernconf=${INSTALLKERNEL} ; \ sed -e "s/%VERSION%/${PKG_VERSION}/" \ -e "s/%PKGNAME%/kernel-${INSTALLKERNEL:tl}${flavor}/" \ -e "s/%KERNELDIR%/kernel/" \ -e "s/%COMMENT%/FreeBSD ${INSTALLKERNEL} kernel ${flavor}/" \ -e "s/%DESC%/FreeBSD ${INSTALLKERNEL} kernel ${flavor}/" \ -e "s/ %VCS_REVISION%/${VCS_REVISION}/" \ ${SRCDIR}/release/packages/kernel.ucl \ > ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl ; \ awk -F\" ' \ /name/ { printf("===> Creating %s-", $$2); next } \ /version/ {print $$2; next } ' \ ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl ; \ ${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname -o ALLOW_BASE_SHLIBS=yes \ - create -M ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl \ + create -f ${PKG_FORMAT} \ + -M ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl \ -p ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.plist \ -r ${KSTAGEDIR}/${DISTDIR} \ - -o ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI)/${PKG_VERSION} + -o ${REPODIR}/${PKG_ABI}/${PKG_VERSION} . endfor .endif .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" . for _kernel in ${BUILDKERNELS:[2..-1]} . if exists(${KSTAGEDIR}/kernel.${_kernel}.meta) . if ${MK_DEBUG_FILES} != "no" _debug=-debug . endif . for flavor in "" ${_debug} create-kernel-packages: create-kernel-packages-extra-flavor${flavor:C,^""$,${_default_flavor},}-${_kernel} create-kernel-packages-extra-flavor${flavor:C,^""$,${_default_flavor},}-${_kernel}: _pkgbootstrap .PHONY @cd ${KSTAGEDIR}/kernel.${_kernel} ; \ env -i LC_COLLATE=C sort ${KSTAGEDIR}/kernel.${_kernel}.meta | \ awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \ -v kernel=yes -v _kernconf=${_kernel} ; \ sed -e "s/%VERSION%/${PKG_VERSION}/" \ -e "s/%PKGNAME%/kernel-${_kernel:tl}${flavor}/" \ -e "s/%KERNELDIR%/kernel.${_kernel}/" \ -e "s/%COMMENT%/FreeBSD ${_kernel} kernel ${flavor}/" \ -e "s/%DESC%/FreeBSD ${_kernel} kernel ${flavor}/" \ -e "s/ %VCS_REVISION%/${VCS_REVISION}/" \ ${SRCDIR}/release/packages/kernel.ucl \ > ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl ; \ awk -F\" ' \ /name/ { printf("===> Creating %s-", $$2); next } \ /version/ {print $$2; next } ' \ ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl ; \ ${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname -o ALLOW_BASE_SHLIBS=yes \ - create -M ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl \ + create -f ${PKG_FORMAT} \ + -M ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl \ -p ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.plist \ -r ${KSTAGEDIR}/kernel.${_kernel} \ - -o ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI)/${PKG_VERSION} + -o ${REPODIR}/${PKG_ABI}/${PKG_VERSION} . endfor . endif . endfor .endif sign-packages: _pkgbootstrap .PHONY + printf "version = 2;\npacking_format = \"${PKG_FORMAT}\";\n" > ${WSTAGEDIR}/meta @[ -L "${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI)/latest" ] && \ unlink ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI)/latest ; \ ${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname repo \ + -m ${WSTAGEDIR}/meta \ -o ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI)/${PKG_VERSION} \ ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI)/${PKG_VERSION} \ ${PKGSIGNKEY} ; \ cd ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/usr/bin/uname config ABI); \ ln -s ${PKG_VERSION} latest # # # checkworld # # Run test suite on installed world. # checkworld: .PHONY @if [ ! -x "${LOCALBASE}/bin/kyua" ]; then \ echo "You need kyua (devel/kyua) to run the test suite." | /usr/bin/fmt; \ exit 1; \ fi ${_+_}PATH="$$PATH:${LOCALBASE}/bin" kyua test -k ${TESTSBASE}/Kyuafile # # # doxygen # # Build the API documentation with doxygen # doxygen: .PHONY @if [ ! -x "${LOCALBASE}/bin/doxygen" ]; then \ echo "You need doxygen (devel/doxygen) to generate the API documentation of the kernel." | /usr/bin/fmt; \ exit 1; \ fi ${_+_}cd ${.CURDIR}/tools/kerneldoc/subsys; ${MAKE} obj all # # update # # Update the source tree(s), by running svn/svnup to update to the # latest copy. # update: .PHONY .if defined(SVN_UPDATE) @echo "--------------------------------------------------------------" @echo ">>> Updating ${.CURDIR} using Subversion" @echo "--------------------------------------------------------------" @(cd ${.CURDIR}; ${SVN_CMD} update ${SVNFLAGS}) .endif # # ------------------------------------------------------------------------ # # From here onwards are utility targets used by the 'make world' and # related targets. If your 'world' breaks, you may like to try to fix # the problem and manually run the following targets to attempt to # complete the build. Beware, this is *not* guaranteed to work, you # need to have a pretty good grip on the current state of the system # to attempt to manually finish it. If in doubt, 'make world' again. # # # legacy: Build compatibility shims for the next three targets. This is a # minimal set of tools and shims necessary to compensate for older systems # which don't have the APIs required by the targets built in bootstrap-tools, # build-tools or cross-tools. # # libnv and libl are both requirements for config(8), which is an unconditional # bootstrap-tool. _config_deps= lib/libnv usr.bin/lex/lib legacy: .PHONY .if ${BOOTSTRAPPING} < ${MINIMUM_SUPPORTED_OSREL} && ${BOOTSTRAPPING} != 0 @echo "ERROR: Source upgrades from versions prior to ${MINIMUM_SUPPORTED_REL} are not supported."; \ false .endif .for _tool in tools/build ${_config_deps} ${_+_}@${ECHODIR} "===> ${_tool} (obj,includes,all,install)"; \ cd ${.CURDIR}/${_tool}; \ if [ -z "${NO_OBJWALK}" ]; then ${MAKE} DIRPRFX=${_tool}/ obj; fi; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${WORLDTMP}/legacy includes; \ ${MAKE} DIRPRFX=${_tool}/ MK_INCLUDES=no all; \ ${MAKE} DIRPRFX=${_tool}/ MK_INCLUDES=no \ DESTDIR=${WORLDTMP}/legacy install .endfor # # bootstrap-tools: Build tools needed for compatibility. These are binaries that # are built to build other binaries in the system. However, the focus of these # binaries is usually quite narrow. Bootstrap tools use the host's compiler and # libraries, augmented by -legacy, in addition to the libraries built during # bootstrap-tools. # _bt= _bootstrap-tools # We want to run the build with only ${WORLDTMP} in $PATH to ensure we don't # accidentally run tools that are incompatible but happen to be in $PATH. # This is especially important when building on Linux/MacOS where many of the # programs used during the build accept different flags or generate different # output. On those platforms we only symlink the tools known to be compatible # (e.g. basic utilities such as mkdir) into ${WORLDTMP} and build all others # from the FreeBSD sources during the bootstrap-tools stage. # We want to build without the user's $PATH starting in the bootstrap-tools # phase so the tools used in that phase (ln, cp, etc) must have already been # linked to $WORLDTMP. The tools are listed in the _host_tools_to_symlink # variable in tools/build/Makefile and are linked during the legacy phase. # Since they could be Linux or MacOS binaries, too we must only use flags that # are portable across operating systems. # If BOOTSTRAP_ALL_TOOLS is set we will build all the required tools from the # current source tree. Otherwise we create a symlink to the version found in # $PATH during the bootstrap-tools stage. .if defined(BOOTSTRAP_ALL_TOOLS) # BOOTSTRAPPING will be set on the command line so we can't override it here. # Instead set BOOTSTRAPPING_OSRELDATE so that the value 0 is set ${BSARGS} BOOTSTRAPPING_OSRELDATE:= 0 .endif .if ${MK_GAMES} != "no" _strfile= usr.bin/fortune/strfile .endif .if ${MK_GCC} != "no" && ${MK_CXX} != "no" _gperf= gnu/usr.bin/gperf .endif .if ${MK_VT} != "no" _vtfontcvt= usr.bin/vtfontcvt .endif # If we are not building the bootstrap because BOOTSTRAPPING is sufficient # we symlink the host version to $WORLDTMP instead. By doing this we can also # detect when a bootstrap tool is being used without the required MK_FOO. # If you add a new bootstrap tool where we could also use the host version, # please ensure that you also add a .else case where you add the tool to the # _bootstrap_tools_links variable. .if ${BOOTSTRAPPING} < 1000033 _m4= usr.bin/m4 _lex= usr.bin/lex # Note: lex needs m4 to build but m4 also depends on lex. However, lex can be # bootstrapped so we build lex first. ${_bt}-usr.bin/m4: ${_bt}-lib/libopenbsd ${_bt}-usr.bin/yacc ${_bt}-${_lex} _bt_m4_depend=${_bt}-${_m4} _bt_lex_depend=${_bt}-${_lex} ${_bt_m4_depend} .else _bootstrap_tools_links+=m4 lex .endif # ELF Tool Chain libraries are needed for ELF tools and dtrace tools. # r296685 fix cross-endian objcopy # r310724 fixed PR 215350, a crash in libdwarf with objects built by GCC 6.2. # r334881 added libdwarf constants used by ctfconvert. # r338478 fixed a crash in objcopy for mips64el objects # r339083 libelf: correct mips64el test to use ELF header # r348347 Add missing powerpc64 relocation support to libdwarf .if ${BOOTSTRAPPING} < 1300030 _elftoolchain_libs= lib/libelf lib/libdwarf lib/libzstd ${_bt}-lib/libelf: ${_bt_m4_depend} ${_bt}-lib/libdwarf: ${_bt_m4_depend} .endif # flua is required to regenerate syscall files. It first appeared during the # 13.0-CURRENT cycle, thus needs to be built on -older releases and stable # branches. .if ${BOOTSTRAPPING} < 1300059 _flua= libexec/flua .endif # r245440 mtree -N support added # r313404 requires sha384.h for libnetbsd, added to libmd in r292782 .if ${BOOTSTRAPPING} < 1100093 _nmtree= lib/libmd \ lib/libnetbsd \ usr.sbin/nmtree ${_bt}-lib/libnetbsd: ${_bt}-lib/libmd ${_bt}-usr.sbin/nmtree: ${_bt}-lib/libnetbsd .else _bootstrap_tools_links+=mtree .endif # r246097: log addition login.conf.db, passwd, pwd.db, and spwd.db with cat -l .if ${BOOTSTRAPPING} < 1000027 _cat= bin/cat .else _bootstrap_tools_links+=cat .endif # r277259 crunchide: Correct 64-bit section header offset # r281674 crunchide: always include both 32- and 64-bit ELF support .if ${BOOTSTRAPPING} < 1100078 _crunchide= usr.sbin/crunch/crunchide .else _bootstrap_tools_links+=crunchide .endif # r285986 crunchen: use STRIPBIN rather than STRIP # 1100113: Support MK_AUTO_OBJ # 1200006: META_MODE fixes .if ${BOOTSTRAPPING} < 1100078 || \ (${MK_AUTO_OBJ} == "yes" && ${BOOTSTRAPPING} < 1100114) || \ (${MK_META_MODE} == "yes" && ${BOOTSTRAPPING} < 1200006) _crunchgen= usr.sbin/crunch/crunchgen .else _bootstrap_tools_links+=crunchgen .endif # r296926 -P keymap search path, MFC to stable/10 in r298297 .if ${BOOTSTRAPPING} < 1003501 || \ (${BOOTSTRAPPING} >= 1100000 && ${BOOTSTRAPPING} < 1100103) _kbdcontrol= usr.sbin/kbdcontrol .else _bootstrap_tools_links+=kbdcontrol .endif _yacc= lib/liby \ usr.bin/yacc ${_bt}-usr.bin/yacc: ${_bt}-lib/liby .if ${MK_BSNMP} != "no" _gensnmptree= usr.sbin/bsnmpd/gensnmptree .endif .if ${MK_LOCALES} != "no" _localedef= usr.bin/localedef .endif # We need to build tblgen when we're building clang or lld, either as # bootstrap tools, or as the part of the normal build. .if ${MK_CLANG_BOOTSTRAP} != "no" || ${MK_CLANG} != "no" || \ ${MK_LLD_BOOTSTRAP} != "no" || ${MK_LLD} != "no" _clang_tblgen= \ lib/clang/libllvmminimal \ usr.bin/clang/llvm-tblgen \ usr.bin/clang/clang-tblgen \ usr.bin/clang/lldb-tblgen # XXX: lldb-tblgen is not needed, if top-level MK_LLDB=no ${_bt}-usr.bin/clang/clang-tblgen: ${_bt}-lib/clang/libllvmminimal ${_bt}-usr.bin/clang/llvm-tblgen: ${_bt}-lib/clang/libllvmminimal ${_bt}-usr.bin/clang/lldb-tblgen: ${_bt}-lib/clang/libllvmminimal .endif # Build BSDL or GPL DTC depending on GPL_DTC option. _dtc= usr.bin/dtc .if ${MK_GPL_DTC} != "no" _dtc= gnu/usr.bin/dtc .endif .if ${MK_LOCALES} != "no" _localedef= usr.bin/localedef .endif .if ${MK_KERBEROS} != "no" _kerberos5_bootstrap_tools= \ kerberos5/tools/make-roken \ kerberos5/lib/libroken \ kerberos5/lib/libvers \ kerberos5/tools/asn1_compile \ kerberos5/tools/slc \ usr.bin/compile_et .ORDER: ${_kerberos5_bootstrap_tools:C/^/${_bt}-/g} .for _tool in ${_kerberos5_bootstrap_tools} ${_bt}-${_tool}: ${_bt}-usr.bin/yacc ${_bt_lex_depend} .endfor .endif ${_bt}-usr.bin/mandoc: ${_bt}-lib/libopenbsd # The tools listed in _basic_bootstrap_tools will generally not be # bootstrapped unless BOOTSTRAP_ALL_TOOL is set. However, when building on a # Linux or MacOS host the host versions are incompatible so we need to build # them from the source tree. Usually the link name will be the same as the subdir, # but some directories such as grep or test install multiple binaries. In that # case we use the _basic_bootstrap_tools_multilink variable which is a list of # subdirectory and comma-separated list of files. _basic_bootstrap_tools_multilink=usr.bin/grep grep,egrep,fgrep _basic_bootstrap_tools_multilink+=bin/test test,[ # bootstrap tools needed by buildworld: _basic_bootstrap_tools=usr.bin/awk usr.bin/cut bin/expr usr.bin/gencat \ usr.bin/join usr.bin/mktemp bin/rmdir usr.bin/sed usr.bin/sort \ usr.bin/truncate usr.bin/tsort # elf2aout is required for sparc64 build _basic_bootstrap_tools+=usr.bin/elf2aout # file2c is required for building usr.sbin/config: _basic_bootstrap_tools+=usr.bin/file2c # uuencode/uudecode required for share/tabset _basic_bootstrap_tools+=usr.bin/uuencode usr.bin/uudecode # xargs is required by mkioctls _basic_bootstrap_tools+=usr.bin/xargs # cap_mkdb is required for share/termcap: _basic_bootstrap_tools+=usr.bin/cap_mkdb # ldd is required for installcheck (TODO: just always use /usr/bin/ldd instead?) _basic_bootstrap_tools+=usr.bin/ldd # services_mkdb/pwd_mkdb are required for installworld: _basic_bootstrap_tools+=usr.sbin/services_mkdb usr.sbin/pwd_mkdb # sysctl/chflags are required for installkernel: _basic_bootstrap_tools+=sbin/sysctl bin/chflags # mkfifo is used by sys/conf/newvers.sh _basic_bootstrap_tools+=usr.bin/mkfifo .if ${MK_AMD} != "no" # unifdef is only used by usr.sbin/amd/libamu/Makefile _basic_bootstrap_tools+=usr.bin/unifdef .endif .if ${MK_BOOT} != "no" _basic_bootstrap_tools+=bin/dd # xz/unxz is used by EFI _basic_bootstrap_tools_multilink+=usr.bin/xz xz,unxz # md5 is used by boot/beri (and possibly others) _basic_bootstrap_tools+=sbin/md5 .if defined(BOOTSTRAP_ALL_TOOLS) ${_bt}-sbin/md5: ${_bt}-lib/libmd .endif .endif .if ${MK_ZONEINFO} != "no" _basic_bootstrap_tools+=usr.sbin/zic usr.sbin/tzsetup .endif .if defined(BOOTSTRAP_ALL_TOOLS) _other_bootstrap_tools+=${_basic_bootstrap_tools} .for _subdir _links in ${_basic_bootstrap_tools_multilink} _other_bootstrap_tools+=${_subdir} .endfor ${_bt}-usr.bin/awk: ${_bt_lex_depend} ${_bt}-usr.bin/yacc ${_bt}-bin/expr: ${_bt_lex_depend} ${_bt}-usr.bin/yacc # If we are bootstrapping file2c, we have to build it before config: ${_bt}-usr.sbin/config: ${_bt}-usr.bin/file2c ${_bt_lex_depend} # Note: no symlink to make/bmake in the !BOOTSTRAP_ALL_TOOLS case here since # the links to make/bmake make links will have already have been created in the # `make legacy` step. Not adding a link to make is important on non-FreeBSD # since "make" will usually point to GNU make there. _other_bootstrap_tools+=usr.bin/bmake .else # All tools in _basic_bootstrap_tools have the same name as the subdirectory # so we can use :T to get the name of the symlinks that we need to create. _bootstrap_tools_links+=${_basic_bootstrap_tools:T} .for _subdir _links in ${_basic_bootstrap_tools_multilink} _bootstrap_tools_links+=${_links:S/,/ /g} .endfor .endif # defined(BOOTSTRAP_ALL_TOOLS) # Link the tools that we need for building but don't need to bootstrap because # the host version is known to be compatible into ${WORLDTMP}/legacy # We do this before building any of the bootstrap tools in case they depend on # the presence of any of the links (e.g. as m4/lex/awk) ${_bt}-links: .PHONY .for _tool in ${_bootstrap_tools_links} ${_bt}-link-${_tool}: .PHONY .MAKE @if [ ! -e "${WORLDTMP}/legacy/bin/${_tool}" ]; then \ source_path=`which ${_tool}`; \ if [ ! -e "$${source_path}" ] ; then \ echo "Cannot find host tool '${_tool}'"; false; \ fi; \ ln -sfnv "$${source_path}" "${WORLDTMP}/legacy/bin/${_tool}"; \ fi ${_bt}-links: ${_bt}-link-${_tool} .endfor bootstrap-tools: ${_bt}-links .PHONY # Please document (add comment) why something is in 'bootstrap-tools'. # Try to bound the building of the bootstrap-tool to just the # FreeBSD versions that need the tool built at this stage of the build. .for _tool in \ ${_clang_tblgen} \ ${_kerberos5_bootstrap_tools} \ ${_strfile} \ ${_gperf} \ ${_dtc} \ ${_cat} \ ${_kbdcontrol} \ ${_elftoolchain_libs} \ usr.bin/lorder \ lib/libopenbsd \ usr.bin/mandoc \ usr.bin/rpcgen \ ${_yacc} \ ${_m4} \ ${_lex} \ ${_other_bootstrap_tools} \ usr.bin/xinstall \ ${_gensnmptree} \ usr.sbin/config \ ${_flua} \ ${_crunchide} \ ${_crunchgen} \ ${_nmtree} \ ${_vtfontcvt} \ ${_localedef} ${_bt}-${_tool}: ${_bt}-links .PHONY .MAKE ${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \ cd ${.CURDIR}/${_tool}; \ if [ -z "${NO_OBJWALK}" ]; then ${MAKE} DIRPRFX=${_tool}/ obj; fi; \ if [ "${_tool}" = "usr.bin/lex" ]; then \ ${MAKE} DIRPRFX=${_tool}/ bootstrap; \ fi; \ ${MAKE} DIRPRFX=${_tool}/ all; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${WORLDTMP}/legacy install bootstrap-tools: ${_bt}-${_tool} .endfor # # build-tools: Build special purpose build tools # .if !defined(NO_SHARE) && ${MK_SYSCONS} != "no" _share= share/syscons/scrnmaps .endif .if ${MK_GCC} != "no" _gcc_tools= gnu/usr.bin/cc/cc_tools .endif .if ${MK_RESCUE} != "no" # rescue includes programs that have build-tools targets _rescue=rescue/rescue .endif .if ${MK_TCSH} != "no" _tcsh=bin/csh .endif .if ${MK_FILE} != "no" _libmagic=lib/libmagic .endif .if ${MK_PMC} != "no" && \ (${TARGET_ARCH} == "aarch64" || ${TARGET_ARCH} == "amd64" || \ ${TARGET_ARCH} == "i386") _jevents=lib/libpmc/pmu-events .endif # kernel-toolchain skips _cleanobj, so handle cleaning up previous # build-tools directories if needed. .if !defined(NO_CLEAN) && make(kernel-toolchain) _bt_clean= ${CLEANDIR} .endif .for _tool in \ ${_tcsh} \ bin/sh \ ${LOCAL_TOOL_DIRS} \ ${_jevents} \ lib/ncurses/ncurses \ lib/ncurses/ncursesw \ ${_rescue} \ ${_share} \ usr.bin/awk \ ${_libmagic} \ usr.bin/mkesdb_static \ usr.bin/mkcsmapper_static \ usr.bin/vi/catalog \ ${_gcc_tools} build-tools_${_tool}: .PHONY ${_+_}@${ECHODIR} "===> ${_tool} (${_bt_clean:D${_bt_clean},}obj,build-tools)"; \ cd ${.CURDIR}/${_tool}; \ if [ -n "${_bt_clean}" ]; then ${MAKE} DIRPRFX=${_tool}/ ${_bt_clean}; fi; \ if [ -z "${NO_OBJWALK}" ]; then ${MAKE} DIRPRFX=${_tool}/ obj; fi; \ ${MAKE} DIRPRFX=${_tool}/ build-tools build-tools: build-tools_${_tool} .endfor # # kernel-tools: Build kernel-building tools # kernel-tools: .PHONY mkdir -p ${WORLDTMP}/usr ${WORLDTMP_MTREE} -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${WORLDTMP}/usr >/dev/null # # cross-tools: All the tools needed to build the rest of the system after # we get done with the earlier stages. It is the last set of tools needed # to begin building the target binaries. # .if ${TARGET_ARCH} != ${MACHINE_ARCH} || ${BUILD_WITH_STRICT_TMPPATH} != 0 .if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "i386" _btxld= usr.sbin/btxld .endif .endif # Rebuild ctfconvert and ctfmerge to avoid difficult-to-diagnose failures # resulting from missing bug fixes or ELF Toolchain updates. .if ${MK_CDDL} != "no" _dtrace_tools= cddl/lib/libctf cddl/usr.bin/ctfconvert \ cddl/usr.bin/ctfmerge .endif # If we're given an XAS, don't build binutils. .if ${XAS:M/*} == "" .if ${MK_BINUTILS_BOOTSTRAP} != "no" _binutils= gnu/usr.bin/binutils .endif .if ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no" _elftctools= lib/libelftc \ lib/libpe \ usr.bin/objcopy \ usr.bin/nm \ usr.bin/size \ usr.bin/strings # These are not required by the build, but can be useful for developers who # cross-build on a FreeBSD 10 host: _elftctools+= usr.bin/addr2line .endif .elif ${TARGET_ARCH} != ${MACHINE_ARCH} && ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no" # If cross-building with an external binutils we still need to build strip for # the target (for at least crunchide). _elftctools= lib/libelftc \ lib/libpe \ usr.bin/objcopy .endif .if ${MK_CLANG_BOOTSTRAP} != "no" _clang= usr.bin/clang .endif .if ${MK_LLD_BOOTSTRAP} != "no" _lld= usr.bin/clang/lld .endif .if ${MK_CLANG_BOOTSTRAP} != "no" || ${MK_LLD_BOOTSTRAP} != "no" _clang_libs= lib/clang .endif .if ${MK_GCC_BOOTSTRAP} != "no" _gcc= gnu/usr.bin/cc .endif .if ${MK_USB} != "no" _usb_tools= stand/usb/tools .endif .if ${BUILD_WITH_STRICT_TMPPATH} != 0 || defined(BOOTSTRAP_ALL_TOOLS) _ar=usr.bin/ar .endif cross-tools: .MAKE .PHONY .for _tool in \ ${LOCAL_XTOOL_DIRS} \ ${_ar} \ ${_clang_libs} \ ${_clang} \ ${_lld} \ ${_binutils} \ ${_elftctools} \ ${_dtrace_tools} \ ${_gcc} \ ${_btxld} \ ${_usb_tools} ${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \ cd ${.CURDIR}/${_tool}; \ if [ -z "${NO_OBJWALK}" ]; then ${MAKE} DIRPRFX=${_tool}/ obj; fi; \ ${MAKE} DIRPRFX=${_tool}/ all; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${WORLDTMP} install .endfor # # native-xtools is the current target for qemu-user cross builds of ports # via poudriere and the imgact_binmisc kernel module. # This target merely builds a toolchan/sysroot, then builds the tools it wants # with the options it wants in a special MAKEOBJDIRPREFIX, using the toolchain # already built. It then installs the static tools to NXBDESTDIR for Poudriere # to pickup. # NXBOBJROOT= ${OBJROOT}${MACHINE}.${MACHINE_ARCH}/nxb/ NXBOBJTOP= ${NXBOBJROOT}${NXB_TARGET}.${NXB_TARGET_ARCH} NXTP?= /nxb-bin .if ${NXTP:N/*} .error NXTP variable should be an absolute path .endif NXBDESTDIR?= ${DESTDIR}${NXTP} # This is the list of tools to be built/installed as static and where # appropriate to build for the given TARGET.TARGET_ARCH. NXBDIRS+= \ bin/cat \ bin/chmod \ bin/cp \ ${_tcsh} \ bin/echo \ bin/expr \ bin/hostname \ bin/ln \ bin/ls \ bin/mkdir \ bin/mv \ bin/ps \ bin/realpath \ bin/rm \ bin/rmdir \ bin/sh \ bin/sleep \ sbin/md5 \ sbin/sysctl \ usr.bin/addr2line \ usr.bin/ar \ usr.bin/awk \ usr.bin/basename \ usr.bin/bmake \ usr.bin/bzip2 \ usr.bin/cmp \ usr.bin/diff \ usr.bin/dirname \ usr.bin/objcopy \ usr.bin/env \ usr.bin/fetch \ usr.bin/find \ usr.bin/grep \ usr.bin/gzip \ usr.bin/head \ usr.bin/id \ usr.bin/lex \ usr.bin/limits \ usr.bin/lorder \ usr.bin/mandoc \ usr.bin/mktemp \ usr.bin/mt \ usr.bin/nm \ usr.bin/patch \ usr.bin/readelf \ usr.bin/sed \ usr.bin/size \ usr.bin/sort \ usr.bin/strings \ usr.bin/tar \ usr.bin/touch \ usr.bin/tr \ usr.bin/true \ usr.bin/uniq \ usr.bin/unzip \ usr.bin/wc \ usr.bin/xargs \ usr.bin/xinstall \ usr.bin/xz \ usr.bin/yacc \ usr.sbin/chown SUBDIR_DEPEND_usr.bin/clang= lib/clang .if ${MK_CLANG} != "no" NXBDIRS+= lib/clang NXBDIRS+= usr.bin/clang .endif .if ${MK_GCC} != "no" NXBDIRS+= gnu/usr.bin/cc .endif .if ${MK_BINUTILS} != "no" NXBDIRS+= gnu/usr.bin/binutils .endif # XXX: native-xtools passes along ${NXBDIRS} in SUBDIR_OVERRIDE that needs # to be evaluated after NXBDIRS is set. .if make(install) && !empty(SUBDIR_OVERRIDE) SUBDIR= ${SUBDIR_OVERRIDE} .endif NXBMAKEARGS+= \ OBJTOP=${NXBOBJTOP:Q} \ OBJROOT=${NXBOBJROOT:Q} \ MAKEOBJDIRPREFIX= \ -DNO_SHARED \ -DNO_CPU_CFLAGS \ -DNO_PIC \ SSP_CFLAGS= \ MK_CASPER=no \ MK_CLANG_EXTRAS=no \ MK_CLANG_FULL=no \ MK_CTF=no \ MK_DEBUG_FILES=no \ MK_GDB=no \ MK_HTML=no \ MK_LLDB=no \ MK_MAN=no \ MK_MAN_UTILS=yes \ MK_OFED=no \ MK_OPENSSH=no \ MK_PROFILE=no \ MK_RETPOLINE=no \ MK_SENDMAIL=no \ MK_SVNLITE=no \ MK_TESTS=no \ MK_WARNS=no \ MK_ZFS=no .if make(native-xtools*) && \ (!defined(NXB_TARGET) || !defined(NXB_TARGET_ARCH)) .error Missing NXB_TARGET / NXB_TARGET_ARCH .endif # For 'toolchain' we want to produce native binaries that themselves generate # native binaries. NXBTMAKE= ${NXBMAKEENV} ${MAKE} ${NXBMAKEARGS:N-DNO_PIC:N-DNO_SHARED} \ TARGET=${MACHINE} TARGET_ARCH=${MACHINE_ARCH} # For 'everything' we want to produce native binaries (hence -target to # be MACHINE) that themselves generate TARGET.TARGET_ARCH binaries. # TARGET/TARGET_ARCH are still passed along from user. # # Use the toolchain we create as an external toolchain. .if ${USING_SYSTEM_COMPILER} == "yes" || ${XCC:N${CCACHE_BIN}:M/*} NXBMAKE+= XCC="${XCC}" \ XCXX="${XCXX}" \ XCPP="${XCPP}" .else NXBMAKE+= XCC="${NXBOBJTOP}/tmp/usr/bin/cc" \ XCXX="${NXBOBJTOP}/tmp/usr/bin/c++" \ XCPP="${NXBOBJTOP}/tmp/usr/bin/cpp" .endif NXBMAKE+= ${NXBMAKEENV} ${MAKE} -f Makefile.inc1 ${NXBMAKEARGS} \ TARGET=${NXB_TARGET} TARGET_ARCH=${NXB_TARGET_ARCH} \ TARGET_TRIPLE=${MACHINE_TRIPLE:Q} # NXBDIRS is improperly based on MACHINE rather than NXB_TARGET. Need to # invoke a sub-make to reevaluate MK_GCC, etc, for NXBDIRS. NXBMAKE+= SUBDIR_OVERRIDE='$${NXBDIRS:M*}' # Need to avoid the -isystem logic when using clang as an external toolchain # even if the TARGET being built for wants GCC. NXBMAKE+= WANT_COMPILER_TYPE='$${X_COMPILER_TYPE}' native-xtools: .PHONY ${_+_}cd ${.CURDIR}; ${NXBTMAKE} _cleanobj MK_GCC=yes # Build the bootstrap/host/cross tools that produce native binaries # Pass along MK_GCC=yes to ensure GCC-needed build tools are built. # We don't quite know what the NXB_TARGET wants so just build it. ${_+_}cd ${.CURDIR}; ${NXBTMAKE} kernel-toolchain MK_GCC=yes # Populate includes/libraries sysroot that produce native binaries. # This is split out from 'toolchain' above mostly so that target LLVM # libraries have a proper LLVM_DEFAULT_TARGET_TRIPLE without # polluting the cross-compiler build. The LLVM/GCC libs are skipped # here to avoid the problem but are kept in 'toolchain' so that # needed build tools are built. ${_+_}cd ${.CURDIR}; ${NXBTMAKE} _includes MK_CLANG=no MK_GCC=no ${_+_}cd ${.CURDIR}; ${NXBTMAKE} _libraries MK_CLANG=no MK_GCC=no # Clean out improper TARGET=MACHINE files ${_+_}cd ${.CURDIR}/gnu/usr.bin/cc/cc_tools; ${NXBTMAKE} cleandir .if !defined(NO_OBJWALK) ${_+_}cd ${.CURDIR}; ${NXBMAKE} _obj .endif ${_+_}cd ${.CURDIR}; ${NXBMAKE} everything @echo ">> native-xtools done. Use 'make native-xtools-install' to install to a given DESTDIR" native-xtools-install: .PHONY mkdir -p ${NXBDESTDIR}/bin ${NXBDESTDIR}/sbin ${NXBDESTDIR}/usr ${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${NXBDESTDIR}/usr >/dev/null ${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${NXBDESTDIR}/usr/include >/dev/null ${_+_}cd ${.CURDIR}; ${NXBMAKE} \ DESTDIR=${NXBDESTDIR} \ -DNO_ROOT \ install # # hierarchy - ensure that all the needed directories are present # hierarchy hier: .MAKE .PHONY ${_+_}cd ${.CURDIR}/etc; ${HMAKE} distrib-dirs # # libraries - build all libraries, and install them under ${DESTDIR}. # # The list of libraries with dependents (${_prebuild_libs}) and their # interdependencies (__L) are built automatically by the # ${.CURDIR}/tools/make_libdeps.sh script. # libraries: .MAKE .PHONY ${_+_}cd ${.CURDIR}; \ ${MAKE} -f Makefile.inc1 _prereq_libs; \ ${MAKE} -f Makefile.inc1 _startup_libs; \ ${MAKE} -f Makefile.inc1 _prebuild_libs; \ ${MAKE} -f Makefile.inc1 _generic_libs # # static libgcc.a prerequisite for shared libc # _prereq_libs= lib/libcompiler_rt .if ${MK_SSP} != "no" _prereq_libs+= lib/libssp_nonshared .endif # These dependencies are not automatically generated: # # gnu/lib/csu, gnu/lib/libgcc, lib/csu and lib/libc must be built before # all shared libraries for ELF. # _startup_libs= lib/csu .if ${MK_BSD_CRTBEGIN} == "no" _startup_libs+= gnu/lib/csu .endif _startup_libs+= lib/libc _startup_libs+= lib/libc_nonshared .if ${MK_LIBCPLUSPLUS} != "no" _startup_libs+= lib/libcxxrt .endif .if ${MK_LLVM_LIBUNWIND} != "no" _prereq_libs+= lib/libgcc_eh lib/libgcc_s _startup_libs+= lib/libgcc_eh lib/libgcc_s lib/libgcc_s__L: lib/libc__L lib/libgcc_s__L: lib/libc_nonshared__L .if ${MK_LIBCPLUSPLUS} != "no" lib/libcxxrt__L: lib/libgcc_s__L .endif .endif _prebuild_libs= ${_kerberos5_lib_libasn1} \ ${_kerberos5_lib_libhdb} \ ${_kerberos5_lib_libheimbase} \ ${_kerberos5_lib_libheimntlm} \ ${_libsqlite3} \ ${_kerberos5_lib_libheimipcc} \ ${_kerberos5_lib_libhx509} ${_kerberos5_lib_libkrb5} \ ${_kerberos5_lib_libroken} \ ${_kerberos5_lib_libwind} \ lib/libbz2 ${_libcom_err} lib/libcrypt \ lib/libelf lib/libexpat \ lib/libfigpar \ ${_lib_libgssapi} \ lib/libkiconv lib/libkvm lib/liblzma lib/libmd lib/libnv \ lib/libzstd \ ${_lib_casper} \ lib/ncurses/ncurses lib/ncurses/ncursesw \ lib/libopie lib/libpam/libpam ${_lib_libthr} \ ${_lib_libradius} lib/libsbuf lib/libtacplus \ lib/libgeom \ ${_cddl_lib_libumem} ${_cddl_lib_libnvpair} \ ${_cddl_lib_libuutil} \ ${_cddl_lib_libavl} \ ${_cddl_lib_libzfs_core} ${_cddl_lib_libzfs} \ ${_cddl_lib_libctf} \ lib/libufs \ lib/libutil lib/libpjdlog ${_lib_libypclnt} lib/libz lib/msun \ ${_secure_lib_libcrypto} ${_secure_lib_libssl} \ ${_lib_libldns} ${_secure_lib_libssh} .if ${MK_GNUCXX} != "no" _prebuild_libs+= gnu/lib/libstdc++ gnu/lib/libsupc++ gnu/lib/libstdc++__L: lib/msun__L gnu/lib/libsupc++__L: gnu/lib/libstdc++__L .endif .if ${MK_DIALOG} != "no" _prebuild_libs+= gnu/lib/libdialog gnu/lib/libdialog__L: lib/msun__L lib/ncurses/ncursesw__L .endif .if ${MK_LIBCPLUSPLUS} != "no" _prebuild_libs+= lib/libc++ .endif lib/libgeom__L: lib/libexpat__L lib/libkvm__L: lib/libelf__L .if ${MK_LIBTHR} != "no" _lib_libthr= lib/libthr .endif .if ${MK_RADIUS_SUPPORT} != "no" _lib_libradius= lib/libradius .endif .if ${MK_OFED} != "no" _prebuild_libs+= \ lib/ofed/libibverbs \ lib/ofed/libibmad \ lib/ofed/libibumad \ lib/ofed/complib \ lib/ofed/libmlx5 lib/ofed/libibmad__L: lib/ofed/libibumad__L lib/ofed/complib__L: lib/libthr__L lib/ofed/libmlx5__L: lib/ofed/libibverbs__L lib/libthr__L .endif .if ${MK_CASPER} != "no" _lib_casper= lib/libcasper .endif lib/libpjdlog__L: lib/libutil__L lib/libcasper__L: lib/libnv__L lib/liblzma__L: lib/libthr__L lib/libzstd__L: lib/libthr__L _generic_libs= ${_cddl_lib} gnu/lib ${_kerberos5_lib} lib ${_secure_lib} usr.bin/lex/lib .if ${MK_IPFILTER} != "no" _generic_libs+= sbin/ipf/libipf .endif .for _DIR in ${LOCAL_LIB_DIRS} .if ${_DIR} == ".WAIT" || (empty(_generic_libs:M${_DIR}) && exists(${.CURDIR}/${_DIR}/Makefile)) _generic_libs+= ${_DIR} .endif .endfor lib/libopie__L lib/libtacplus__L: lib/libmd__L .if ${MK_CDDL} != "no" _cddl_lib_libumem= cddl/lib/libumem _cddl_lib_libnvpair= cddl/lib/libnvpair _cddl_lib_libavl= cddl/lib/libavl _cddl_lib_libuutil= cddl/lib/libuutil .if ${MK_ZFS} != "no" _cddl_lib_libzfs_core= cddl/lib/libzfs_core _cddl_lib_libzfs= cddl/lib/libzfs cddl/lib/libzfs_core__L: cddl/lib/libnvpair__L cddl/lib/libzfs__L: cddl/lib/libzfs_core__L lib/msun__L lib/libutil__L cddl/lib/libzfs__L: lib/libthr__L lib/libmd__L lib/libz__L cddl/lib/libumem__L cddl/lib/libzfs__L: cddl/lib/libuutil__L cddl/lib/libavl__L lib/libgeom__L lib/libbe__L: cddl/lib/libzfs__L .endif _cddl_lib_libctf= cddl/lib/libctf _cddl_lib= cddl/lib cddl/lib/libctf__L: lib/libz__L .endif # cddl/lib/libdtrace requires lib/libproc and lib/librtld_db; it's only built # on select architectures though (see cddl/lib/Makefile) .if ${MACHINE_CPUARCH} != "sparc64" _prebuild_libs+= lib/libprocstat lib/libproc lib/librtld_db lib/libprocstat__L: lib/libelf__L lib/libkvm__L lib/libutil__L lib/libproc__L: lib/libprocstat__L lib/librtld_db__L: lib/libprocstat__L .endif .if ${MK_CRYPT} != "no" .if ${MK_OPENSSL} != "no" _secure_lib_libcrypto= secure/lib/libcrypto _secure_lib_libssl= secure/lib/libssl lib/libradius__L secure/lib/libssl__L: secure/lib/libcrypto__L secure/lib/libcrypto__L: lib/libthr__L .if ${MK_LDNS} != "no" _lib_libldns= lib/libldns lib/libldns__L: secure/lib/libssl__L .endif .if ${MK_OPENSSH} != "no" _secure_lib_libssh= secure/lib/libssh secure/lib/libssh__L: lib/libz__L secure/lib/libcrypto__L lib/libcrypt__L .if ${MK_LDNS} != "no" secure/lib/libssh__L: lib/libldns__L .endif .if ${MK_GSSAPI} != "no" && ${MK_KERBEROS_SUPPORT} != "no" secure/lib/libssh__L: lib/libgssapi__L kerberos5/lib/libkrb5__L \ kerberos5/lib/libhx509__L kerberos5/lib/libasn1__L lib/libcom_err__L \ lib/libmd__L kerberos5/lib/libroken__L .endif .endif .endif _secure_lib= secure/lib .endif .if ${MK_KERBEROS} != "no" kerberos5/lib/libasn1__L: lib/libcom_err__L kerberos5/lib/libroken__L kerberos5/lib/libhdb__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ kerberos5/lib/libkrb5__L kerberos5/lib/libroken__L \ kerberos5/lib/libwind__L lib/libsqlite3__L kerberos5/lib/libheimntlm__L: secure/lib/libcrypto__L kerberos5/lib/libkrb5__L \ kerberos5/lib/libroken__L lib/libcom_err__L kerberos5/lib/libhx509__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ secure/lib/libcrypto__L kerberos5/lib/libroken__L kerberos5/lib/libwind__L kerberos5/lib/libkrb5__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ lib/libcrypt__L secure/lib/libcrypto__L kerberos5/lib/libhx509__L \ kerberos5/lib/libroken__L kerberos5/lib/libwind__L \ kerberos5/lib/libheimbase__L kerberos5/lib/libheimipcc__L kerberos5/lib/libroken__L: lib/libcrypt__L kerberos5/lib/libwind__L: kerberos5/lib/libroken__L lib/libcom_err__L kerberos5/lib/libheimbase__L: lib/libthr__L kerberos5/lib/libheimipcc__L: kerberos5/lib/libroken__L kerberos5/lib/libheimbase__L lib/libthr__L .endif lib/libsqlite3__L: lib/libthr__L .if ${MK_GSSAPI} != "no" _lib_libgssapi= lib/libgssapi .endif .if ${MK_KERBEROS} != "no" _kerberos5_lib= kerberos5/lib _kerberos5_lib_libasn1= kerberos5/lib/libasn1 _kerberos5_lib_libhdb= kerberos5/lib/libhdb _kerberos5_lib_libheimbase= kerberos5/lib/libheimbase _kerberos5_lib_libkrb5= kerberos5/lib/libkrb5 _kerberos5_lib_libhx509= kerberos5/lib/libhx509 _kerberos5_lib_libroken= kerberos5/lib/libroken _kerberos5_lib_libheimntlm= kerberos5/lib/libheimntlm _libsqlite3= lib/libsqlite3 _kerberos5_lib_libheimipcc= kerberos5/lib/libheimipcc _kerberos5_lib_libwind= kerberos5/lib/libwind _libcom_err= lib/libcom_err .endif .if ${MK_NIS} != "no" _lib_libypclnt= lib/libypclnt .endif .if ${MK_OPENSSL} == "no" lib/libradius__L: lib/libmd__L .endif lib/libproc__L: \ ${_cddl_lib_libctf:D${_cddl_lib_libctf}__L} lib/libelf__L lib/librtld_db__L lib/libutil__L .if ${MK_CXX} != "no" .if ${MK_LIBCPLUSPLUS} != "no" lib/libproc__L: lib/libcxxrt__L .else # This implies MK_GNUCXX != "no"; see lib/libproc lib/libproc__L: gnu/lib/libsupc++__L .endif .endif .for _lib in ${_prereq_libs} ${_lib}__PL: .PHONY .MAKE .if !defined(_MKSHOWCONFIG) && exists(${.CURDIR}/${_lib}) ${_+_}@${ECHODIR} "===> ${_lib} (obj,all,install)"; \ cd ${.CURDIR}/${_lib}; \ if [ -z "${NO_OBJWALK}" ]; then ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; fi; \ ${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \ DIRPRFX=${_lib}/ all; \ ${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \ DIRPRFX=${_lib}/ install .endif .endfor .for _lib in ${_startup_libs} ${_prebuild_libs} ${_generic_libs} ${_lib}__L: .PHONY .MAKE .if !defined(_MKSHOWCONFIG) && exists(${.CURDIR}/${_lib}) ${_+_}@${ECHODIR} "===> ${_lib} (obj,all,install)"; \ cd ${.CURDIR}/${_lib}; \ if [ -z "${NO_OBJWALK}" ]; then ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; fi; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ all; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ install .endif .endfor _prereq_libs: ${_prereq_libs:S/$/__PL/} _startup_libs: ${_startup_libs:S/$/__L/} _prebuild_libs: ${_prebuild_libs:S/$/__L/} _generic_libs: ${_generic_libs:S/$/__L/} # Enable SUBDIR_PARALLEL when not calling 'make all', unless called from # 'everything' with _PARALLEL_SUBDIR_OK set. This is because it is unlikely # that running 'make all' from the top-level, especially with a SUBDIR_OVERRIDE # or LOCAL_DIRS set, will have a reliable build if SUBDIRs are built in # parallel. This is safe for the world stage of buildworld though since it has # already built libraries in a proper order and installed includes into # WORLDTMP. Special handling is done for SUBDIR ordering for 'install*' to # avoid trashing a system if it crashes mid-install. .if !make(all) || defined(_PARALLEL_SUBDIR_OK) SUBDIR_PARALLEL= .endif .include .if make(check-old) || make(check-old-dirs) || \ make(check-old-files) || make(check-old-libs) || \ make(delete-old) || make(delete-old-dirs) || \ make(delete-old-files) || make(delete-old-libs) # # check for / delete old files section # .include "ObsoleteFiles.inc" OLD_LIBS_MESSAGE="Please be sure no application still uses those libraries, \ else you can not start such an application. Consult UPDATING for more \ information regarding how to cope with the removal/revision bump of a \ specific library." .if !defined(BATCH_DELETE_OLD_FILES) RM_I=-i .else RM_I=-v .endif delete-old-files: .PHONY @echo ">>> Removing old files (only deletes safe to delete libs)" # Ask for every old file if the user really wants to remove it. # It's annoying, but better safe than sorry. # NB: We cannot pass the list of OLD_FILES as a parameter because the # argument list will get too long. Using .for/.endfor make "loops" will make # the Makefile parser segfault. @exec 3<&0; \ cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | sort | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \ rm ${RM_I} "${DESTDIR}/$${file}" <&3; \ fi; \ for ext in debug symbols; do \ if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \ "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \ <&3; \ fi; \ done; \ done # Remove catpages without corresponding manpages. @exec 3<&0; \ find ${DESTDIR}/usr/share/man/cat* ! -type d 2>/dev/null | sort | \ sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \ while read catpage; do \ read manpage; \ if [ ! -e "$${manpage}" ]; then \ rm ${RM_I} $${catpage} <&3; \ fi; \ done @echo ">>> Old files removed" check-old-files: .PHONY @echo ">>> Checking for old files" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ echo "${DESTDIR}/$${file}"; \ fi; \ for ext in debug symbols; do \ if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \ fi; \ done; \ done | sort # Check for catpages without corresponding manpages. @find ${DESTDIR}/usr/share/man/cat* ! -type d 2>/dev/null | \ sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \ while read catpage; do \ read manpage; \ if [ ! -e "$${manpage}" ]; then \ echo $${catpage}; \ fi; \ done | sort delete-old-libs: .PHONY @echo ">>> Removing old libraries" @echo "${OLD_LIBS_MESSAGE}" | fmt @exec 3<&0; \ cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_LIBS | xargs -n1 | sort | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \ rm ${RM_I} "${DESTDIR}/$${file}" <&3; \ fi; \ for ext in debug symbols; do \ if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \ "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \ <&3; \ fi; \ done; \ done @echo ">>> Old libraries removed" check-old-libs: .PHONY @echo ">>> Checking for old libraries" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_LIBS | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ echo "${DESTDIR}/$${file}"; \ fi; \ for ext in debug symbols; do \ if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \ fi; \ done; \ done | sort delete-old-dirs: .PHONY @echo ">>> Removing old directories" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_DIRS | xargs -n1 | sort -r | \ while read dir; do \ if [ -d "${DESTDIR}/$${dir}" ]; then \ rmdir -v "${DESTDIR}/$${dir}" || true; \ elif [ -L "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \ fi; \ if [ -d "${DESTDIR}${DEBUGDIR}/$${dir}" ]; then \ rmdir -v "${DESTDIR}${DEBUGDIR}/$${dir}" || true; \ elif [ -L "${DESTDIR}${DEBUGDIR}/$${dir}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${dir} is a link, please remove everything manually."; \ fi; \ done @echo ">>> Old directories removed" check-old-dirs: .PHONY @echo ">>> Checking for old directories" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_DIRS | xargs -n1 | sort -r | \ while read dir; do \ if [ -d "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir}"; \ elif [ -L "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \ fi; \ if [ -d "${DESTDIR}${DEBUGDIR}/$${dir}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${dir}"; \ elif [ -L "${DESTDIR}${DEBUGDIR}/$${dir}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${dir} is a link, please remove everything manually."; \ fi; \ done delete-old: delete-old-files delete-old-dirs .PHONY @echo "To remove old libraries run '${MAKE_CMD} delete-old-libs'." check-old: check-old-files check-old-libs check-old-dirs .PHONY @echo "To remove old files and directories run '${MAKE_CMD} delete-old'." @echo "To remove old libraries run '${MAKE_CMD} delete-old-libs'." .endif # # showconfig - show build configuration. # showconfig: .PHONY @(${MAKE} -n -f ${.CURDIR}/sys/conf/kern.opts.mk -V dummy -dg1 UPDATE_DEPENDFILE=no NO_OBJ=yes; \ ${MAKE} -n -f ${.CURDIR}/share/mk/src.opts.mk -V dummy -dg1 UPDATE_DEPENDFILE=no NO_OBJ=yes) 2>&1 | grep ^MK_ | sort -u .if !empty(KRNLOBJDIR) && !empty(KERNCONF) DTBOUTPUTPATH= ${KRNLOBJDIR}/${KERNCONF}/ .if !defined(FDT_DTS_FILE) || empty(FDT_DTS_FILE) .if !defined(_MKSHOWCONFIG) && exists(${KERNCONFDIR}/${KERNCONF}) FDT_DTS_FILE!= awk 'BEGIN {FS="="} /^makeoptions[[:space:]]+FDT_DTS_FILE/ {print $$2}' \ '${KERNCONFDIR}/${KERNCONF}' ; echo .endif .endif .endif .if !defined(DTBOUTPUTPATH) || !exists(${DTBOUTPUTPATH}) DTBOUTPUTPATH= ${.CURDIR} .endif # # Build 'standalone' Device Tree Blob # builddtb: .PHONY @PATH=${TMPPATH} MACHINE=${TARGET} \ ${.CURDIR}/sys/tools/fdt/make_dtb.sh ${.CURDIR}/sys \ "${FDT_DTS_FILE}" ${DTBOUTPUTPATH} ############### # cleanworld # In the following, the first 'rm' in a series will usually remove all # files and directories. If it does not, then there are probably some # files with file flags set, so this unsets them and tries the 'rm' a # second time. There are situations where this target will be cleaning # some directories via more than one method, but that duplication is # needed to correctly handle all the possible situations. Removing all # files without file flags set in the first 'rm' instance saves time, # because 'chflags' will need to operate on fewer files afterwards. # # It is expected that BW_CANONICALOBJDIR == the CANONICALOBJDIR as would be # created by bsd.obj.mk, except that we don't want to .include that file # in this makefile. We don't do a cleandir walk if MK_AUTO_OBJ is yes # since it is not possible for files to land in the wrong place. # .if make(cleanworld) BW_CANONICALOBJDIR:=${OBJTOP}/ .elif make(cleanuniverse) BW_CANONICALOBJDIR:=${OBJROOT} .if ${MK_UNIFIED_OBJDIR} == "no" .error ${.TARGETS} only supported with WITH_UNIFIED_OBJDIR enabled. .endif .endif cleanworld cleanuniverse: .PHONY .if !empty(BW_CANONICALOBJDIR) && exists(${BW_CANONICALOBJDIR}) && \ ${.CURDIR:tA} != ${BW_CANONICALOBJDIR:tA} -rm -rf ${BW_CANONICALOBJDIR}* -chflags -R 0 ${BW_CANONICALOBJDIR} rm -rf ${BW_CANONICALOBJDIR}* .endif .if make(cleanworld) && ${MK_AUTO_OBJ} == "no" && \ (empty(BW_CANONICALOBJDIR) || ${.CURDIR:tA} == ${BW_CANONICALOBJDIR:tA}) .if ${.CURDIR} == ${.OBJDIR} || ${.CURDIR}/obj == ${.OBJDIR} # To be safe in this case, fall back to a 'make cleandir' ${_+_}@cd ${.CURDIR}; ${MAKE} cleandir .endif .endif .if ${TARGET} == ${MACHINE} && ${TARGET_ARCH} == ${MACHINE_ARCH} XDEV_CPUTYPE?=${CPUTYPE} .else XDEV_CPUTYPE?=${TARGET_CPUTYPE} .endif NOFUN=-DNO_FSCHG MK_HTML=no -DNO_LINT \ MK_MAN=no MK_NLS=no MK_PROFILE=no \ MK_KERBEROS=no MK_RESCUE=no MK_TESTS=no MK_WARNS=no \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ CPUTYPE=${XDEV_CPUTYPE} XDDIR=${TARGET_ARCH}-freebsd XDTP?=/usr/${XDDIR} .if ${XDTP:N/*} .error XDTP variable should be an absolute path .endif CDBOBJROOT= ${OBJROOT}${MACHINE}.${MACHINE_ARCH}/xdev/ CDBOBJTOP= ${CDBOBJROOT}${XDDIR} CDBENV= \ INSTALL="sh ${.CURDIR}/tools/install.sh" CDENV= ${CDBENV} \ TOOLS_PREFIX=${XDTP} CDMAKEARGS= \ OBJTOP=${CDBOBJTOP:Q} \ OBJROOT=${CDBOBJROOT:Q} CD2MAKEARGS= ${CDMAKEARGS} .if ${WANT_COMPILER_TYPE} == gcc || \ (defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == gcc) # GCC requires -isystem and -L when using a cross-compiler. --sysroot # won't set header path and -L is used to ensure the base library path # is added before the port PREFIX library path. CD2CFLAGS+= -isystem ${XDDESTDIR}/usr/include -L${XDDESTDIR}/usr/lib # GCC requires -B to find /usr/lib/crti.o when using a cross-compiler # combined with --sysroot. CD2CFLAGS+= -B${XDDESTDIR}/usr/lib # Force using libc++ for external GCC. .if defined(X_COMPILER_TYPE) && \ ${X_COMPILER_TYPE} == gcc && ${X_COMPILER_VERSION} >= 40800 CD2CXXFLAGS+= -isystem ${XDDESTDIR}/usr/include/c++/v1 -std=c++11 \ -nostdinc++ .endif .endif CD2CFLAGS+= --sysroot=${XDDESTDIR}/ CD2ENV=${CDENV} CC="${CC} ${CD2CFLAGS}" CXX="${CXX} ${CD2CXXFLAGS} ${CD2CFLAGS}" \ CPP="${CPP} ${CD2CFLAGS}" \ MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH} CDTMP= ${OBJTOP}/${XDDIR}/tmp CDMAKE=${CDENV} PATH=${CDTMP}/usr/bin:${PATH} ${MAKE} ${CDMAKEARGS} ${NOFUN} CD2MAKE=${CD2ENV} PATH=${CDTMP}/usr/bin:${XDDESTDIR}/usr/bin:${PATH} \ ${MAKE} ${CD2MAKEARGS} ${NOFUN} .if ${MK_META_MODE} != "no" # Don't rebuild build-tools targets during normal build. CD2MAKE+= BUILD_TOOLS_META=.NOMETA .endif XDDESTDIR=${DESTDIR}${XDTP} .ORDER: xdev-build xdev-install xdev-links xdev: xdev-build xdev-install .PHONY .ORDER: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools xdev-build: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools .PHONY _xb-worldtmp: .PHONY mkdir -p ${CDTMP}/usr ${WORLDTMP_MTREE} -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${CDTMP}/usr >/dev/null _xb-bootstrap-tools: .PHONY .for _tool in \ ${_clang_tblgen} \ ${_gperf} \ ${_yacc} ${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \ cd ${.CURDIR}/${_tool}; \ if [ -z "${NO_OBJWALK}" ]; then ${CDMAKE} DIRPRFX=${_tool}/ obj; fi; \ ${CDMAKE} DIRPRFX=${_tool}/ all; \ ${CDMAKE} DIRPRFX=${_tool}/ DESTDIR=${CDTMP} install .endfor _xb-build-tools: .PHONY ${_+_}@cd ${.CURDIR}; \ ${CDBENV} ${MAKE} ${CDMAKEARGS} -f Makefile.inc1 ${NOFUN} build-tools XDEVDIRS= \ ${_clang_libs} \ ${_lld} \ ${_binutils} \ ${_elftctools} \ usr.bin/ar \ ${_clang} \ ${_gcc} _xb-cross-tools: .PHONY .for _tool in ${XDEVDIRS} ${_+_}@${ECHODIR} "===> xdev ${_tool} (obj,all)"; \ cd ${.CURDIR}/${_tool}; \ if [ -z "${NO_OBJWALK}" ]; then ${CDMAKE} DIRPRFX=${_tool}/ obj; fi; \ ${CDMAKE} DIRPRFX=${_tool}/ all .endfor _xi-mtree: .PHONY ${_+_}@${ECHODIR} "mtree populating ${XDDESTDIR}" mkdir -p ${XDDESTDIR} ${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.root.dist \ -p ${XDDESTDIR} >/dev/null ${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${XDDESTDIR}/usr >/dev/null ${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${XDDESTDIR}/usr/include >/dev/null .if defined(_LIBCOMPAT) ${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${XDDESTDIR}/usr >/dev/null .endif .if ${MK_TESTS} != "no" mkdir -p ${XDDESTDIR}${TESTSBASE} ${DESTDIR_MTREE} -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${XDDESTDIR}${TESTSBASE} >/dev/null .endif .ORDER: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries xdev-install: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries .PHONY _xi-cross-tools: .PHONY @echo "_xi-cross-tools" .for _tool in ${XDEVDIRS} ${_+_}@${ECHODIR} "===> xdev ${_tool} (install)"; \ cd ${.CURDIR}/${_tool}; \ ${CDMAKE} DIRPRFX=${_tool}/ install DESTDIR=${XDDESTDIR} .endfor _xi-includes: .PHONY .if !defined(NO_OBJWALK) ${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 _obj \ DESTDIR=${XDDESTDIR} .endif ${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 includes \ DESTDIR=${XDDESTDIR} _xi-libraries: .PHONY ${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 libraries \ DESTDIR=${XDDESTDIR} xdev-links: .PHONY ${_+_}cd ${XDDESTDIR}/usr/bin; \ mkdir -p ../../../../usr/bin; \ for i in *; do \ ln -sf ../../${XDTP}/usr/bin/$$i \ ../../../../usr/bin/${XDDIR}-$$i; \ ln -sf ../../${XDTP}/usr/bin/$$i \ ../../../../usr/bin/${XDDIR}${_REVISION}-$$i; \ done Index: projects/clang1000-import/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h =================================================================== --- projects/clang1000-import/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h (revision 356919) +++ projects/clang1000-import/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h (revision 356920) @@ -1,885 +1,886 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 Pawel Jakub Dawidek. All rights reserved. * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright 2019 Joyent, Inc. * Copyright (c) 2012 Martin Matuska . All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2016 Nexenta Systems, Inc. * Copyright (c) 2019 Datto Inc. */ #ifndef _LIBZFS_H #define _LIBZFS_H #include #include #include #include #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif /* * Miscellaneous ZFS constants */ #define ZFS_MAXPROPLEN MAXPATHLEN #define ZPOOL_MAXPROPLEN MAXPATHLEN /* * libzfs errors */ typedef enum zfs_error { EZFS_SUCCESS = 0, /* no error -- success */ EZFS_NOMEM = 2000, /* out of memory */ EZFS_BADPROP, /* invalid property value */ EZFS_PROPREADONLY, /* cannot set readonly property */ EZFS_PROPTYPE, /* property does not apply to dataset type */ EZFS_PROPNONINHERIT, /* property is not inheritable */ EZFS_PROPSPACE, /* bad quota or reservation */ EZFS_BADTYPE, /* dataset is not of appropriate type */ EZFS_BUSY, /* pool or dataset is busy */ EZFS_EXISTS, /* pool or dataset already exists */ EZFS_NOENT, /* no such pool or dataset */ EZFS_BADSTREAM, /* bad backup stream */ EZFS_DSREADONLY, /* dataset is readonly */ EZFS_VOLTOOBIG, /* volume is too large for 32-bit system */ EZFS_INVALIDNAME, /* invalid dataset name */ EZFS_BADRESTORE, /* unable to restore to destination */ EZFS_BADBACKUP, /* backup failed */ EZFS_BADTARGET, /* bad attach/detach/replace target */ EZFS_NODEVICE, /* no such device in pool */ EZFS_BADDEV, /* invalid device to add */ EZFS_NOREPLICAS, /* no valid replicas */ EZFS_RESILVERING, /* currently resilvering */ EZFS_BADVERSION, /* unsupported version */ EZFS_POOLUNAVAIL, /* pool is currently unavailable */ EZFS_DEVOVERFLOW, /* too many devices in one vdev */ EZFS_BADPATH, /* must be an absolute path */ EZFS_CROSSTARGET, /* rename or clone across pool or dataset */ EZFS_ZONED, /* used improperly in local zone */ EZFS_MOUNTFAILED, /* failed to mount dataset */ EZFS_UMOUNTFAILED, /* failed to unmount dataset */ EZFS_UNSHARENFSFAILED, /* unshare(1M) failed */ EZFS_SHARENFSFAILED, /* share(1M) failed */ EZFS_PERM, /* permission denied */ EZFS_NOSPC, /* out of space */ EZFS_FAULT, /* bad address */ EZFS_IO, /* I/O error */ EZFS_INTR, /* signal received */ EZFS_ISSPARE, /* device is a hot spare */ EZFS_INVALCONFIG, /* invalid vdev configuration */ EZFS_RECURSIVE, /* recursive dependency */ EZFS_NOHISTORY, /* no history object */ EZFS_POOLPROPS, /* couldn't retrieve pool props */ EZFS_POOL_NOTSUP, /* ops not supported for this type of pool */ EZFS_POOL_INVALARG, /* invalid argument for this pool operation */ EZFS_NAMETOOLONG, /* dataset name is too long */ EZFS_OPENFAILED, /* open of device failed */ EZFS_NOCAP, /* couldn't get capacity */ EZFS_LABELFAILED, /* write of label failed */ EZFS_BADWHO, /* invalid permission who */ EZFS_BADPERM, /* invalid permission */ EZFS_BADPERMSET, /* invalid permission set name */ EZFS_NODELEGATION, /* delegated administration is disabled */ EZFS_UNSHARESMBFAILED, /* failed to unshare over smb */ EZFS_SHARESMBFAILED, /* failed to share over smb */ EZFS_BADCACHE, /* bad cache file */ EZFS_ISL2CACHE, /* device is for the level 2 ARC */ EZFS_VDEVNOTSUP, /* unsupported vdev type */ EZFS_NOTSUP, /* ops not supported on this dataset */ EZFS_ACTIVE_SPARE, /* pool has active shared spare devices */ EZFS_UNPLAYED_LOGS, /* log device has unplayed logs */ EZFS_REFTAG_RELE, /* snapshot release: tag not found */ EZFS_REFTAG_HOLD, /* snapshot hold: tag already exists */ EZFS_TAGTOOLONG, /* snapshot hold/rele: tag too long */ EZFS_PIPEFAILED, /* pipe create failed */ EZFS_THREADCREATEFAILED, /* thread create failed */ EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */ EZFS_SCRUBBING, /* currently scrubbing */ EZFS_NO_SCRUB, /* no active scrub */ EZFS_DIFF, /* general failure of zfs diff */ EZFS_DIFFDATA, /* bad zfs diff data */ EZFS_POOLREADONLY, /* pool is in read-only mode */ EZFS_SCRUB_PAUSED, /* scrub currently paused */ EZFS_ACTIVE_POOL, /* pool is imported on a different system */ EZFS_NO_PENDING, /* cannot cancel, no operation is pending */ EZFS_CHECKPOINT_EXISTS, /* checkpoint exists */ EZFS_DISCARDING_CHECKPOINT, /* currently discarding a checkpoint */ EZFS_NO_CHECKPOINT, /* pool has no checkpoint */ EZFS_DEVRM_IN_PROGRESS, /* a device is currently being removed */ EZFS_VDEV_TOO_BIG, /* a device is too big to be used */ EZFS_TOOMANY, /* argument list too long */ EZFS_INITIALIZING, /* currently initializing */ EZFS_NO_INITIALIZE, /* no active initialize */ EZFS_UNKNOWN } zfs_error_t; /* * UEFI boot support parameters. When creating whole disk boot pool, * zpool create should allow to create EFI System partition for UEFI boot * program. In case of BIOS, the EFI System partition is not used * even if it does exist. */ typedef enum zpool_boot_label { ZPOOL_NO_BOOT_LABEL = 0, ZPOOL_CREATE_BOOT_LABEL, ZPOOL_COPY_BOOT_LABEL } zpool_boot_label_t; /* * The following data structures are all part * of the zfs_allow_t data structure which is * used for printing 'allow' permissions. * It is a linked list of zfs_allow_t's which * then contain avl tree's for user/group/sets/... * and each one of the entries in those trees have * avl tree's for the permissions they belong to and * whether they are local,descendent or local+descendent * permissions. The AVL trees are used primarily for * sorting purposes, but also so that we can quickly find * a given user and or permission. */ typedef struct zfs_perm_node { avl_node_t z_node; char z_pname[MAXPATHLEN]; } zfs_perm_node_t; typedef struct zfs_allow_node { avl_node_t z_node; char z_key[MAXPATHLEN]; /* name, such as joe */ avl_tree_t z_localdescend; /* local+descendent perms */ avl_tree_t z_local; /* local permissions */ avl_tree_t z_descend; /* descendent permissions */ } zfs_allow_node_t; typedef struct zfs_allow { struct zfs_allow *z_next; char z_setpoint[MAXPATHLEN]; avl_tree_t z_sets; avl_tree_t z_crperms; avl_tree_t z_user; avl_tree_t z_group; avl_tree_t z_everyone; } zfs_allow_t; /* * Basic handle types */ typedef struct zfs_handle zfs_handle_t; typedef struct zpool_handle zpool_handle_t; typedef struct libzfs_handle libzfs_handle_t; /* * Library initialization */ extern libzfs_handle_t *libzfs_init(void); extern void libzfs_fini(libzfs_handle_t *); extern libzfs_handle_t *zpool_get_handle(zpool_handle_t *); extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *); extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t); extern void zfs_save_arguments(int argc, char **, char *, int); extern int zpool_log_history(libzfs_handle_t *, const char *); extern int libzfs_errno(libzfs_handle_t *); extern const char *libzfs_error_action(libzfs_handle_t *); extern const char *libzfs_error_description(libzfs_handle_t *); extern int zfs_standard_error(libzfs_handle_t *, int, const char *); extern void libzfs_mnttab_init(libzfs_handle_t *); extern void libzfs_mnttab_fini(libzfs_handle_t *); extern void libzfs_mnttab_cache(libzfs_handle_t *, boolean_t); extern int libzfs_mnttab_find(libzfs_handle_t *, const char *, struct mnttab *); extern void libzfs_mnttab_add(libzfs_handle_t *, const char *, const char *, const char *); extern void libzfs_mnttab_remove(libzfs_handle_t *, const char *); /* * Basic handle functions */ extern zpool_handle_t *zpool_open(libzfs_handle_t *, const char *); extern zpool_handle_t *zpool_open_canfail(libzfs_handle_t *, const char *); extern void zpool_close(zpool_handle_t *); extern const char *zpool_get_name(zpool_handle_t *); extern int zpool_get_state(zpool_handle_t *); extern const char *zpool_state_to_name(vdev_state_t, vdev_aux_t); extern const char *zpool_pool_state_to_name(pool_state_t); extern void zpool_free_handles(libzfs_handle_t *); extern int zpool_nextboot(libzfs_handle_t *, uint64_t, uint64_t, const char *); /* * Iterate over all active pools in the system. */ typedef int (*zpool_iter_f)(zpool_handle_t *, void *); extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *); extern boolean_t zpool_skip_pool(const char *); /* * Functions to create and destroy pools */ extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *, nvlist_t *, nvlist_t *); extern int zpool_destroy(zpool_handle_t *, const char *); extern int zpool_add(zpool_handle_t *, nvlist_t *); typedef struct splitflags { /* do not split, but return the config that would be split off */ int dryrun : 1; /* after splitting, import the pool */ int import : 1; int name_flags; } splitflags_t; /* * Functions to manipulate pool and vdev state */ extern int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t); extern int zpool_initialize(zpool_handle_t *, pool_initialize_func_t, nvlist_t *); extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *); extern int zpool_reguid(zpool_handle_t *); extern int zpool_reopen(zpool_handle_t *); extern int zpool_sync_one(zpool_handle_t *, void *); extern int zpool_vdev_online(zpool_handle_t *, const char *, int, vdev_state_t *); extern int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t); extern int zpool_vdev_attach(zpool_handle_t *, const char *, const char *, nvlist_t *, int); extern int zpool_vdev_detach(zpool_handle_t *, const char *); extern int zpool_vdev_remove(zpool_handle_t *, const char *); extern int zpool_vdev_remove_cancel(zpool_handle_t *); extern int zpool_vdev_indirect_size(zpool_handle_t *, const char *, uint64_t *); extern int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *, splitflags_t); extern int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t); extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t); extern int zpool_vdev_clear(zpool_handle_t *, uint64_t); extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *, boolean_t *, boolean_t *); extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *, boolean_t *, boolean_t *, boolean_t *); extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, const char *, zpool_boot_label_t, uint64_t, int *); /* * Functions to manage pool properties */ extern int zpool_set_prop(zpool_handle_t *, const char *, const char *); extern int zpool_get_prop(zpool_handle_t *, zpool_prop_t, char *, size_t proplen, zprop_source_t *, boolean_t); extern uint64_t zpool_get_prop_int(zpool_handle_t *, zpool_prop_t, zprop_source_t *); extern const char *zpool_prop_to_name(zpool_prop_t); extern const char *zpool_prop_values(zpool_prop_t); /* * Pool health statistics. */ typedef enum { /* * The following correspond to faults as defined in the (fault.fs.zfs.*) * event namespace. Each is associated with a corresponding message ID. * This must be kept in sync with the zfs_msgid_table in * lib/libzfs/libzfs_status.c. */ ZPOOL_STATUS_CORRUPT_CACHE, /* corrupt /kernel/drv/zpool.cache */ ZPOOL_STATUS_MISSING_DEV_R, /* missing device with replicas */ ZPOOL_STATUS_MISSING_DEV_NR, /* missing device with no replicas */ ZPOOL_STATUS_CORRUPT_LABEL_R, /* bad device label with replicas */ ZPOOL_STATUS_CORRUPT_LABEL_NR, /* bad device label with no replicas */ ZPOOL_STATUS_BAD_GUID_SUM, /* sum of device guids didn't match */ ZPOOL_STATUS_CORRUPT_POOL, /* pool metadata is corrupted */ ZPOOL_STATUS_CORRUPT_DATA, /* data errors in user (meta)data */ ZPOOL_STATUS_FAILING_DEV, /* device experiencing errors */ ZPOOL_STATUS_VERSION_NEWER, /* newer on-disk version */ ZPOOL_STATUS_HOSTID_MISMATCH, /* last accessed by another system */ ZPOOL_STATUS_HOSTID_ACTIVE, /* currently active on another system */ ZPOOL_STATUS_HOSTID_REQUIRED, /* multihost=on and hostid=0 */ ZPOOL_STATUS_IO_FAILURE_WAIT, /* failed I/O, failmode 'wait' */ ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */ ZPOOL_STATUS_IO_FAILURE_MMP, /* failed MMP, failmode not 'panic' */ ZPOOL_STATUS_BAD_LOG, /* cannot read log chain(s) */ /* * If the pool has unsupported features but can still be opened in * read-only mode, its status is ZPOOL_STATUS_UNSUP_FEAT_WRITE. If the * pool has unsupported features but cannot be opened at all, its * status is ZPOOL_STATUS_UNSUP_FEAT_READ. */ ZPOOL_STATUS_UNSUP_FEAT_READ, /* unsupported features for read */ ZPOOL_STATUS_UNSUP_FEAT_WRITE, /* unsupported features for write */ /* * These faults have no corresponding message ID. At the time we are * checking the status, the original reason for the FMA fault (I/O or * checksum errors) has been lost. */ ZPOOL_STATUS_FAULTED_DEV_R, /* faulted device with replicas */ ZPOOL_STATUS_FAULTED_DEV_NR, /* faulted device with no replicas */ /* * The following are not faults per se, but still an error possibly * requiring administrative attention. There is no corresponding * message ID. */ ZPOOL_STATUS_VERSION_OLDER, /* older legacy on-disk version */ ZPOOL_STATUS_FEAT_DISABLED, /* supported features are disabled */ ZPOOL_STATUS_RESILVERING, /* device being resilvered */ ZPOOL_STATUS_OFFLINE_DEV, /* device offline */ ZPOOL_STATUS_REMOVED_DEV, /* removed device */ ZPOOL_STATUS_NON_NATIVE_ASHIFT, /* (e.g. 512e dev with ashift of 9) */ /* * Finally, the following indicates a healthy pool. */ ZPOOL_STATUS_OK } zpool_status_t; extern zpool_status_t zpool_get_status(zpool_handle_t *, char **); extern zpool_status_t zpool_import_status(nvlist_t *, char **); extern void zpool_dump_ddt(const ddt_stat_t *dds, const ddt_histogram_t *ddh); /* * Statistics and configuration functions. */ extern nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **); extern nvlist_t *zpool_get_features(zpool_handle_t *); extern int zpool_refresh_stats(zpool_handle_t *, boolean_t *); extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **); extern boolean_t zpool_is_bootable(zpool_handle_t *); /* * Import and export functions */ extern int zpool_export(zpool_handle_t *, boolean_t, const char *); extern int zpool_export_force(zpool_handle_t *, const char *); extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *, char *altroot); extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *, nvlist_t *, int); extern void zpool_print_unsup_feat(nvlist_t *config); /* * Search for pools to import */ typedef struct importargs { char **path; /* a list of paths to search */ int paths; /* number of paths to search */ char *poolname; /* name of a pool to find */ uint64_t guid; /* guid of a pool to find */ char *cachefile; /* cachefile to use for import */ int can_be_active : 1; /* can the pool be active? */ int unique : 1; /* does 'poolname' already exist? */ int exists : 1; /* set on return if pool already exists */ nvlist_t *policy; /* load policy (max txg, rewind, etc.) */ } importargs_t; extern nvlist_t *zpool_search_import(libzfs_handle_t *, importargs_t *); extern int zpool_tryimport(libzfs_handle_t *hdl, char *target, nvlist_t **configp, importargs_t *args); /* legacy pool search routines */ extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **); extern nvlist_t *zpool_find_import_cached(libzfs_handle_t *, const char *, char *, uint64_t); /* * Miscellaneous pool functions */ struct zfs_cmd; extern const char *zfs_history_event_names[]; typedef enum { VDEV_NAME_PATH = 1 << 0, VDEV_NAME_GUID = 1 << 1, VDEV_NAME_FOLLOW_LINKS = 1 << 2, VDEV_NAME_TYPE_ID = 1 << 3, } vdev_name_t; extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *, int name_flags); extern int zpool_upgrade(zpool_handle_t *, uint64_t); extern int zpool_get_history(zpool_handle_t *, nvlist_t **); extern int zpool_history_unpack(char *, uint64_t, uint64_t *, nvlist_t ***, uint_t *); extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *, size_t len); extern int zfs_ioctl(libzfs_handle_t *, int request, struct zfs_cmd *); extern int zpool_get_physpath(zpool_handle_t *, char *, size_t); extern void zpool_explain_recover(libzfs_handle_t *, const char *, int, nvlist_t *); extern int zpool_checkpoint(zpool_handle_t *); extern int zpool_discard_checkpoint(zpool_handle_t *); /* * Basic handle manipulations. These functions do not create or destroy the * underlying datasets, only the references to them. */ extern zfs_handle_t *zfs_open(libzfs_handle_t *, const char *, int); extern zfs_handle_t *zfs_handle_dup(zfs_handle_t *); extern void zfs_close(zfs_handle_t *); extern zfs_type_t zfs_get_type(const zfs_handle_t *); extern const char *zfs_get_name(const zfs_handle_t *); extern zpool_handle_t *zfs_get_pool_handle(const zfs_handle_t *); extern const char *zfs_get_pool_name(const zfs_handle_t *); /* * Property management functions. Some functions are shared with the kernel, * and are found in sys/fs/zfs.h. */ /* * zfs dataset property management */ extern const char *zfs_prop_default_string(zfs_prop_t); extern uint64_t zfs_prop_default_numeric(zfs_prop_t); extern const char *zfs_prop_column_name(zfs_prop_t); extern boolean_t zfs_prop_align_right(zfs_prop_t); extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t, nvlist_t *, uint64_t, zfs_handle_t *, zpool_handle_t *, const char *); extern const char *zfs_prop_to_name(zfs_prop_t); extern int zfs_prop_set(zfs_handle_t *, const char *, const char *); extern int zfs_prop_set_list(zfs_handle_t *, nvlist_t *); extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t, zprop_source_t *, char *, size_t, boolean_t); extern int zfs_prop_get_recvd(zfs_handle_t *, const char *, char *, size_t, boolean_t); extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *, zprop_source_t *, char *, size_t); extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname, uint64_t *propvalue); extern int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname, char *propbuf, int proplen, boolean_t literal); extern int zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname, uint64_t *propvalue); extern int zfs_prop_get_written(zfs_handle_t *zhp, const char *propname, char *propbuf, int proplen, boolean_t literal); extern int zfs_prop_get_feature(zfs_handle_t *zhp, const char *propname, char *buf, size_t len); extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t); extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t); extern const char *zfs_prop_values(zfs_prop_t); extern int zfs_prop_is_string(zfs_prop_t prop); extern nvlist_t *zfs_get_user_props(zfs_handle_t *); extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *); extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *); typedef struct zprop_list { int pl_prop; char *pl_user_prop; struct zprop_list *pl_next; boolean_t pl_all; size_t pl_width; size_t pl_recvd_width; boolean_t pl_fixed; } zprop_list_t; extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **, boolean_t, boolean_t); extern void zfs_prune_proplist(zfs_handle_t *, uint8_t *); #define ZFS_MOUNTPOINT_NONE "none" #define ZFS_MOUNTPOINT_LEGACY "legacy" #define ZFS_FEATURE_DISABLED "disabled" #define ZFS_FEATURE_ENABLED "enabled" #define ZFS_FEATURE_ACTIVE "active" #define ZFS_UNSUPPORTED_INACTIVE "inactive" #define ZFS_UNSUPPORTED_READONLY "readonly" /* * zpool property management */ extern int zpool_expand_proplist(zpool_handle_t *, zprop_list_t **); extern int zpool_prop_get_feature(zpool_handle_t *, const char *, char *, size_t); extern const char *zpool_prop_default_string(zpool_prop_t); extern uint64_t zpool_prop_default_numeric(zpool_prop_t); extern const char *zpool_prop_column_name(zpool_prop_t); extern boolean_t zpool_prop_align_right(zpool_prop_t); /* * Functions shared by zfs and zpool property management. */ extern int zprop_iter(zprop_func func, void *cb, boolean_t show_all, boolean_t ordered, zfs_type_t type); extern int zprop_get_list(libzfs_handle_t *, char *, zprop_list_t **, zfs_type_t); extern void zprop_free_list(zprop_list_t *); #define ZFS_GET_NCOLS 5 typedef enum { GET_COL_NONE, GET_COL_NAME, GET_COL_PROPERTY, GET_COL_VALUE, GET_COL_RECVD, GET_COL_SOURCE } zfs_get_column_t; /* * Functions for printing zfs or zpool properties */ typedef struct zprop_get_cbdata { int cb_sources; zfs_get_column_t cb_columns[ZFS_GET_NCOLS]; int cb_colwidths[ZFS_GET_NCOLS + 1]; boolean_t cb_scripted; boolean_t cb_literal; boolean_t cb_first; zprop_list_t *cb_proplist; zfs_type_t cb_type; } zprop_get_cbdata_t; void zprop_print_one_property(const char *, zprop_get_cbdata_t *, const char *, const char *, zprop_source_t, const char *, const char *); /* * Iterator functions. */ typedef int (*zfs_iter_f)(zfs_handle_t *, void *); extern int zfs_iter_root(libzfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_dependents(zfs_handle_t *, boolean_t, zfs_iter_f, void *); extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *); extern int zfs_iter_snapshots(zfs_handle_t *, boolean_t, zfs_iter_f, void *, uint64_t, uint64_t); extern int zfs_iter_snapshots_sorted(zfs_handle_t *, zfs_iter_f, void *, uint64_t, uint64_t); extern int zfs_iter_snapspec(zfs_handle_t *, const char *, zfs_iter_f, void *); extern int zfs_iter_bookmarks(zfs_handle_t *, zfs_iter_f, void *); typedef struct get_all_cb { zfs_handle_t **cb_handles; size_t cb_alloc; size_t cb_used; } get_all_cb_t; void zfs_foreach_mountpoint(libzfs_handle_t *, zfs_handle_t **, size_t, zfs_iter_f, void*, boolean_t); void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *); /* * Functions to create and destroy datasets. */ extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t, nvlist_t *); extern int zfs_create_ancestors(libzfs_handle_t *, const char *); extern int zfs_destroy(zfs_handle_t *, boolean_t); extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t); extern int zfs_destroy_snaps_nvl(libzfs_handle_t *, nvlist_t *, boolean_t); extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *); extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *); extern int zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props); extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t); typedef struct renameflags { /* recursive rename */ int recurse : 1; /* don't unmount file systems */ int nounmount : 1; /* force unmount file systems */ int forceunmount : 1; } renameflags_t; extern int zfs_rename(zfs_handle_t *, const char *, const char *, renameflags_t flags); typedef struct sendflags { /* print informational messages (ie, -v was specified) */ boolean_t verbose; /* recursive send (ie, -R) */ boolean_t replicate; /* for incrementals, do all intermediate snapshots */ boolean_t doall; /* if dataset is a clone, do incremental from its origin */ boolean_t fromorigin; /* do deduplication */ boolean_t dedup; /* send properties (ie, -p) */ boolean_t props; /* do not send (no-op, ie. -n) */ boolean_t dryrun; /* parsable verbose output (ie. -P) */ boolean_t parsable; /* show progress (ie. -v) */ boolean_t progress; /* large blocks (>128K) are permitted */ boolean_t largeblock; /* WRITE_EMBEDDED records of type DATA are permitted */ boolean_t embed_data; /* compressed WRITE records are permitted */ boolean_t compress; /* show progress as process title(ie. -V) */ boolean_t progressastitle; } sendflags_t; typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *); extern int zfs_send(zfs_handle_t *, const char *, const char *, sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **); extern int zfs_send_one(zfs_handle_t *, const char *, int, sendflags_t flags); extern int zfs_send_resume(libzfs_handle_t *, sendflags_t *, int outfd, const char *); extern nvlist_t *zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token); extern int zfs_promote(zfs_handle_t *); extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t, int); extern int zfs_hold_nvl(zfs_handle_t *, int, nvlist_t *); extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t); extern int zfs_get_holds(zfs_handle_t *, nvlist_t **); extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *); typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain, uid_t rid, uint64_t space); extern int zfs_userspace(zfs_handle_t *, zfs_userquota_prop_t, zfs_userspace_cb_t, void *); extern int zfs_get_fsacl(zfs_handle_t *, nvlist_t **); extern int zfs_set_fsacl(zfs_handle_t *, boolean_t, nvlist_t *); typedef struct recvflags { /* print informational messages (ie, -v was specified) */ boolean_t verbose; /* the destination is a prefix, not the exact fs (ie, -d) */ boolean_t isprefix; /* * Only the tail of the sent snapshot path is appended to the * destination to determine the received snapshot name (ie, -e). */ boolean_t istail; /* do not actually do the recv, just check if it would work (ie, -n) */ boolean_t dryrun; /* rollback/destroy filesystems as necessary (eg, -F) */ boolean_t force; /* set "canmount=off" on all modified filesystems */ boolean_t canmountoff; /* * Mark the file systems as "resumable" and do not destroy them if the * receive is interrupted */ boolean_t resumable; /* byteswap flag is used internally; callers need not specify */ boolean_t byteswap; /* do not mount file systems as they are extracted (private) */ boolean_t nomount; } recvflags_t; extern int zfs_receive(libzfs_handle_t *, const char *, nvlist_t *, recvflags_t *, int, avl_tree_t *); typedef enum diff_flags { ZFS_DIFF_PARSEABLE = 0x1, ZFS_DIFF_TIMESTAMP = 0x2, ZFS_DIFF_CLASSIFY = 0x4 } diff_flags_t; extern int zfs_show_diffs(zfs_handle_t *, int, const char *, const char *, int); /* * Miscellaneous functions. */ extern const char *zfs_type_to_name(zfs_type_t); extern void zfs_refresh_properties(zfs_handle_t *); extern int zfs_name_valid(const char *, zfs_type_t); extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t); extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *, zfs_type_t); extern int zfs_spa_version(zfs_handle_t *, int *); extern boolean_t zfs_bookmark_exists(const char *path); extern ulong_t get_system_hostid(void); /* * Mount support functions. */ extern boolean_t is_mounted(libzfs_handle_t *, const char *special, char **); extern boolean_t zfs_is_mounted(zfs_handle_t *, char **); extern int zfs_mount(zfs_handle_t *, const char *, int); +extern int zfs_mount_at(zfs_handle_t *, const char *, int, const char *); extern int zfs_unmount(zfs_handle_t *, const char *, int); extern int zfs_unmountall(zfs_handle_t *, int); /* * Share support functions. */ extern boolean_t zfs_is_shared(zfs_handle_t *); extern int zfs_share(zfs_handle_t *); extern int zfs_unshare(zfs_handle_t *); /* * Protocol-specific share support functions. */ extern boolean_t zfs_is_shared_nfs(zfs_handle_t *, char **); extern boolean_t zfs_is_shared_smb(zfs_handle_t *, char **); extern int zfs_share_nfs(zfs_handle_t *); extern int zfs_share_smb(zfs_handle_t *); extern int zfs_shareall(zfs_handle_t *); extern int zfs_unshare_nfs(zfs_handle_t *, const char *); extern int zfs_unshare_smb(zfs_handle_t *, const char *); extern int zfs_unshareall_nfs(zfs_handle_t *); extern int zfs_unshareall_smb(zfs_handle_t *); extern int zfs_unshareall_bypath(zfs_handle_t *, const char *); extern int zfs_unshareall(zfs_handle_t *); extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *, void *, void *, int, zfs_share_op_t); /* * FreeBSD-specific jail support function. */ extern int zfs_jail(zfs_handle_t *, int, int); /* * When dealing with nvlists, verify() is extremely useful */ #ifndef verify #ifdef NDEBUG #define verify(EX) ((void)(EX)) #else #define verify(EX) assert(EX) #endif #endif /* * Utility function to convert a number to a human-readable form. */ extern void zfs_nicenum(uint64_t, char *, size_t); extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *); /* * Given a device or file, determine if it is part of a pool. */ extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **, boolean_t *); /* * Label manipulation. */ extern int zpool_read_label(int, nvlist_t **); extern int zpool_read_all_labels(int, nvlist_t **); extern int zpool_clear_label(int); /* is this zvol valid for use as a dump device? */ extern int zvol_check_dump_config(char *); /* * Management interfaces for SMB ACL files */ int zfs_smb_acl_add(libzfs_handle_t *, char *, char *, char *); int zfs_smb_acl_remove(libzfs_handle_t *, char *, char *, char *); int zfs_smb_acl_purge(libzfs_handle_t *, char *, char *); int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *, char *); /* * Enable and disable datasets within a pool by mounting/unmounting and * sharing/unsharing them. */ extern int zpool_enable_datasets(zpool_handle_t *, const char *, int); extern int zpool_disable_datasets(zpool_handle_t *, boolean_t); /* * Mappings between vdev and FRU. */ extern void libzfs_fru_refresh(libzfs_handle_t *); extern const char *libzfs_fru_lookup(libzfs_handle_t *, const char *); extern const char *libzfs_fru_devpath(libzfs_handle_t *, const char *); extern boolean_t libzfs_fru_compare(libzfs_handle_t *, const char *, const char *); extern boolean_t libzfs_fru_notself(libzfs_handle_t *, const char *); extern int zpool_fru_set(zpool_handle_t *, uint64_t, const char *); #ifndef illumos extern int zmount(const char *, const char *, int, char *, char *, int, char *, int); #endif extern int zfs_remap_indirects(libzfs_handle_t *hdl, const char *); /* Allow consumers to initialize libshare externally for optimal performance */ extern int zfs_init_libshare_arg(libzfs_handle_t *, int, void *); /* * For most consumers, zfs_init_libshare_arg is sufficient on its own, and * zfs_uninit_libshare is unnecessary. zfs_uninit_libshare should only be called * if the caller has already initialized libshare for one set of zfs handles, * and wishes to share or unshare filesystems outside of that set. In that case, * the caller should uninitialize libshare, and then re-initialize it with the * new handles being shared or unshared. */ extern void zfs_uninit_libshare(libzfs_handle_t *); #ifdef __cplusplus } #endif #endif /* _LIBZFS_H */ Index: projects/clang1000-import/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c =================================================================== --- projects/clang1000-import/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c (revision 356919) +++ projects/clang1000-import/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_mount.c (revision 356920) @@ -1,1713 +1,1734 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, 2016 by Delphix. All rights reserved. * Copyright 2016 Igor Kozhukhov * Copyright 2017 Joyent, Inc. * Copyright 2017 RackTop Systems. * Copyright 2018 OmniOS Community Edition (OmniOSce) Association. */ /* * Routines to manage ZFS mounts. We separate all the nasty routines that have * to deal with the OS. The following functions are the main entry points -- * they are used by mount and unmount and when changing a filesystem's * mountpoint. * * zfs_is_mounted() * zfs_mount() * zfs_unmount() * zfs_unmountall() * * This file also contains the functions used to manage sharing filesystems via * NFS and iSCSI: * * zfs_is_shared() * zfs_share() * zfs_unshare() * * zfs_is_shared_nfs() * zfs_is_shared_smb() * zfs_share_proto() * zfs_shareall(); * zfs_unshare_nfs() * zfs_unshare_smb() * zfs_unshareall_nfs() * zfs_unshareall_smb() * zfs_unshareall() * zfs_unshareall_bypath() * * The following functions are available for pool consumers, and will * mount/unmount and share/unshare all datasets within pool: * * zpool_enable_datasets() * zpool_disable_datasets() */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "libzfs_impl.h" #include #include #define MAXISALEN 257 /* based on sysinfo(2) man page */ static int mount_tp_nthr = 512; /* tpool threads for multi-threaded mounting */ static void zfs_mount_task(void *); static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *); zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **, zfs_share_proto_t); /* * The share protocols table must be in the same order as the zfs_share_proto_t * enum in libzfs_impl.h */ typedef struct { zfs_prop_t p_prop; char *p_name; int p_share_err; int p_unshare_err; } proto_table_t; proto_table_t proto_table[PROTO_END] = { {ZFS_PROP_SHARENFS, "nfs", EZFS_SHARENFSFAILED, EZFS_UNSHARENFSFAILED}, {ZFS_PROP_SHARESMB, "smb", EZFS_SHARESMBFAILED, EZFS_UNSHARESMBFAILED}, }; zfs_share_proto_t nfs_only[] = { PROTO_NFS, PROTO_END }; zfs_share_proto_t smb_only[] = { PROTO_SMB, PROTO_END }; zfs_share_proto_t share_all_proto[] = { PROTO_NFS, PROTO_SMB, PROTO_END }; /* * Search the sharetab for the given mountpoint and protocol, returning * a zfs_share_type_t value. */ static zfs_share_type_t is_shared(libzfs_handle_t *hdl, const char *mountpoint, zfs_share_proto_t proto) { char buf[MAXPATHLEN], *tab; char *ptr; if (hdl->libzfs_sharetab == NULL) return (SHARED_NOT_SHARED); (void) fseek(hdl->libzfs_sharetab, 0, SEEK_SET); while (fgets(buf, sizeof (buf), hdl->libzfs_sharetab) != NULL) { /* the mountpoint is the first entry on each line */ if ((tab = strchr(buf, '\t')) == NULL) continue; *tab = '\0'; if (strcmp(buf, mountpoint) == 0) { #ifdef illumos /* * the protocol field is the third field * skip over second field */ ptr = ++tab; if ((tab = strchr(ptr, '\t')) == NULL) continue; ptr = ++tab; if ((tab = strchr(ptr, '\t')) == NULL) continue; *tab = '\0'; if (strcmp(ptr, proto_table[proto].p_name) == 0) { switch (proto) { case PROTO_NFS: return (SHARED_NFS); case PROTO_SMB: return (SHARED_SMB); default: return (0); } } #else if (proto == PROTO_NFS) return (SHARED_NFS); #endif } } return (SHARED_NOT_SHARED); } #ifdef illumos static boolean_t dir_is_empty_stat(const char *dirname) { struct stat st; /* * We only want to return false if the given path is a non empty * directory, all other errors are handled elsewhere. */ if (stat(dirname, &st) < 0 || !S_ISDIR(st.st_mode)) { return (B_TRUE); } /* * An empty directory will still have two entries in it, one * entry for each of "." and "..". */ if (st.st_size > 2) { return (B_FALSE); } return (B_TRUE); } static boolean_t dir_is_empty_readdir(const char *dirname) { DIR *dirp; struct dirent64 *dp; int dirfd; if ((dirfd = openat(AT_FDCWD, dirname, O_RDONLY | O_NDELAY | O_LARGEFILE | O_CLOEXEC, 0)) < 0) { return (B_TRUE); } if ((dirp = fdopendir(dirfd)) == NULL) { (void) close(dirfd); return (B_TRUE); } while ((dp = readdir64(dirp)) != NULL) { if (strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0) continue; (void) closedir(dirp); return (B_FALSE); } (void) closedir(dirp); return (B_TRUE); } /* * Returns true if the specified directory is empty. If we can't open the * directory at all, return true so that the mount can fail with a more * informative error message. */ static boolean_t dir_is_empty(const char *dirname) { struct statvfs64 st; /* * If the statvfs call fails or the filesystem is not a ZFS * filesystem, fall back to the slow path which uses readdir. */ if ((statvfs64(dirname, &st) != 0) || (strcmp(st.f_basetype, "zfs") != 0)) { return (dir_is_empty_readdir(dirname)); } /* * At this point, we know the provided path is on a ZFS * filesystem, so we can use stat instead of readdir to * determine if the directory is empty or not. We try to avoid * using readdir because that requires opening "dirname"; this * open file descriptor can potentially end up in a child * process if there's a concurrent fork, thus preventing the * zfs_mount() from otherwise succeeding (the open file * descriptor inherited by the child process will cause the * parent's mount to fail with EBUSY). The performance * implications of replacing the open, read, and close with a * single stat is nice; but is not the main motivation for the * added complexity. */ return (dir_is_empty_stat(dirname)); } #endif /* * Checks to see if the mount is active. If the filesystem is mounted, we fill * in 'where' with the current mountpoint, and return 1. Otherwise, we return * 0. */ boolean_t is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where) { struct mnttab entry; if (libzfs_mnttab_find(zfs_hdl, special, &entry) != 0) return (B_FALSE); if (where != NULL) *where = zfs_strdup(zfs_hdl, entry.mnt_mountp); return (B_TRUE); } boolean_t zfs_is_mounted(zfs_handle_t *zhp, char **where) { return (is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where)); } +static boolean_t +zfs_is_mountable_internal(zfs_handle_t *zhp, const char *mountpoint) +{ + + if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) && + getzoneid() == GLOBAL_ZONEID) + return (B_FALSE); + + return (B_TRUE); +} + /* * Returns true if the given dataset is mountable, false otherwise. Returns the * mountpoint in 'buf'. */ static boolean_t zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen, zprop_source_t *source) { char sourceloc[MAXNAMELEN]; zprop_source_t sourcetype; if (!zfs_prop_valid_for_type(ZFS_PROP_MOUNTPOINT, zhp->zfs_type)) return (B_FALSE); verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, buf, buflen, &sourcetype, sourceloc, sizeof (sourceloc), B_FALSE) == 0); if (strcmp(buf, ZFS_MOUNTPOINT_NONE) == 0 || strcmp(buf, ZFS_MOUNTPOINT_LEGACY) == 0) return (B_FALSE); if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_OFF) return (B_FALSE); - if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) && - getzoneid() == GLOBAL_ZONEID) + if (!zfs_is_mountable_internal(zhp, buf)) return (B_FALSE); if (source) *source = sourcetype; return (B_TRUE); } /* * Mount the given filesystem. */ int zfs_mount(zfs_handle_t *zhp, const char *options, int flags) { - struct stat buf; char mountpoint[ZFS_MAXPROPLEN]; + + if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL)) + return (0); + + return (zfs_mount_at(zhp, options, flags, mountpoint)); +} + +int +zfs_mount_at(zfs_handle_t *zhp, const char *options, int flags, + const char *mountpoint) +{ + struct stat buf; char mntopts[MNT_LINE_MAX]; libzfs_handle_t *hdl = zhp->zfs_hdl; if (options == NULL) mntopts[0] = '\0'; else (void) strlcpy(mntopts, options, sizeof (mntopts)); /* * If the pool is imported read-only then all mounts must be read-only */ if (zpool_get_prop_int(zhp->zpool_hdl, ZPOOL_PROP_READONLY, NULL)) flags |= MS_RDONLY; - if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL)) - return (0); + if (!zfs_is_mountable_internal(zhp, mountpoint)) + return (B_FALSE); /* Create the directory if it doesn't already exist */ if (lstat(mountpoint, &buf) != 0) { if (mkdirp(mountpoint, 0755) != 0) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to create mountpoint")); return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED, dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint)); } } #ifdef illumos /* FreeBSD: overlay mounts are not checked. */ /* * Determine if the mountpoint is empty. If so, refuse to perform the * mount. We don't perform this check if MS_OVERLAY is specified, which * would defeat the point. We also avoid this check if 'remount' is * specified. */ if ((flags & MS_OVERLAY) == 0 && strstr(mntopts, MNTOPT_REMOUNT) == NULL && !dir_is_empty(mountpoint)) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "directory is not empty")); return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED, dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint)); } #endif /* perform the mount */ if (zmount(zfs_get_name(zhp), mountpoint, flags, MNTTYPE_ZFS, NULL, 0, mntopts, sizeof (mntopts)) != 0) { /* * Generic errors are nasty, but there are just way too many * from mount(), and they're well-understood. We pick a few * common ones to improve upon. */ if (errno == EBUSY) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "mountpoint or dataset is busy")); } else if (errno == EPERM) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Insufficient privileges")); } else if (errno == ENOTSUP) { char buf[256]; int spa_version; VERIFY(zfs_spa_version(zhp, &spa_version) == 0); (void) snprintf(buf, sizeof (buf), dgettext(TEXT_DOMAIN, "Can't mount a version %lld " "file system on a version %d pool. Pool must be" " upgraded to mount this file system."), (u_longlong_t)zfs_prop_get_int(zhp, ZFS_PROP_VERSION), spa_version); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, buf)); } else { zfs_error_aux(hdl, strerror(errno)); } return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED, dgettext(TEXT_DOMAIN, "cannot mount '%s'"), zhp->zfs_name)); } /* add the mounted entry into our cache */ libzfs_mnttab_add(hdl, zfs_get_name(zhp), mountpoint, mntopts); return (0); } /* * Unmount a single filesystem. */ static int unmount_one(libzfs_handle_t *hdl, const char *mountpoint, int flags) { if (umount2(mountpoint, flags) != 0) { zfs_error_aux(hdl, strerror(errno)); return (zfs_error_fmt(hdl, EZFS_UMOUNTFAILED, dgettext(TEXT_DOMAIN, "cannot unmount '%s'"), mountpoint)); } return (0); } /* * Unmount the given filesystem. */ int zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags) { libzfs_handle_t *hdl = zhp->zfs_hdl; struct mnttab entry; char *mntpt = NULL; /* check to see if we need to unmount the filesystem */ if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) && libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0)) { /* * mountpoint may have come from a call to * getmnt/getmntany if it isn't NULL. If it is NULL, * we know it comes from libzfs_mnttab_find which can * then get freed later. We strdup it to play it safe. */ if (mountpoint == NULL) mntpt = zfs_strdup(hdl, entry.mnt_mountp); else mntpt = zfs_strdup(hdl, mountpoint); /* * Unshare and unmount the filesystem */ if (zfs_unshare_proto(zhp, mntpt, share_all_proto) != 0) return (-1); if (unmount_one(hdl, mntpt, flags) != 0) { free(mntpt); (void) zfs_shareall(zhp); return (-1); } libzfs_mnttab_remove(hdl, zhp->zfs_name); free(mntpt); } return (0); } /* * Unmount this filesystem and any children inheriting the mountpoint property. * To do this, just act like we're changing the mountpoint property, but don't * remount the filesystems afterwards. */ int zfs_unmountall(zfs_handle_t *zhp, int flags) { prop_changelist_t *clp; int ret; clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT, 0, flags); if (clp == NULL) return (-1); ret = changelist_prefix(clp); changelist_free(clp); return (ret); } boolean_t zfs_is_shared(zfs_handle_t *zhp) { zfs_share_type_t rc = 0; zfs_share_proto_t *curr_proto; if (ZFS_IS_VOLUME(zhp)) return (B_FALSE); for (curr_proto = share_all_proto; *curr_proto != PROTO_END; curr_proto++) rc |= zfs_is_shared_proto(zhp, NULL, *curr_proto); return (rc ? B_TRUE : B_FALSE); } int zfs_share(zfs_handle_t *zhp) { assert(!ZFS_IS_VOLUME(zhp)); return (zfs_share_proto(zhp, share_all_proto)); } int zfs_unshare(zfs_handle_t *zhp) { assert(!ZFS_IS_VOLUME(zhp)); return (zfs_unshareall(zhp)); } /* * Check to see if the filesystem is currently shared. */ zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *zhp, char **where, zfs_share_proto_t proto) { char *mountpoint; zfs_share_type_t rc; if (!zfs_is_mounted(zhp, &mountpoint)) return (SHARED_NOT_SHARED); if ((rc = is_shared(zhp->zfs_hdl, mountpoint, proto)) != SHARED_NOT_SHARED) { if (where != NULL) *where = mountpoint; else free(mountpoint); return (rc); } else { free(mountpoint); return (SHARED_NOT_SHARED); } } boolean_t zfs_is_shared_nfs(zfs_handle_t *zhp, char **where) { return (zfs_is_shared_proto(zhp, where, PROTO_NFS) != SHARED_NOT_SHARED); } boolean_t zfs_is_shared_smb(zfs_handle_t *zhp, char **where) { return (zfs_is_shared_proto(zhp, where, PROTO_SMB) != SHARED_NOT_SHARED); } /* * Make sure things will work if libshare isn't installed by using * wrapper functions that check to see that the pointers to functions * initialized in _zfs_init_libshare() are actually present. */ #ifdef illumos static sa_handle_t (*_sa_init)(int); static sa_handle_t (*_sa_init_arg)(int, void *); static void (*_sa_fini)(sa_handle_t); static sa_share_t (*_sa_find_share)(sa_handle_t, char *); static int (*_sa_enable_share)(sa_share_t, char *); static int (*_sa_disable_share)(sa_share_t, char *); static char *(*_sa_errorstr)(int); static int (*_sa_parse_legacy_options)(sa_group_t, char *, char *); static boolean_t (*_sa_needs_refresh)(sa_handle_t *); static libzfs_handle_t *(*_sa_get_zfs_handle)(sa_handle_t); static int (*_sa_zfs_process_share)(sa_handle_t, sa_group_t, sa_share_t, char *, char *, zprop_source_t, char *, char *, char *); static void (*_sa_update_sharetab_ts)(sa_handle_t); #endif /* * _zfs_init_libshare() * * Find the libshare.so.1 entry points that we use here and save the * values to be used later. This is triggered by the runtime loader. * Make sure the correct ISA version is loaded. */ #pragma init(_zfs_init_libshare) static void _zfs_init_libshare(void) { #ifdef illumos void *libshare; char path[MAXPATHLEN]; char isa[MAXISALEN]; #if defined(_LP64) if (sysinfo(SI_ARCHITECTURE_64, isa, MAXISALEN) == -1) isa[0] = '\0'; #else isa[0] = '\0'; #endif (void) snprintf(path, MAXPATHLEN, "/usr/lib/%s/libshare.so.1", isa); if ((libshare = dlopen(path, RTLD_LAZY | RTLD_GLOBAL)) != NULL) { _sa_init = (sa_handle_t (*)(int))dlsym(libshare, "sa_init"); _sa_init_arg = (sa_handle_t (*)(int, void *))dlsym(libshare, "sa_init_arg"); _sa_fini = (void (*)(sa_handle_t))dlsym(libshare, "sa_fini"); _sa_find_share = (sa_share_t (*)(sa_handle_t, char *)) dlsym(libshare, "sa_find_share"); _sa_enable_share = (int (*)(sa_share_t, char *))dlsym(libshare, "sa_enable_share"); _sa_disable_share = (int (*)(sa_share_t, char *))dlsym(libshare, "sa_disable_share"); _sa_errorstr = (char *(*)(int))dlsym(libshare, "sa_errorstr"); _sa_parse_legacy_options = (int (*)(sa_group_t, char *, char *)) dlsym(libshare, "sa_parse_legacy_options"); _sa_needs_refresh = (boolean_t (*)(sa_handle_t *)) dlsym(libshare, "sa_needs_refresh"); _sa_get_zfs_handle = (libzfs_handle_t *(*)(sa_handle_t)) dlsym(libshare, "sa_get_zfs_handle"); _sa_zfs_process_share = (int (*)(sa_handle_t, sa_group_t, sa_share_t, char *, char *, zprop_source_t, char *, char *, char *))dlsym(libshare, "sa_zfs_process_share"); _sa_update_sharetab_ts = (void (*)(sa_handle_t)) dlsym(libshare, "sa_update_sharetab_ts"); if (_sa_init == NULL || _sa_init_arg == NULL || _sa_fini == NULL || _sa_find_share == NULL || _sa_enable_share == NULL || _sa_disable_share == NULL || _sa_errorstr == NULL || _sa_parse_legacy_options == NULL || _sa_needs_refresh == NULL || _sa_get_zfs_handle == NULL || _sa_zfs_process_share == NULL || _sa_update_sharetab_ts == NULL) { _sa_init = NULL; _sa_init_arg = NULL; _sa_fini = NULL; _sa_disable_share = NULL; _sa_enable_share = NULL; _sa_errorstr = NULL; _sa_parse_legacy_options = NULL; (void) dlclose(libshare); _sa_needs_refresh = NULL; _sa_get_zfs_handle = NULL; _sa_zfs_process_share = NULL; _sa_update_sharetab_ts = NULL; } } #endif } /* * zfs_init_libshare(zhandle, service) * * Initialize the libshare API if it hasn't already been initialized. * In all cases it returns 0 if it succeeded and an error if not. The * service value is which part(s) of the API to initialize and is a * direct map to the libshare sa_init(service) interface. */ static int zfs_init_libshare_impl(libzfs_handle_t *zhandle, int service, void *arg) { #ifdef illumos /* * libshare is either not installed or we're in a branded zone. The * rest of the wrapper functions around the libshare calls already * handle NULL function pointers, but we don't want the callers of * zfs_init_libshare() to fail prematurely if libshare is not available. */ if (_sa_init == NULL) return (SA_OK); /* * Attempt to refresh libshare. This is necessary if there was a cache * miss for a new ZFS dataset that was just created, or if state of the * sharetab file has changed since libshare was last initialized. We * want to make sure so check timestamps to see if a different process * has updated any of the configuration. If there was some non-ZFS * change, we need to re-initialize the internal cache. */ if (_sa_needs_refresh != NULL && _sa_needs_refresh(zhandle->libzfs_sharehdl)) { zfs_uninit_libshare(zhandle); zhandle->libzfs_sharehdl = _sa_init_arg(service, arg); } if (zhandle && zhandle->libzfs_sharehdl == NULL) zhandle->libzfs_sharehdl = _sa_init_arg(service, arg); if (zhandle->libzfs_sharehdl == NULL) return (SA_NO_MEMORY); #endif return (SA_OK); } int zfs_init_libshare(libzfs_handle_t *zhandle, int service) { return (zfs_init_libshare_impl(zhandle, service, NULL)); } int zfs_init_libshare_arg(libzfs_handle_t *zhandle, int service, void *arg) { return (zfs_init_libshare_impl(zhandle, service, arg)); } /* * zfs_uninit_libshare(zhandle) * * Uninitialize the libshare API if it hasn't already been * uninitialized. It is OK to call multiple times. */ void zfs_uninit_libshare(libzfs_handle_t *zhandle) { if (zhandle != NULL && zhandle->libzfs_sharehdl != NULL) { #ifdef illumos if (_sa_fini != NULL) _sa_fini(zhandle->libzfs_sharehdl); #endif zhandle->libzfs_sharehdl = NULL; } } /* * zfs_parse_options(options, proto) * * Call the legacy parse interface to get the protocol specific * options using the NULL arg to indicate that this is a "parse" only. */ int zfs_parse_options(char *options, zfs_share_proto_t proto) { #ifdef illumos if (_sa_parse_legacy_options != NULL) { return (_sa_parse_legacy_options(NULL, options, proto_table[proto].p_name)); } return (SA_CONFIG_ERR); #else return (SA_OK); #endif } #ifdef illumos /* * zfs_sa_find_share(handle, path) * * wrapper around sa_find_share to find a share path in the * configuration. */ static sa_share_t zfs_sa_find_share(sa_handle_t handle, char *path) { if (_sa_find_share != NULL) return (_sa_find_share(handle, path)); return (NULL); } /* * zfs_sa_enable_share(share, proto) * * Wrapper for sa_enable_share which enables a share for a specified * protocol. */ static int zfs_sa_enable_share(sa_share_t share, char *proto) { if (_sa_enable_share != NULL) return (_sa_enable_share(share, proto)); return (SA_CONFIG_ERR); } /* * zfs_sa_disable_share(share, proto) * * Wrapper for sa_enable_share which disables a share for a specified * protocol. */ static int zfs_sa_disable_share(sa_share_t share, char *proto) { if (_sa_disable_share != NULL) return (_sa_disable_share(share, proto)); return (SA_CONFIG_ERR); } #endif /* illumos */ /* * Share the given filesystem according to the options in the specified * protocol specific properties (sharenfs, sharesmb). We rely * on "libshare" to the dirty work for us. */ static int zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto) { char mountpoint[ZFS_MAXPROPLEN]; char shareopts[ZFS_MAXPROPLEN]; char sourcestr[ZFS_MAXPROPLEN]; libzfs_handle_t *hdl = zhp->zfs_hdl; zfs_share_proto_t *curr_proto; zprop_source_t sourcetype; int error, ret; if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL)) return (0); for (curr_proto = proto; *curr_proto != PROTO_END; curr_proto++) { /* * Return success if there are no share options. */ if (zfs_prop_get(zhp, proto_table[*curr_proto].p_prop, shareopts, sizeof (shareopts), &sourcetype, sourcestr, ZFS_MAXPROPLEN, B_FALSE) != 0 || strcmp(shareopts, "off") == 0) continue; #ifdef illumos ret = zfs_init_libshare_arg(hdl, SA_INIT_ONE_SHARE_FROM_HANDLE, zhp); if (ret != SA_OK) { (void) zfs_error_fmt(hdl, EZFS_SHARENFSFAILED, dgettext(TEXT_DOMAIN, "cannot share '%s': %s"), zfs_get_name(zhp), _sa_errorstr != NULL ? _sa_errorstr(ret) : ""); return (-1); } #endif /* * If the 'zoned' property is set, then zfs_is_mountable() * will have already bailed out if we are in the global zone. * But local zones cannot be NFS servers, so we ignore it for * local zones as well. */ if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) continue; #ifdef illumos share = zfs_sa_find_share(hdl->libzfs_sharehdl, mountpoint); if (share == NULL) { /* * This may be a new file system that was just * created so isn't in the internal cache * (second time through). Rather than * reloading the entire configuration, we can * assume ZFS has done the checking and it is * safe to add this to the internal * configuration. */ if (_sa_zfs_process_share(hdl->libzfs_sharehdl, NULL, NULL, mountpoint, proto_table[*curr_proto].p_name, sourcetype, shareopts, sourcestr, zhp->zfs_name) != SA_OK) { (void) zfs_error_fmt(hdl, proto_table[*curr_proto].p_share_err, dgettext(TEXT_DOMAIN, "cannot share '%s'"), zfs_get_name(zhp)); return (-1); } share = zfs_sa_find_share(hdl->libzfs_sharehdl, mountpoint); } if (share != NULL) { int err; err = zfs_sa_enable_share(share, proto_table[*curr_proto].p_name); if (err != SA_OK) { (void) zfs_error_fmt(hdl, proto_table[*curr_proto].p_share_err, dgettext(TEXT_DOMAIN, "cannot share '%s'"), zfs_get_name(zhp)); return (-1); } } else #else if (*curr_proto != PROTO_NFS) { fprintf(stderr, "Unsupported share protocol: %d.\n", *curr_proto); continue; } if (strcmp(shareopts, "on") == 0) error = fsshare(ZFS_EXPORTS_PATH, mountpoint, ""); else error = fsshare(ZFS_EXPORTS_PATH, mountpoint, shareopts); if (error != 0) #endif { (void) zfs_error_fmt(hdl, proto_table[*curr_proto].p_share_err, dgettext(TEXT_DOMAIN, "cannot share '%s'"), zfs_get_name(zhp)); return (-1); } } return (0); } int zfs_share_nfs(zfs_handle_t *zhp) { return (zfs_share_proto(zhp, nfs_only)); } int zfs_share_smb(zfs_handle_t *zhp) { return (zfs_share_proto(zhp, smb_only)); } int zfs_shareall(zfs_handle_t *zhp) { return (zfs_share_proto(zhp, share_all_proto)); } /* * Unshare a filesystem by mountpoint. */ static int unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint, zfs_share_proto_t proto) { #ifdef illumos sa_share_t share; int err; char *mntpt; /* * Mountpoint could get trashed if libshare calls getmntany * which it does during API initialization, so strdup the * value. */ mntpt = zfs_strdup(hdl, mountpoint); /* * make sure libshare initialized, initialize everything because we * don't know what other unsharing may happen later. Functions up the * stack are allowed to initialize instead a subset of shares at the * time the set is known. */ if ((err = zfs_init_libshare_arg(hdl, SA_INIT_ONE_SHARE_FROM_NAME, (void *)name)) != SA_OK) { free(mntpt); /* don't need the copy anymore */ return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err, dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"), name, _sa_errorstr(err))); } share = zfs_sa_find_share(hdl->libzfs_sharehdl, mntpt); free(mntpt); /* don't need the copy anymore */ if (share != NULL) { err = zfs_sa_disable_share(share, proto_table[proto].p_name); if (err != SA_OK) { return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err, dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"), name, _sa_errorstr(err))); } } else { return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err, dgettext(TEXT_DOMAIN, "cannot unshare '%s': not found"), name)); } #else char buf[MAXPATHLEN]; FILE *fp; int err; if (proto != PROTO_NFS) { fprintf(stderr, "No SMB support in FreeBSD yet.\n"); return (EOPNOTSUPP); } err = fsunshare(ZFS_EXPORTS_PATH, mountpoint); if (err != 0) { zfs_error_aux(hdl, "%s", strerror(err)); return (zfs_error_fmt(hdl, EZFS_UNSHARENFSFAILED, dgettext(TEXT_DOMAIN, "cannot unshare '%s'"), name)); } #endif return (0); } /* * Unshare the given filesystem. */ int zfs_unshare_proto(zfs_handle_t *zhp, const char *mountpoint, zfs_share_proto_t *proto) { libzfs_handle_t *hdl = zhp->zfs_hdl; struct mnttab entry; char *mntpt = NULL; /* check to see if need to unmount the filesystem */ rewind(zhp->zfs_hdl->libzfs_mnttab); if (mountpoint != NULL) mountpoint = mntpt = zfs_strdup(hdl, mountpoint); if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) && libzfs_mnttab_find(hdl, zfs_get_name(zhp), &entry) == 0)) { zfs_share_proto_t *curr_proto; if (mountpoint == NULL) mntpt = zfs_strdup(zhp->zfs_hdl, entry.mnt_mountp); for (curr_proto = proto; *curr_proto != PROTO_END; curr_proto++) { if (is_shared(hdl, mntpt, *curr_proto) && unshare_one(hdl, zhp->zfs_name, mntpt, *curr_proto) != 0) { if (mntpt != NULL) free(mntpt); return (-1); } } } if (mntpt != NULL) free(mntpt); return (0); } int zfs_unshare_nfs(zfs_handle_t *zhp, const char *mountpoint) { return (zfs_unshare_proto(zhp, mountpoint, nfs_only)); } int zfs_unshare_smb(zfs_handle_t *zhp, const char *mountpoint) { return (zfs_unshare_proto(zhp, mountpoint, smb_only)); } /* * Same as zfs_unmountall(), but for NFS and SMB unshares. */ int zfs_unshareall_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto) { prop_changelist_t *clp; int ret; clp = changelist_gather(zhp, ZFS_PROP_SHARENFS, 0, 0); if (clp == NULL) return (-1); ret = changelist_unshare(clp, proto); changelist_free(clp); return (ret); } int zfs_unshareall_nfs(zfs_handle_t *zhp) { return (zfs_unshareall_proto(zhp, nfs_only)); } int zfs_unshareall_smb(zfs_handle_t *zhp) { return (zfs_unshareall_proto(zhp, smb_only)); } int zfs_unshareall(zfs_handle_t *zhp) { return (zfs_unshareall_proto(zhp, share_all_proto)); } int zfs_unshareall_bypath(zfs_handle_t *zhp, const char *mountpoint) { return (zfs_unshare_proto(zhp, mountpoint, share_all_proto)); } /* * Remove the mountpoint associated with the current dataset, if necessary. * We only remove the underlying directory if: * * - The mountpoint is not 'none' or 'legacy' * - The mountpoint is non-empty * - The mountpoint is the default or inherited * - The 'zoned' property is set, or we're in a local zone * * Any other directories we leave alone. */ void remove_mountpoint(zfs_handle_t *zhp) { char mountpoint[ZFS_MAXPROPLEN]; zprop_source_t source; if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), &source)) return; if (source == ZPROP_SRC_DEFAULT || source == ZPROP_SRC_INHERITED) { /* * Try to remove the directory, silently ignoring any errors. * The filesystem may have since been removed or moved around, * and this error isn't really useful to the administrator in * any way. */ (void) rmdir(mountpoint); } } /* * Add the given zfs handle to the cb_handles array, dynamically reallocating * the array if it is out of space */ void libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp) { if (cbp->cb_alloc == cbp->cb_used) { size_t newsz; zfs_handle_t **newhandles; newsz = cbp->cb_alloc != 0 ? cbp->cb_alloc * 2 : 64; newhandles = zfs_realloc(zhp->zfs_hdl, cbp->cb_handles, cbp->cb_alloc * sizeof (zfs_handle_t *), newsz * sizeof (zfs_handle_t *)); cbp->cb_handles = newhandles; cbp->cb_alloc = newsz; } cbp->cb_handles[cbp->cb_used++] = zhp; } /* * Recursive helper function used during file system enumeration */ static int zfs_iter_cb(zfs_handle_t *zhp, void *data) { get_all_cb_t *cbp = data; if (!(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM)) { zfs_close(zhp); return (0); } if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_NOAUTO) { zfs_close(zhp); return (0); } /* * If this filesystem is inconsistent and has a receive resume * token, we can not mount it. */ if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) && zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, NULL, 0, NULL, NULL, 0, B_TRUE) == 0) { zfs_close(zhp); return (0); } libzfs_add_handle(cbp, zhp); if (zfs_iter_filesystems(zhp, zfs_iter_cb, cbp) != 0) { zfs_close(zhp); return (-1); } return (0); } /* * Sort comparator that compares two mountpoint paths. We sort these paths so * that subdirectories immediately follow their parents. This means that we * effectively treat the '/' character as the lowest value non-nul char. * Since filesystems from non-global zones can have the same mountpoint * as other filesystems, the comparator sorts global zone filesystems to * the top of the list. This means that the global zone will traverse the * filesystem list in the correct order and can stop when it sees the * first zoned filesystem. In a non-global zone, only the delegated * filesystems are seen. * * An example sorted list using this comparator would look like: * * /foo * /foo/bar * /foo/bar/baz * /foo/baz * /foo.bar * /foo (NGZ1) * /foo (NGZ2) * * The mount code depend on this ordering to deterministically iterate * over filesystems in order to spawn parallel mount tasks. */ static int mountpoint_cmp(const void *arga, const void *argb) { zfs_handle_t *const *zap = arga; zfs_handle_t *za = *zap; zfs_handle_t *const *zbp = argb; zfs_handle_t *zb = *zbp; char mounta[MAXPATHLEN]; char mountb[MAXPATHLEN]; const char *a = mounta; const char *b = mountb; boolean_t gota, gotb; uint64_t zoneda, zonedb; zoneda = zfs_prop_get_int(za, ZFS_PROP_ZONED); zonedb = zfs_prop_get_int(zb, ZFS_PROP_ZONED); if (zoneda && !zonedb) return (1); if (!zoneda && zonedb) return (-1); gota = (zfs_get_type(za) == ZFS_TYPE_FILESYSTEM); if (gota) verify(zfs_prop_get(za, ZFS_PROP_MOUNTPOINT, mounta, sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0); gotb = (zfs_get_type(zb) == ZFS_TYPE_FILESYSTEM); if (gotb) verify(zfs_prop_get(zb, ZFS_PROP_MOUNTPOINT, mountb, sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0); if (gota && gotb) { while (*a != '\0' && (*a == *b)) { a++; b++; } if (*a == *b) return (0); if (*a == '\0') return (-1); if (*b == '\0') return (1); if (*a == '/') return (-1); if (*b == '/') return (1); return (*a < *b ? -1 : *a > *b); } if (gota) return (-1); if (gotb) return (1); /* * If neither filesystem has a mountpoint, revert to sorting by * datset name. */ return (strcmp(zfs_get_name(za), zfs_get_name(zb))); } /* * Return true if path2 is a child of path1 or path2 equals path1 or * path1 is "/" (path2 is always a child of "/"). */ static boolean_t libzfs_path_contains(const char *path1, const char *path2) { return (strcmp(path1, path2) == 0 || strcmp(path1, "/") == 0 || (strstr(path2, path1) == path2 && path2[strlen(path1)] == '/')); } static int non_descendant_idx(zfs_handle_t **handles, size_t num_handles, int idx) { char parent[ZFS_MAXPROPLEN]; char child[ZFS_MAXPROPLEN]; int i; verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, parent, sizeof (parent), NULL, NULL, 0, B_FALSE) == 0); for (i = idx + 1; i < num_handles; i++) { verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, child, sizeof (child), NULL, NULL, 0, B_FALSE) == 0); if (!libzfs_path_contains(parent, child)) break; } return (i); } typedef struct mnt_param { libzfs_handle_t *mnt_hdl; tpool_t *mnt_tp; zfs_handle_t **mnt_zhps; /* filesystems to mount */ size_t mnt_num_handles; int mnt_idx; /* Index of selected entry to mount */ zfs_iter_f mnt_func; void *mnt_data; } mnt_param_t; /* * Allocate and populate the parameter struct for mount function, and * schedule mounting of the entry selected by idx. */ static void zfs_dispatch_mount(libzfs_handle_t *hdl, zfs_handle_t **handles, size_t num_handles, int idx, zfs_iter_f func, void *data, tpool_t *tp) { mnt_param_t *mnt_param = zfs_alloc(hdl, sizeof (mnt_param_t)); mnt_param->mnt_hdl = hdl; mnt_param->mnt_tp = tp; mnt_param->mnt_zhps = handles; mnt_param->mnt_num_handles = num_handles; mnt_param->mnt_idx = idx; mnt_param->mnt_func = func; mnt_param->mnt_data = data; (void) tpool_dispatch(tp, zfs_mount_task, (void*)mnt_param); } /* * This is the structure used to keep state of mounting or sharing operations * during a call to zpool_enable_datasets(). */ typedef struct mount_state { /* * ms_mntstatus is set to -1 if any mount fails. While multiple threads * could update this variable concurrently, no synchronization is * needed as it's only ever set to -1. */ int ms_mntstatus; int ms_mntflags; const char *ms_mntopts; } mount_state_t; static int zfs_mount_one(zfs_handle_t *zhp, void *arg) { mount_state_t *ms = arg; int ret = 0; if (zfs_mount(zhp, ms->ms_mntopts, ms->ms_mntflags) != 0) ret = ms->ms_mntstatus = -1; return (ret); } static int zfs_share_one(zfs_handle_t *zhp, void *arg) { mount_state_t *ms = arg; int ret = 0; if (zfs_share(zhp) != 0) ret = ms->ms_mntstatus = -1; return (ret); } /* * Thread pool function to mount one file system. On completion, it finds and * schedules its children to be mounted. This depends on the sorting done in * zfs_foreach_mountpoint(). Note that the degenerate case (chain of entries * each descending from the previous) will have no parallelism since we always * have to wait for the parent to finish mounting before we can schedule * its children. */ static void zfs_mount_task(void *arg) { mnt_param_t *mp = arg; int idx = mp->mnt_idx; zfs_handle_t **handles = mp->mnt_zhps; size_t num_handles = mp->mnt_num_handles; char mountpoint[ZFS_MAXPROPLEN]; verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, mountpoint, sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0); if (mp->mnt_func(handles[idx], mp->mnt_data) != 0) return; /* * We dispatch tasks to mount filesystems with mountpoints underneath * this one. We do this by dispatching the next filesystem with a * descendant mountpoint of the one we just mounted, then skip all of * its descendants, dispatch the next descendant mountpoint, and so on. * The non_descendant_idx() function skips over filesystems that are * descendants of the filesystem we just dispatched. */ for (int i = idx + 1; i < num_handles; i = non_descendant_idx(handles, num_handles, i)) { char child[ZFS_MAXPROPLEN]; verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, child, sizeof (child), NULL, NULL, 0, B_FALSE) == 0); if (!libzfs_path_contains(mountpoint, child)) break; /* not a descendant, return */ zfs_dispatch_mount(mp->mnt_hdl, handles, num_handles, i, mp->mnt_func, mp->mnt_data, mp->mnt_tp); } free(mp); } /* * Issue the func callback for each ZFS handle contained in the handles * array. This function is used to mount all datasets, and so this function * guarantees that filesystems for parent mountpoints are called before their * children. As such, before issuing any callbacks, we first sort the array * of handles by mountpoint. * * Callbacks are issued in one of two ways: * * 1. Sequentially: If the parallel argument is B_FALSE or the ZFS_SERIAL_MOUNT * environment variable is set, then we issue callbacks sequentially. * * 2. In parallel: If the parallel argument is B_TRUE and the ZFS_SERIAL_MOUNT * environment variable is not set, then we use a tpool to dispatch threads * to mount filesystems in parallel. This function dispatches tasks to mount * the filesystems at the top-level mountpoints, and these tasks in turn * are responsible for recursively mounting filesystems in their children * mountpoints. */ void zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles, size_t num_handles, zfs_iter_f func, void *data, boolean_t parallel) { zoneid_t zoneid = getzoneid(); /* * The ZFS_SERIAL_MOUNT environment variable is an undocumented * variable that can be used as a convenience to do a/b comparison * of serial vs. parallel mounting. */ boolean_t serial_mount = !parallel || (getenv("ZFS_SERIAL_MOUNT") != NULL); /* * Sort the datasets by mountpoint. See mountpoint_cmp for details * of how these are sorted. */ qsort(handles, num_handles, sizeof (zfs_handle_t *), mountpoint_cmp); if (serial_mount) { for (int i = 0; i < num_handles; i++) { func(handles[i], data); } return; } /* * Issue the callback function for each dataset using a parallel * algorithm that uses a thread pool to manage threads. */ tpool_t *tp = tpool_create(1, mount_tp_nthr, 0, NULL); /* * There may be multiple "top level" mountpoints outside of the pool's * root mountpoint, e.g.: /foo /bar. Dispatch a mount task for each of * these. */ for (int i = 0; i < num_handles; i = non_descendant_idx(handles, num_handles, i)) { /* * Since the mountpoints have been sorted so that the zoned * filesystems are at the end, a zoned filesystem seen from * the global zone means that we're done. */ if (zoneid == GLOBAL_ZONEID && zfs_prop_get_int(handles[i], ZFS_PROP_ZONED)) break; zfs_dispatch_mount(hdl, handles, num_handles, i, func, data, tp); } tpool_wait(tp); /* wait for all scheduled mounts to complete */ tpool_destroy(tp); } /* * Mount and share all datasets within the given pool. This assumes that no * datasets within the pool are currently mounted. */ #pragma weak zpool_mount_datasets = zpool_enable_datasets int zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) { get_all_cb_t cb = { 0 }; mount_state_t ms = { 0 }; zfs_handle_t *zfsp; int ret = 0; if ((zfsp = zfs_open(zhp->zpool_hdl, zhp->zpool_name, ZFS_TYPE_DATASET)) == NULL) goto out; /* * Gather all non-snapshot datasets within the pool. Start by adding * the root filesystem for this pool to the list, and then iterate * over all child filesystems. */ libzfs_add_handle(&cb, zfsp); if (zfs_iter_filesystems(zfsp, zfs_iter_cb, &cb) != 0) goto out; /* * Mount all filesystems */ ms.ms_mntopts = mntopts; ms.ms_mntflags = flags; zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used, zfs_mount_one, &ms, B_TRUE); if (ms.ms_mntstatus != 0) ret = ms.ms_mntstatus; /* * Share all filesystems that need to be shared. This needs to be * a separate pass because libshare is not mt-safe, and so we need * to share serially. */ ms.ms_mntstatus = 0; zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used, zfs_share_one, &ms, B_FALSE); if (ms.ms_mntstatus != 0) ret = ms.ms_mntstatus; out: for (int i = 0; i < cb.cb_used; i++) zfs_close(cb.cb_handles[i]); free(cb.cb_handles); return (ret); } static int mountpoint_compare(const void *a, const void *b) { const char *mounta = *((char **)a); const char *mountb = *((char **)b); return (strcmp(mountb, mounta)); } /* alias for 2002/240 */ #pragma weak zpool_unmount_datasets = zpool_disable_datasets /* * Unshare and unmount all datasets within the given pool. We don't want to * rely on traversing the DSL to discover the filesystems within the pool, * because this may be expensive (if not all of them are mounted), and can fail * arbitrarily (on I/O error, for example). Instead, we walk /etc/mnttab and * gather all the filesystems that are currently mounted. */ int zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force) { int used, alloc; struct mnttab entry; size_t namelen; char **mountpoints = NULL; zfs_handle_t **datasets = NULL; libzfs_handle_t *hdl = zhp->zpool_hdl; int i; int ret = -1; int flags = (force ? MS_FORCE : 0); #ifdef illumos sa_init_selective_arg_t sharearg; #endif namelen = strlen(zhp->zpool_name); rewind(hdl->libzfs_mnttab); used = alloc = 0; while (getmntent(hdl->libzfs_mnttab, &entry) == 0) { /* * Ignore non-ZFS entries. */ if (entry.mnt_fstype == NULL || strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) continue; /* * Ignore filesystems not within this pool. */ if (entry.mnt_mountp == NULL || strncmp(entry.mnt_special, zhp->zpool_name, namelen) != 0 || (entry.mnt_special[namelen] != '/' && entry.mnt_special[namelen] != '\0')) continue; /* * At this point we've found a filesystem within our pool. Add * it to our growing list. */ if (used == alloc) { if (alloc == 0) { if ((mountpoints = zfs_alloc(hdl, 8 * sizeof (void *))) == NULL) goto out; if ((datasets = zfs_alloc(hdl, 8 * sizeof (void *))) == NULL) goto out; alloc = 8; } else { void *ptr; if ((ptr = zfs_realloc(hdl, mountpoints, alloc * sizeof (void *), alloc * 2 * sizeof (void *))) == NULL) goto out; mountpoints = ptr; if ((ptr = zfs_realloc(hdl, datasets, alloc * sizeof (void *), alloc * 2 * sizeof (void *))) == NULL) goto out; datasets = ptr; alloc *= 2; } } if ((mountpoints[used] = zfs_strdup(hdl, entry.mnt_mountp)) == NULL) goto out; /* * This is allowed to fail, in case there is some I/O error. It * is only used to determine if we need to remove the underlying * mountpoint, so failure is not fatal. */ datasets[used] = make_dataset_handle(hdl, entry.mnt_special); used++; } /* * At this point, we have the entire list of filesystems, so sort it by * mountpoint. */ #ifdef illumos sharearg.zhandle_arr = datasets; sharearg.zhandle_len = used; ret = zfs_init_libshare_arg(hdl, SA_INIT_SHARE_API_SELECTIVE, &sharearg); if (ret != 0) goto out; #endif qsort(mountpoints, used, sizeof (char *), mountpoint_compare); /* * Walk through and first unshare everything. */ for (i = 0; i < used; i++) { zfs_share_proto_t *curr_proto; for (curr_proto = share_all_proto; *curr_proto != PROTO_END; curr_proto++) { if (is_shared(hdl, mountpoints[i], *curr_proto) && unshare_one(hdl, mountpoints[i], mountpoints[i], *curr_proto) != 0) goto out; } } /* * Now unmount everything, removing the underlying directories as * appropriate. */ for (i = 0; i < used; i++) { if (unmount_one(hdl, mountpoints[i], flags) != 0) goto out; } for (i = 0; i < used; i++) { if (datasets[i]) remove_mountpoint(datasets[i]); } ret = 0; out: for (i = 0; i < used; i++) { if (datasets[i]) zfs_close(datasets[i]); free(mountpoints[i]); } free(datasets); free(mountpoints); return (ret); } Index: projects/clang1000-import/cddl/contrib/opensolaris/lib/libzfs =================================================================== --- projects/clang1000-import/cddl/contrib/opensolaris/lib/libzfs (revision 356919) +++ projects/clang1000-import/cddl/contrib/opensolaris/lib/libzfs (revision 356920) Property changes on: projects/clang1000-import/cddl/contrib/opensolaris/lib/libzfs ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/cddl/contrib/opensolaris/lib/libzfs:r356848-356919 Index: projects/clang1000-import/cddl/contrib/opensolaris =================================================================== --- projects/clang1000-import/cddl/contrib/opensolaris (revision 356919) +++ projects/clang1000-import/cddl/contrib/opensolaris (revision 356920) Property changes on: projects/clang1000-import/cddl/contrib/opensolaris ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/cddl/contrib/opensolaris:r356848-356919 Index: projects/clang1000-import/cddl =================================================================== --- projects/clang1000-import/cddl (revision 356919) +++ projects/clang1000-import/cddl (revision 356920) Property changes on: projects/clang1000-import/cddl ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/cddl:r356848-356919 Index: projects/clang1000-import/gnu/usr.bin/binutils/Makefile =================================================================== --- projects/clang1000-import/gnu/usr.bin/binutils/Makefile (revision 356919) +++ projects/clang1000-import/gnu/usr.bin/binutils/Makefile (revision 356920) @@ -1,29 +1,35 @@ # $FreeBSD$ .include SUBDIR= libiberty \ libbfd \ libopcodes SUBDIR.${MK_BINUTILS}+= doc SUBDIR.${MK_BINUTILS}+= libbinutils -SUBDIR.${MK_BINUTILS}+= as SUBDIR.${MK_BINUTILS}+= objdump + +# GNU as is used on x86 only, for a few files that cannot be assembled by +# Clang IAS. Other archs either use Clang IAS for every assembly file, or +# use external toolchain. +.if ${TARGET} == "amd64" || ${TARGET} == "i386" +SUBDIR.${MK_BINUTILS}+= as +.endif # All archs except powerpc either use lld or require external toolchain. # powerpc still needs binutils ld to link 32-bit binaries. .if ${TARGET} == "powerpc" SUBDIR.${MK_BINUTILS}+=ld .endif SUBDIR_DEPEND_libbinutils=libbfd # for bfdver.h SUBDIR_DEPEND_as=libbfd libiberty libopcodes SUBDIR_DEPEND_ld=libbfd libiberty SUBDIR_DEPEND_objdump=libbfd libiberty libbinutils libopcodes .if !make(install) SUBDIR_PARALLEL= .endif .include Index: projects/clang1000-import/gnu/usr.bin/binutils =================================================================== --- projects/clang1000-import/gnu/usr.bin/binutils (revision 356919) +++ projects/clang1000-import/gnu/usr.bin/binutils (revision 356920) Property changes on: projects/clang1000-import/gnu/usr.bin/binutils ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/gnu/usr.bin/binutils:r356848-356919 Index: projects/clang1000-import/include/stdlib.h =================================================================== --- projects/clang1000-import/include/stdlib.h (revision 356919) +++ projects/clang1000-import/include/stdlib.h (revision 356920) @@ -1,347 +1,355 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)stdlib.h 8.5 (Berkeley) 5/19/95 * $FreeBSD$ */ #ifndef _STDLIB_H_ #define _STDLIB_H_ #include #include #include __NULLABILITY_PRAGMA_PUSH #if __BSD_VISIBLE #ifndef _RUNE_T_DECLARED typedef __rune_t rune_t; #define _RUNE_T_DECLARED #endif #endif #ifndef _SIZE_T_DECLARED typedef __size_t size_t; #define _SIZE_T_DECLARED #endif #ifndef __cplusplus #ifndef _WCHAR_T_DECLARED typedef ___wchar_t wchar_t; #define _WCHAR_T_DECLARED #endif #endif typedef struct { int quot; /* quotient */ int rem; /* remainder */ } div_t; typedef struct { long quot; long rem; } ldiv_t; #define EXIT_FAILURE 1 #define EXIT_SUCCESS 0 #define RAND_MAX 0x7ffffffd __BEGIN_DECLS #ifdef _XLOCALE_H_ #include #endif extern int __mb_cur_max; extern int ___mb_cur_max(void); #define MB_CUR_MAX ((size_t)___mb_cur_max()) _Noreturn void abort(void); int abs(int) __pure2; int atexit(void (* _Nonnull)(void)); double atof(const char *); int atoi(const char *); long atol(const char *); void *bsearch(const void *, const void *, size_t, size_t, int (*)(const void * _Nonnull, const void *)); void *calloc(size_t, size_t) __malloc_like __result_use_check __alloc_size2(1, 2); div_t div(int, int) __pure2; _Noreturn void exit(int); void free(void *); char *getenv(const char *); long labs(long) __pure2; ldiv_t ldiv(long, long) __pure2; void *malloc(size_t) __malloc_like __result_use_check __alloc_size(1); int mblen(const char *, size_t); size_t mbstowcs(wchar_t * __restrict , const char * __restrict, size_t); int mbtowc(wchar_t * __restrict, const char * __restrict, size_t); void qsort(void *, size_t, size_t, int (* _Nonnull)(const void *, const void *)); int rand(void); void *realloc(void *, size_t) __result_use_check __alloc_size(2); void srand(unsigned); double strtod(const char * __restrict, char ** __restrict); float strtof(const char * __restrict, char ** __restrict); long strtol(const char * __restrict, char ** __restrict, int); long double strtold(const char * __restrict, char ** __restrict); unsigned long strtoul(const char * __restrict, char ** __restrict, int); int system(const char *); int wctomb(char *, wchar_t); size_t wcstombs(char * __restrict, const wchar_t * __restrict, size_t); /* * Functions added in C99 which we make conditionally available in the * BSD^C89 namespace if the compiler supports `long long'. * The #if test is more complicated than it ought to be because * __BSD_VISIBLE implies __ISO_C_VISIBLE == 1999 *even if* `long long' * is not supported in the compilation environment (which therefore means * that it can't really be ISO C99). * * (The only other extension made by C99 in thie header is _Exit().) */ #if __ISO_C_VISIBLE >= 1999 || defined(__cplusplus) #ifdef __LONG_LONG_SUPPORTED /* LONGLONG */ typedef struct { long long quot; long long rem; } lldiv_t; /* LONGLONG */ long long atoll(const char *); /* LONGLONG */ long long llabs(long long) __pure2; /* LONGLONG */ lldiv_t lldiv(long long, long long) __pure2; /* LONGLONG */ long long strtoll(const char * __restrict, char ** __restrict, int); /* LONGLONG */ unsigned long long strtoull(const char * __restrict, char ** __restrict, int); #endif /* __LONG_LONG_SUPPORTED */ _Noreturn void _Exit(int); #endif /* __ISO_C_VISIBLE >= 1999 */ /* * If we're in a mode greater than C99, expose C11 functions. */ #if __ISO_C_VISIBLE >= 2011 || __cplusplus >= 201103L void * aligned_alloc(size_t, size_t) __malloc_like __alloc_align(1) __alloc_size(2); int at_quick_exit(void (*)(void)); _Noreturn void quick_exit(int); #endif /* __ISO_C_VISIBLE >= 2011 */ /* * Extensions made by POSIX relative to C. */ #if __POSIX_VISIBLE >= 199506 || __XSI_VISIBLE char *realpath(const char * __restrict, char * __restrict); #endif #if __POSIX_VISIBLE >= 199506 int rand_r(unsigned *); /* (TSF) */ #endif #if __POSIX_VISIBLE >= 200112 int posix_memalign(void **, size_t, size_t); /* (ADV) */ int setenv(const char *, const char *, int); int unsetenv(const char *); #endif #if __POSIX_VISIBLE >= 200809 || __XSI_VISIBLE int getsubopt(char **, char *const *, char **); #ifndef _MKDTEMP_DECLARED char *mkdtemp(char *); #define _MKDTEMP_DECLARED #endif #ifndef _MKSTEMP_DECLARED int mkstemp(char *); #define _MKSTEMP_DECLARED #endif #endif /* __POSIX_VISIBLE >= 200809 || __XSI_VISIBLE */ /* * The only changes to the XSI namespace in revision 6 were the deletion * of the ttyslot() and valloc() functions, which FreeBSD never declared * in this header. For revision 7, ecvt(), fcvt(), and gcvt(), which * FreeBSD also does not have, and mktemp(), are to be deleted. */ #if __XSI_VISIBLE /* XXX XSI requires pollution from here. We'd rather not. */ long a64l(const char *); double drand48(void); /* char *ecvt(double, int, int * __restrict, int * __restrict); */ double erand48(unsigned short[3]); /* char *fcvt(double, int, int * __restrict, int * __restrict); */ /* char *gcvt(double, int, int * __restrict, int * __restrict); */ int grantpt(int); char *initstate(unsigned int, char *, size_t); long jrand48(unsigned short[3]); char *l64a(long); void lcong48(unsigned short[7]); long lrand48(void); #if !defined(_MKTEMP_DECLARED) && (__BSD_VISIBLE || __XSI_VISIBLE <= 600) char *mktemp(char *); #define _MKTEMP_DECLARED #endif long mrand48(void); long nrand48(unsigned short[3]); int posix_openpt(int); char *ptsname(int); int putenv(char *); long random(void); unsigned short *seed48(unsigned short[3]); char *setstate(/* const */ char *); void srand48(long); void srandom(unsigned int); int unlockpt(int); #endif /* __XSI_VISIBLE */ #if __BSD_VISIBLE extern const char *malloc_conf; extern void (*malloc_message)(void *, const char *); /* * The alloca() function can't be implemented in C, and on some * platforms it can't be implemented at all as a callable function. * The GNU C compiler provides a built-in alloca() which we can use. * On platforms where alloca() is not in libc, programs which use it * will fail to link when compiled with non-GNU compilers. */ #if __GNUC__ >= 2 || defined(__INTEL_COMPILER) #undef alloca /* some GNU bits try to get cute and define this on their own */ #define alloca(sz) __builtin_alloca(sz) #endif void abort2(const char *, int, void **) __dead2; __uint32_t arc4random(void); void arc4random_buf(void *, size_t); __uint32_t arc4random_uniform(__uint32_t); #ifdef __BLOCKS__ int atexit_b(void (^ _Nonnull)(void)); void *bsearch_b(const void *, const void *, size_t, size_t, int (^ _Nonnull)(const void *, const void *)); #endif char *getbsize(int *, long *); /* getcap(3) functions */ char *cgetcap(char *, const char *, int); int cgetclose(void); int cgetent(char **, char **, const char *); int cgetfirst(char **, char **); int cgetmatch(const char *, const char *); int cgetnext(char **, char **); int cgetnum(char *, const char *, long *); int cgetset(const char *); int cgetstr(char *, const char *, char **); int cgetustr(char *, const char *, char **); int daemon(int, int); int daemonfd(int, int); char *devname(__dev_t, __mode_t); char *devname_r(__dev_t, __mode_t, char *, int); char *fdevname(int); char *fdevname_r(int, char *, int); int getloadavg(double [], int); const char * getprogname(void); int heapsort(void *, size_t, size_t, int (* _Nonnull)(const void *, const void *)); #ifdef __BLOCKS__ int heapsort_b(void *, size_t, size_t, int (^ _Nonnull)(const void *, const void *)); void qsort_b(void *, size_t, size_t, int (^ _Nonnull)(const void *, const void *)); #endif int l64a_r(long, char *, int); int mergesort(void *, size_t, size_t, int (*)(const void *, const void *)); #ifdef __BLOCKS__ int mergesort_b(void *, size_t, size_t, int (^)(const void *, const void *)); #endif int mkostemp(char *, int); int mkostemps(char *, int, int); int mkostempsat(int, char *, int, int); void qsort_r(void *, size_t, size_t, void *, int (*)(void *, const void *, const void *)); int radixsort(const unsigned char **, int, const unsigned char *, unsigned); void *reallocarray(void *, size_t, size_t) __result_use_check __alloc_size2(2, 3); void *reallocf(void *, size_t) __result_use_check __alloc_size(2); int rpmatch(const char *); void setprogname(const char *); int sradixsort(const unsigned char **, int, const unsigned char *, unsigned); void srandomdev(void); long long strtonum(const char *, long long, long long, const char **); /* Deprecated interfaces, to be removed. */ __int64_t strtoq(const char *, char **, int); __uint64_t strtouq(const char *, char **, int); extern char *suboptarg; /* getsubopt(3) external variable */ #endif /* __BSD_VISIBLE */ #if __EXT1_VISIBLE +#ifndef _RSIZE_T_DEFINED +#define _RSIZE_T_DEFINED +typedef size_t rsize_t; +#endif + #ifndef _ERRNO_T_DEFINED #define _ERRNO_T_DEFINED typedef int errno_t; #endif /* K.3.6 */ typedef void (*constraint_handler_t)(const char * __restrict, void * __restrict, errno_t); /* K.3.6.1.1 */ constraint_handler_t set_constraint_handler_s(constraint_handler_t handler); /* K.3.6.1.2 */ _Noreturn void abort_handler_s(const char * __restrict, void * __restrict, errno_t); /* K3.6.1.3 */ void ignore_handler_s(const char * __restrict, void * __restrict, errno_t); +/* K.3.6.3.2 */ +errno_t qsort_s(void *, rsize_t, rsize_t, + int (*)(const void *, const void *, void *), void *); #endif /* __EXT1_VISIBLE */ __END_DECLS __NULLABILITY_PRAGMA_POP #endif /* !_STDLIB_H_ */ Index: projects/clang1000-import/lib/libbe/be_access.c =================================================================== --- projects/clang1000-import/lib/libbe/be_access.c (revision 356919) +++ projects/clang1000-import/lib/libbe/be_access.c (revision 356920) @@ -1,343 +1,340 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2017 Kyle J. Kneitinger * Copyright (c) 2018 Kyle Evans * Copyright (c) 2019 Wes Maag * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include "be.h" #include "be_impl.h" struct be_mountcheck_info { const char *path; char *name; }; struct be_mount_info { libbe_handle_t *lbh; const char *be; const char *mountpoint; int mntflags; int deepmount; int depth; }; static int be_mountcheck_cb(zfs_handle_t *zfs_hdl, void *data) { struct be_mountcheck_info *info; char *mountpoint; if (data == NULL) return (1); info = (struct be_mountcheck_info *)data; if (!zfs_is_mounted(zfs_hdl, &mountpoint)) return (0); if (strcmp(mountpoint, info->path) == 0) { info->name = strdup(zfs_get_name(zfs_hdl)); free(mountpoint); return (1); } free(mountpoint); return (0); } /* * Called from be_mount, uses the given zfs_handle and attempts to * mount it at the passed mountpoint. If the deepmount flag is set, continue * calling the function for each child dataset. */ static int be_mount_iter(zfs_handle_t *zfs_hdl, void *data) { int err; char *mountpoint; char tmp[BE_MAXPATHLEN], zfs_mnt[BE_MAXPATHLEN]; struct be_mount_info *info; - char opt; info = (struct be_mount_info *)data; if (zfs_is_mounted(zfs_hdl, &mountpoint)) { free(mountpoint); return (0); } /* * canmount and mountpoint are both ignored for the BE dataset, because * the rest of the system (kernel and loader) will effectively do the * same. */ if (info->depth == 0) { snprintf(tmp, BE_MAXPATHLEN, "%s", info->mountpoint); } else { if (zfs_prop_get_int(zfs_hdl, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_OFF) return (0); if (zfs_prop_get(zfs_hdl, ZFS_PROP_MOUNTPOINT, zfs_mnt, BE_MAXPATHLEN, NULL, NULL, 0, 1)) return (1); /* * We've encountered mountpoint=none at some intermediate * dataset (e.g. zroot/var) that will have children that may * need to be mounted. Skip mounting it, but iterate through * the children. */ if (strcmp("none", zfs_mnt) == 0) goto skipmount; mountpoint = be_mountpoint_augmented(info->lbh, zfs_mnt); snprintf(tmp, BE_MAXPATHLEN, "%s%s", info->mountpoint, mountpoint); } - opt = '\0'; - if ((err = zmount(zfs_get_name(zfs_hdl), tmp, info->mntflags, - __DECONST(char *, MNTTYPE_ZFS), NULL, 0, &opt, 1)) != 0) { + if ((err = zfs_mount_at(zfs_hdl, NULL, info->mntflags, tmp)) != 0) { switch (errno) { case ENAMETOOLONG: return (set_error(info->lbh, BE_ERR_PATHLEN)); case ELOOP: case ENOENT: case ENOTDIR: return (set_error(info->lbh, BE_ERR_BADPATH)); case EPERM: return (set_error(info->lbh, BE_ERR_PERMS)); case EBUSY: return (set_error(info->lbh, BE_ERR_PATHBUSY)); default: return (set_error(info->lbh, BE_ERR_UNKNOWN)); } } if (!info->deepmount) return (0); skipmount: ++info->depth; err = zfs_iter_filesystems(zfs_hdl, be_mount_iter, info); --info->depth; return (err); } static int be_umount_iter(zfs_handle_t *zfs_hdl, void *data) { int err; char *mountpoint; struct be_mount_info *info; info = (struct be_mount_info *)data; ++info->depth; if((err = zfs_iter_filesystems(zfs_hdl, be_umount_iter, info)) != 0) { return (err); } --info->depth; if (!zfs_is_mounted(zfs_hdl, &mountpoint)) { return (0); } free(mountpoint); if (zfs_unmount(zfs_hdl, NULL, info->mntflags) != 0) { switch (errno) { case ENAMETOOLONG: return (set_error(info->lbh, BE_ERR_PATHLEN)); case ELOOP: case ENOENT: case ENOTDIR: return (set_error(info->lbh, BE_ERR_BADPATH)); case EPERM: return (set_error(info->lbh, BE_ERR_PERMS)); case EBUSY: return (set_error(info->lbh, BE_ERR_PATHBUSY)); default: return (set_error(info->lbh, BE_ERR_UNKNOWN)); } } return (0); } /* * usage */ int be_mounted_at(libbe_handle_t *lbh, const char *path, nvlist_t *details) { char be[BE_MAXPATHLEN]; zfs_handle_t *root_hdl; struct be_mountcheck_info info; prop_data_t propinfo; bzero(&be, BE_MAXPATHLEN); if ((root_hdl = zfs_open(lbh->lzh, lbh->root, ZFS_TYPE_FILESYSTEM)) == NULL) return (BE_ERR_ZFSOPEN); info.path = path; info.name = NULL; zfs_iter_filesystems(root_hdl, be_mountcheck_cb, &info); zfs_close(root_hdl); if (info.name != NULL) { if (details != NULL) { if ((root_hdl = zfs_open(lbh->lzh, lbh->root, ZFS_TYPE_FILESYSTEM)) == NULL) { free(info.name); return (BE_ERR_ZFSOPEN); } propinfo.lbh = lbh; propinfo.list = details; propinfo.single_object = false; prop_list_builder_cb(root_hdl, &propinfo); zfs_close(root_hdl); } free(info.name); return (0); } return (1); } /* * usage */ int be_mount(libbe_handle_t *lbh, char *bootenv, char *mountpoint, int flags, char *result_loc) { char be[BE_MAXPATHLEN]; char mnt_temp[BE_MAXPATHLEN]; int mntflags, mntdeep; int err; struct be_mount_info info; zfs_handle_t *zhdl; if ((err = be_root_concat(lbh, bootenv, be)) != 0) return (set_error(lbh, err)); if ((err = be_exists(lbh, bootenv)) != 0) return (set_error(lbh, err)); if (is_mounted(lbh->lzh, be, NULL)) return (set_error(lbh, BE_ERR_MOUNTED)); mntdeep = (flags & BE_MNT_DEEP) ? 1 : 0; mntflags = (flags & BE_MNT_FORCE) ? MNT_FORCE : 0; /* Create mountpoint if it is not specified */ if (mountpoint == NULL) { strlcpy(mnt_temp, "/tmp/be_mount.XXXX", sizeof(mnt_temp)); if (mkdtemp(mnt_temp) == NULL) return (set_error(lbh, BE_ERR_IO)); } if ((zhdl = zfs_open(lbh->lzh, be, ZFS_TYPE_FILESYSTEM)) == NULL) return (set_error(lbh, BE_ERR_ZFSOPEN)); info.lbh = lbh; info.be = be; info.mountpoint = (mountpoint == NULL) ? mnt_temp : mountpoint; info.mntflags = mntflags; info.deepmount = mntdeep; info.depth = 0; if((err = be_mount_iter(zhdl, &info) != 0)) { zfs_close(zhdl); return (err); } zfs_close(zhdl); if (result_loc != NULL) strlcpy(result_loc, mountpoint == NULL ? mnt_temp : mountpoint, BE_MAXPATHLEN); return (BE_ERR_SUCCESS); } /* * usage */ int be_unmount(libbe_handle_t *lbh, char *bootenv, int flags) { int err; char be[BE_MAXPATHLEN]; zfs_handle_t *root_hdl; struct be_mount_info info; if ((err = be_root_concat(lbh, bootenv, be)) != 0) return (set_error(lbh, err)); if ((root_hdl = zfs_open(lbh->lzh, be, ZFS_TYPE_FILESYSTEM)) == NULL) return (set_error(lbh, BE_ERR_ZFSOPEN)); info.lbh = lbh; info.be = be; info.mountpoint = NULL; info.mntflags = (flags & BE_MNT_FORCE) ? MS_FORCE : 0; info.depth = 0; if ((err = be_umount_iter(root_hdl, &info)) != 0) { zfs_close(root_hdl); return (err); } zfs_close(root_hdl); return (BE_ERR_SUCCESS); } /* * This function will blow away the input buffer as needed if we're discovered * to be looking at a root-mount. If the mountpoint is naturally beyond the * root, however, the buffer may be left intact and a pointer to the section * past altroot will be returned instead for the caller's perusal. */ char * be_mountpoint_augmented(libbe_handle_t *lbh, char *mountpoint) { if (lbh->altroot_len == 0) return (mountpoint); if (mountpoint == NULL || *mountpoint == '\0') return (mountpoint); if (mountpoint[lbh->altroot_len] == '\0') { *(mountpoint + 1) = '\0'; return (mountpoint); } else return (mountpoint + lbh->altroot_len); } Index: projects/clang1000-import/lib/libc/stdlib/Makefile.inc =================================================================== --- projects/clang1000-import/lib/libc/stdlib/Makefile.inc (revision 356919) +++ projects/clang1000-import/lib/libc/stdlib/Makefile.inc (revision 356920) @@ -1,65 +1,66 @@ # from @(#)Makefile.inc 8.3 (Berkeley) 2/4/95 # $FreeBSD$ # machine-independent stdlib sources .PATH: ${LIBC_SRCTOP}/${LIBC_ARCH}/stdlib ${LIBC_SRCTOP}/stdlib MISRCS+=C99_Exit.c a64l.c abort.c abs.c atexit.c atof.c atoi.c atol.c atoll.c \ bsearch.c \ cxa_thread_atexit.c cxa_thread_atexit_impl.c \ div.c exit.c getenv.c getopt.c getopt_long.c \ getsubopt.c hcreate.c hcreate_r.c hdestroy_r.c heapsort.c heapsort_b.c \ hsearch_r.c imaxabs.c imaxdiv.c \ insque.c l64a.c labs.c ldiv.c llabs.c lldiv.c lsearch.c \ - merge.c mergesort_b.c ptsname.c qsort.c qsort_r.c quick_exit.c \ - radixsort.c rand.c \ + merge.c mergesort_b.c ptsname.c qsort.c qsort_r.c qsort_s.c \ + quick_exit.c radixsort.c rand.c \ random.c reallocarray.c reallocf.c realpath.c remque.c \ set_constraint_handler_s.c strfmon.c strtoimax.c \ strtol.c strtold.c strtoll.c strtoq.c strtoul.c strtonum.c strtoull.c \ strtoumax.c strtouq.c system.c tdelete.c tfind.c tsearch.c twalk.c # Work around an issue on case-insensitive file systems. # libc has both _Exit.c and _exit.s and they both yield # _exit.o (case insensitively speaking). CLEANFILES+=C99_Exit.c C99_Exit.c: ${LIBC_SRCTOP}/stdlib/_Exit.c .NOMETA ln -sf ${.ALLSRC} ${.TARGET} SYM_MAPS+= ${LIBC_SRCTOP}/stdlib/Symbol.map # machine-dependent stdlib sources .sinclude "${LIBC_SRCTOP}/${LIBC_ARCH}/stdlib/Makefile.inc" MAN+= a64l.3 abort.3 abs.3 alloca.3 atexit.3 atof.3 \ atoi.3 atol.3 at_quick_exit.3 bsearch.3 \ div.3 exit.3 getenv.3 getopt.3 getopt_long.3 getsubopt.3 \ hcreate.3 imaxabs.3 imaxdiv.3 insque.3 labs.3 ldiv.3 llabs.3 lldiv.3 \ lsearch.3 memory.3 ptsname.3 qsort.3 \ quick_exit.3 \ radixsort.3 rand.3 random.3 reallocarray.3 reallocf.3 realpath.3 \ set_constraint_handler_s.3 \ strfmon.3 strtod.3 strtol.3 strtonum.3 strtoul.3 system.3 \ tsearch.3 MLINKS+=a64l.3 l64a.3 a64l.3 l64a_r.3 MLINKS+=atol.3 atoll.3 MLINKS+=exit.3 _Exit.3 MLINKS+=getenv.3 putenv.3 getenv.3 setenv.3 getenv.3 unsetenv.3 MLINKS+=getopt_long.3 getopt_long_only.3 MLINKS+=hcreate.3 hdestroy.3 hcreate.3 hsearch.3 MLINKS+=hcreate.3 hcreate_r.3 hcreate.3 hdestroy_r.3 hcreate.3 hsearch_r.3 MLINKS+=insque.3 remque.3 MLINKS+=lsearch.3 lfind.3 MLINKS+=ptsname.3 grantpt.3 ptsname.3 unlockpt.3 -MLINKS+=qsort.3 heapsort.3 qsort.3 mergesort.3 qsort.3 qsort_r.3 +MLINKS+=qsort.3 heapsort.3 qsort.3 mergesort.3 qsort.3 qsort_r.3 \ + qsort.3 qsort_s.3 MLINKS+=rand.3 rand_r.3 rand.3 srand.3 MLINKS+=random.3 initstate.3 random.3 setstate.3 random.3 srandom.3 \ random.3 srandomdev.3 MLINKS+=radixsort.3 sradixsort.3 MLINKS+=set_constraint_handler_s.3 abort_handler_s.3 MLINKS+=set_constraint_handler_s.3 ignore_handler_s.3 MLINKS+=strfmon.3 strfmon_l.3 MLINKS+=strtod.3 strtof.3 strtod.3 strtold.3 MLINKS+=strtol.3 strtoll.3 strtol.3 strtoq.3 strtol.3 strtoimax.3 MLINKS+=strtoul.3 strtoull.3 strtoul.3 strtouq.3 strtoul.3 strtoumax.3 MLINKS+=tsearch.3 tdelete.3 tsearch.3 tfind.3 tsearch.3 twalk.3 Index: projects/clang1000-import/lib/libc/stdlib/Symbol.map =================================================================== --- projects/clang1000-import/lib/libc/stdlib/Symbol.map (revision 356919) +++ projects/clang1000-import/lib/libc/stdlib/Symbol.map (revision 356920) @@ -1,132 +1,136 @@ /* * $FreeBSD$ */ FBSD_1.0 { _Exit; a64l; abort; abs; atexit; __cxa_atexit; __cxa_finalize; atof; atoi; atol; atoll; bsearch; div; __isthreaded; exit; getenv; opterr; optind; optopt; optreset; optarg; getopt; getopt_long; getopt_long_only; suboptarg; getsubopt; grantpt; ptsname; unlockpt; hcreate; hdestroy; hsearch; heapsort; imaxabs; imaxdiv; insque; l64a; l64a_r; labs; ldiv; llabs; lldiv; lsearch; lfind; mergesort; putenv; qsort_r; qsort; radixsort; sradixsort; rand_r; rand; srand; srandom; srandomdev; initstate; setstate; random; reallocf; realpath; remque; setenv; unsetenv; strfmon; strtoimax; strtol; strtoll; strtonum; strtoq; strtoul; strtoull; strtoumax; strtouq; system; tdelete; tfind; tsearch; twalk; }; FBSD_1.3 { at_quick_exit; atof_l; atoi_l; atol_l; atoll_l; quick_exit; strtod_l; strtof_l; strtoimax_l; strtol_l; strtold_l; strtoll_l; strtoq_l; strtoul_l; strtoull_l; strtoumax_l; strtouq_l; }; FBSD_1.4 { atexit_b; bsearch_b; heapsort_b; mergesort_b; qsort_b; hcreate_r; hdestroy_r; hsearch_r; reallocarray; }; FBSD_1.5 { __cxa_thread_atexit; __cxa_thread_atexit_impl; abort_handler_s; ignore_handler_s; set_constraint_handler_s; }; +FBSD_1.6 { + qsort_s; +}; + FBSDprivate_1.0 { __system; _system; __libc_system; __cxa_thread_call_dtors; __libc_atexit; }; Index: projects/clang1000-import/lib/libc/stdlib/qsort.3 =================================================================== --- projects/clang1000-import/lib/libc/stdlib/qsort.3 (revision 356919) +++ projects/clang1000-import/lib/libc/stdlib/qsort.3 (revision 356920) @@ -1,372 +1,431 @@ .\" Copyright (c) 1990, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" This code is derived from software contributed to Berkeley by .\" the American National Standards Committee X3, on Information .\" Processing Systems. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)qsort.3 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd February 20, 2013 +.Dd January 14, 2020 .Dt QSORT 3 .Os .Sh NAME .Nm qsort , .Nm qsort_b , .Nm qsort_r , .Nm heapsort , .Nm heapsort_b , .Nm mergesort , .Nm mergesort_b .Nd sort functions .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In stdlib.h .Ft void .Fo qsort .Fa "void *base" .Fa "size_t nmemb" .Fa "size_t size" .Fa "int \*[lp]*compar\*[rp]\*[lp]const void *, const void *\*[rp]" .Fc .Ft void .Fo qsort_b .Fa "void *base" .Fa "size_t nmemb" .Fa "size_t size" .Fa "int \*[lp]^compar\*[rp]\*[lp]const void *, const void *\*[rp]" .Fc .Ft void .Fo qsort_r .Fa "void *base" .Fa "size_t nmemb" .Fa "size_t size" .Fa "void *thunk" .Fa "int \*[lp]*compar\*[rp]\*[lp]void *, const void *, const void *\*[rp]" .Fc .Ft int .Fo heapsort .Fa "void *base" .Fa "size_t nmemb" .Fa "size_t size" .Fa "int \*[lp]*compar\*[rp]\*[lp]const void *, const void *\*[rp]" .Fc .Ft int .Fo heapsort_b .Fa "void *base" .Fa "size_t nmemb" .Fa "size_t size" .Fa "int \*[lp]^compar\*[rp]\*[lp]const void *, const void *\*[rp]" .Fc .Ft int .Fo mergesort .Fa "void *base" .Fa "size_t nmemb" .Fa "size_t size" .Fa "int \*[lp]*compar\*[rp]\*[lp]const void *, const void *\*[rp]" .Fc .Ft int .Fo mergesort_b .Fa "void *base" .Fa "size_t nmemb" .Fa "size_t size" .Fa "int \*[lp]^compar\*[rp]\*[lp]const void *, const void *\*[rp]" .Fc +.Fd #define __STDC_WANT_LIB_EXT1__ 1 +.Ft errno_t +.Fo qsort_s +.Fa "void *base" +.Fa "rsize_t nmemb" +.Fa "rsize_t size" +.Fa "int \*[lp]*compar\*[rp]\*[lp]const void *, const void *, void *\*[rp]" +.Fa "void *thunk" +.Fc .Sh DESCRIPTION The .Fn qsort function is a modified partition-exchange sort, or quicksort. The .Fn heapsort function is a modified selection sort. The .Fn mergesort function is a modified merge sort with exponential search intended for sorting data with pre-existing order. .Pp The .Fn qsort and .Fn heapsort functions sort an array of .Fa nmemb objects, the initial member of which is pointed to by .Fa base . The size of each object is specified by .Fa size . The .Fn mergesort function behaves similarly, but .Em requires that .Fa size be greater than .Dq "sizeof(void *) / 2" . .Pp The contents of the array .Fa base are sorted in ascending order according to a comparison function pointed to by .Fa compar , which requires two arguments pointing to the objects being compared. .Pp The comparison function must return an integer less than, equal to, or greater than zero if the first argument is considered to be respectively less than, equal to, or greater than the second. .Pp The .Fn qsort_r function behaves identically to .Fn qsort , except that it takes an additional argument, .Fa thunk , which is passed unchanged as the first argument to function pointed to .Fa compar . This allows the comparison function to access additional data without using global variables, and thus .Fn qsort_r is suitable for use in functions which must be reentrant. The .Fn qsort_b function behaves identically to .Fn qsort , except that it takes a block, rather than a function pointer. .Pp The algorithms implemented by .Fn qsort , .Fn qsort_r , and .Fn heapsort are .Em not stable, that is, if two members compare as equal, their order in the sorted array is undefined. The .Fn heapsort_b function behaves identically to .Fn heapsort , except that it takes a block, rather than a function pointer. The .Fn mergesort algorithm is stable. The .Fn mergesort_b function behaves identically to .Fn mergesort , except that it takes a block, rather than a function pointer. .Pp The .Fn qsort and .Fn qsort_r functions are an implementation of C.A.R. Hoare's .Dq quicksort algorithm, a variant of partition-exchange sorting; in particular, see .An D.E. Knuth Ns 's .%T "Algorithm Q" . .Sy Quicksort takes O N lg N average time. This implementation uses median selection to avoid its O N**2 worst-case behavior. .Pp The .Fn heapsort function is an implementation of .An "J.W.J. William" Ns 's .Dq heapsort algorithm, a variant of selection sorting; in particular, see .An "D.E. Knuth" Ns 's .%T "Algorithm H" . .Sy Heapsort takes O N lg N worst-case time. Its .Em only advantage over .Fn qsort is that it uses almost no additional memory; while .Fn qsort does not allocate memory, it is implemented using recursion. .Pp The function .Fn mergesort requires additional memory of size .Fa nmemb * .Fa size bytes; it should be used only when space is not at a premium. The .Fn mergesort function is optimized for data with pre-existing order; its worst case time is O N lg N; its best case is O N. .Pp Normally, .Fn qsort is faster than .Fn mergesort is faster than .Fn heapsort . Memory availability and pre-existing order in the data can make this untrue. +.Pp +The +.Fn qsort_s +function behaves the same as +.Fn qsort_r , except that: +.Bl -dash +.It +The order of arguments is different +.It +The order of arguments to +.Fa compar +is different +.It +if +.Fa nmemb +or +.Fa size +are greater than +.Dv RSIZE_MAX , +or +.Fa nmemb +is not zero and +.Fa compar +is NULL, then the runtime-constraint handler is called, and +.Fn qsort_s +returns an error. +Note that the handler is called before +.Fn qsort_s +returns the error, and the handler function might not return. +.El .Sh RETURN VALUES The .Fn qsort and .Fn qsort_r functions return no value. +The +.Fn qsort_s +function returns zero on success, non-zero on error. .Pp .Rv -std heapsort mergesort .Sh EXAMPLES A sample program that sorts an array of .Vt int values in place using .Fn qsort , and then prints the sorted array to standard output is: .Bd -literal #include #include /* * Custom comparison function that compares 'int' values through pointers * passed by qsort(3). */ static int int_compare(const void *p1, const void *p2) { int left = *(const int *)p1; int right = *(const int *)p2; return ((left > right) - (left < right)); } /* * Sort an array of 'int' values and print it to standard output. */ int main(void) { int int_array[] = { 4, 5, 9, 3, 0, 1, 7, 2, 8, 6 }; size_t array_size = sizeof(int_array) / sizeof(int_array[0]); size_t k; qsort(&int_array, array_size, sizeof(int_array[0]), int_compare); for (k = 0; k < array_size; k++) printf(" %d", int_array[k]); puts(""); return (EXIT_SUCCESS); } .Ed .Sh COMPATIBILITY +The order of arguments for the comparison function used with +.Fn qsort_r +is different from the one used by +.Fn qsort_s , +and the GNU libc implementation of +.Fn qsort_r . +When porting software written for GNU libc, it is usually possible +to replace +.Fn qsort_r +with +.Fn qsort_s +to work around this problem. +.Pp Previous versions of .Fn qsort did not permit the comparison routine itself to call .Fn qsort 3 . This is no longer true. .Sh ERRORS The .Fn heapsort and .Fn mergesort functions succeed unless: .Bl -tag -width Er .It Bq Er EINVAL The .Fa size argument is zero, or, the .Fa size argument to .Fn mergesort is less than .Dq "sizeof(void *) / 2" . .It Bq Er ENOMEM The .Fn heapsort or .Fn mergesort functions were unable to allocate memory. .El .Sh SEE ALSO .Xr sort 1 , .Xr radixsort 3 .Rs .%A Hoare, C.A.R. .%D 1962 .%T "Quicksort" .%J "The Computer Journal" .%V 5:1 .%P pp. 10-15 .Re .Rs .%A Williams, J.W.J .%D 1964 .%T "Heapsort" .%J "Communications of the ACM" .%V 7:1 .%P pp. 347-348 .Re .Rs .%A Knuth, D.E. .%D 1968 .%B "The Art of Computer Programming" .%V Vol. 3 .%T "Sorting and Searching" .%P pp. 114-123, 145-149 .Re .Rs .%A McIlroy, P.M. .%T "Optimistic Sorting and Information Theoretic Complexity" .%J "Fourth Annual ACM-SIAM Symposium on Discrete Algorithms" .%V January 1992 .Re .Rs .%A Bentley, J.L. .%A McIlroy, M.D. .%T "Engineering a Sort Function" .%J "Software--Practice and Experience" .%V Vol. 23(11) .%P pp. 1249-1265 .%D November\ 1993 .Re .Sh STANDARDS The .Fn qsort function conforms to .St -isoC . +.Fn qsort_s +conforms to +.St -isoC-2011 +K.3.6.3.2. .Sh HISTORY The variants of these functions that take blocks as arguments first appeared in Mac OS X. This implementation was created by David Chisnall. Index: projects/clang1000-import/lib/libc/stdlib/qsort.c =================================================================== --- projects/clang1000-import/lib/libc/stdlib/qsort.c (revision 356919) +++ projects/clang1000-import/lib/libc/stdlib/qsort.c (revision 356920) @@ -1,200 +1,245 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)qsort.c 8.1 (Berkeley) 6/4/93"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); +#include +#include #include +#include +#include "libc_private.h" -#ifdef I_AM_QSORT_R +#if defined(I_AM_QSORT_R) typedef int cmp_t(void *, const void *, const void *); +#elif defined(I_AM_QSORT_S) +typedef int cmp_t(const void *, const void *, void *); #else typedef int cmp_t(const void *, const void *); #endif static inline char *med3(char *, char *, char *, cmp_t *, void *); #define MIN(a, b) ((a) < (b) ? a : b) /* * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function". */ static inline void swapfunc(char *a, char *b, size_t es) { char t; do { t = *a; *a++ = *b; *b++ = t; } while (--es > 0); } #define vecswap(a, b, n) \ if ((n) > 0) swapfunc(a, b, n) -#ifdef I_AM_QSORT_R +#if defined(I_AM_QSORT_R) #define CMP(t, x, y) (cmp((t), (x), (y))) +#elif defined(I_AM_QSORT_S) +#define CMP(t, x, y) (cmp((x), (y), (t))) #else #define CMP(t, x, y) (cmp((x), (y))) #endif static inline char * med3(char *a, char *b, char *c, cmp_t *cmp, void *thunk -#ifndef I_AM_QSORT_R +#if !defined(I_AM_QSORT_R) && !defined(I_AM_QSORT_S) __unused #endif ) { return CMP(thunk, a, b) < 0 ? (CMP(thunk, b, c) < 0 ? b : (CMP(thunk, a, c) < 0 ? c : a )) :(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c )); } -#ifdef I_AM_QSORT_R +#if defined(I_AM_QSORT_R) void qsort_r(void *a, size_t n, size_t es, void *thunk, cmp_t *cmp) +#elif defined(I_AM_QSORT_S) +errno_t +qsort_s(void *a, rsize_t n, rsize_t es, cmp_t *cmp, void *thunk) #else #define thunk NULL void qsort(void *a, size_t n, size_t es, cmp_t *cmp) #endif { char *pa, *pb, *pc, *pd, *pl, *pm, *pn; size_t d1, d2; int cmp_result; int swap_cnt; +#ifdef I_AM_QSORT_S + if (n > RSIZE_MAX) { + __throw_constraint_handler_s("qsort_s : n > RSIZE_MAX", EINVAL); + return (EINVAL); + } else if (es > RSIZE_MAX) { + __throw_constraint_handler_s("qsort_s : es > RSIZE_MAX", EINVAL); + return (EINVAL); + } else if (n != 0) { + if (a == NULL) { + __throw_constraint_handler_s("qsort_s : a == NULL", EINVAL); + return (EINVAL); + } else if (cmp == NULL) { + __throw_constraint_handler_s("qsort_s : cmp == NULL", EINVAL); + return (EINVAL); + } + } +#endif + loop: swap_cnt = 0; if (n < 7) { for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) for (pl = pm; pl > (char *)a && CMP(thunk, pl - es, pl) > 0; pl -= es) swapfunc(pl, pl - es, es); +#ifdef I_AM_QSORT_S + return (0); +#else return; +#endif } pm = (char *)a + (n / 2) * es; if (n > 7) { pl = a; pn = (char *)a + (n - 1) * es; if (n > 40) { size_t d = (n / 8) * es; pl = med3(pl, pl + d, pl + 2 * d, cmp, thunk); pm = med3(pm - d, pm, pm + d, cmp, thunk); pn = med3(pn - 2 * d, pn - d, pn, cmp, thunk); } pm = med3(pl, pm, pn, cmp, thunk); } swapfunc(a, pm, es); pa = pb = (char *)a + es; pc = pd = (char *)a + (n - 1) * es; for (;;) { while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) { if (cmp_result == 0) { swap_cnt = 1; swapfunc(pa, pb, es); pa += es; } pb += es; } while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) { if (cmp_result == 0) { swap_cnt = 1; swapfunc(pc, pd, es); pd -= es; } pc -= es; } if (pb > pc) break; swapfunc(pb, pc, es); swap_cnt = 1; pb += es; pc -= es; } if (swap_cnt == 0) { /* Switch to insertion sort */ for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) for (pl = pm; pl > (char *)a && CMP(thunk, pl - es, pl) > 0; pl -= es) swapfunc(pl, pl - es, es); +#ifdef I_AM_QSORT_S + return (0); +#else return; +#endif } pn = (char *)a + n * es; d1 = MIN(pa - (char *)a, pb - pa); vecswap(a, pb - d1, d1); d1 = MIN(pd - pc, pn - pd - es); vecswap(pb, pn - d1, d1); d1 = pb - pa; d2 = pd - pc; if (d1 <= d2) { /* Recurse on left partition, then iterate on right partition */ if (d1 > es) { -#ifdef I_AM_QSORT_R +#if defined(I_AM_QSORT_R) qsort_r(a, d1 / es, es, thunk, cmp); +#elif defined(I_AM_QSORT_S) + qsort_s(a, d1 / es, es, cmp, thunk); #else qsort(a, d1 / es, es, cmp); #endif } if (d2 > es) { /* Iterate rather than recurse to save stack space */ /* qsort(pn - d2, d2 / es, es, cmp); */ a = pn - d2; n = d2 / es; goto loop; } } else { /* Recurse on right partition, then iterate on left partition */ if (d2 > es) { -#ifdef I_AM_QSORT_R +#if defined(I_AM_QSORT_R) qsort_r(pn - d2, d2 / es, es, thunk, cmp); +#elif defined(I_AM_QSORT_S) + qsort_s(pn - d2, d2 / es, es, cmp, thunk); #else qsort(pn - d2, d2 / es, es, cmp); #endif } if (d1 > es) { /* Iterate rather than recurse to save stack space */ /* qsort(a, d1 / es, es, cmp); */ n = d1 / es; goto loop; } } + +#ifdef I_AM_QSORT_S + return (0); +#endif } Index: projects/clang1000-import/lib/libc/stdlib/qsort_s.c =================================================================== --- projects/clang1000-import/lib/libc/stdlib/qsort_s.c (nonexistent) +++ projects/clang1000-import/lib/libc/stdlib/qsort_s.c (revision 356920) @@ -0,0 +1,8 @@ +/* + * This file is in the public domain. Originally written by Garrett + * A. Wollman. + * + * $FreeBSD$ + */ +#define I_AM_QSORT_S +#include "qsort.c" Property changes on: projects/clang1000-import/lib/libc/stdlib/qsort_s.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/clang1000-import/lib/libc/tests/stdlib/Makefile =================================================================== --- projects/clang1000-import/lib/libc/tests/stdlib/Makefile (revision 356919) +++ projects/clang1000-import/lib/libc/tests/stdlib/Makefile (revision 356920) @@ -1,71 +1,73 @@ # $FreeBSD$ .include ATF_TESTS_C+= dynthr_test ATF_TESTS_C+= heapsort_test ATF_TESTS_C+= mergesort_test ATF_TESTS_C+= qsort_test +ATF_TESTS_C+= qsort_r_test +ATF_TESTS_C+= qsort_s_test ATF_TESTS_C+= set_constraint_handler_s_test ATF_TESTS_C+= strfmon_test ATF_TESTS_C+= tsearch_test .if ${COMPILER_FEATURES:Mc++11} ATF_TESTS_CXX+= cxa_thread_atexit_test ATF_TESTS_CXX+= cxa_thread_atexit_nothr_test .endif # All architectures on FreeBSD have fenv.h CFLAGS+= -D__HAVE_FENV # Not sure why this isn't defined for all architectures, since most # have long double. .if ${MACHINE_CPUARCH} == "aarch64" || \ ${MACHINE_CPUARCH} == "amd64" || \ ${MACHINE_CPUARCH} == "i386" CFLAGS+= -D__HAVE_LONG_DOUBLE .endif # TODO: t_getenv_thread, t_mi_vector_hash, t_strtoi NETBSD_ATF_TESTS_C+= abs_test NETBSD_ATF_TESTS_C+= atoi_test NETBSD_ATF_TESTS_C+= div_test NETBSD_ATF_TESTS_C+= getenv_test NETBSD_ATF_TESTS_C+= exit_test NETBSD_ATF_TESTS_C+= hsearch_test NETBSD_ATF_TESTS_C+= posix_memalign_test NETBSD_ATF_TESTS_C+= random_test NETBSD_ATF_TESTS_C+= strtod_test NETBSD_ATF_TESTS_C+= strtol_test NETBSD_ATF_TESTS_C+= system_test # TODO: need to come up with a correct explanation of what the patch pho does # with h_atexit #ATF_TESTS_SH= atexit_test NETBSD_ATF_TESTS_SH= getopt_test .include "../Makefile.netbsd-tests" BINDIR= ${TESTSDIR} # TODO: see comment above #PROGS+= h_atexit PROGS+= h_getopt h_getopt_long CFLAGS+= -I${.CURDIR} CXXSTD.cxa_thread_atexit_test= c++11 CXXSTD.cxa_thread_atexit_nothr_test= c++11 LIBADD.cxa_thread_atexit_test+= pthread .for t in h_getopt h_getopt_long CFLAGS.$t+= -I${LIBNETBSD_SRCDIR} -I${SRCTOP}/contrib/netbsd-tests LDFLAGS.$t+= -L${LIBNETBSD_OBJDIR} LIBADD.${t}+= netbsd util .endfor LIBADD.strtod_test+= m SUBDIR+= dynthr_mod .include Index: projects/clang1000-import/lib/libc/tests/stdlib/qsort_r_test.c =================================================================== --- projects/clang1000-import/lib/libc/tests/stdlib/qsort_r_test.c (nonexistent) +++ projects/clang1000-import/lib/libc/tests/stdlib/qsort_r_test.c (revision 356920) @@ -0,0 +1,92 @@ +/*- + * Copyright (C) 2020 Edward Tomasz Napierala + * Copyright (C) 2004 Maxim Sobolev + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Test for qsort_r(3) routine. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include "test-sort.h" + +#define THUNK 42 + +static int +sorthelp_r(void *thunk, const void *a, const void *b) +{ + const int *oa, *ob; + + ATF_REQUIRE_EQ(*(int *)thunk, THUNK); + + oa = a; + ob = b; + /* Don't use "return *oa - *ob" since it's easy to cause overflow! */ + if (*oa > *ob) + return (1); + if (*oa < *ob) + return (-1); + return (0); +} + +ATF_TC_WITHOUT_HEAD(qsort_r_test); +ATF_TC_BODY(qsort_r_test, tc) +{ + int testvector[IVEC_LEN]; + int sresvector[IVEC_LEN]; + int i, j; + int thunk = THUNK; + + for (j = 2; j < IVEC_LEN; j++) { + /* Populate test vectors */ + for (i = 0; i < j; i++) + testvector[i] = sresvector[i] = initvector[i]; + + /* Sort using qsort_r(3) */ + qsort_r(testvector, j, sizeof(testvector[0]), &thunk, + sorthelp_r); + /* Sort using reference slow sorting routine */ + ssort(sresvector, j); + + /* Compare results */ + for (i = 0; i < j; i++) + ATF_CHECK_MSG(testvector[i] == sresvector[i], + "item at index %d didn't match: %d != %d", + i, testvector[i], sresvector[i]); + } +} + +ATF_TP_ADD_TCS(tp) +{ + + ATF_TP_ADD_TC(tp, qsort_r_test); + + return (atf_no_error()); +} Property changes on: projects/clang1000-import/lib/libc/tests/stdlib/qsort_r_test.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/clang1000-import/lib/libc/tests/stdlib/qsort_s_test.c =================================================================== --- projects/clang1000-import/lib/libc/tests/stdlib/qsort_s_test.c (nonexistent) +++ projects/clang1000-import/lib/libc/tests/stdlib/qsort_s_test.c (revision 356920) @@ -0,0 +1,257 @@ +/*- + * Copyright (C) 2020 Edward Tomasz Napierala + * Copyright (c) 2017 Juniper Networks. All rights reserved. + * Copyright (C) 2004 Maxim Sobolev + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Test for qsort_s(3) routine. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#define THUNK 42 + +#include "test-sort.h" + +static errno_t e; + +static int +sorthelp_s(const void *a, const void *b, void *thunk) +{ + const int *oa, *ob; + + ATF_REQUIRE_EQ(*(int *)thunk, THUNK); + + oa = a; + ob = b; + /* Don't use "return *oa - *ob" since it's easy to cause overflow! */ + if (*oa > *ob) + return (1); + if (*oa < *ob) + return (-1); + return (0); +} + +void +h(const char * restrict msg __unused, void * restrict ptr __unused, errno_t error) +{ + e = error; +} + +/* nmemb < 0 */ +ATF_TC_WITHOUT_HEAD(qsort_s_nmemb_lt_zero); +ATF_TC_BODY(qsort_s_nmemb_lt_zero, tc) +{ + int thunk = THUNK; + int b; + + ATF_CHECK(qsort_s(&b, -1, sizeof(int), sorthelp_s, &thunk) != 0); +} + +/* nmemb > rmax */ +ATF_TC_WITHOUT_HEAD(qsort_s_nmemb_gt_rmax); +ATF_TC_BODY(qsort_s_nmemb_gt_rmax, tc) +{ + int thunk = THUNK; + int b; + + ATF_CHECK(qsort_s(&b, RSIZE_MAX + 1, sizeof(int), sorthelp_s, &thunk) != 0); +} + +/* size < 0 */ +ATF_TC_WITHOUT_HEAD(qsort_s_size_lt_zero); +ATF_TC_BODY(qsort_s_size_lt_zero, tc) +{ + int thunk = THUNK; + int b; + + ATF_CHECK(qsort_s(&b, 1, -1, sorthelp_s, &thunk) != 0); +} + +/* size > rmax */ +ATF_TC_WITHOUT_HEAD(qsort_s_size_gt_rmax); +ATF_TC_BODY(qsort_s_size_gt_rmax, tc) +{ + int thunk = THUNK; + int b; + + ATF_CHECK(qsort_s(&b, 1, RSIZE_MAX + 1, sorthelp_s, &thunk) != 0); +} + +/* NULL compar */ +ATF_TC_WITHOUT_HEAD(qsort_s_null_compar); +ATF_TC_BODY(qsort_s_null_compar, tc) +{ + int thunk = THUNK; + int b; + + ATF_CHECK(qsort_s(&b, 1, sizeof(int), NULL, &thunk) != 0); +} + +/* nmemb < 0, handler */ +ATF_TC_WITHOUT_HEAD(qsort_s_nmemb_lt_zero_h); +ATF_TC_BODY(qsort_s_nmemb_lt_zero_h, tc) +{ + int thunk = THUNK; + int b[] = {81, 4, 7}; + + e = 0; + set_constraint_handler_s(h); + ATF_CHECK(qsort_s(&b, -1, sizeof(int), sorthelp_s, &thunk) != 0); + ATF_CHECK(e > 0); + ATF_CHECK_EQ(b[0], 81); + ATF_CHECK_EQ(b[1], 4); + ATF_CHECK_EQ(b[2], 7); +} + +/* nmemb > rmax, handler */ +ATF_TC_WITHOUT_HEAD(qsort_s_nmemb_gt_rmax_h); +ATF_TC_BODY(qsort_s_nmemb_gt_rmax_h, tc) +{ + int thunk = THUNK; + int b[] = {81, 4, 7}; + + e = 0; + set_constraint_handler_s(h); + ATF_CHECK(qsort_s(&b, RSIZE_MAX + 1, sizeof(int), sorthelp_s, &thunk) != 0); + ATF_CHECK(e > 0); + ATF_CHECK_EQ(b[0], 81); + ATF_CHECK_EQ(b[1], 4); + ATF_CHECK_EQ(b[2], 7); +} + +/* size < 0, handler */ +ATF_TC_WITHOUT_HEAD(qsort_s_size_lt_zero_h); +ATF_TC_BODY(qsort_s_size_lt_zero_h, tc) +{ + int thunk = THUNK; + int b[] = {81, 4, 7}; + + e = 0; + set_constraint_handler_s(h); + ATF_CHECK(qsort_s(&b, nitems(b), -1, sorthelp_s, &thunk) != 0); + ATF_CHECK(e > 0); + ATF_CHECK_EQ(b[0], 81); + ATF_CHECK_EQ(b[1], 4); + ATF_CHECK_EQ(b[2], 7); +} + +/* size > rmax, handler */ +ATF_TC_WITHOUT_HEAD(qsort_s_size_gt_rmax_h); +ATF_TC_BODY(qsort_s_size_gt_rmax_h, tc) +{ + int thunk = THUNK; + int b[] = {81, 4, 7}; + + e = 0; + set_constraint_handler_s(h); + ATF_CHECK(qsort_s(&b, nitems(b), RSIZE_MAX + 1, sorthelp_s, &thunk) != 0); + ATF_CHECK(e > 0); + ATF_CHECK_EQ(b[0], 81); + ATF_CHECK_EQ(b[1], 4); + ATF_CHECK_EQ(b[2], 7); +} + +/* NULL compar, handler */ +ATF_TC_WITHOUT_HEAD(qsort_s_null_compar_h); +ATF_TC_BODY(qsort_s_null_compar_h, tc) +{ + int thunk = THUNK; + int b[] = {81, 4, 7}; + + e = 0; + set_constraint_handler_s(h); + ATF_CHECK(qsort_s(&b, nitems(b), sizeof(int), NULL, &thunk) != 0); + ATF_CHECK(e > 0); + ATF_CHECK_EQ(b[0], 81); + ATF_CHECK_EQ(b[1], 4); + ATF_CHECK_EQ(b[2], 7); +} + +ATF_TC_WITHOUT_HEAD(qsort_s_h); +ATF_TC_BODY(qsort_s_h, tc) +{ + int thunk = THUNK; + int b[] = {81, 4, 7}; + + e = 0; + set_constraint_handler_s(h); + ATF_CHECK(qsort_s(&b, nitems(b), sizeof(int), sorthelp_s, &thunk) == 0); + ATF_CHECK(e == 0); + ATF_CHECK_EQ(b[0], 4); + ATF_CHECK_EQ(b[1], 7); + ATF_CHECK_EQ(b[2], 81); +} + +ATF_TC_WITHOUT_HEAD(qsort_s_test); +ATF_TC_BODY(qsort_s_test, tc) +{ + int testvector[IVEC_LEN]; + int sresvector[IVEC_LEN]; + int i, j; + int thunk = THUNK; + + for (j = 2; j < IVEC_LEN; j++) { + /* Populate test vectors */ + for (i = 0; i < j; i++) + testvector[i] = sresvector[i] = initvector[i]; + + /* Sort using qsort_s(3) */ + qsort_s(testvector, j, sizeof(testvector[0]), + sorthelp_s, &thunk); + /* Sort using reference slow sorting routine */ + ssort(sresvector, j); + + /* Compare results */ + for (i = 0; i < j; i++) + ATF_CHECK_MSG(testvector[i] == sresvector[i], + "item at index %d didn't match: %d != %d", + i, testvector[i], sresvector[i]); + } +} + +ATF_TP_ADD_TCS(tp) +{ + ATF_TP_ADD_TC(tp, qsort_s_nmemb_lt_zero); + ATF_TP_ADD_TC(tp, qsort_s_nmemb_gt_rmax); + ATF_TP_ADD_TC(tp, qsort_s_size_lt_zero); + ATF_TP_ADD_TC(tp, qsort_s_size_gt_rmax); + ATF_TP_ADD_TC(tp, qsort_s_null_compar); + ATF_TP_ADD_TC(tp, qsort_s_nmemb_lt_zero_h); + ATF_TP_ADD_TC(tp, qsort_s_nmemb_gt_rmax_h); + ATF_TP_ADD_TC(tp, qsort_s_size_lt_zero_h); + ATF_TP_ADD_TC(tp, qsort_s_size_gt_rmax_h); + ATF_TP_ADD_TC(tp, qsort_s_null_compar_h); + ATF_TP_ADD_TC(tp, qsort_s_h); + ATF_TP_ADD_TC(tp, qsort_s_test); + + return (atf_no_error()); +} Property changes on: projects/clang1000-import/lib/libc/tests/stdlib/qsort_s_test.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/clang1000-import/share/man/man5/src.conf.5 =================================================================== --- projects/clang1000-import/share/man/man5/src.conf.5 (revision 356919) +++ projects/clang1000-import/share/man/man5/src.conf.5 (revision 356920) @@ -1,1925 +1,1920 @@ .\" DO NOT EDIT-- this file is @generated by tools/build/options/makeman. .\" $FreeBSD$ -.Dd January 17, 2020 +.Dd January 19, 2020 .Dt SRC.CONF 5 .Os .Sh NAME .Nm src.conf .Nd "source build options" .Sh DESCRIPTION The .Nm file contains settings that will apply to every build involving the .Fx source tree; see .Xr build 7 . .Pp The .Nm file uses the standard makefile syntax. However, .Nm should not specify any dependencies to .Xr make 1 . Instead, .Nm is to set .Xr make 1 variables that control the aspects of how the system builds. .Pp The default location of .Nm is .Pa /etc/src.conf , though an alternative location can be specified in the .Xr make 1 variable .Va SRCCONF . Overriding the location of .Nm may be necessary if the system-wide settings are not suitable for a particular build. For instance, setting .Va SRCCONF to .Pa /dev/null effectively resets all build controls to their defaults. .Pp The only purpose of .Nm is to control the compilation of the .Fx source code, which is usually located in .Pa /usr/src . As a rule, the system administrator creates .Nm when the values of certain control variables need to be changed from their defaults. .Pp In addition, control variables can be specified for a particular build via the .Fl D option of .Xr make 1 or in its environment; see .Xr environ 7 . .Pp The environment of .Xr make 1 for the build can be controlled via the .Va SRC_ENV_CONF variable, which defaults to .Pa /etc/src-env.conf . Some examples that may only be set in this file are .Va WITH_DIRDEPS_BUILD , and .Va WITH_META_MODE , and .Va MAKEOBJDIRPREFIX as they are environment-only variables. .Pp The values of variables are ignored regardless of their setting; even if they would be set to .Dq Li FALSE or .Dq Li NO . The presence of an option causes it to be honored by .Xr make 1 . .Pp This list provides a name and short description for variables that can be used for source builds. .Bl -tag -width indent .It Va WITHOUT_ACCT Set to not build process accounting tools such as .Xr accton 8 and .Xr sa 8 . .It Va WITHOUT_ACPI Set to not build .Xr acpiconf 8 , .Xr acpidump 8 and related programs. .It Va WITH_AMD Set to build the legacy .Xr amd 8 automount daemon and related programs. Note that .Xr autofs 5 is the preferred automount technique. .It Va WITHOUT_APM Set to not build .Xr apm 8 , .Xr apmd 8 and related programs. .It Va WITHOUT_ASSERT_DEBUG Set to compile programs and libraries without the .Xr assert 3 checks. .It Va WITHOUT_AT Set to not build .Xr at 1 and related utilities. .It Va WITHOUT_ATM Set to not build programs and libraries related to ATM networking. .It Va WITHOUT_AUDIT Set to not build audit support into system programs. .It Va WITHOUT_AUTHPF Set to not build .Xr authpf 8 . .It Va WITHOUT_AUTOFS Set to not build .Xr autofs 5 related programs, libraries, and kernel modules. .It Va WITHOUT_AUTO_OBJ Disable automatic creation of objdirs. This is enabled by default if the wanted OBJDIR is writable by the current user. .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITH_BEARSSL Build the BearSSL library. .Pp BearSSL is a tiny SSL library suitable for embedded environments. For details see .Lk http://www.BearSSL.org/ .Pp This library is currently only used to perform signature verification and related operations for Verified Exec and .Xr loader 8 . When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITH_LOADER_EFI_SECUREBOOT (unless .Va WITHOUT_LOADER_EFI_SECUREBOOT is set explicitly) .It Va WITH_LOADER_VERIEXEC (unless .Va WITHOUT_LOADER_VERIEXEC is set explicitly) .It Va WITH_VERIEXEC (unless .Va WITHOUT_VERIEXEC is set explicitly) .El .It Va WITHOUT_BHYVE Set to not build or install .Xr bhyve 8 , associated utilities, and examples. .Pp This option only affects amd64/amd64. .It Va WITH_BIND_NOW Build all binaries with the .Dv DF_BIND_NOW flag set to indicate that the run-time loader should perform all relocation processing at process startup rather than on demand. .It Va WITHOUT_BINUTILS Do not build or install GNU .Xr as 1 , .Xr ld.bfd 1 , and .Xr objdump 1 as part of the normal system build. -The resulting system cannot build programs from source. .Pp This is a default setting on arm64/aarch64, riscv/riscv64 and riscv/riscv64sf. .It Va WITH_BINUTILS -Set to build and install GNU -.Xr as 1 , +Build and install GNU +.Xr as 1 +on i386 and amd64, .Xr objdump 1 , -and, on powerpc, +and .Xr ld.bfd 1 -as part -of the normal system build. +on powerpc as part of the normal system build. .Pp This is a default setting on amd64/amd64, arm/armv6, arm/armv7, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITHOUT_BINUTILS_BOOTSTRAP Do not build binutils (as, ld.bfd, and objdump) as part of the bootstrap process. -.Bf -symbolic -The option does not work for build targets unless some alternative -toolchain is provided. -.Ef .Pp This is a default setting on -arm64/aarch64, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. +arm/armv6, arm/armv7, arm64/aarch64, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. .It Va WITH_BINUTILS_BOOTSTRAP -Set build binutils (as, objdump, and on powerpc ld) +Build binutils (as on i386 and amd64, objdump, and ld on powerpc) as part of the bootstrap process. .Pp This is a default setting on -amd64/amd64, arm/armv6, arm/armv7, i386/i386, powerpc/powerpc and powerpc/powerpc64. +amd64/amd64, i386/i386, powerpc/powerpc and powerpc/powerpc64. .It Va WITHOUT_BLACKLIST Set this if you do not want to build .Xr blacklistd 8 and .Xr blacklistctl 8 . When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_BLACKLIST_SUPPORT (unless .Va WITH_BLACKLIST_SUPPORT is set explicitly) .El .It Va WITHOUT_BLACKLIST_SUPPORT Set to build some programs without .Xr libblacklist 3 support, like .Xr fingerd 8 , .Xr ftpd 8 , and .Xr sshd 8 . .It Va WITHOUT_BLUETOOTH Set to not build Bluetooth related kernel modules, programs and libraries. .It Va WITHOUT_BOOT Set to not build the boot blocks and loader. .It Va WITHOUT_BOOTPARAMD Set to not build or install .Xr bootparamd 8 . .It Va WITHOUT_BOOTPD Set to not build or install .Xr bootpd 8 . .It Va WITHOUT_BSDINSTALL Set to not build .Xr bsdinstall 8 , .Xr sade 8 , and related programs. .It Va WITHOUT_BSD_CPIO Set to not build the BSD licensed version of cpio based on .Xr libarchive 3 . .It Va WITHOUT_BSD_CRTBEGIN Disable the BSD licensed .Pa crtbegin.o and .Pa crtend.o . .Pp This is a default setting on sparc64/sparc64. .It Va WITH_BSD_CRTBEGIN Enable the BSD licensed .Pa crtbegin.o and .Pa crtend.o . .Pp This is a default setting on amd64/amd64, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, riscv/riscv64 and riscv/riscv64sf. .It Va WITH_BSD_GREP Install BSD-licensed grep as '[ef]grep' instead of GNU grep. .It Va WITHOUT_BSNMP Set to not build or install .Xr bsnmpd 1 and related libraries and data files. .It Va WITHOUT_BZIP2 Set to not build contributed bzip2 software as a part of the base system. .Bf -symbolic The option has no effect yet. .Ef When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_BZIP2_SUPPORT (unless .Va WITH_BZIP2_SUPPORT is set explicitly) .El .It Va WITHOUT_BZIP2_SUPPORT Set to build some programs without optional bzip2 support. .It Va WITHOUT_CALENDAR Set to not build .Xr calendar 1 . .It Va WITHOUT_CAPSICUM Set to not build Capsicum support into system programs. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_CASPER .El .It Va WITHOUT_CAROOT Set to not add the trusted certificates from the Mozilla NSS bundle to base. .It Va WITHOUT_CASPER Set to not build Casper program and related libraries. .It Va WITH_CCACHE_BUILD Set to use .Xr ccache 1 for the build. No configuration is required except to install the .Sy devel/ccache package. When using with .Xr distcc 1 , set .Sy CCACHE_PREFIX=/usr/local/bin/distcc . The default cache directory of .Pa $HOME/.ccache will be used, which can be overridden by setting .Sy CCACHE_DIR . The .Sy CCACHE_COMPILERCHECK option defaults to .Sy content when using the in-tree bootstrap compiler, and .Sy mtime when using an external compiler. The .Sy CCACHE_CPP2 option is used for Clang but not GCC. .Pp Sharing a cache between multiple work directories requires using a layout similar to .Pa /some/prefix/src .Pa /some/prefix/obj and an environment such as: .Bd -literal -offset indent CCACHE_BASEDIR='${SRCTOP:H}' MAKEOBJDIRPREFIX='${SRCTOP:H}/obj' .Ed .Pp See .Xr ccache 1 for more configuration options. .It Va WITHOUT_CCD Set to not build .Xr geom_ccd 4 and related utilities. .It Va WITHOUT_CDDL Set to not build code licensed under Sun's CDDL. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_CTF .It .Va WITHOUT_LOADER_ZFS .It .Va WITHOUT_ZFS .El .It Va WITHOUT_CLANG Set to not build the Clang C/C++ compiler during the regular phase of the build. .Pp This is a default setting on sparc64/sparc64. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_CLANG_EXTRAS .It .Va WITHOUT_CLANG_FULL .It .Va WITHOUT_LLVM_COV .El .It Va WITH_CLANG Set to build the Clang C/C++ compiler during the normal phase of the build. .Pp This is a default setting on amd64/amd64, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, riscv/riscv64 and riscv/riscv64sf. .It Va WITHOUT_CLANG_BOOTSTRAP Set to not build the Clang C/C++ compiler during the bootstrap phase of the build. To be able to build the system, either gcc or clang bootstrap must be enabled unless an alternate compiler is provided via XCC. .Pp This is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf and sparc64/sparc64. .It Va WITH_CLANG_BOOTSTRAP Set to build the Clang C/C++ compiler during the bootstrap phase of the build. .Pp This is a default setting on amd64/amd64, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, powerpc/powerpc, powerpc/powerpc64, riscv/riscv64 and riscv/riscv64sf. .It Va WITH_CLANG_EXTRAS Set to build additional clang and llvm tools, such as bugpoint and clang-format. .It Va WITHOUT_CLANG_FULL Set to avoid building the ARCMigrate, Rewriter and StaticAnalyzer components of the Clang C/C++ compiler. .Pp This is a default setting on sparc64/sparc64. .It Va WITH_CLANG_FULL Set to build the ARCMigrate, Rewriter and StaticAnalyzer components of the Clang C/C++ compiler. .Pp This is a default setting on amd64/amd64, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, riscv/riscv64 and riscv/riscv64sf. .It Va WITHOUT_CLANG_IS_CC Do not install links to the Clang C/C++ compiler as .Pa /usr/bin/cc , .Pa /usr/bin/c++ and .Pa /usr/bin/cpp . If .Va WITH_GCC is set then links to the GCC C/C++ compiler will be installed instead. .Pp This is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf and sparc64/sparc64. .It Va WITH_CLANG_IS_CC Install links to the Clang C/C++ compiler as .Pa /usr/bin/cc , .Pa /usr/bin/c++ and .Pa /usr/bin/cpp . .Pp This is a default setting on amd64/amd64, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, powerpc/powerpc, powerpc/powerpc64, riscv/riscv64 and riscv/riscv64sf. .It Va WITHOUT_CPP Set to not build .Xr cpp 1 . .It Va WITHOUT_CROSS_COMPILER Set to not build any cross compiler in the cross-tools stage of buildworld. When compiling a different version of .Fx than what is installed on the system, provide an alternate compiler with XCC to ensure success. When compiling with an identical version of .Fx to the host, this option may be safely used. This option may also be safe when the host version of .Fx is close to the sources being built, but all bets are off if there have been any changes to the toolchain between the versions. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_BINUTILS_BOOTSTRAP .It .Va WITHOUT_CLANG_BOOTSTRAP .It .Va WITHOUT_ELFTOOLCHAIN_BOOTSTRAP .It .Va WITHOUT_GCC_BOOTSTRAP .It .Va WITHOUT_LLD_BOOTSTRAP .El .It Va WITHOUT_CRYPT Set to not build any crypto code. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_DMAGENT .It .Va WITHOUT_KERBEROS .It .Va WITHOUT_KERBEROS_SUPPORT .It .Va WITHOUT_LDNS .It .Va WITHOUT_LDNS_UTILS .It .Va WITHOUT_OPENSSH .It .Va WITHOUT_OPENSSL .It .Va WITHOUT_UNBOUND .El .Pp When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_GSSAPI (unless .Va WITH_GSSAPI is set explicitly) .El .It Va WITH_CTF Set to compile with CTF (Compact C Type Format) data. CTF data encapsulates a reduced form of debugging information similar to DWARF and the venerable stabs and is required for DTrace. .It Va WITHOUT_CUSE Set to not build CUSE-related programs and libraries. .It Va WITHOUT_CXGBETOOL Set to not build .Xr cxgbetool 8 .Pp This is a default setting on arm/armv6, arm/armv7, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, riscv/riscv64 and riscv/riscv64sf. .It Va WITH_CXGBETOOL Set to build .Xr cxgbetool 8 .Pp This is a default setting on amd64/amd64, arm64/aarch64, i386/i386, powerpc/powerpc64 and sparc64/sparc64. .It Va WITHOUT_CXX Set to not build .Xr c++ 1 and related libraries. It will also prevent building of .Xr gperf 1 and .Xr devd 8 . When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_CLANG .It .Va WITHOUT_CLANG_EXTRAS .It .Va WITHOUT_CLANG_FULL .It .Va WITHOUT_DTRACE_TESTS .It .Va WITHOUT_GNUCXX .It .Va WITHOUT_GOOGLETEST .It .Va WITHOUT_LLVM_COV .It .Va WITHOUT_TESTS .El .It Va WITHOUT_DEBUG_FILES Set to avoid building or installing standalone debug files for each executable binary and shared library. .It Va WITHOUT_DIALOG Set to not build .Xr dialog 1 , .Xr dialog 3 , .Xr dpv 1 , and .Xr dpv 3 . When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_BSDINSTALL .El .It Va WITHOUT_DICT Set to not build the Webster dictionary files. .It Va WITH_DIRDEPS_BUILD This is an experimental build system. For details see http://www.crufty.net/sjg/docs/freebsd-meta-mode.htm. Build commands can be seen from the top-level with: .Dl make show-valid-targets The build is driven by dirdeps.mk using .Va DIRDEPS stored in Makefile.depend files found in each directory. .Pp The build can be started from anywhere, and behaves the same. The initial instance of .Xr make 1 recursively reads .Va DIRDEPS from .Pa Makefile.depend , computing a graph of tree dependencies from the current origin. Setting .Va NO_DIRDEPS skips checking dirdep dependencies and will only build in the current and child directories. .Va NO_DIRDEPS_BELOW skips building any dirdeps and only build the current directory. .Pp This also utilizes the .Va WITH_META_MODE logic for incremental builds. .Pp The build hides commands executed unless .Va NO_SILENT is defined. .Pp Note that there is currently no mass install feature for this. .Pp When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITH_INSTALL_AS_USER .El .Pp When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITH_META_MODE (unless .Va WITHOUT_META_MODE is set explicitly) .It Va WITH_STAGING (unless .Va WITHOUT_STAGING is set explicitly) .It Va WITH_STAGING_MAN (unless .Va WITHOUT_STAGING_MAN is set explicitly) .It Va WITH_STAGING_PROG (unless .Va WITHOUT_STAGING_PROG is set explicitly) .It Va WITH_SYSROOT (unless .Va WITHOUT_SYSROOT is set explicitly) .El .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITH_DIRDEPS_CACHE Cache result of dirdeps.mk which can save significant time for subsequent builds. Depends on .Va WITH_DIRDEPS_BUILD . .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITHOUT_DMAGENT Set to not build dma Mail Transport Agent. .It Va WITHOUT_DOCCOMPRESS Set to not install compressed system documentation. Only the uncompressed version will be installed. .It Va WITH_DTRACE_TESTS Set to build and install the DTrace test suite in .Pa /usr/tests/cddl/usr.sbin/dtrace . This test suite is considered experimental on architectures other than amd64/amd64 and running it may cause system instability. .It Va WITHOUT_DYNAMICROOT Set this if you do not want to link .Pa /bin and .Pa /sbin dynamically. .It Va WITHOUT_EE Set to not build and install .Xr edit 1 , .Xr ee 1 , and related programs. .It Va WITHOUT_EFI Set not to build .Xr efivar 3 and .Xr efivar 8 . .Pp This is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. .It Va WITH_EFI Set to build .Xr efivar 3 and .Xr efivar 8 . .Pp This is a default setting on amd64/amd64, arm/armv6, arm/armv7, arm64/aarch64 and i386/i386. .It Va WITHOUT_ELFTOOLCHAIN_BOOTSTRAP Set to not build ELF Tool Chain tools (addr2line, nm, size, strings and strip) as part of the bootstrap process. .Bf -symbolic An alternate bootstrap tool chain must be provided. .Ef .It Va WITHOUT_EXAMPLES Set to avoid installing examples to .Pa /usr/share/examples/ . .It Va WITH_EXPERIMENTAL Set to include experimental features in the build. .It Va WITH_EXTRA_TCP_STACKS Set to build extra TCP stack modules. .It Va WITHOUT_FDT Set to not build Flattened Device Tree support as part of the base system. This includes the device tree compiler (dtc) and libfdt support library. .It Va WITHOUT_FILE Set to not build .Xr file 1 and related programs. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_SVNLITE .El .It Va WITHOUT_FINGER Set to not build or install .Xr finger 1 and .Xr fingerd 8 . .It Va WITHOUT_FLOPPY Set to not build or install programs for operating floppy disk driver. .It Va WITHOUT_FMTREE Set to not build and install .Pa /usr/sbin/fmtree . .It Va WITHOUT_FORMAT_EXTENSIONS Set to not enable .Fl fformat-extensions when compiling the kernel. Also disables all format checking. .It Va WITHOUT_FORTH Set to build bootloaders without Forth support. .It Va WITHOUT_FP_LIBC Set to build .Nm libc without floating-point support. .It Va WITHOUT_FREEBSD_UPDATE Set to not build .Xr freebsd-update 8 . .It Va WITHOUT_FTP Set to not build or install .Xr ftp 1 and .Xr ftpd 8 . .It Va WITHOUT_GAMES Set to not build games. .It Va WITH_GCC Set to build and install gcc and g++. This option is deprecated and will be removed before .Fx 13 . .It Va WITH_GCC_BOOTSTRAP Set to build gcc and g++ as part of the bootstrap process. This option is deprecated and will be removed before .Fx 13 . .It Va WITHOUT_GCOV Set to not build the .Xr gcov 1 tool. .It Va WITHOUT_GDB Set to not build .Xr gdb 1 . .Pp This is a default setting on arm64/aarch64, riscv/riscv64 and riscv/riscv64sf. .It Va WITH_GDB Set to build .Xr gdb 1 . .Pp This is a default setting on amd64/amd64, arm/armv6, arm/armv7, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITHOUT_GDB_LIBEXEC Set to install .Xr gdb 1 into .Pa /usr/bin . .Pp This is a default setting on sparc64/sparc64. .It Va WITH_GDB_LIBEXEC Set to install .Xr gdb 1 into .Pa /usr/libexec . This permits .Xr gdb 1 to be used as a fallback for .Xr crashinfo 8 if a newer version is not installed. .Pp This is a default setting on amd64/amd64, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, riscv/riscv64 and riscv/riscv64sf. .It Va WITH_GNUCXX Build the GNU C++ stack (g++, libstdc++). This option is deprecated and will be removed before .Fx 13 . .It Va WITHOUT_GNU_DIFF Set to not build GNU .Xr diff 1 and .Xr diff3 1 . .It Va WITHOUT_GNU_GREP Set to not build GNU .Xr grep 1 . .It Va WITH_GNU_GREP_COMPAT Set this option to include GNU extensions in .Xr bsdgrep 1 by linking against libgnuregex. .It Va WITHOUT_GOOGLETEST Set to neither build nor install .Lb libgmock , .Lb libgtest , and dependent tests. .It Va WITHOUT_GPIO Set to not build .Xr gpioctl 8 as part of the base system. .It Va WITH_GPL_DTC Set to build the GPL'd version of the device tree compiler from elinux.org, instead of the BSD licensed one. .It Va WITHOUT_GSSAPI Set to not build libgssapi. .It Va WITHOUT_HAST Set to not build .Xr hastd 8 and related utilities. .It Va WITH_HESIOD Set to build Hesiod support. .It Va WITHOUT_HTML Set to not build HTML docs. .It Va WITH_HTTPD Set to build and install simple_httpd .It Va WITHOUT_HYPERV Set to not build or install HyperV utilities. .Pp This is a default setting on arm/armv6, arm/armv7, arm64/aarch64, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. .It Va WITH_HYPERV Set to build or install HyperV utilities. .Pp This is a default setting on amd64/amd64 and i386/i386. .It Va WITHOUT_ICONV Set to not build iconv as part of libc. .It Va WITHOUT_INCLUDES Set to not install header files. This option used to be spelled .Va NO_INCS . .Bf -symbolic The option does not work for build targets. .Ef .It Va WITHOUT_INET Set to not build programs and libraries related to IPv4 networking. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_INET_SUPPORT .El .It Va WITHOUT_INET6 Set to not build programs and libraries related to IPv6 networking. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_INET6_SUPPORT .El .It Va WITHOUT_INET6_SUPPORT Set to build libraries, programs, and kernel modules without IPv6 support. .It Va WITHOUT_INETD Set to not build .Xr inetd 8 . .It Va WITHOUT_INET_SUPPORT Set to build libraries, programs, and kernel modules without IPv4 support. .It Va WITHOUT_INSTALLLIB Set this to not install optional libraries. For example, when creating a .Xr nanobsd 8 image. .Bf -symbolic The option does not work for build targets. .Ef .It Va WITH_INSTALL_AS_USER Set to make install targets succeed for non-root users by installing files with owner and group attributes set to that of the user running the .Xr make 1 command. The user still must set the .Va DESTDIR variable to point to a directory where the user has write permissions. .It Va WITHOUT_IPFILTER Set to not build IP Filter package. .It Va WITHOUT_IPFW Set to not build IPFW tools. .It Va WITHOUT_IPSEC_SUPPORT Set to not build the kernel with .Xr ipsec 4 support. This option is needed for .Xr ipsec 4 and .Xr tcpmd5 4 . .It Va WITHOUT_ISCSI Set to not build .Xr iscsid 8 and related utilities. .It Va WITHOUT_JAIL Set to not build tools for the support of jails; e.g., .Xr jail 8 . .It Va WITHOUT_KDUMP Set to not build .Xr kdump 1 and .Xr truss 1 . .It Va WITHOUT_KERBEROS Set this to not build Kerberos 5 (KTH Heimdal). When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_GSSAPI (unless .Va WITH_GSSAPI is set explicitly) .It Va WITHOUT_KERBEROS_SUPPORT (unless .Va WITH_KERBEROS_SUPPORT is set explicitly) .El .It Va WITHOUT_KERBEROS_SUPPORT Set to build some programs without Kerberos support, like .Xr ssh 1 , .Xr telnet 1 , .Xr sshd 8 , and .Xr telnetd 8 . .It Va WITH_KERNEL_RETPOLINE Set to enable the "retpoline" mitigation for CVE-2017-5715 in the kernel build. .It Va WITHOUT_KERNEL_SYMBOLS Set to not install kernel symbol files. .Bf -symbolic This option is recommended for those people who have small root partitions. .Ef .It Va WITHOUT_KVM Set to not build the .Nm libkvm library as a part of the base system. .Bf -symbolic The option has no effect yet. .Ef When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_KVM_SUPPORT (unless .Va WITH_KVM_SUPPORT is set explicitly) .El .It Va WITHOUT_KVM_SUPPORT Set to build some programs without optional .Nm libkvm support. .It Va WITHOUT_LDNS Setting this variable will prevent the LDNS library from being built. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_LDNS_UTILS .It .Va WITHOUT_UNBOUND .El .It Va WITHOUT_LDNS_UTILS Setting this variable will prevent building the LDNS utilities .Xr drill 1 and .Xr host 1 . .It Va WITHOUT_LEGACY_CONSOLE Set to not build programs that support a legacy PC console; e.g., .Xr kbdcontrol 1 and .Xr vidcontrol 1 . .It Va WITHOUT_LIB32 On 64-bit platforms, set to not build 32-bit library set and a .Nm ld-elf32.so.1 runtime linker. .Pp This is a default setting on arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mipsn32, mips/mipselhf, mips/mipshf, powerpc/powerpc, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. .It Va WITHOUT_LIBCPLUSPLUS Set to avoid building libcxxrt and libc++. .It Va WITHOUT_LIBPTHREAD Set to not build the .Nm libpthread providing library, .Nm libthr . When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_LIBTHR .El .It Va WITH_LIBSOFT On armv6 only, set to enable soft float ABI compatibility libraries. This option is for transitioning to the new hard float ABI. .It Va WITHOUT_LIBTHR Set to not build the .Nm libthr (1:1 threading) library. .It Va WITHOUT_LLD Set to not build LLVM's lld linker. .Pp This is a default setting on sparc64/sparc64. .It Va WITH_LLD Set to build LLVM's lld linker. .Pp This is a default setting on amd64/amd64, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, riscv/riscv64 and riscv/riscv64sf. .It Va WITHOUT_LLDB Set to not build the LLDB debugger. .Pp This is a default setting on arm/armv6, arm/armv7, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. .It Va WITH_LLDB Set to build the LLDB debugger. .Pp This is a default setting on amd64/amd64, arm64/aarch64 and i386/i386. .It Va WITHOUT_LLD_BOOTSTRAP Set to not build the LLD linker during the bootstrap phase of the build. To be able to build the system, either Binutils or LLD bootstrap must be enabled unless an alternate linker is provided via XLD. .Pp This is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc and sparc64/sparc64. .It Va WITH_LLD_BOOTSTRAP Set to build the LLD linker during the bootstrap phase of the build, and use it during buildworld and buildkernel. .Pp This is a default setting on amd64/amd64, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, powerpc/powerpc64, riscv/riscv64 and riscv/riscv64sf. .It Va WITHOUT_LLD_IS_LD Set to use GNU binutils ld as the system linker, instead of LLVM's LLD. .Pp This is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc and sparc64/sparc64. .It Va WITH_LLD_IS_LD Set to use LLVM's LLD as the system linker, instead of GNU binutils ld. .Pp This is a default setting on amd64/amd64, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, powerpc/powerpc64, riscv/riscv64 and riscv/riscv64sf. .It Va WITHOUT_LLVM_COV Set to not build the .Xr llvm-cov 1 tool. .Pp This is a default setting on sparc64/sparc64. .It Va WITH_LLVM_COV Set to build the .Xr llvm-cov 1 tool. .Pp This is a default setting on amd64/amd64, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, riscv/riscv64 and riscv/riscv64sf. .It Va WITHOUT_LLVM_LIBUNWIND Set to use GCC's stack unwinder (instead of LLVM's libunwind). .Pp This is a default setting on sparc64/sparc64. .It Va WITH_LLVM_LIBUNWIND Set to use LLVM's libunwind stack unwinder (instead of GCC's unwinder). .Pp This is a default setting on amd64/amd64, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, riscv/riscv64 and riscv/riscv64sf. .It Va WITHOUT_LLVM_TARGET_AARCH64 Set to not build LLVM target support for AArch64. The .Va LLVM_TARGET_ALL option should be used rather than this in most cases. .It Va WITHOUT_LLVM_TARGET_ALL Set to only build the required LLVM target support. This option is preferred to specific target support options. When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_LLVM_TARGET_AARCH64 (unless .Va WITH_LLVM_TARGET_AARCH64 is set explicitly) .It Va WITHOUT_LLVM_TARGET_ARM (unless .Va WITH_LLVM_TARGET_ARM is set explicitly) .It Va WITHOUT_LLVM_TARGET_MIPS (unless .Va WITH_LLVM_TARGET_MIPS is set explicitly) .It Va WITHOUT_LLVM_TARGET_POWERPC (unless .Va WITH_LLVM_TARGET_POWERPC is set explicitly) .It Va WITHOUT_LLVM_TARGET_RISCV (unless .Va WITH_LLVM_TARGET_RISCV is set explicitly) .It Va WITHOUT_LLVM_TARGET_SPARC (unless .Va WITH_LLVM_TARGET_SPARC is set explicitly) .El .It Va WITHOUT_LLVM_TARGET_ARM Set to not build LLVM target support for ARM. The .Va LLVM_TARGET_ALL option should be used rather than this in most cases. .It Va WITH_LLVM_TARGET_BPF Set to build LLVM target support for BPF. The .Va LLVM_TARGET_ALL option should be used rather than this in most cases. .It Va WITHOUT_LLVM_TARGET_MIPS Set to not build LLVM target support for MIPS. The .Va LLVM_TARGET_ALL option should be used rather than this in most cases. .It Va WITHOUT_LLVM_TARGET_POWERPC Set to not build LLVM target support for PowerPC. The .Va LLVM_TARGET_ALL option should be used rather than this in most cases. .It Va WITHOUT_LLVM_TARGET_RISCV Set to not build LLVM target support for RISC-V. The .Va LLVM_TARGET_ALL option should be used rather than this in most cases. .It Va WITHOUT_LLVM_TARGET_SPARC Set to not build LLVM target support for SPARC. The .Va LLVM_TARGET_ALL option should be used rather than this in most cases. .It Va WITHOUT_LLVM_TARGET_X86 Set to not build LLVM target support for X86. The .Va LLVM_TARGET_ALL option should be used rather than this in most cases. .It Va WITH_LOADER_EFI_SECUREBOOT Enable building .Xr loader 8 with support for verification based on certificates obtained from UEFI. .Pp .It Va WITH_LOADER_FIREWIRE Enable firewire support in /boot/loader on x86. This option is a nop on all other platforms. .It Va WITH_LOADER_FORCE_LE Set to force the powerpc boot loader to launch the kernel in little endian mode. .It Va WITHOUT_LOADER_GELI Disable inclusion of GELI crypto support in the boot chain binaries. .Pp This is a default setting on powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITH_LOADER_GELI Set to build GELI bootloader support. .Pp This is a default setting on amd64/amd64, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, riscv/riscv64 and riscv/riscv64sf. .It Va WITHOUT_LOADER_LUA Set to not build LUA bindings for the boot loader. .Pp This is a default setting on powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITH_LOADER_LUA Set to build LUA bindings for the boot loader. .Pp This is a default setting on amd64/amd64, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, riscv/riscv64 and riscv/riscv64sf. .It Va WITHOUT_LOADER_OFW Disable building of openfirmware bootloader components. .Pp This is a default setting on amd64/amd64, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, riscv/riscv64 and riscv/riscv64sf. .It Va WITH_LOADER_OFW Set to build openfirmware bootloader components. .Pp This is a default setting on powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITHOUT_LOADER_UBOOT Disable building of ubldr. .Pp This is a default setting on amd64/amd64, arm64/aarch64, i386/i386, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. .It Va WITH_LOADER_UBOOT Set to build ubldr. .Pp This is a default setting on arm/armv6, arm/armv7, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc and powerpc/powerpc64. .It Va WITH_LOADER_VERBOSE Set to build with extra verbose debugging in the loader. May explode already nearly too large loader over the limit. Use with care. .It Va WITH_LOADER_VERIEXEC Enable building .Xr loader 8 with support for verification similar to Verified Exec. .Pp Depends on .Va WITH_BEARSSL . When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITH_LOADER_EFI_SECUREBOOT (unless .Va WITHOUT_LOADER_EFI_SECUREBOOT is set explicitly) .El .It Va WITH_LOADER_VERIEXEC_PASS_MANIFEST Enable building .Xr loader 8 with support to pass a verified manifest to the kernel. The kernel has to be built with a module to parse the manifest. .Pp Depends on .Va WITH_LOADER_VERIEXEC . .It Va WITHOUT_LOADER_ZFS Set to not build ZFS file system boot loader support. .It Va WITHOUT_LOCALES Set to not build localization files; see .Xr locale 1 . .It Va WITHOUT_LOCATE Set to not build .Xr locate 1 and related programs. .It Va WITHOUT_LPR Set to not build .Xr lpr 1 and related programs. .It Va WITHOUT_LS_COLORS Set to build .Xr ls 1 without support for colors to distinguish file types. .It Va WITHOUT_LZMA_SUPPORT Set to build some programs without optional lzma compression support. .It Va WITHOUT_MAIL Set to not build any mail support (MUA or MTA). When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_DMAGENT .It .Va WITHOUT_MAILWRAPPER .It .Va WITHOUT_SENDMAIL .El .It Va WITHOUT_MAILWRAPPER Set to not build the .Xr mailwrapper 8 MTA selector. .It Va WITHOUT_MAKE Set to not install .Xr make 1 and related support files. .It Va WITHOUT_MAKE_CHECK_USE_SANDBOX Set to not execute .Dq Li "make check" in limited sandbox mode. This option should be paired with .Va WITH_INSTALL_AS_USER if executed as an unprivileged user. See .Xr tests 7 for more details. .It Va WITHOUT_MAN Set to not build manual pages. When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_MAN_UTILS (unless .Va WITH_MAN_UTILS is set explicitly) .El .It Va WITHOUT_MANCOMPRESS Set to not to install compressed man pages. Only the uncompressed versions will be installed. .It Va WITHOUT_MAN_UTILS Set to not build utilities for manual pages, .Xr apropos 1 , .Xr makewhatis 1 , .Xr man 1 , .Xr whatis 1 , .Xr manctl 8 , and related support files. .It Va WITH_META_MODE Create .Xr make 1 meta files when building, which can provide a reliable incremental build when using .Xr filemon 4 . The meta file is created in OBJDIR as .Pa target.meta . These meta files track the command that was executed, its output, and the current directory. The .Xr filemon 4 module is required unless .Va NO_FILEMON is defined. When the module is loaded, any files used by the commands executed are tracked as dependencies for the target in its meta file. The target is considered out-of-date and rebuilt if any of these conditions are true compared to the last build: .Bl -bullet -compact .It The command to execute changes. .It The current working directory changes. .It The target's meta file is missing. .It The target's meta file is missing filemon data when filemon is loaded and a previous run did not have it loaded. .It [requires .Xr filemon 4 ] Files read, executed or linked to are newer than the target. .It [requires .Xr filemon 4 ] Files read, written, executed or linked are missing. .El The meta files can also be useful for debugging. .Pp The build hides commands that are executed unless .Va NO_SILENT is defined. Errors cause .Xr make 1 to show some of its environment for further debugging. .Pp The build operates as it normally would otherwise. This option originally invoked a different build system but that was renamed to .Va WITH_DIRDEPS_BUILD . .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITHOUT_MLX5TOOL Set to not build .Xr mlx5tool 8 .Pp This is a default setting on arm/armv6, arm/armv7, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, riscv/riscv64 and riscv/riscv64sf. .It Va WITH_MLX5TOOL Set to build .Xr mlx5tool 8 .Pp This is a default setting on amd64/amd64, arm64/aarch64, i386/i386, powerpc/powerpc64 and sparc64/sparc64. .It Va WITHOUT_NDIS Set to not build programs and libraries related to NDIS emulation support. .It Va WITHOUT_NETCAT Set to not build .Xr nc 1 utility. .It Va WITHOUT_NETGRAPH Set to not build applications to support .Xr netgraph 4 . When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_ATM .It .Va WITHOUT_BLUETOOTH .El .Pp When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_NETGRAPH_SUPPORT (unless .Va WITH_NETGRAPH_SUPPORT is set explicitly) .El .It Va WITHOUT_NETGRAPH_SUPPORT Set to build libraries, programs, and kernel modules without netgraph support. .It Va WITHOUT_NIS Set to not build .Xr NIS 8 support and related programs. If set, you might need to adopt your .Xr nsswitch.conf 5 and remove .Sq nis entries. .It Va WITHOUT_NLS Set to not build NLS catalogs. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_NLS_CATALOGS .El .It Va WITHOUT_NLS_CATALOGS Set to not build NLS catalog support for .Xr csh 1 . .It Va WITHOUT_NS_CACHING Set to disable name caching in the .Pa nsswitch subsystem. The generic caching daemon, .Xr nscd 8 , will not be built either if this option is set. .It Va WITHOUT_NTP Set to not build .Xr ntpd 8 and related programs. .It Va WITHOUT_NVME Set to not build nvme related tools and kernel modules. .Pp This is a default setting on arm/armv6, arm/armv7, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. .It Va WITH_NVME Set to build nvme related tools and kernel modules. .Pp This is a default setting on amd64/amd64, arm64/aarch64, i386/i386 and powerpc/powerpc64. .It Va WITH_OFED Set to build the .Dq "OpenFabrics Enterprise Distribution" Infiniband software stack. .It Va WITH_OFED_EXTRA Set to build the non-essential components of the .Dq "OpenFabrics Enterprise Distribution" Infiniband software stack, mostly examples. .It Va WITH_OPENLDAP Enable building openldap support for kerberos. .It Va WITHOUT_OPENMP Set to not build LLVM's OpenMP runtime. .Pp This is a default setting on arm/armv6, arm/armv7, arm64/aarch64, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. .It Va WITH_OPENMP Set to build LLVM's OpenMP runtime. .Pp This is a default setting on amd64/amd64, i386/i386 and powerpc/powerpc64. .It Va WITHOUT_OPENSSH Set to not build OpenSSH. .It Va WITHOUT_OPENSSL Set to not build OpenSSL. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_DMAGENT .It .Va WITHOUT_KERBEROS .It .Va WITHOUT_KERBEROS_SUPPORT .It .Va WITHOUT_LDNS .It .Va WITHOUT_LDNS_UTILS .It .Va WITHOUT_OPENSSH .It .Va WITHOUT_UNBOUND .El .Pp When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_GSSAPI (unless .Va WITH_GSSAPI is set explicitly) .El .It Va WITHOUT_PAM Set to not build PAM library and modules. .Bf -symbolic This option is deprecated and does nothing. .Ef When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_PAM_SUPPORT (unless .Va WITH_PAM_SUPPORT is set explicitly) .El .It Va WITHOUT_PAM_SUPPORT Set to build some programs without PAM support, particularly .Xr ftpd 8 and .Xr ppp 8 . .It Va WITHOUT_PF Set to not build PF firewall package. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_AUTHPF .El .It Va WITH_PIE Build dynamically linked binaries as Position-Independent Executable (PIE). .It Va WITHOUT_PKGBOOTSTRAP Set to not build .Xr pkg 7 bootstrap tool. .It Va WITHOUT_PMC Set to not build .Xr pmccontrol 8 and related programs. .It Va WITHOUT_PORTSNAP Set to not build or install .Xr portsnap 8 and related files. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_FREEBSD_UPDATE .El .It Va WITHOUT_PPP Set to not build .Xr ppp 8 and related programs. .It Va WITHOUT_PROFILE Set to not build profiled libraries for use with .Xr gprof 8 . .Pp This is a default setting on mips/mips64el, mips/mips64, mips/mips64elhf and mips/mips64hf. .It Va WITH_PROFILE Set to build profiled libraries for use with .Xr gprof 8 . .Pp This is a default setting on amd64/amd64, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mipsn32, mips/mipselhf, mips/mipshf, powerpc/powerpc, powerpc/powerpc64, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. .It Va WITHOUT_QUOTAS Set to not build .Xr quota 1 and related programs. .It Va WITHOUT_RADIUS_SUPPORT Set to not build radius support into various applications, like .Xr pam_radius 8 and .Xr ppp 8 . .It Va WITH_RATELIMIT Set to build the system with rate limit support. .Pp This makes .Dv SO_MAX_PACING_RATE effective in .Xr getsockopt 2 , and .Ar txrlimit support in .Xr ifconfig 8 , by proxy. .It Va WITHOUT_RBOOTD Set to not build or install .Xr rbootd 8 . .It Va WITH_REPRODUCIBLE_BUILD Set to exclude build metadata (such as the build time, user, or host) from the kernel, boot loaders, and uname output, so that builds produce bit-for-bit identical output. .It Va WITHOUT_RESCUE Set to not build .Xr rescue 8 . .It Va WITH_RETPOLINE Set to build the base system with the retpoline speculative execution vulnerability mitigation for CVE-2017-5715. .It Va WITHOUT_ROUTED Set to not build .Xr routed 8 utility. .It Va WITH_RPCBIND_WARMSTART_SUPPORT Set to build .Xr rpcbind 8 with warmstart support. .It Va WITHOUT_SENDMAIL Set to not build .Xr sendmail 8 and related programs. .It Va WITHOUT_SERVICESDB Set to not install .Pa /var/db/services.db . .It Va WITHOUT_SETUID_LOGIN Set this to disable the installation of .Xr login 1 as a set-user-ID root program. .It Va WITHOUT_SHAREDOCS Set to not build the .Bx 4.4 legacy docs. .It Va WITHOUT_SHARED_TOOLCHAIN Set to build the toolchain binaries as statically linked executables. The set includes .Xr cc 1 , .Xr make 1 and necessary utilities like assembler, linker and library archive manager. .It Va WITH_SORT_THREADS Set to enable threads in .Xr sort 1 . .It Va WITHOUT_SOURCELESS Set to not build kernel modules that include sourceless code (either microcode or native code for host CPU). When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_SOURCELESS_HOST .It .Va WITHOUT_SOURCELESS_UCODE .El .It Va WITHOUT_SOURCELESS_HOST Set to not build kernel modules that include sourceless native code for host CPU. .It Va WITHOUT_SOURCELESS_UCODE Set to not build kernel modules that include sourceless microcode. .It Va WITHOUT_SSP Set to not build world with propolice stack smashing protection. .Pp This is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf and mips/mips64hf. .It Va WITH_SSP Set to build world with propolice stack smashing protection. .Pp This is a default setting on amd64/amd64, arm/armv6, arm/armv7, arm64/aarch64, i386/i386, powerpc/powerpc, powerpc/powerpc64, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. .It Va WITH_STAGING Enable staging of files to a stage tree. This can be best thought of as auto-install to .Va DESTDIR with some extra meta data to ensure dependencies can be tracked. Depends on .Va WITH_DIRDEPS_BUILD . When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITH_STAGING_MAN (unless .Va WITHOUT_STAGING_MAN is set explicitly) .It Va WITH_STAGING_PROG (unless .Va WITHOUT_STAGING_PROG is set explicitly) .El .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITH_STAGING_MAN Enable staging of man pages to stage tree. .It Va WITH_STAGING_PROG Enable staging of PROGs to stage tree. .It Va WITH_STALE_STAGED Check staged files are not stale. .It Va WITHOUT_STATS Set to neither build nor install .Lb libstats and dependent binaries. .It Va WITH_SVN Set to install .Xr svnlite 1 as .Xr svn 1 . .It Va WITHOUT_SVNLITE Set to not build .Xr svnlite 1 and related programs. .It Va WITHOUT_SYMVER Set to disable symbol versioning when building shared libraries. .It Va WITHOUT_SYSCONS Set to not build .Xr syscons 4 support files such as keyboard maps, fonts, and screen output maps. .It Va WITH_SYSROOT Enable use of sysroot during build. Depends on .Va WITH_DIRDEPS_BUILD . .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITHOUT_SYSTEM_COMPILER Set to not opportunistically skip building a cross-compiler during the bootstrap phase of the build. Normally, if the currently installed compiler matches the planned bootstrap compiler type and revision, then it will not be built. This does not prevent a compiler from being built for installation though, only for building one for the build itself. The .Va WITHOUT_CLANG and .Va WITHOUT_GCC options control those. .It Va WITHOUT_SYSTEM_LINKER Set to not opportunistically skip building a cross-linker during the bootstrap phase of the build. Normally, if the currently installed linker matches the planned bootstrap linker type and revision, then it will not be built. This does not prevent a linker from being built for installation though, only for building one for the build itself. The .Va WITHOUT_LLD and .Va WITHOUT_BINUTILS options control those. .Pp This option is only relevant when .Va WITH_LLD_BOOTSTRAP is set. .It Va WITHOUT_TALK Set to not build or install .Xr talk 1 and .Xr talkd 8 . .It Va WITHOUT_TCP_WRAPPERS Set to not build or install .Xr tcpd 8 , and related utilities. .It Va WITHOUT_TCSH Set to not build and install .Pa /bin/csh (which is .Xr tcsh 1 ) . .It Va WITHOUT_TELNET Set to not build .Xr telnet 1 and related programs. .It Va WITHOUT_TESTS Set to not build nor install the .Fx Test Suite in .Pa /usr/tests/ . See .Xr tests 7 for more details. This also disables the build of all test-related dependencies, including ATF. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_DTRACE_TESTS .El .Pp When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_GOOGLETEST (unless .Va WITH_GOOGLETEST is set explicitly) .It Va WITHOUT_TESTS_SUPPORT (unless .Va WITH_TESTS_SUPPORT is set explicitly) .El .It Va WITHOUT_TESTS_SUPPORT Set to disables the build of all test-related dependencies, including ATF. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_GOOGLETEST .El .It Va WITHOUT_TEXTPROC Set to not build programs used for text processing. .It Va WITHOUT_TFTP Set to not build or install .Xr tftp 1 and .Xr tftpd 8 . .It Va WITHOUT_TOOLCHAIN Set to not install header or programs used for program development, compilers, debuggers etc. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_BINUTILS .It .Va WITHOUT_CLANG .It .Va WITHOUT_CLANG_EXTRAS .It .Va WITHOUT_CLANG_FULL .It .Va WITHOUT_GCC .It .Va WITHOUT_GDB .It .Va WITHOUT_INCLUDES .It .Va WITHOUT_LLD .It .Va WITHOUT_LLDB .It .Va WITHOUT_LLVM_COV .El .It Va WITHOUT_UNBOUND Set to not build .Xr unbound 8 and related programs. .It Va WITHOUT_UNIFIED_OBJDIR Set to use the historical object directory format for .Xr build 7 targets. For native-builds and builds done directly in sub-directories the format of .Pa ${MAKEOBJDIRPREFIX}/${.CURDIR} is used, while for cross-builds .Pa ${MAKEOBJDIRPREFIX}/${TARGET}.${TARGET_ARCH}/${.CURDIR} is used. .Pp This option is transitional and will be removed before the 12.0 release, at which time .va WITH_UNIFIED_OBJDIR will be enabled permanently. .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITHOUT_USB Set to not build USB-related programs and libraries. .It Va WITHOUT_USB_GADGET_EXAMPLES Set to not build USB gadget kernel modules. .It Va WITHOUT_UTMPX Set to not build user accounting tools such as .Xr last 1 , .Xr users 1 , .Xr who 1 , .Xr ac 8 , .Xr lastlogin 8 and .Xr utx 8 . .It Va WITH_VERIEXEC Enable building .Xr veriexec 8 which loads the contents of verified manifests into the kernel for use by .Xr mac_veriexec 4 .Pp Depends on .Va WITH_BEARSSL . .It Va WITHOUT_VI Set to not build and install vi, view, ex and related programs. .It Va WITHOUT_VT Set to not build .Xr vt 4 support files (fonts and keymaps). .It Va WITHOUT_WARNS Set this to not add warning flags to the compiler invocations. Useful as a temporary workaround when code enters the tree which triggers warnings in environments that differ from the original developer. .It Va WITHOUT_WIRELESS Set to not build programs used for 802.11 wireless networks; especially .Xr wpa_supplicant 8 and .Xr hostapd 8 . When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_WIRELESS_SUPPORT (unless .Va WITH_WIRELESS_SUPPORT is set explicitly) .El .It Va WITHOUT_WIRELESS_SUPPORT Set to build libraries, programs, and kernel modules without 802.11 wireless support. .It Va WITHOUT_WPA_SUPPLICANT_EAPOL Build .Xr wpa_supplicant 8 without support for the IEEE 802.1X protocol and without support for EAP-PEAP, EAP-TLS, EAP-LEAP, and EAP-TTLS protocols (usable only via 802.1X). .It Va WITHOUT_ZFS Set to not build ZFS file system kernel module, libraries, and user commands. .It Va WITHOUT_ZONEINFO Set to not build the timezone database. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_ZONEINFO_LEAPSECONDS_SUPPORT .It .Va WITHOUT_ZONEINFO_OLD_TIMEZONES_SUPPORT .El .It Va WITH_ZONEINFO_LEAPSECONDS_SUPPORT Set to build leapsecond information in to the timezone database. .It Va WITH_ZONEINFO_OLD_TIMEZONES_SUPPORT Set to build backward compatibility timezone aliases in to the timezone database. .El .Sh FILES .Bl -tag -compact -width Pa .It Pa /etc/src.conf .It Pa /etc/src-env.conf .It Pa /usr/share/mk/bsd.own.mk .El .Sh SEE ALSO .Xr make 1 , .Xr make.conf 5 , .Xr build 7 , .Xr ports 7 .Sh HISTORY The .Nm file appeared in .Fx 7.0 . .Sh AUTHORS This manual page was autogenerated by .An tools/build/options/makeman . Index: projects/clang1000-import/share/man/man7/hier.7 =================================================================== --- projects/clang1000-import/share/man/man7/hier.7 (revision 356919) +++ projects/clang1000-import/share/man/man7/hier.7 (revision 356920) @@ -1,918 +1,912 @@ .\" Copyright (c) 1990, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)hier.7 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd September 10, 2019 +.Dd January 20, 2020 .Dt HIER 7 .Os .Sh NAME .Nm hier .Nd layout of file systems .Sh DESCRIPTION A sketch of the file system hierarchy. .Bl -tag -width "/libexec/" .It Pa / root directory of the file system .It Pa /bin/ user utilities fundamental to both single-user and multi-user environments .It Pa /boot/ programs and configuration files used during operating system bootstrap .Pp .Bl -tag -width "defaults/" -compact .It Pa defaults/ default bootstrapping configuration files; see .Xr loader.conf 5 .It Pa dtb/ Compiled flattened device tree (FDT) files; see .Xr fdt 4 and .Xr dtc 1 .It Pa efi/ Mount point for EFI System Partition (ESP) on UEFI systems. .It Pa firmware/ loadable kernel modules containing binary firmware for hardware that needs firmware downloaded to it to function .It Pa kernel/ pure kernel executable (the operating system loaded into memory at boot time) and kernel modules .It Pa modules/ third-party loadable kernel modules; see .Xr kldstat 8 .It Pa overlays/ Compiled flattened device tree (FDT) overlays; see .Xr fdt 4 and .Xr dtc 1 .It Pa zfs/ .Xr zfs 8 zpool cache files .El .It Pa /cdrom/ default mount point for CD-ROM drives .It Pa /compat/ normally a link to .Pa /usr/compat . If not, then the .Pa /usr/compat comments apply .It Pa /dev/ device special files managed by .Xr devfs 5 .Pp .Bl -tag -width "defaults/" -compact .It Pa fd/ file descriptor files; see .Xr \&fd 4 .El .It Pa /etc/ system configuration files and scripts .Pp .Bl -tag -width "defaults/" -compact .It Pa defaults/ default system configuration files; see .Xr rc 8 .It Pa bluetooth/ bluetooth configuration files .It Pa localtime local timezone information; see .Xr ctime 3 .It Pa mail/ Sendmail control files .It Pa mtree/ mtree configuration files; see .Xr mtree 8 .It Pa pam.d/ configuration files for the Pluggable Authentication Modules (PAM) library .It Pa periodic/ scripts that are run daily, weekly, and monthly, via .Xr cron 8 ; see .Xr periodic 8 .It Pa rc.d/ system and daemon startup/control scripts; see .Xr rc 8 .It Pa security/ OpenBSM audit configuration files; see .Xr audit 8 .It Pa ppp/ ppp configuration files; see .Xr ppp 8 .It Pa ssh/ OpenSSH configuration files; see .Xr ssh 1 .It Pa ssl/ OpenSSL configuration files .El .It Pa /lib/ critical system libraries needed for binaries in .Pa /bin and .Pa /sbin .Pp .Bl -tag -width "defaults/" -compact .It Pa casper/ service-specific .Xr libcasper 3 Capsicum support libraries .It Pa geom/ class-specific libraries for the .Xr geom 8 utility .It Pa nvmecontrol/ vendor-specific libraries to extend the .Xr nvmecontrol 8 utility .El .It Pa /libexec/ critical system utilities needed for binaries in .Pa /bin and .Pa /sbin .It Pa /media/ contains subdirectories to be used as mount points for removable media such as CDs, USB drives, and floppy disks .It Pa /mnt/ empty directory commonly used by system administrators as a temporary mount point .It Pa /net/ automounted NFS shares; see .Xr auto_master 5 .It Pa /proc/ process file system; see .Xr procfs 5 .It Pa /rescue/ statically linked programs for emergency recovery; see .Xr rescue 8 .It Pa /root/ root's HOME directory .It Pa /sbin/ system programs and administration utilities fundamental to both single-user and multi-user environments .It Pa /tmp/ temporary files that are not guaranteed to persist across system reboots .It Pa /usr/ contains the majority of user utilities and applications .Pp .Bl -tag -width "defaults/" -compact .It Pa bin/ common utilities, programming tools, and applications .It Pa compat/ files needed to support binary compatibility with other operating systems, such as Linux .It Pa include/ standard C include files .Pp .Bl -tag -width "kerberos5/" -compact .It Pa arpa/ C include files for Internet service protocols .It Pa bsnmp/ C include files for the SNMP daemon .It Pa c++/ C++ include files .It Pa cam/ C include files for the Common Access Methods Layer .Bl -tag -width "kerberos5/" -compact .It Pa scsi/ SCSI device on top of CAM .El .It Pa dev/ C include files for programming various .Fx devices .Bl -tag -width "kerberos5/" -compact .It Pa ic/ various header files describing driver- and bus-independent hardware circuits .It Pa ofw/ Open Firmware support .It Pa pbio/ 8255 PPI cards; see .Xr pbio 4 .It Pa ppbus/ parallel port bus; see .Xr ppbus 4 .It Pa usb/ USB subsystem .It Pa wi/ .Xr wi 4 WaveLAN driver .El .It Pa fs/ .Bl -tag -width "kerberos5/" -compact .It Pa fdescfs/ per-process file descriptors file system .It Pa msdosfs/ MS-DOS file system .It Pa nfs/ C include files for NFS (Network File System) version 2, 3 and 4 .It Pa nullfs/ loopback file system .It Pa procfs/ process file system .It Pa smbfs/ SMB/CIFS file system .It Pa udf/ UDF file system .It Pa unionfs union file system .El .It Pa geom/ GEOM framework .Bl -tag -width "kerberos5/" -compact .It Pa concat/ CONCAT GEOM class .It Pa gate/ GATE GEOM class .It Pa mirror/ MIRROR GEOM class .It Pa nop/ NOP GEOM class .It Pa raid3/ RAID3 GEOM class .It Pa stripe/ STRIPE GEOM class .El .It Pa libmilter/ C include files for libmilter, the .Xr sendmail 8 mail filter API .It Pa machine/ machine-specific C include files .It Pa net/ miscellaneous network C include files .Bl -tag -width Fl -compact .It Pa altq/ C include files for alternate queueing .El .It Pa net80211/ C include files for 802.11 wireless networking; see .Xr net80211 4 .It Pa netinet/ C include files for Internet standard protocols; see .Xr inet 4 .It Pa netinet6/ C include files for Internet protocol version 6; see .Xr inet6 4 .It Pa netipsec/ kernel key-management service; see .Xr ipsec 4 .It Pa netsmb/ SMB/CIFS requester .It Pa nfs/ C include files for NFS (Network File System) version 2 and 3 (legacy) .It Pa openssl/ OpenSSL (Cryptography/SSL toolkit) headers .It Pa protocols/ C include files for Berkeley service protocols .It Pa rpc/ remote procedure calls; see .Xr rpc 3 .It Pa rpcsvc/ definition of RPC service structures; see .Xr rpc 3 .It Pa security/ PAM; see .Xr pam 8 .It Pa sys/ system C include files (kernel data structures) .\" .It Pa tcl/ .\" Tcl language; .\" see .\" .Xr Tcl n .\" .Bl -tag -width "kerberos5/" -compact .\" .It Pa generic/ .\" ??? .\" .It Pa unix/ .\" ??? .\" .El .It Pa ufs/ C include files for UFS (The U-word File System) .Bl -tag -width "kerberos5/" -compact .It Pa ffs/ Fast file system .It Pa ufs/ UFS file system .El .It Pa vm/ virtual memory; see .Xr vmstat 8 .El .Pp .It Pa lib/ shared and archive .Xr ar 1 Ns -type libraries .Pp .Bl -tag -width Fl -compact .It Pa aout/ a.out archive libraries .It Pa compat/ shared libraries for compatibility .Bl -tag -width Fl -compact .It Pa aout/ a.out backward compatibility libraries .El .It Pa debug/ standalone debug data for the kernel and base system libraries and binaries .It Pa dtrace/ DTrace library scripts .It Pa engines/ OpenSSL (Cryptography/SSL toolkit) dynamically loadable engines .El .Pp .It Pa libdata/ miscellaneous utility data files .Pp .Bl -tag -width Fl -compact .It Pa gcc/ .Xr gcc 1 configuration data .It Pa ldscripts/ linker scripts; see .Xr ld 1 .El .Pp .It Pa libexec/ system daemons & system utilities (executed by other programs) .Pp .Bl -tag -width Fl -compact .It Pa aout/ utilities to manipulate a.out executables .It Pa elf/ utilities to manipulate ELF executables .It Pa lpr/ utilities and filters for LP print system; see .Xr lpr 1 .It Pa sendmail/ the .Xr sendmail 8 binary; see .Xr mailwrapper 8 .It Pa sm.bin/ restricted shell for .Xr sendmail 8 ; see .Xr smrsh 8 .El .Pp .It Pa local/ local executables, libraries, etc. Also used as the default destination for the .Xr ports 7 framework. Within .Pa local/ , the general layout sketched out by .Nm for .Pa /usr should be used. Exceptions are the -.Pa man -directory -.Po directly under -.Pa local/ -rather than under -.Pa local/share/ Ns Pc , ports documentation .Po in .Pa share/doc// Ns Pc , and .Pa /usr/local/etc .Po mimics .Pa /etc Ns Pc . .It Pa obj/ architecture-specific target tree produced by building the .Pa /usr/src tree .It Pa ports/ .Xr ports 7 , the .Fx ports collection. .It Pa sbin/ system daemons & system utilities (executed by users) .It Pa share/ architecture-independent files .Pp .Bl -tag -width Fl -compact .It Pa calendar/ a variety of pre-fab calendar files; see .Xr calendar 1 .It Pa dict/ word lists; see .Xr look 1 .Bl -tag -width Fl -compact .It Pa freebsd .Fx Ns -specific terms, proper names, and jargon .It Pa web2 words from Webster's 2nd International .El .It Pa doc/ miscellaneous documentation; source for most of the printed .Bx manuals (available from the .Tn USENIX association) .Bl -tag -width Fl -compact .It Pa FAQ/ Frequently Asked Questions .It Pa IPv6/ implementation notes for IPv6 .It Pa es/ Spanish translations of documents in /usr/share/doc .It Pa handbook/ .Fx Handbook .It Pa ja/ Japanese translations of documents in /usr/share/doc .It Pa legal/ License files for vendor supplied firmware files .It Pa ncurses/ HTML documents pertaining to ncurses; see .Xr ncurses 3 .It Pa ntp/ HTML documents pertaining to the Network Time Protocol .It Pa ru/ Russian translations of documents in /usr/share/doc .It Pa tutorials/ .Fx tutorials .It Pa zh/ Chinese translations of documents in /usr/share/doc .El .It Pa examples/ various examples for users and programmers .It Pa firmware/ firmware images loaded by userland programs .It Pa games/ ASCII text files used by various games .It Pa keys/ known trusted and revoked keys. .Bl -tag -width Fl -compact .It Pa pkg/ fingerprints for .Xr pkg 7 and .Xr pkg 8 .El .It Pa locale/ localization files; see .Xr setlocale 3 .It Pa man/ manual pages .It Pa misc/ miscellaneous system-wide ASCII text files .Bl -tag -width Fl -compact .It Pa fonts/ ??? .It Pa termcap terminal characteristics database; see .Xr termcap 5 .El .It Pa mk/ templates for make; see .Xr make 1 .It Pa nls/ national language support files .It Pa security/ data files for security policies such as .Xr mac_lomac 4 .It Pa sendmail/ .Xr sendmail 8 configuration files .It Pa skel/ example .Pa .\& (dot) files for new accounts .It Pa snmp/ MIBs, example files and tree definitions for the SNMP daemon. .Bl -tag -width Fl -compact .It Pa defs/ tree definition files for use with .Xr gensnmptree 1 .It Pa mibs/ MIB files .El .It Pa syscons/ files used by syscons; see .Xr syscons 4 .Bl -tag -width Fl -compact .It Pa fonts/ console fonts; see .Xr vidcontrol 1 and .Xr vidfont 1 .It Pa keymaps/ console keyboard maps; see .Xr kbdcontrol 1 and .Xr kbdmap 1 .It Pa scrnmaps/ console screen maps .El .It Pa tabset/ tab description files for a variety of terminals; used in the termcap file; see .Xr termcap 5 .It Pa vi/ localization support and utilities for .Xr vi 1 .It Pa vt/ files used by vt; see .Xr vt 4 .Bl -tag -width Fl -compact .It Pa fonts/ console fonts; see .Xr vidcontrol 1 and .Xr vidfont 1 .It Pa keymaps/ console keyboard maps; see .Xr kbdcontrol 1 and .Xr kbdmap 1 .\" .It Pa scrnmaps/ .\" console screen maps .El .It Pa zoneinfo/ timezone configuration information; see .Xr tzfile 5 .El .Pp .It Pa src/ .Bx , third-party, and/or local source files .Pp .Bl -tag -width "kerberos5/" -compact .It Pa bin/ source code for files in /bin .It Pa cddl/ utilities covered by the Common Development and Distribution License .It Pa contrib/ source code for contributed software .It Pa crypto/ source code for contributed cryptography software .It Pa etc/ source code for files in .Pa /etc .It Pa gnu/ utilities covered by the GNU General Public License .It Pa include/ source code for files in .Pa /usr/include .It Pa kerberos5/ build infrastructure for Kerberos version 5 .It Pa lib/ source code for files in .Pa /lib and .Pa /usr/lib .It Pa libexec/ source code for files in .Pa /usr/libexec .It Pa release/ files required to produce a .Fx release .It Pa rescue/ source code for files in .Pa /rescue .It Pa sbin/ source code for files in .Pa /sbin .It Pa secure/ build directory for files in .Pa /usr/src/crypto .It Pa share/ source for files in .Pa /usr/share .It Pa stand/ boot loader source code .It Pa sys/ kernel source code .Bl -tag -width Fl -compact .It Pa amd64/ AMD64 architecture support .It Pa arm/ ARM architecture support .It Pa arm64/ ARMv8 architecture support .It Pa cam/ .Xr cam 4 and .Xr ctl 4 .It Pa cddl/ CDDL-licensed optional sources, including ZFS and DTrace .It Pa ddb/ .Xr ddb 4 .It Pa fs/ most filesystems .It Pa dev/ device drivers .It Pa geom/ .Xr geom 4 .It Pa i386/ i386 (32 bit) architecture support .It Pa kern/ main part of the kernel .It Pa mips/ MIPS architecture support .It Pa net80211/ .Xr net80211 4 .It Pa netgraph/ .Xr netgraph 4 .It Pa netinet/ .Xr inet 4 .It Pa netinet6/ .Xr inet6 4 .It Pa netipsec/ .Xr ipsec 4 .It Pa netpfil/ .Xr ipfw 4 and .Xr pf 4 .It Pa opencrypto/ .Xr crypto 7 .It Pa powerpc/ PowerPC/POWER architecture support .It Pa riscv/ RISC-V architecture support .It Pa security/ .Xr audit 4 and .Xr mac 4 .It Pa sparc64/ SPARC64 architecture support .It Pa sys/ kernel headers .It Pa ufs/ Unix File System .It Pa x86/ code shared by AMD64 and i386 architectures .El .It Pa targets/ support for experimental DIRDEPS_BUILD .It Pa tests/ source code for files in .Pa /usr/tests .It Pa tools/ tools used for maintenance and testing of .Fx .It Pa usr.bin/ source code for files in .Pa /usr/bin .It Pa usr.sbin/ source code for files in .Pa /usr/sbin .El .Pp .It Pa tests/ The .Fx test suite. See .Xr tests 7 for more details. .El .It Pa /var/ multi-purpose log, temporary, transient, and spool files .Pp .Bl -tag -width "defaults/" -compact .It Pa account/ system accounting files .Pp .Bl -tag -width Fl -compact .It Pa acct execution accounting file; see .Xr acct 5 .El .Pp .It Pa at/ timed command scheduling files; see .Xr \&at 1 .Pp .Bl -tag -width Fl -compact .It Pa jobs/ directory containing job files .It Pa spool/ directory containing output spool files .El .Pp .It Pa backups/ miscellaneous backup files .It Pa cache/ miscellaneous cached files .Pp .Bl -tag -width Fl -compact .It Pa pkg/ cached packages for .Xr pkg 8 .El .Pp .It Pa crash/ default directory to store kernel crash dumps; see .Xr crash 8 and .Xr savecore 8 .It Pa cron/ files used by cron; see .Xr cron 8 .Pp .Bl -tag -width Fl -compact .It Pa tabs/ crontab files; see .Xr crontab 5 .El .Pp .It Pa db/ miscellaneous automatically generated system-specific database files .It Pa empty/ empty directory for use by programs that need a specifically empty directory. Used for instance by .Xr sshd 8 for privilege separation. .It Pa games/ miscellaneous game status and score files .It Pa heimdal/ Kerberos server databases; see .Xr kdc 8 .It Pa log/ miscellaneous system log files .Pp .Bl -tag -width Fl -compact .It Pa utx.lastlogin last login log; see .Xr getutxent 3 .It Pa utx.log login/logout log; see .Xr getutxent 3 .El .Pp .It Pa mail/ user mailbox files .It Pa msgs/ system messages database; see .Xr msgs 1 .It Pa preserve/ temporary home of files preserved after an accidental death of an editor; see .Xr \&ex 1 .It Pa quotas/ file system quota information files .It Pa run/ system information files describing various info about system since it was booted .Pp .Bl -tag -width Fl -compact .It Pa ppp/ writable by the .Dq network group for command connection sockets; see .Xr ppp 8 .It Pa utx.active database of current users; see .Xr getutxent 3 .El .Pp .It Pa rwho/ rwho data files; see .Xr rwhod 8 , .Xr rwho 1 , and .Xr ruptime 1 .It Pa spool/ miscellaneous printer and mail system spooling directories .Pp .Bl -tag -width Fl -compact .It Pa clientmqueue/ undelivered submission mail queue; see .Xr sendmail 8 .It Pa ftp/ commonly ~ftp; the anonymous ftp root directory .It Pa mqueue/ undelivered mail queue; see .Xr sendmail 8 .It Pa output/ line printer spooling directories .El .Pp .It Pa tmp/ temporary files that are kept between system reboots .Pp .Bl -tag -width Fl -compact .It Pa vi.recover/ the directory where recovery files are stored .El .Pp .It Pa yp/ the NIS maps .El .El .Sh NOTES This manual page documents the default .Fx file system layout, but the actual hierarchy on a given system is defined at the system administrator's discretion. A well-maintained installation will include a customized version of this document. .Sh SEE ALSO .Xr apropos 1 , .Xr find 1 , .Xr finger 1 , .Xr grep 1 , .Xr ls 1 , .Xr whatis 1 , .Xr whereis 1 , .Xr which 1 , .Xr fd 4 , .Xr devfs 5 , .Xr fsck 8 .Sh HISTORY A .Nm manual page appeared in .At v7 . Index: projects/clang1000-import/share/misc/committers-doc.dot =================================================================== --- projects/clang1000-import/share/misc/committers-doc.dot (revision 356919) +++ projects/clang1000-import/share/misc/committers-doc.dot (revision 356920) @@ -1,195 +1,196 @@ # $FreeBSD$ # This file is meant to list all FreeBSD doc+www committers and describe the # mentor-mentee relationships between them. # The graphical output can be generated from this file with the following # command: # $ dot -T png -o file.png committers-doc.dot # # The dot binary is part of the graphics/graphviz port. digraph doc { # Node definitions follow this example: # # foo [label="Foo Bar\nfoo@FreeBSD.org\n????/??/??"] # # ????/??/?? is the date when the commit bit was obtained, usually the one you # can find looking at svn logs for the svnadmin/conf/access file. # Use YYYY/MM/DD format. # # For returned commit bits, the node definition will follow this example: # # foo [label="Foo Bar\nfoo@FreeBSD.org\n????/??/??\n????/??/??"] # # The first date is the same as for an active committer, the second date is # the date when the commit bit has been returned. Again, check svn logs. node [color=grey62, style=filled, bgcolor=black]; # Alumni go here. Try to keep things sorted. ache [label="Andrey Chernov\nache@FreeBSD.org\n1997/06/13\n2010/12/11"] bmah [label="Bruce A. Mah\nbmah@FreeBSD.org\n2000/08/22\n2009/09/13"] bvs [label="Vitaly Bogdanov\nbvs@FreeBSD.org\n2005/10/03\n2010/12/11"] ceri [label="Ceri Davies\nceri@FreeBSD.org\n2002/03/17\n2012/02/29"] den [label="Denis Peplin\nden@FreeBSD.org\n2003/09/13\n2009/07/09"] garys [label="Gary W. Swearingen\ngarys@FreeBSD.org\n2005/08/21\n2008/03/02"] jcamou [label="Jesus R. Camou\njcamou@FreeBSD.org\n2005/03/02\n2008/12/20"] jesusr [label="Jesus Rodriguez Cuesta\njesusr@FreeBSD.org\n1998/12/10\n2010/12/11"] jim [label="Jim Mock\njim@FreeBSD.org\n1999/08/11\n2003/12/15"] josef [label="Josef El-Rayes\njosef@FreeBSD.org\n2004/01/15\n2008/03/29"] marcel [label="Marcel Moolenaar\nmarcel@FreeBSD.org\n1999/07/03\n2012/04/25"] mheinen [label="Martin Heinen\nmheinen@FreeBSD.org\n2002/10/04\n2006/04/26"] murray [label="Murray Stokely\nmurray@FreeBSD.org\n2000/04/05\n2012/04/25"] nik [label="Nik Clayton\nnik@FreeBSD.org\n1998/02/26\n2008/12/20"] pgj [label="Gabor Pali\npgj@FreeBSD.org\n2008/04/21\n2010/12/01"] roam [label="Peter Pentchev\nroam@FreeBSD.org\n2003/02/14\n2012/02/29"] node [color=lightblue2, style=filled, bgcolor=black]; # Current doc committers go here. Try to keep things sorted. ale [label="Alex Dupre\nale@FreeBSD.org\n2003/12/22"] allanjude [label="Allan Jude\nallanjude@FreeBSD.org\n2014/05/17"] bcr [label="Benedict Reuschling\nbcr@FreeBSD.org\n2009/12/24"] bhd [label="Björn Heidotting\nbhd@FreeBSD.org\n2014/10/14"] blackend [label="Marc Fonvieille\nblackend@FreeBSD.org\n2002/06/16"] brd [label="Brad Davis\nbrd@FreeBSD.org\n2005/06/01"] brueffer [label="Christian Brueffer\nbrueffer@FreeBSD.org\n2003/01/13"] carlavilla [label="Sergio Carlavilla\ncarlavilla@FreeBSD.org\n2019/05/16"] chinsan [label="Chinsan Huang\nchinsan@FreeBSD.org\n2006/09/20"] crees [label="Chris Rees\ncrees@FreeBSD.org\n2013/05/27"] danger [label="Daniel Gerzo\ndanger@FreeBSD.org\n2006/08/20"] delphij [label="Xin Li\ndelphij@FreeBSD.org\n2004/09/14"] dexter [label="Michael Dexter\ndexter@FreeBSD.org\n2016/11/15"] dru [label="Dru Lavigne\ndru@FreeBSD.org\n2013/01/22"] eadler [label="Eitan Adler\neadler@FreeBSD.org\n2012/10/15"] ebrandi [label="Edson Brandi\nebrandi@FreeBSD.org\n2012/09/13"] gabor [label="Gabor Kovesdan\ngabor@FreeBSD.org\n2007/02/02"] ganbold [label="Ganbold Tsagaankhuu\nganbold@FreeBSD.org\n2008/02/26"] gavin [label="Gavin Atkinson\ngavin@FreeBSD.org\n2011/07/18"] gjb [label="Glen Barber\ngjb@FreeBSD.org\n2010/09/01"] hrs [label="Hiroki Sato\nhrs@FreeBSD.org\n2000/07/06"] issyl0 [label="Isabell Long\nissyl0@FreeBSD.org\n2012/04/25"] jgh [label="Jason Helfman\njgh@FreeBSD.org\n2014/01/20"] jkois [label="Johann Kois\njkois@FreeBSD.org\n2004/11/11"] joel [label="Joel Dahl\njoel@FreeBSD.org\n2005/04/05"] keramida [label="Giorgos Keramidas\nkeramida@FreeBSD.org\n2001/10/12"] linimon [label="Mark Linimon\nlinimon@FreeBSD.org\n2004/03/31"] loader [label="Fukang Chen\nloader@FreeBSD.org\n2007/07/30"] manolis [label="Manolis Kiagias\nmanolis@FreeBSD.org\n2008/05/24"] marck [label="Dmitry Morozovsky\nmarck@FreeBSD.org\n2004/08/10"] maxim [label="Maxim Konovalov\nmaxim@FreeBSD.org\n2002/02/07"] miwi [label="Martin Wilke\nmiwi@FreeBSD.org\n2007/10/26"] pav [label="Pav Lucistnik\npav@FreeBSD.org\n2005/08/12"] pluknet [label="Sergey Kandaurov\npluknet@FreeBSD.org\n2012/02/14"] remko [label="Remko Lodder\nremko@FreeBSD.org\n2004/10/16"] rene [label="Rene Ladan\nrene@FreeBSD.org\n2008/11/03"] ryusuke [label="Ryusuke Suzuki\nryusuke@FreeBSD.org\n2009/12/21"] sevan [label="Sevan Janiyan\nsevan@FreeBSD.org\n2016/09/16"] sg [label="Stephen Gregoratto\nsg@FreeBSD.org\n2019/09/10"] simon [label="Simon L. Nielsen\nsimon@FreeBSD.org\n2003/07/20"] skreuzer [label="Steven Kreuzer\nskreuzer@FreeBSD.org\n2014/01/15"] taras [label="Taras Korenko\ntaras@FreeBSD.org\n2010/06/25"] trhodes [label="Tom Rhodes\ntrhodes@FreeBSD.org\n2002/03/25"] wblock [label="Warren Block\nwblock@FreeBSD.org\n2011/09/12"] ygy [label="Guangyuan Yang\nygy@FreeBSD.org\n2017/09/18"] zeising [label="Niclas Zeising\nzeising@FreeBSD.org\n2012/07/03"] # Here are the mentor/mentee relationships. # Group together all the mentees for a particular mentor. # Keep the list sorted by mentor login. bcr -> gavin bcr -> wblock bcr -> eadler bcr -> dru bcr -> crees bcr -> jgh bcr -> allanjude bcr -> bhd bcr -> sevan bcr -> dexter bcr -> sg +bcr -> carlavilla blackend -> ale brueffer -> joel ceri -> brd ceri -> brueffer ceri -> linimon ceri -> roam ceri -> simon den -> marck delphij -> chinsan delphij -> loader eadler -> allanjude gabor -> pgj gabor -> manolis gabor -> taras gabor -> issyl0 gabor -> ebrandi gabor -> carlavilla gjb -> wblock gjb -> rene gjb -> dru gjb -> crees hrs -> ryusuke hrs -> dru hrs -> skreuzer jesusr -> jcamou jim -> trhodes jkois -> miwi jkois -> bcr jkois -> gavin jkois -> gjb jkois -> eadler joel -> zeising keramida -> blackend keramida -> danger keramida -> gabor keramida -> ganbold keramida -> garys keramida -> gjb keramida -> pav marck -> bvs marck -> pluknet marck -> taras maxim -> taras mheinen -> jkois murray -> ceri murray -> delphij nik -> bmah nik -> keramida remko -> jkois remko -> rene remko -> jgh simon -> josef simon -> remko trhodes -> danger trhodes -> jcamou wblock -> jgh wblock -> allanjude } Index: projects/clang1000-import/share/mk/src.opts.mk =================================================================== --- projects/clang1000-import/share/mk/src.opts.mk (revision 356919) +++ projects/clang1000-import/share/mk/src.opts.mk (revision 356920) @@ -1,600 +1,604 @@ # $FreeBSD$ # # Option file for FreeBSD /usr/src builds. # # Users define WITH_FOO and WITHOUT_FOO on the command line or in /etc/src.conf # and /etc/make.conf files. These translate in the build system to MK_FOO={yes,no} # with sensible (usually) defaults. # # Makefiles must include bsd.opts.mk after defining specific MK_FOO options that # are applicable for that Makefile (typically there are none, but sometimes there # are exceptions). Recursive makes usually add MK_FOO=no for options that they wish # to omit from that make. # # Makefiles must include bsd.mkopt.mk before they test the value of any MK_FOO # variable. # # Makefiles may also assume that this file is included by src.opts.mk should it # need variables defined there prior to the end of the Makefile where # bsd.{subdir,lib.bin}.mk is traditionally included. # # The old-style YES_FOO and NO_FOO are being phased out. No new instances of them # should be added. Old instances should be removed since they were just to # bridge the gap between FreeBSD 4 and FreeBSD 5. # # Makefiles should never test WITH_FOO or WITHOUT_FOO directly (although an # exception is made for _WITHOUT_SRCONF which turns off this mechanism # completely inside bsd.*.mk files). # .if !target(____) ____: .include # # Define MK_* variables (which are either "yes" or "no") for users # to set via WITH_*/WITHOUT_* in /etc/src.conf and override in the # make(1) environment. # These should be tested with `== "no"' or `!= "no"' in makefiles. # The NO_* variables should only be set by makefiles for variables # that haven't been converted over. # # These options are used by the src builds. Those listed in # __DEFAULT_YES_OPTIONS default to 'yes' and will build unless turned # off. __DEFAULT_NO_OPTIONS will default to 'no' and won't build # unless turned on. Any options listed in 'BROKEN_OPTIONS' will be # hard-wired to 'no'. "Broken" here means not working or # not-appropriate and/or not supported. It doesn't imply something is # wrong with the code. There's not a single good word for this, so # BROKEN was selected as the least imperfect one considered at the # time. Options are added to BROKEN_OPTIONS list on a per-arch basis. # At this time, there's no provision for mutually incompatible options. __DEFAULT_YES_OPTIONS = \ ACCT \ ACPI \ APM \ AT \ ATM \ AUDIT \ AUTHPF \ AUTOFS \ BHYVE \ BINUTILS \ - BINUTILS_BOOTSTRAP \ BLACKLIST \ BLUETOOTH \ BOOT \ BOOTPARAMD \ BOOTPD \ BSD_CPIO \ BSD_CRTBEGIN \ BSDINSTALL \ BSNMP \ BZIP2 \ CALENDAR \ CAPSICUM \ CAROOT \ CASPER \ CCD \ CDDL \ CPP \ CROSS_COMPILER \ CRYPT \ CUSE \ CXX \ CXGBETOOL \ DIALOG \ DICT \ DMAGENT \ DYNAMICROOT \ EE \ EFI \ ELFTOOLCHAIN_BOOTSTRAP \ EXAMPLES \ FDT \ FILE \ FINGER \ FLOPPY \ FMTREE \ FORTH \ FP_LIBC \ FREEBSD_UPDATE \ FTP \ GAMES \ GCOV \ GDB \ GNU_DIFF \ GNU_GREP \ GOOGLETEST \ GPIO \ HAST \ HTML \ HYPERV \ ICONV \ INET \ INET6 \ INETD \ IPFILTER \ IPFW \ ISCSI \ JAIL \ KDUMP \ KVM \ LDNS \ LDNS_UTILS \ LEGACY_CONSOLE \ LIBPTHREAD \ LIBTHR \ LLVM_COV \ LLVM_LIBUNWIND \ LLVM_TARGET_ALL \ LOADER_GELI \ LOADER_LUA \ LOADER_OFW \ LOADER_UBOOT \ LOCALES \ LOCATE \ LPR \ LS_COLORS \ LZMA_SUPPORT \ MAIL \ MAILWRAPPER \ MAKE \ MLX5TOOL \ NDIS \ NETCAT \ NETGRAPH \ NLS_CATALOGS \ NS_CACHING \ NTP \ NVME \ OFED \ OPENSSL \ PAM \ PF \ PKGBOOTSTRAP \ PMC \ PORTSNAP \ PPP \ QUOTAS \ RADIUS_SUPPORT \ RBOOTD \ RESCUE \ ROUTED \ SENDMAIL \ SERVICESDB \ SETUID_LOGIN \ SHARED_TOOLCHAIN \ SHAREDOCS \ SOURCELESS \ SOURCELESS_HOST \ SOURCELESS_UCODE \ STATS \ SVNLITE \ SYSCONS \ SYSTEM_COMPILER \ SYSTEM_LINKER \ TALK \ TCP_WRAPPERS \ TCSH \ TELNET \ TEXTPROC \ TFTP \ UNBOUND \ USB \ UTMPX \ VI \ VT \ WIRELESS \ WPA_SUPPLICANT_EAPOL \ ZFS \ LOADER_ZFS \ ZONEINFO __DEFAULT_NO_OPTIONS = \ AMD \ BEARSSL \ BSD_GREP \ CLANG_EXTRAS \ DTRACE_TESTS \ EXPERIMENTAL \ GCC \ GCC_BOOTSTRAP \ GNUCXX \ GNU_GREP_COMPAT \ GPL_DTC \ HESIOD \ HTTPD \ LIBSOFT \ LOADER_FIREWIRE \ LOADER_FORCE_LE \ LOADER_VERBOSE \ LOADER_VERIEXEC_PASS_MANIFEST \ OFED_EXTRA \ OPENLDAP \ REPRODUCIBLE_BUILD \ RPCBIND_WARMSTART_SUPPORT \ SORT_THREADS \ SVN \ ZONEINFO_LEAPSECONDS_SUPPORT \ ZONEINFO_OLD_TIMEZONES_SUPPORT \ # LEFT/RIGHT. Left options which default to "yes" unless their corresponding # RIGHT option is disabled. __DEFAULT_DEPENDENT_OPTIONS= \ CLANG_FULL/CLANG \ LOADER_VERIEXEC/BEARSSL \ LOADER_EFI_SECUREBOOT/LOADER_VERIEXEC \ VERIEXEC/BEARSSL \ # MK_*_SUPPORT options which default to "yes" unless their corresponding # MK_* variable is set to "no". # .for var in \ BLACKLIST \ BZIP2 \ INET \ INET6 \ KERBEROS \ KVM \ NETGRAPH \ PAM \ TESTS \ WIRELESS __DEFAULT_DEPENDENT_OPTIONS+= ${var}_SUPPORT/${var} .endfor # # Default behaviour of some options depends on the architecture. Unfortunately # this means that we have to test TARGET_ARCH (the buildworld case) as well # as MACHINE_ARCH (the non-buildworld case). Normally TARGET_ARCH is not # used at all in bsd.*.mk, but we have to make an exception here if we want # to allow defaults for some things like clang to vary by target architecture. # Additional, per-target behavior should be rarely added only after much # gnashing of teeth and grinding of gears. # .if defined(TARGET_ARCH) __T=${TARGET_ARCH} .else __T=${MACHINE_ARCH} .endif .if defined(TARGET) __TT=${TARGET} .else __TT=${MACHINE} .endif # All supported backends for LLVM_TARGET_XXX __LLVM_TARGETS= \ aarch64 \ arm \ mips \ powerpc \ riscv \ sparc \ x86 __LLVM_TARGET_FILT= C/(amd64|i386)/x86/:S/sparc64/sparc/:S/arm64/aarch64/:S/powerpc64/powerpc/ .for __llt in ${__LLVM_TARGETS} # Default enable the given TARGET's LLVM_TARGET support .if ${__TT:${__LLVM_TARGET_FILT}} == ${__llt} __DEFAULT_YES_OPTIONS+= LLVM_TARGET_${__llt:${__LLVM_TARGET_FILT}:tu} # aarch64 needs arm for -m32 support. .elif ${__TT} == "arm64" && ${__llt} == "arm" __DEFAULT_DEPENDENT_OPTIONS+= LLVM_TARGET_ARM/LLVM_TARGET_AARCH64 # Default the rest of the LLVM_TARGETs to the value of MK_LLVM_TARGET_ALL. .else __DEFAULT_DEPENDENT_OPTIONS+= LLVM_TARGET_${__llt:${__LLVM_TARGET_FILT}:tu}/LLVM_TARGET_ALL .endif .endfor __DEFAULT_NO_OPTIONS+=LLVM_TARGET_BPF .include # If the compiler is not C++11 capable, disable Clang. External toolchain will # be required. .if ${COMPILER_FEATURES:Mc++11} && (${__TT} != "mips" && ${__TT} != "sparc64") # Clang is enabled, and will be installed as the default /usr/bin/cc. __DEFAULT_YES_OPTIONS+=CLANG CLANG_BOOTSTRAP CLANG_IS_CC LLD .elif ${COMPILER_FEATURES:Mc++11} && ${__T} != "sparc64" # If an external compiler that supports C++11 is used as ${CC} and Clang # supports the target, then Clang is enabled but we still require an external # toolchain. # default /usr/bin/cc. __DEFAULT_YES_OPTIONS+=CLANG LLD __DEFAULT_NO_OPTIONS+=CLANG_BOOTSTRAP CLANG_IS_CC .else # Everything else disables Clang, and uses GCC instead. __DEFAULT_NO_OPTIONS+=CLANG CLANG_BOOTSTRAP CLANG_IS_CC LLD .endif # In-tree binutils/gcc are older versions without modern architecture support. .if ${__T} == "aarch64" || ${__T:Mriscv*} != "" BROKEN_OPTIONS+=BINUTILS BINUTILS_BOOTSTRAP GCC GCC_BOOTSTRAP GDB +.endif +.if ${__T} == "amd64" || ${__T} == "i386" || ${__T:Mpowerpc*} +__DEFAULT_YES_OPTIONS+=BINUTILS_BOOTSTRAP +.else +__DEFAULT_NO_OPTIONS+=BINUTILS_BOOTSTRAP .endif .if ${__T:Mriscv*} != "" BROKEN_OPTIONS+=OFED .endif .if ${__TT} != "mips" && ${__T} != "powerpc" && ${__T} != "powerpcspe" && \ ${__T} != "sparc64" __DEFAULT_YES_OPTIONS+=LLD_BOOTSTRAP LLD_IS_LD .else __DEFAULT_NO_OPTIONS+=LLD_BOOTSTRAP LLD_IS_LD .endif .if ${__T} == "aarch64" || ${__T} == "amd64" || ${__T} == "i386" __DEFAULT_YES_OPTIONS+=LLDB .else __DEFAULT_NO_OPTIONS+=LLDB .endif # GDB in base is generally less functional than GDB in ports. Ports GDB # sparc64 kernel support has not been tested. .if ${__T} == "sparc64" __DEFAULT_NO_OPTIONS+=GDB_LIBEXEC .else __DEFAULT_YES_OPTIONS+=GDB_LIBEXEC .endif # LIB32 is supported on amd64, mips64, and powerpc64 .if (${__T} == "amd64" || ${__T:Mmips64*} || ${__T} == "powerpc64") __DEFAULT_YES_OPTIONS+=LIB32 .else BROKEN_OPTIONS+=LIB32 .endif # Only doing soft float API stuff on armv6 and armv7 .if ${__T} != "armv6" && ${__T} != "armv7" BROKEN_OPTIONS+=LIBSOFT .endif .if ${__T:Mmips*} BROKEN_OPTIONS+=SSP .endif # EFI doesn't exist on mips, powerpc, sparc or riscv. .if ${__T:Mmips*} || ${__T:Mpowerpc*} || ${__T:Msparc64} || ${__T:Mriscv*} BROKEN_OPTIONS+=EFI .endif # OFW is only for powerpc and sparc64, exclude others .if ${__T:Mpowerpc*} == "" && ${__T:Msparc64} == "" BROKEN_OPTIONS+=LOADER_OFW .endif # UBOOT is only for arm, mips and powerpc, exclude others .if ${__T:Marm*} == "" && ${__T:Mmips*} == "" && ${__T:Mpowerpc*} == "" BROKEN_OPTIONS+=LOADER_UBOOT .endif # GELI and Lua in loader currently cause boot failures on sparc64 and powerpc. # Further debugging is required -- probably they are just broken on big # endian systems generically (they jump to null pointers or try to read # crazy high addresses, which is typical of endianness problems). .if ${__T} == "sparc64" || ${__T:Mpowerpc*} BROKEN_OPTIONS+=LOADER_GELI LOADER_LUA .endif .if ${__T:Mmips64*} # profiling won't work on MIPS64 because there is only assembly for o32 BROKEN_OPTIONS+=PROFILE .endif .if ${__T} != "aarch64" && ${__T} != "amd64" && ${__T} != "i386" && \ ${__T} != "powerpc64" && ${__T} != "sparc64" BROKEN_OPTIONS+=CXGBETOOL BROKEN_OPTIONS+=MLX5TOOL .endif # HyperV is currently x86-only .if ${__T} != "amd64" && ${__T} != "i386" BROKEN_OPTIONS+=HYPERV .endif # NVME is only aarch64, x86 and powerpc64 .if ${__T} != "aarch64" && ${__T} != "amd64" && ${__T} != "i386" && \ ${__T} != "powerpc64" BROKEN_OPTIONS+=NVME .endif .if ${__T:Msparc64} # Sparc64 need extra crt*.o files - PR 239851 BROKEN_OPTIONS+=BSD_CRTBEGIN # PR 233405 BROKEN_OPTIONS+=LLVM_LIBUNWIND .endif .if ${COMPILER_FEATURES:Mc++11} && \ (${__T} == "amd64" || ${__T} == "i386" || ${__T} == "powerpc64") __DEFAULT_YES_OPTIONS+=OPENMP .else __DEFAULT_NO_OPTIONS+=OPENMP .endif .include # # MK_* options that default to "yes" if the compiler is a C++11 compiler. # .for var in \ LIBCPLUSPLUS .if !defined(MK_${var}) .if ${COMPILER_FEATURES:Mc++11} .if defined(WITHOUT_${var}) MK_${var}:= no .else MK_${var}:= yes .endif .else .if defined(WITH_${var}) MK_${var}:= yes .else MK_${var}:= no .endif .endif .endif .endfor # # Force some options off if their dependencies are off. # Order is somewhat important. # .if !${COMPILER_FEATURES:Mc++11} MK_GOOGLETEST:= no MK_LLVM_LIBUNWIND:= no .endif .if ${MK_CAPSICUM} == "no" MK_CASPER:= no .endif .if ${MK_LIBPTHREAD} == "no" MK_LIBTHR:= no .endif .if ${MK_SOURCELESS} == "no" MK_SOURCELESS_HOST:= no MK_SOURCELESS_UCODE:= no .endif .if ${MK_CDDL} == "no" MK_ZFS:= no MK_LOADER_ZFS:= no MK_CTF:= no .endif .if ${MK_CRYPT} == "no" MK_OPENSSL:= no MK_OPENSSH:= no MK_KERBEROS:= no MK_KERBEROS_SUPPORT:= no .endif .if ${MK_CXX} == "no" MK_CLANG:= no MK_GNUCXX:= no MK_GOOGLETEST:= no MK_TESTS:= no .endif .if ${MK_DIALOG} == "no" MK_BSDINSTALL:= no .endif .if ${MK_FILE} == "no" MK_SVNLITE:= no .endif .if (${__TT} == "mips" || ${__TT} == "sparc64") && ${MK_GCC} == "no" MK_BINUTILS_BOOTSTRAP:= no .endif .if ${MK_MAIL} == "no" MK_MAILWRAPPER:= no MK_SENDMAIL:= no MK_DMAGENT:= no .endif .if ${MK_NETGRAPH} == "no" MK_ATM:= no MK_BLUETOOTH:= no .endif .if ${MK_NLS} == "no" MK_NLS_CATALOGS:= no .endif .if ${MK_OPENSSL} == "no" MK_DMAGENT:= no MK_OPENSSH:= no MK_KERBEROS:= no MK_KERBEROS_SUPPORT:= no MK_LDNS:= no .endif .if ${MK_LDNS} == "no" MK_LDNS_UTILS:= no MK_UNBOUND:= no .endif .if ${MK_PF} == "no" MK_AUTHPF:= no .endif .if ${MK_OFED} == "no" MK_OFED_EXTRA:= no .endif .if ${MK_PORTSNAP} == "no" # freebsd-update depends on phttpget from portsnap MK_FREEBSD_UPDATE:= no .endif .if ${MK_TESTS} == "no" MK_DTRACE_TESTS:= no .endif .if ${MK_TESTS_SUPPORT} == "no" MK_GOOGLETEST:= no .endif .if ${MK_ZONEINFO} == "no" MK_ZONEINFO_LEAPSECONDS_SUPPORT:= no MK_ZONEINFO_OLD_TIMEZONES_SUPPORT:= no .endif .if ${MK_CROSS_COMPILER} == "no" MK_BINUTILS_BOOTSTRAP:= no MK_CLANG_BOOTSTRAP:= no MK_ELFTOOLCHAIN_BOOTSTRAP:= no MK_GCC_BOOTSTRAP:= no MK_LLD_BOOTSTRAP:= no .endif .if ${MK_TOOLCHAIN} == "no" MK_BINUTILS:= no MK_CLANG:= no MK_GCC:= no MK_GDB:= no MK_INCLUDES:= no MK_LLD:= no MK_LLDB:= no .endif .if ${MK_CLANG} == "no" MK_CLANG_EXTRAS:= no MK_CLANG_FULL:= no MK_LLVM_COV:= no .endif .if ${MK_LOADER_VERIEXEC} == "no" MK_LOADER_VERIEXEC_PASS_MANIFEST := no .endif # # MK_* options whose default value depends on another option. # .for vv in \ GSSAPI/KERBEROS \ MAN_UTILS/MAN .if defined(WITH_${vv:H}) MK_${vv:H}:= yes .elif defined(WITHOUT_${vv:H}) MK_${vv:H}:= no .else MK_${vv:H}:= ${MK_${vv:T}} .endif .endfor # # Set defaults for the MK_*_SUPPORT variables. # .if !${COMPILER_FEATURES:Mc++11} MK_LLDB:= no .endif # gcc 4.8 and newer supports libc++, so suppress gnuc++ in that case. # while in theory we could build it with that, we don't want to do # that since it creates too much confusion for too little gain. # XXX: This is incomplete and needs X_COMPILER_TYPE/VERSION checks too # to prevent Makefile.inc1 from bootstrapping unneeded dependencies # and to support 'make delete-old' when supplying an external toolchain. .if ${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} >= 40800 MK_GNUCXX:=no MK_GCC:=no .endif .endif # !target(____) Index: projects/clang1000-import/stand/powerpc/uboot/start.S =================================================================== --- projects/clang1000-import/stand/powerpc/uboot/start.S (revision 356919) +++ projects/clang1000-import/stand/powerpc/uboot/start.S (revision 356920) @@ -1,94 +1,99 @@ /*- * Copyright (c) 2007 Semihalf, Rafal Jaworowski * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include /* * Entry point to the loader that U-Boot passes control to. */ .text .globl _start _start: /* Hint where to look for the API signature */ lis %r11, uboot_address@ha addi %r11, %r11, uboot_address@l stw %r1, 0(%r11) - /* Save U-Boot's r14 */ + /* Save U-Boot's r14 and r30 */ lis %r11, saved_regs@ha addi %r11, %r11, saved_regs@l stw %r14, 0(%r11) + stw %r30, 4(%r11) /* Disable interrupts */ mfmsr %r11 andi. %r11, %r11, ~0x8000@l mtmsr %r11 b main /* * syscall() */ ENTRY(syscall) - stwu %r1, -16(%r1) + stwu %r1, -32(%r1) mflr %r0 stw %r14, 8(%r1) - stw %r0, 20(%r1) - /* Restore U-Boot's r14 */ + stw %r30, 12(%r1) + stw %r0, 36(%r1) + /* Restore U-Boot's r14 and r30 */ lis %r11, saved_regs@ha addi %r11, %r11, saved_regs@l lwz %r14, 0(%r11) + lwz %r30, 4(%r11) /* Enable interrupts */ mfmsr %r11 ori %r11, %r11, 0x8000@l mtmsr %r11 /* Call into U-Boot */ lis %r11, syscall_ptr@ha addi %r11, %r11, syscall_ptr@l lwz %r11, 0(%r11) mtctr %r11 bctrl /* Disable interrupts */ mfmsr %r11 andi. %r11, %r11, ~0x8000@l mtmsr %r11 /* Epilogue */ lwz %r11, 0(%r1) lwz %r0, 4(%r11) mtlr %r0 lwz %r14, 8(%r1) + lwz %r30, 12(%r1) mr %r1, %r11 blr /* * Data section */ .data GLOBAL(syscall_ptr) .long 0 GLOBAL(saved_regs) .long 0 /* R14 */ + .long 0 /* R30 */ GLOBAL(uboot_address) .long 0 Index: projects/clang1000-import/sys/amd64/linux/Makefile =================================================================== --- projects/clang1000-import/sys/amd64/linux/Makefile (revision 356919) +++ projects/clang1000-import/sys/amd64/linux/Makefile (revision 356920) @@ -1,28 +1,7 @@ # Makefile for syscall tables # # $FreeBSD$ -# Don't use an OBJDIR -.OBJDIR: ${.CURDIR} +GENERATED_PREFIX= linux_ -.include - -MAKESYSCALLS= ../../tools/makesyscalls.lua -SRCS= syscalls.conf \ - syscalls.master -GENERATED= linux_proto.h \ - linux_syscall.h \ - linux_syscalls.c \ - linux_sysent.c \ - linux_systrace_args.c - -all: - @echo "make sysent only" - -# We .ORDER these explicitly so that we only run MAKESYSCALLS once, rather than -# potentially once for each ${GENERATED} file. -.ORDER: ${GENERATED} -sysent: ${GENERATED} - -${GENERATED}: ${MAKESYSCALLS} ${SRCS} - ${LUA} ${MAKESYSCALLS} syscalls.master syscalls.conf +.include "../../conf/sysent.mk" Index: projects/clang1000-import/sys/amd64/linux32/Makefile =================================================================== --- projects/clang1000-import/sys/amd64/linux32/Makefile (revision 356919) +++ projects/clang1000-import/sys/amd64/linux32/Makefile (revision 356920) @@ -1,28 +1,7 @@ # Makefile for syscall tables # # $FreeBSD$ -# Don't use an OBJDIR -.OBJDIR: ${.CURDIR} +GENERATED_PREFIX= linux32_ -.include - -MAKESYSCALLS= ../../tools/makesyscalls.lua -SRCS= syscalls.conf \ - syscalls.master -GENERATED= linux32_proto.h \ - linux32_syscall.h \ - linux32_syscalls.c \ - linux32_sysent.c \ - linux32_systrace_args.c - -all: - @echo "make sysent only" - -# We .ORDER these explicitly so that we only run MAKESYSCALLS once, rather than -# potentially once for each ${GENERATED} file. -.ORDER: ${GENERATED} -sysent: ${GENERATED} - -${GENERATED}: ${MAKESYSCALLS} ${SRCS} - ${LUA} ${MAKESYSCALLS} syscalls.master syscalls.conf +.include "../../conf/sysent.mk" Index: projects/clang1000-import/sys/arm/allwinner/a10/a10_padconf.c =================================================================== --- projects/clang1000-import/sys/arm/allwinner/a10/a10_padconf.c (revision 356919) +++ projects/clang1000-import/sys/arm/allwinner/a10/a10_padconf.c (revision 356920) @@ -1,230 +1,230 @@ /*- * Copyright (c) 2016 Emmanuel Vadot * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #ifdef SOC_ALLWINNER_A10 const static struct allwinner_pins a10_pins[] = { {"PA0", 0, 0, {"gpio_in", "gpio_out", "emac", "spi1", "uart2", NULL, NULL, NULL}}, {"PA1", 0, 1, {"gpio_in", "gpio_out", "emac", "spi1", "uart2", NULL, NULL, NULL}}, {"PA2", 0, 2, {"gpio_in", "gpio_out", "emac", "spi1", "uart2", NULL, NULL, NULL}}, {"PA3", 0, 3, {"gpio_in", "gpio_out", "emac", "spi1", "uart2", NULL, NULL, NULL}}, {"PA4", 0, 4, {"gpio_in", "gpio_out", "emac", "spi1", NULL, NULL, NULL, NULL}}, {"PA5", 0, 5, {"gpio_in", "gpio_out", "emac", "spi3", NULL, NULL, NULL, NULL}}, {"PA6", 0, 6, {"gpio_in", "gpio_out", "emac", "spi3", NULL, NULL, NULL, NULL}}, {"PA7", 0, 7, {"gpio_in", "gpio_out", "emac", "spi3", NULL, NULL, NULL, NULL}}, {"PA8", 0, 8, {"gpio_in", "gpio_out", "emac", "spi3", NULL, NULL, NULL, NULL}}, {"PA9", 0, 9, {"gpio_in", "gpio_out", "emac", "spi3", NULL, NULL, NULL, NULL}}, {"PA10", 0, 10, {"gpio_in", "gpio_out", "emac", NULL, "uart1", NULL, NULL, NULL}}, {"PA11", 0, 11, {"gpio_in", "gpio_out", "emac", NULL, "uart1", NULL, NULL, NULL}}, {"PA12", 0, 12, {"gpio_in", "gpio_out", "emac", "uart6", "uart1", NULL, NULL, NULL}}, {"PA13", 0, 13, {"gpio_in", "gpio_out", "emac", "uart6", "uart1", NULL, NULL, NULL}}, {"PA14", 0, 14, {"gpio_in", "gpio_out", "emac", "uart7", "uart1", NULL, NULL, NULL}}, {"PA15", 0, 15, {"gpio_in", "gpio_out", "emac", "uart7", "uart1", NULL, NULL, NULL}}, {"PA16", 0, 16, {"gpio_in", "gpio_out", NULL, "can", "uart1", NULL, NULL, NULL}}, {"PA17", 0, 17, {"gpio_in", "gpio_out", NULL, "can", "uart1", NULL, NULL, NULL}}, {"PB0", 1, 0, {"gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, NULL, NULL}}, {"PB1", 1, 1, {"gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, NULL, NULL}}, {"PB2", 1, 2, {"gpio_in", "gpio_out", "pwm", NULL, NULL, NULL, NULL, NULL}}, {"PB3", 1, 3, {"gpio_in", "gpio_out", "ir0", NULL, NULL, NULL, NULL, NULL}}, {"PB4", 1, 4, {"gpio_in", "gpio_out", "ir0", NULL, NULL, NULL, NULL, NULL}}, {"PB5", 1, 5, {"gpio_in", "gpio_out", "i2s", "ac97", NULL, NULL, NULL, NULL}}, {"PB6", 1, 6, {"gpio_in", "gpio_out", "i2s", "ac97", NULL, NULL, NULL, NULL}}, {"PB7", 1, 7, {"gpio_in", "gpio_out", "i2s", "ac97", NULL, NULL, NULL, NULL}}, {"PB8", 1, 8, {"gpio_in", "gpio_out", "i2s", "ac97", NULL, NULL, NULL, NULL}}, {"PB9", 1, 9, {"gpio_in", "gpio_out", "i2s", NULL, NULL, NULL, NULL, NULL}}, {"PB10", 1, 10, {"gpio_in", "gpio_out", "i2s", NULL, NULL, NULL, NULL, NULL}}, {"PB11", 1, 11, {"gpio_in", "gpio_out", "i2s", NULL, NULL, NULL, NULL, NULL}}, {"PB12", 1, 12, {"gpio_in", "gpio_out", "i2s", "ac97", NULL, NULL, NULL, NULL}}, {"PB13", 1, 13, {"gpio_in", "gpio_out", "spi2", NULL, NULL, NULL, NULL, NULL}}, {"PB14", 1, 14, {"gpio_in", "gpio_out", "spi2", "jtag", NULL, NULL, NULL, NULL}}, {"PB15", 1, 15, {"gpio_in", "gpio_out", "spi2", "jtag", NULL, NULL, NULL, NULL}}, {"PB16", 1, 16, {"gpio_in", "gpio_out", "spi2", "jtag", NULL, NULL, NULL, NULL}}, {"PB17", 1, 17, {"gpio_in", "gpio_out", "spi2", "jtag", NULL, NULL, NULL, NULL}}, {"PB18", 1, 18, {"gpio_in", "gpio_out", "i2c1", NULL, NULL, NULL, NULL, NULL}}, {"PB19", 1, 19, {"gpio_in", "gpio_out", "i2c1", NULL, NULL, NULL, NULL, NULL}}, {"PB20", 1, 20, {"gpio_in", "gpio_out", "i2c2", NULL, NULL, NULL, NULL, NULL}}, {"PB21", 1, 21, {"gpio_in", "gpio_out", "i2c2", NULL, NULL, NULL, NULL, NULL}}, {"PB22", 1, 22, {"gpio_in", "gpio_out", "uart0", "ir1", NULL, NULL, NULL, NULL}}, {"PB23", 1, 23, {"gpio_in", "gpio_out", "uart0", "ir1", NULL, NULL, NULL, NULL}}, {"PC0", 2, 0, {"gpio_in", "gpio_out", "nand", "spi0", NULL, NULL, NULL, NULL}}, {"PC1", 2, 1, {"gpio_in", "gpio_out", "nand", "spi0", NULL, NULL, NULL, NULL}}, {"PC2", 2, 2, {"gpio_in", "gpio_out", "nand", "spi0", NULL, NULL, NULL, NULL}}, {"PC3", 2, 3, {"gpio_in", "gpio_out", "nand", NULL, NULL, NULL, NULL, NULL}}, {"PC4", 2, 4, {"gpio_in", "gpio_out", "nand", NULL, NULL, NULL, NULL, NULL}}, {"PC5", 2, 5, {"gpio_in", "gpio_out", "nand", NULL, NULL, NULL, NULL, NULL}}, {"PC6", 2, 6, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC7", 2, 7, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC8", 2, 8, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC9", 2, 9, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC10", 2, 10, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC11", 2, 11, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC12", 2, 12, {"gpio_in", "gpio_out", "nand", NULL, NULL, NULL, NULL, NULL}}, {"PC13", 2, 13, {"gpio_in", "gpio_out", "nand", NULL, NULL, NULL, NULL, NULL}}, {"PC14", 2, 14, {"gpio_in", "gpio_out", "nand", NULL, NULL, NULL, NULL, NULL}}, {"PC15", 2, 15, {"gpio_in", "gpio_out", "nand", NULL, NULL, NULL, NULL, NULL}}, {"PC16", 2, 16, {"gpio_in", "gpio_out", "nand", NULL, NULL, NULL, NULL, NULL}}, {"PC17", 2, 17, {"gpio_in", "gpio_out", "nand", NULL, NULL, NULL, NULL, NULL}}, {"PC18", 2, 18, {"gpio_in", "gpio_out", "nand", NULL, NULL, NULL, NULL, NULL}}, {"PC19", 2, 19, {"gpio_in", "gpio_out", "nand", "spi2", NULL, NULL, NULL, NULL}}, {"PC20", 2, 20, {"gpio_in", "gpio_out", "nand", "spi2", NULL, NULL, NULL, NULL}}, {"PC21", 2, 21, {"gpio_in", "gpio_out", "nand", "spi2", NULL, NULL, NULL, NULL}}, {"PC22", 2, 22, {"gpio_in", "gpio_out", "nand", "spi2", NULL, NULL, NULL, NULL}}, {"PC23", 2, 23, {"gpio_in", "gpio_out", "spi0", NULL, NULL, NULL, NULL, NULL}}, {"PC24", 2, 24, {"gpio_in", "gpio_out", "nand", NULL, NULL, NULL, NULL, NULL}}, {"PD0", 3, 0, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD1", 3, 1, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD2", 3, 2, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD3", 3, 3, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD4", 3, 4, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD5", 3, 5, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD6", 3, 6, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD7", 3, 7, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD8", 3, 8, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD9", 3, 9, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD10", 3, 10, {"gpio_in", "gpio_out", "lcd0", "lvds1", NULL, NULL, NULL, NULL}}, {"PD11", 3, 11, {"gpio_in", "gpio_out", "lcd0", "lvds1", NULL, NULL, NULL, NULL}}, {"PD12", 3, 12, {"gpio_in", "gpio_out", "lcd0", "lvds1", NULL, NULL, NULL, NULL}}, {"PD13", 3, 13, {"gpio_in", "gpio_out", "lcd0", "lvds1", NULL, NULL, NULL, NULL}}, {"PD14", 3, 14, {"gpio_in", "gpio_out", "lcd0", "lvds1", NULL, NULL, NULL, NULL}}, {"PD15", 3, 15, {"gpio_in", "gpio_out", "lcd0", "lvds1", NULL, NULL, NULL, NULL}}, {"PD16", 3, 16, {"gpio_in", "gpio_out", "lcd0", "lvds1", NULL, NULL, NULL, NULL}}, {"PD17", 3, 17, {"gpio_in", "gpio_out", "lcd0", "lvds1", NULL, NULL, NULL, NULL}}, {"PD18", 3, 18, {"gpio_in", "gpio_out", "lcd0", "lvds1", NULL, NULL, NULL, NULL}}, {"PD19", 3, 19, {"gpio_in", "gpio_out", "lcd0", "lvds1", NULL, NULL, NULL, NULL}}, {"PD20", 3, 20, {"gpio_in", "gpio_out", "lcd0", "csi1", NULL, NULL, NULL, NULL}}, {"PD21", 3, 21, {"gpio_in", "gpio_out", "lcd0", "sim", NULL, NULL, NULL, NULL}}, {"PD22", 3, 22, {"gpio_in", "gpio_out", "lcd0", "sim", NULL, NULL, NULL, NULL}}, {"PD23", 3, 23, {"gpio_in", "gpio_out", "lcd0", "sim", NULL, NULL, NULL, NULL}}, {"PD24", 3, 24, {"gpio_in", "gpio_out", "lcd0", "sim", NULL, NULL, NULL, NULL}}, {"PD25", 3, 25, {"gpio_in", "gpio_out", "lcd0", "sim", NULL, NULL, NULL, NULL}}, {"PD26", 3, 26, {"gpio_in", "gpio_out", "lcd0", "sim", NULL, NULL, NULL, NULL}}, {"PD27", 3, 27, {"gpio_in", "gpio_out", "lcd0", "sim", NULL, NULL, NULL, NULL}}, {"PE0", 4, 0, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE1", 4, 1, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE2", 4, 2, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE3", 4, 3, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE4", 4, 4, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE5", 4, 5, {"gpio_in", "gpio_out", "ts0", "csi0", "sim", NULL, NULL, NULL}}, {"PE6", 4, 6, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE7", 4, 7, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE8", 4, 8, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE9", 4, 9, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE10", 4, 10, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE11", 4, 11, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PF0", 5, 0, {"gpio_in", "gpio_out", "mmc0", NULL, "jtag", NULL, NULL, NULL}}, {"PF1", 5, 1, {"gpio_in", "gpio_out", "mmc0", NULL, "jtag", NULL, NULL, NULL}}, {"PF2", 5, 2, {"gpio_in", "gpio_out", "mmc0", NULL, "uart0", NULL, NULL, NULL}}, {"PF3", 5, 3, {"gpio_in", "gpio_out", "mmc0", NULL, "jtag", NULL, NULL, NULL}}, {"PF4", 5, 4, {"gpio_in", "gpio_out", "mmc0", NULL, "jtag", NULL, NULL, NULL}}, {"PF5", 5, 5, {"gpio_in", "gpio_out", "mmc0", NULL, "jtag", NULL, NULL, NULL}}, {"PG0", 6, 0, {"gpio_in", "gpio_out", "ts1", "csi1", "mmc1", NULL, NULL, NULL}}, {"PG1", 6, 1, {"gpio_in", "gpio_out", "ts1", "csi1", "mmc1", NULL, NULL, NULL}}, {"PG2", 6, 2, {"gpio_in", "gpio_out", "ts1", "csi1", "mmc1", NULL, NULL, NULL}}, {"PG3", 6, 3, {"gpio_in", "gpio_out", "ts1", "csi1", "mmc1", NULL, NULL, NULL}}, {"PG4", 6, 4, {"gpio_in", "gpio_out", "ts1", "csi1", "mmc1", "csi0", NULL, NULL}}, {"PG5", 6, 5, {"gpio_in", "gpio_out", "ts1", "csi1", "mmc1", "csi0", NULL, NULL}}, {"PG6", 6, 6, {"gpio_in", "gpio_out", "ts1", "csi1", "uart3", "csi0", NULL, NULL}}, {"PG7", 6, 7, {"gpio_in", "gpio_out", "ts1", "csi1", "uart3", "csi0", NULL, NULL}}, {"PG8", 6, 8, {"gpio_in", "gpio_out", "ts1", "csi1", "uart3", "csi0", NULL, NULL}}, {"PG9", 6, 9, {"gpio_in", "gpio_out", "ts1", "csi1", "uart3", "csi0", NULL, NULL}}, {"PG10", 6, 10, {"gpio_in", "gpio_out", "ts1", "csi1", "uart4", "csi0", NULL, NULL}}, {"PG11", 6, 11, {"gpio_in", "gpio_out", "ts1", "csi1", "uart4", "csi0", NULL, NULL}}, - {"PH0", 7, 0, {"gpio_in", "gpio_out", "lcd1", "pata", "uart3", NULL, "eint0", "csi1"}, 6, 0}, - {"PH1", 7, 1, {"gpio_in", "gpio_out", "lcd1", "pata", "uart3", NULL, "eint1", "csi1"}, 6, 1}, - {"PH2", 7, 2, {"gpio_in", "gpio_out", "lcd1", "pata", "uart3", NULL, "eint2", "csi1"}, 6, 2}, - {"PH3", 7, 3, {"gpio_in", "gpio_out", "lcd1", "pata", "uart3", NULL, "eint3", "csi1"}, 6, 3}, - {"PH4", 7, 4, {"gpio_in", "gpio_out", "lcd1", "pata", "uart4", NULL, "eint4", "csi1"}, 6, 4}, - {"PH5", 7, 5, {"gpio_in", "gpio_out", "lcd1", "pata", "uart4", NULL, "eint5", "csi1"}, 6, 5}, - {"PH6", 7, 6, {"gpio_in", "gpio_out", "lcd1", "pata", "uart5", "ms", "eint6", "csi1"}, 6, 6}, - {"PH7", 7, 7, {"gpio_in", "gpio_out", "lcd1", "pata", "uart5", "ms", "eint7", "csi1"}, 6, 7}, - {"PH8", 7, 8, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "ms", "eint8", "csi1"}, 6, 8}, - {"PH9", 7, 9, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "ms", "eint9", "csi1"}, 6, 9}, - {"PH10", 7, 10, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "ms", "eint10", "csi1"}, 6, 10}, - {"PH11", 7, 11, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "ms", "eint11", "csi1"}, 6, 11}, - {"PH12", 7, 12, {"gpio_in", "gpio_out", "lcd1", "pata", "ps2", NULL, "eint12", "csi1"}, 6, 12}, - {"PH13", 7, 13, {"gpio_in", "gpio_out", "lcd1", "pata", "ps2", "sim", "eint13", "csi1"}, 6, 13}, - {"PH14", 7, 14, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "sim", "eint14", "csi1"}, 6, 14}, - {"PH15", 7, 15, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "sim", "eint15", "csi1"}, 6, 15}, - {"PH16", 7, 16, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", NULL, "eint16", "csi1"}, 6, 16}, - {"PH17", 7, 17, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "sim", "eint17", "csi1"}, 6, 17}, - {"PH18", 7, 18, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "sim", "eint18", "csi1"}, 6, 18}, - {"PH19", 7, 19, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "sim", "eint19", "csi1"}, 6, 19}, - {"PH20", 7, 20, {"gpio_in", "gpio_out", "lcd1", "pata", "can", NULL, "eint20", "csi1"}, 6, 20}, - {"PH21", 7, 21, {"gpio_in", "gpio_out", "lcd1", "pata", "can", NULL, "eint21", "csi1"}, 6, 21}, + {"PH0", 7, 0, {"gpio_in", "gpio_out", "lcd1", "pata", "uart3", NULL, "ph_eint0", "csi1"}, 6, 0, 0}, + {"PH1", 7, 1, {"gpio_in", "gpio_out", "lcd1", "pata", "uart3", NULL, "ph_eint1", "csi1"}, 6, 1, 0}, + {"PH2", 7, 2, {"gpio_in", "gpio_out", "lcd1", "pata", "uart3", NULL, "ph_eint2", "csi1"}, 6, 2, 0}, + {"PH3", 7, 3, {"gpio_in", "gpio_out", "lcd1", "pata", "uart3", NULL, "ph_eint3", "csi1"}, 6, 3, 0}, + {"PH4", 7, 4, {"gpio_in", "gpio_out", "lcd1", "pata", "uart4", NULL, "ph_eint4", "csi1"}, 6, 4, 0}, + {"PH5", 7, 5, {"gpio_in", "gpio_out", "lcd1", "pata", "uart4", NULL, "ph_eint5", "csi1"}, 6, 5, 0}, + {"PH6", 7, 6, {"gpio_in", "gpio_out", "lcd1", "pata", "uart5", "ms", "ph_eint6", "csi1"}, 6, 6, 0}, + {"PH7", 7, 7, {"gpio_in", "gpio_out", "lcd1", "pata", "uart5", "ms", "ph_eint7", "csi1"}, 6, 7, 0}, + {"PH8", 7, 8, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "ms", "ph_eint8", "csi1"}, 6, 8, 0}, + {"PH9", 7, 9, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "ms", "ph_eint9", "csi1"}, 6, 9, 0}, + {"PH10", 7, 10, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "ms", "ph_eint10", "csi1"}, 6, 10, 0}, + {"PH11", 7, 11, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "ms", "ph_eint11", "csi1"}, 6, 11, 0}, + {"PH12", 7, 12, {"gpio_in", "gpio_out", "lcd1", "pata", "ps2", NULL, "ph_eint12", "csi1"}, 6, 12, 0}, + {"PH13", 7, 13, {"gpio_in", "gpio_out", "lcd1", "pata", "ps2", "sim", "ph_eint13", "csi1"}, 6, 13, 0}, + {"PH14", 7, 14, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "sim", "ph_eint14", "csi1"}, 6, 14, 0}, + {"PH15", 7, 15, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "sim", "ph_eint15", "csi1"}, 6, 15, 0}, + {"PH16", 7, 16, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", NULL, "ph_eint16", "csi1"}, 6, 16, 0}, + {"PH17", 7, 17, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "sim", "ph_eint17", "csi1"}, 6, 17, 0}, + {"PH18", 7, 18, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "sim", "ph_eint18", "csi1"}, 6, 18, 0}, + {"PH19", 7, 19, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "sim", "ph_eint19", "csi1"}, 6, 19, 0}, + {"PH20", 7, 20, {"gpio_in", "gpio_out", "lcd1", "pata", "can", NULL, "ph_eint20", "csi1"}, 6, 20, 0}, + {"PH21", 7, 21, {"gpio_in", "gpio_out", "lcd1", "pata", "can", NULL, "ph_eint21", "csi1"}, 6, 21, 0}, {"PH22", 7, 22, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "mmc1", NULL, "csi1"}}, {"PH23", 7, 23, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "mmc1", NULL, "csi1"}}, {"PH24", 7, 24, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "mmc1", NULL, "csi1"}}, {"PH25", 7, 25, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "mmc1", NULL, "csi1"}}, {"PH26", 7, 26, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "mmc1", NULL, "csi1"}}, {"PH27", 7, 27, {"gpio_in", "gpio_out", "lcd1", "pata", "keypad", "mmc1", NULL, "csi1"}}, {"PI0", 8, 0, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}}, {"PI1", 8, 1, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}}, {"PI2", 8, 2, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}}, {"PI3", 8, 3, {"gpio_in", "gpio_out", "pwm", NULL, NULL, NULL, NULL, NULL}}, {"PI4", 8, 4, {"gpio_in", "gpio_out", "mmc3", NULL, NULL, NULL, NULL, NULL}}, {"PI5", 8, 5, {"gpio_in", "gpio_out", "mmc3", NULL, NULL, NULL, NULL, NULL}}, {"PI6", 8, 6, {"gpio_in", "gpio_out", "mmc3", NULL, NULL, NULL, NULL, NULL}}, {"PI7", 8, 7, {"gpio_in", "gpio_out", "mmc3", NULL, NULL, NULL, NULL, NULL}}, {"PI8", 8, 8, {"gpio_in", "gpio_out", "mmc3", NULL, NULL, NULL, NULL, NULL}}, {"PI9", 8, 9, {"gpio_in", "gpio_out", "mmc3", NULL, NULL, NULL, NULL, NULL}}, - {"PI10", 8, 10, {"gpio_in", "gpio_out", "spi0", "uart5", NULL, NULL, "eint22", NULL}, 6, 22}, - {"PI11", 8, 11, {"gpio_in", "gpio_out", "spi0", "uart5", NULL, NULL, "eint23", NULL}, 6, 23}, - {"PI12", 8, 12, {"gpio_in", "gpio_out", "spi0", "uart6", NULL, NULL, "eint24", NULL}, 6, 24}, - {"PI13", 8, 13, {"gpio_in", "gpio_out", "spi0", "uart6", NULL, NULL, "eint25", NULL}, 6, 25}, - {"PI14", 8, 14, {"gpio_in", "gpio_out", "spi0", "ps2", "timer4", NULL, "eint26", NULL}, 6, 26}, - {"PI15", 8, 15, {"gpio_in", "gpio_out", "spi1", "ps2", "timer5", NULL, "eint27", NULL}, 6, 27}, - {"PI16", 8, 16, {"gpio_in", "gpio_out", "spi1", "uart2", NULL, NULL, "eint28", NULL}, 6, 28}, - {"PI17", 8, 17, {"gpio_in", "gpio_out", "spi1", "uart2", NULL, NULL, "eint29", NULL}, 6, 29}, - {"PI18", 8, 18, {"gpio_in", "gpio_out", "spi1", "uart2", NULL, NULL, "eint30", NULL}, 6, 30}, - {"PI19", 8, 19, {"gpio_in", "gpio_out", "spi1", "uart2", NULL, NULL, "eint31", NULL}, 6, 31}, + {"PI10", 8, 10, {"gpio_in", "gpio_out", "spi0", "uart5", NULL, NULL, "pi_eint22", NULL}, 6, 22, 0}, + {"PI11", 8, 11, {"gpio_in", "gpio_out", "spi0", "uart5", NULL, NULL, "pi_eint23", NULL}, 6, 23, 0}, + {"PI12", 8, 12, {"gpio_in", "gpio_out", "spi0", "uart6", NULL, NULL, "pi_eint24", NULL}, 6, 24, 0}, + {"PI13", 8, 13, {"gpio_in", "gpio_out", "spi0", "uart6", NULL, NULL, "pi_eint25", NULL}, 6, 25, 0}, + {"PI14", 8, 14, {"gpio_in", "gpio_out", "spi0", "ps2", "timer4", NULL, "pi_eint26", NULL}, 6, 26, 0}, + {"PI15", 8, 15, {"gpio_in", "gpio_out", "spi1", "ps2", "timer5", NULL, "pi_eint27", NULL}, 6, 27, 0}, + {"PI16", 8, 16, {"gpio_in", "gpio_out", "spi1", "uart2", NULL, NULL, "pi_eint28", NULL}, 6, 28, 0}, + {"PI17", 8, 17, {"gpio_in", "gpio_out", "spi1", "uart2", NULL, NULL, "pi_eint29", NULL}, 6, 29, 0}, + {"PI18", 8, 18, {"gpio_in", "gpio_out", "spi1", "uart2", NULL, NULL, "pi_eint30", NULL}, 6, 30, 0}, + {"PI19", 8, 19, {"gpio_in", "gpio_out", "spi1", "uart2", NULL, NULL, "pi_eint31", NULL}, 6, 31, 0}, {"PI20", 8, 20, {"gpio_in", "gpio_out", "ps2", "uart7", "hdmi", NULL, NULL, NULL}}, {"PI21", 8, 21, {"gpio_in", "gpio_out", "ps2", "uart7", "hdmi", NULL, NULL, NULL}}, }; const struct allwinner_padconf a10_padconf = { .npins = sizeof(a10_pins) / sizeof(struct allwinner_pins), .pins = a10_pins, }; #endif /* SOC_ALLWINNER_A10 */ Index: projects/clang1000-import/sys/arm/allwinner/a13/a13_padconf.c =================================================================== --- projects/clang1000-import/sys/arm/allwinner/a13/a13_padconf.c (revision 356919) +++ projects/clang1000-import/sys/arm/allwinner/a13/a13_padconf.c (revision 356920) @@ -1,128 +1,128 @@ /*- * Copyright (c) 2016 Emmanuel Vadot * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #ifdef SOC_ALLWINNER_A13 const static struct allwinner_pins a13_pins[] = { {"PB0", 1, 0, {"gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, NULL, NULL}}, {"PB1", 1, 1, {"gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, NULL, NULL}}, - {"PB2", 1, 2, {"gpio_in", "gpio_out", "pwm", NULL, NULL, NULL, "eint16", NULL}, 6, 16}, - {"PB3", 1, 3, {"gpio_in", "gpio_out", "ir0", NULL, NULL, NULL, "eint17", NULL}, 6, 17}, - {"PB4", 1, 4, {"gpio_in", "gpio_out", "ir0", NULL, NULL, NULL, "eint18", NULL}, 6, 18}, - {"PB10", 1, 10, {"gpio_in", "gpio_out", "spi2", NULL, NULL, NULL, "eint24", NULL}, 6, 24}, + {"PB2", 1, 2, {"gpio_in", "gpio_out", "pwm", NULL, NULL, NULL, "pb_eint16", NULL}, 6, 16, 0}, + {"PB3", 1, 3, {"gpio_in", "gpio_out", "ir0", NULL, NULL, NULL, "pb_eint17", NULL}, 6, 17, 0}, + {"PB4", 1, 4, {"gpio_in", "gpio_out", "ir0", NULL, NULL, NULL, "pb_eint18", NULL}, 6, 18, 0}, + {"PB10", 1, 10, {"gpio_in", "gpio_out", "spi2", NULL, NULL, NULL, "pb_eint24", NULL}, 6, 24, 0}, {"PB15", 1, 15, {"gpio_in", "gpio_out", "i2c1", NULL, NULL, NULL, NULL, NULL}}, {"PB16", 1, 16, {"gpio_in", "gpio_out", "i2c1", NULL, NULL, NULL, NULL, NULL}}, {"PB17", 1, 17, {"gpio_in", "gpio_out", "i2c2", NULL, NULL, NULL, NULL, NULL}}, {"PB18", 1, 18, {"gpio_in", "gpio_out", "i2c2", NULL, NULL, NULL, NULL, NULL}}, {"PC0", 2, 0, {"gpio_in", "gpio_out", "nand", "spi0", NULL, NULL, NULL, NULL}}, {"PC1", 2, 1, {"gpio_in", "gpio_out", "nand", "spi0", NULL, NULL, NULL, NULL}}, {"PC2", 2, 2, {"gpio_in", "gpio_out", "nand", "spi0", NULL, NULL, NULL, NULL}}, {"PC3", 2, 3, {"gpio_in", "gpio_out", "nand", "spi0", NULL, NULL, NULL, NULL}}, {"PC4", 2, 4, {"gpio_in", "gpio_out", "nand", NULL, NULL, NULL, NULL, NULL}}, {"PC5", 2, 5, {"gpio_in", "gpio_out", "nand", NULL, NULL, NULL, NULL, NULL}}, {"PC6", 2, 6, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC7", 2, 7, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC8", 2, 8, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC9", 2, 9, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC10", 2, 10, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC11", 2, 11, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC12", 2, 12, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC13", 2, 13, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC14", 2, 14, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC15", 2, 15, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC19", 2, 19, {"gpio_in", "gpio_out", "nand", NULL, "uart3", NULL, NULL, NULL}}, {"PD2", 3, 2, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD3", 3, 3, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD4", 3, 4, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD5", 3, 5, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD6", 3, 6, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD7", 3, 7, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD10", 3, 10, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD11", 3, 11, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD12", 3, 12, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD13", 3, 13, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD14", 3, 14, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD15", 3, 15, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD18", 3, 18, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD19", 3, 19, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD20", 3, 20, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD21", 3, 21, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD22", 3, 22, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD23", 3, 23, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD24", 3, 24, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD25", 3, 25, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD26", 3, 26, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD27", 3, 27, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, - {"PE0", 4, 0, {"gpio_in", NULL, NULL, "csi0", "spi2", NULL, "eint14", NULL}, 6, 14}, - {"PE1", 4, 1, {"gpio_in", NULL, NULL, "csi0", "spi2", NULL, "eint15", NULL}, 6, 15}, + {"PE0", 4, 0, {"gpio_in", NULL, NULL, "csi0", "spi2", NULL, "pe_eint14", NULL}, 6, 14, 0}, + {"PE1", 4, 1, {"gpio_in", NULL, NULL, "csi0", "spi2", NULL, "pe_eint15", NULL}, 6, 15, 0}, {"PE2", 4, 2, {"gpio_in", NULL, NULL, "csi0", "spi2", NULL, NULL, NULL}}, {"PE3", 4, 3, {"gpio_in", "gpio_out", NULL, "csi0", "spi2", NULL, NULL, NULL}}, {"PE4", 4, 4, {"gpio_in", "gpio_out", NULL, "csi0", "mmc2", NULL, NULL, NULL}}, {"PE5", 4, 5, {"gpio_in", "gpio_out", NULL, "csi0", "mmc2", NULL, NULL, NULL}}, {"PE6", 4, 6, {"gpio_in", "gpio_out", NULL, "csi0", "mmc2", NULL, NULL, NULL}}, {"PE7", 4, 7, {"gpio_in", "gpio_out", NULL, "csi0", "mmc2", NULL, NULL, NULL}}, {"PE8", 4, 8, {"gpio_in", "gpio_out", NULL, "csi0", "mmc2", NULL, NULL, NULL}}, {"PE9", 4, 9, {"gpio_in", "gpio_out", NULL, "csi0", "mmc2", NULL, NULL, NULL}}, {"PE10", 4, 10, {"gpio_in", "gpio_out", NULL, "csi0", "uart1", NULL, NULL, NULL}}, {"PE11", 4, 11, {"gpio_in", "gpio_out", NULL, "csi0", "uart1", NULL, NULL, NULL}}, {"PF0", 5, 0, {"gpio_in", "gpio_out", "mmc0", NULL, NULL, NULL, NULL, NULL}}, {"PF1", 5, 1, {"gpio_in", "gpio_out", "mmc0", NULL, NULL, NULL, NULL, NULL}}, {"PF2", 5, 2, {"gpio_in", "gpio_out", "mmc0", NULL, NULL, NULL, NULL, NULL}}, {"PF3", 5, 3, {"gpio_in", "gpio_out", "mmc0", NULL, NULL, NULL, NULL, NULL}}, {"PF4", 5, 4, {"gpio_in", "gpio_out", "mmc0", NULL, NULL, NULL, NULL, NULL}}, {"PF5", 5, 5, {"gpio_in", "gpio_out", "mmc0", NULL, NULL, NULL, NULL, NULL}}, - {"PG0", 6, 0, {"gpio_in", NULL, NULL, NULL, NULL, NULL, "eint0", NULL}, 6, 0}, - {"PG1", 6, 1, {"gpio_in", NULL, NULL, NULL, NULL, NULL, "eint1", NULL}, 6, 1}, - {"PG2", 6, 2, {"gpio_in", NULL, NULL, NULL, NULL, NULL, "eint2", NULL}, 6, 2}, - {"PG3", 6, 3, {"gpio_in", "gpio_out", "mmc1", NULL, "uart1", NULL, "eint3", NULL}, 6, 3}, - {"PG4", 6, 4, {"gpio_in", "gpio_out", "mmc1", NULL, "uart1", NULL, "eint4", NULL}, 6, 4}, - {"PG9", 6, 9, {"gpio_in", "gpio_out", "spi1", "uart3", NULL, NULL, "eint9", NULL}, 6, 9}, - {"PG10", 6, 10, {"gpio_in", "gpio_out", "spi1", "uart3", NULL, NULL, "eint10", NULL}, 6, 10}, - {"PG11", 6, 11, {"gpio_in", "gpio_out", "spi1", "uart3", NULL, NULL, "eint11", NULL}, 6, 11}, - {"PG12", 6, 12, {"gpio_in", "gpio_out", "spi1", "uart3", NULL, NULL, "eint12", NULL}, 6, 12}, + {"PG0", 6, 0, {"gpio_in", NULL, NULL, NULL, NULL, NULL, "pg_eint0", NULL}, 6, 0, 6}, + {"PG1", 6, 1, {"gpio_in", NULL, NULL, NULL, NULL, NULL, "pg_eint1", NULL}, 6, 1, 6}, + {"PG2", 6, 2, {"gpio_in", NULL, NULL, NULL, NULL, NULL, "pg_eint2", NULL}, 6, 2, 6}, + {"PG3", 6, 3, {"gpio_in", "gpio_out", "mmc1", NULL, "uart1", NULL, "pg_eint3", NULL}, 6, 3, 0}, + {"PG4", 6, 4, {"gpio_in", "gpio_out", "mmc1", NULL, "uart1", NULL, "pg_eint4", NULL}, 6, 4, 0}, + {"PG9", 6, 9, {"gpio_in", "gpio_out", "spi1", "uart3", NULL, NULL, "pg_eint9", NULL}, 6, 9, 0}, + {"PG10", 6, 10, {"gpio_in", "gpio_out", "spi1", "uart3", NULL, NULL, "pg_eint10", NULL}, 6, 10, 0}, + {"PG11", 6, 11, {"gpio_in", "gpio_out", "spi1", "uart3", NULL, NULL, "pg_eint11", NULL}, 6, 11, 0}, + {"PG12", 6, 12, {"gpio_in", "gpio_out", "spi1", "uart3", NULL, NULL, "pg_eint12", NULL}, 6, 12, 0}, }; const struct allwinner_padconf a13_padconf = { .npins = sizeof(a13_pins) / sizeof(struct allwinner_pins), .pins = a13_pins, }; #endif /* SOC_ALLWINNER_A13 */ Index: projects/clang1000-import/sys/arm/allwinner/a20/a20_padconf.c =================================================================== --- projects/clang1000-import/sys/arm/allwinner/a20/a20_padconf.c (revision 356919) +++ projects/clang1000-import/sys/arm/allwinner/a20/a20_padconf.c (revision 356920) @@ -1,230 +1,230 @@ /*- * Copyright (c) 2016 Emmanuel Vadot * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #ifdef SOC_ALLWINNER_A20 const static struct allwinner_pins a20_pins[] = { {"PA0", 0, 0, {"gpio_in", "gpio_out", "emac", "spi1", "uart2", "gmac", NULL, NULL}}, {"PA1", 0, 1, {"gpio_in", "gpio_out", "emac", "spi1", "uart2", "gmac", NULL, NULL}}, {"PA2", 0, 2, {"gpio_in", "gpio_out", "emac", "spi1", "uart2", "gmac", NULL, NULL}}, {"PA3", 0, 3, {"gpio_in", "gpio_out", "emac", "spi1", "uart2", "gmac", NULL, NULL}}, {"PA4", 0, 4, {"gpio_in", "gpio_out", "emac", "spi1", NULL, "gmac", NULL, NULL}}, {"PA5", 0, 5, {"gpio_in", "gpio_out", "emac", "spi3", NULL, "gmac", NULL, NULL}}, {"PA6", 0, 6, {"gpio_in", "gpio_out", "emac", "spi3", NULL, "gmac", NULL, NULL}}, {"PA7", 0, 7, {"gpio_in", "gpio_out", "emac", "spi3", NULL, "gmac", NULL, NULL}}, {"PA8", 0, 8, {"gpio_in", "gpio_out", "emac", "spi3", NULL, "gmac", NULL, NULL}}, {"PA9", 0, 9, {"gpio_in", "gpio_out", "emac", "spi3", NULL, "gmac", "i2c1", NULL}}, {"PA10", 0, 10, {"gpio_in", "gpio_out", "emac", NULL, "uart1", "gmac", NULL, NULL}}, {"PA11", 0, 11, {"gpio_in", "gpio_out", "emac", NULL, "uart1", "gmac", NULL, NULL}}, {"PA12", 0, 12, {"gpio_in", "gpio_out", "emac", "uart6", "uart1", "gmac", NULL, NULL}}, {"PA13", 0, 13, {"gpio_in", "gpio_out", "emac", "uart6", "uart1", "gmac", NULL, NULL}}, {"PA14", 0, 14, {"gpio_in", "gpio_out", "emac", "uart7", "uart1", "gmac", "i2c1", NULL}}, {"PA15", 0, 15, {"gpio_in", "gpio_out", "emac", "uart7", "uart1", "gmac", "i2c1", NULL}}, {"PA16", 0, 16, {"gpio_in", "gpio_out", NULL, "can", "uart1", "gmac", "i2c1", NULL}}, {"PA17", 0, 17, {"gpio_in", "gpio_out", NULL, "can", "uart1", "gmac", "i2c1", NULL}}, {"PB0", 1, 0, {"gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, NULL, NULL}}, {"PB1", 1, 1, {"gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, NULL, NULL}}, {"PB2", 1, 2, {"gpio_in", "gpio_out", "pwm", NULL, NULL, NULL, NULL, NULL}}, {"PB3", 1, 3, {"gpio_in", "gpio_out", "ir0", NULL, "spdif", NULL, NULL, NULL}}, {"PB4", 1, 4, {"gpio_in", "gpio_out", "ir0", NULL, NULL, NULL, NULL, NULL}}, {"PB5", 1, 5, {"gpio_in", "gpio_out", "i2s0", "ac97", NULL, NULL, NULL, NULL}}, {"PB6", 1, 6, {"gpio_in", "gpio_out", "i2c0", "ac97", NULL, NULL, NULL, NULL}}, {"PB7", 1, 7, {"gpio_in", "gpio_out", "i2c0", "ac97", NULL, NULL, NULL, NULL}}, {"PB8", 1, 8, {"gpio_in", "gpio_out", "i2c0", "ac97", NULL, NULL, NULL, NULL}}, {"PB9", 1, 9, {"gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, NULL, NULL}}, {"PB10", 1, 10, {"gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, NULL, NULL}}, {"PB11", 1, 11, {"gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, NULL, NULL}}, {"PB12", 1, 12, {"gpio_in", "gpio_out", "i2c0", "ac97", "spdif", NULL, NULL, NULL}}, {"PB13", 1, 13, {"gpio_in", "gpio_out", "spi2", NULL, "spdif", NULL, NULL, NULL}}, {"PB14", 1, 14, {"gpio_in", "gpio_out", "spi2", "jtag", NULL, NULL, NULL, NULL}}, {"PB15", 1, 15, {"gpio_in", "gpio_out", "spi2", "jtag", NULL, NULL, NULL, NULL}}, {"PB16", 1, 16, {"gpio_in", "gpio_out", "spi2", "jtag", NULL, NULL, NULL, NULL}}, {"PB17", 1, 17, {"gpio_in", "gpio_out", "spi2", "jtag", NULL, NULL, NULL, NULL}}, {"PB18", 1, 18, {"gpio_in", "gpio_out", "i2c1", NULL, NULL, NULL, NULL, NULL}}, {"PB19", 1, 19, {"gpio_in", "gpio_out", "i2c1", NULL, NULL, NULL, NULL, NULL}}, {"PB20", 1, 20, {"gpio_in", "gpio_out", "i2c2", NULL, NULL, NULL, NULL, NULL}}, {"PB21", 1, 21, {"gpio_in", "gpio_out", "i2c2", NULL, NULL, NULL, NULL, NULL}}, {"PB22", 1, 22, {"gpio_in", "gpio_out", "uart0", "ir1", NULL, NULL, NULL, NULL}}, {"PB23", 1, 23, {"gpio_in", "gpio_out", "uart0", "ir1", NULL, NULL, NULL, NULL}}, {"PC0", 2, 0, {"gpio_in", "gpio_out", "nand0", "spi0", NULL, NULL, NULL, NULL}}, {"PC1", 2, 1, {"gpio_in", "gpio_out", "nand0", "spi0", NULL, NULL, NULL, NULL}}, {"PC2", 2, 2, {"gpio_in", "gpio_out", "nand0", "spi0", NULL, NULL, NULL, NULL}}, {"PC3", 2, 3, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC4", 2, 4, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC5", 2, 5, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC6", 2, 6, {"gpio_in", "gpio_out", "nand0", "mmc2", NULL, NULL, NULL, NULL}}, {"PC7", 2, 7, {"gpio_in", "gpio_out", "nand0", "mmc2", NULL, NULL, NULL, NULL}}, {"PC8", 2, 8, {"gpio_in", "gpio_out", "nand0", "mmc2", NULL, NULL, NULL, NULL}}, {"PC9", 2, 9, {"gpio_in", "gpio_out", "nand0", "mmc2", NULL, NULL, NULL, NULL}}, {"PC10", 2, 10, {"gpio_in", "gpio_out", "nand0", "mmc2", NULL, NULL, NULL, NULL}}, {"PC11", 2, 11, {"gpio_in", "gpio_out", "nand0", "mmc2", NULL, NULL, NULL, NULL}}, {"PC12", 2, 12, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC13", 2, 13, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC14", 2, 14, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC15", 2, 15, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC16", 2, 16, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC17", 2, 17, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC18", 2, 18, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, - {"PC19", 2, 19, {"gpio_in", "gpio_out", "nand0", "spi2", NULL, NULL, "eint12", NULL}, 6, 12}, - {"PC20", 2, 20, {"gpio_in", "gpio_out", "nand0", "spi2", NULL, NULL, "eint13", NULL}, 6, 13}, - {"PC21", 2, 21, {"gpio_in", "gpio_out", "nand0", "spi2", NULL, NULL, "eint14", NULL}, 6, 14}, - {"PC22", 2, 22, {"gpio_in", "gpio_out", "nand0", "spi2", NULL, NULL, "eint15", NULL}, 6, 15}, + {"PC19", 2, 19, {"gpio_in", "gpio_out", "nand0", "spi2", NULL, NULL, NULL, NULL}}, + {"PC20", 2, 20, {"gpio_in", "gpio_out", "nand0", "spi2", NULL, NULL, NULL, NULL}}, + {"PC21", 2, 21, {"gpio_in", "gpio_out", "nand0", "spi2", NULL, NULL, NULL, NULL}}, + {"PC22", 2, 22, {"gpio_in", "gpio_out", "nand0", "spi2", NULL, NULL, NULL, NULL}}, {"PC23", 2, 23, {"gpio_in", "gpio_out", NULL, "spi0", NULL, NULL, NULL, NULL}}, {"PC24", 2, 24, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PD0", 3, 0, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD1", 3, 1, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD2", 3, 2, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD3", 3, 3, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD4", 3, 4, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD5", 3, 5, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD6", 3, 6, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD7", 3, 7, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD8", 3, 8, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD9", 3, 9, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD10", 3, 10, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD11", 3, 11, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD12", 3, 12, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD13", 3, 13, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD14", 3, 14, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD15", 3, 15, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD16", 3, 16, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD17", 3, 17, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD18", 3, 18, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD19", 3, 19, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD20", 3, 20, {"gpio_in", "gpio_out", "lcd0", "csi1", NULL, NULL, NULL, NULL}}, {"PD21", 3, 21, {"gpio_in", "gpio_out", "lcd0", "sim", NULL, NULL, NULL, NULL}}, {"PD22", 3, 22, {"gpio_in", "gpio_out", "lcd0", "sim", NULL, NULL, NULL, NULL}}, {"PD23", 3, 23, {"gpio_in", "gpio_out", "lcd0", "sim", NULL, NULL, NULL, NULL}}, {"PD24", 3, 24, {"gpio_in", "gpio_out", "lcd0", "sim", NULL, NULL, NULL, NULL}}, {"PD25", 3, 25, {"gpio_in", "gpio_out", "lcd0", "sim", NULL, NULL, NULL, NULL}}, {"PD26", 3, 26, {"gpio_in", "gpio_out", "lcd0", "sim", NULL, NULL, NULL, NULL}}, {"PD27", 3, 27, {"gpio_in", "gpio_out", "lcd0", "sim", NULL, NULL, NULL, NULL}}, {"PE0", 4, 0, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE1", 4, 1, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE2", 4, 2, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE3", 4, 3, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE4", 4, 4, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE5", 4, 5, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE6", 4, 6, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE7", 4, 7, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE8", 4, 8, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE9", 4, 9, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE10", 4, 10, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PE11", 4, 11, {"gpio_in", "gpio_out", "ts0", "csi0", NULL, NULL, NULL, NULL}}, {"PF0", 5, 0, {"gpio_in", "gpio_out", "mmc0", NULL, "jtag", NULL, NULL, NULL}}, {"PF1", 5, 1, {"gpio_in", "gpio_out", "mmc0", NULL, "jtag", NULL, NULL, NULL}}, {"PF2", 5, 2, {"gpio_in", "gpio_out", "mmc0", NULL, "uart0", NULL, NULL, NULL}}, {"PF3", 5, 3, {"gpio_in", "gpio_out", "mmc0", NULL, "jtag", NULL, NULL, NULL}}, {"PF4", 5, 4, {"gpio_in", "gpio_out", "mmc0", NULL, "uart0", NULL, NULL, NULL}}, {"PF5", 5, 5, {"gpio_in", "gpio_out", "mmc0", NULL, "jtag", NULL, NULL, NULL}}, {"PG0", 6, 0, {"gpio_in", "gpio_out", "ts1", "csi1", "mmc1", NULL, NULL, NULL}}, {"PG1", 6, 1, {"gpio_in", "gpio_out", "ts1", "csi1", "mmc1", NULL, NULL, NULL}}, {"PG2", 6, 2, {"gpio_in", "gpio_out", "ts1", "csi1", "mmc1", NULL, NULL, NULL}}, {"PG3", 6, 3, {"gpio_in", "gpio_out", "ts1", "csi1", "mmc1", NULL, NULL, NULL}}, {"PG4", 6, 4, {"gpio_in", "gpio_out", "ts1", "csi1", "mmc1", "csi0", NULL, NULL}}, {"PG5", 6, 5, {"gpio_in", "gpio_out", "ts1", "csi1", "mmc1", "csi0", NULL, NULL}}, {"PG6", 6, 6, {"gpio_in", "gpio_out", "ts1", "csi1", "uart3", "csi0", NULL, NULL}}, {"PG7", 6, 7, {"gpio_in", "gpio_out", "ts1", "csi1", "uart3", "csi0", NULL, NULL}}, {"PG8", 6, 8, {"gpio_in", "gpio_out", "ts1", "csi1", "uart3", "csi0", NULL, NULL}}, {"PG9", 6, 9, {"gpio_in", "gpio_out", "ts1", "csi1", "uart3", "csi0", NULL, NULL}}, {"PG10", 6, 10, {"gpio_in", "gpio_out", "ts1", "csi1", "uart4", "csi0", NULL, NULL}}, {"PG11", 6, 11, {"gpio_in", "gpio_out", "ts1", "csi1", "uart4", "csi0", NULL, NULL}}, - {"PH0", 7, 0, {"gpio_in", "gpio_out", "lcd1", NULL, "uart3", NULL, "eint0", "csi1"}, 6, 0}, - {"PH1", 7, 1, {"gpio_in", "gpio_out", "lcd1", NULL, "uart3", NULL, "eint1", "csi1"}, 6, 1}, - {"PH2", 7, 2, {"gpio_in", "gpio_out", "lcd1", NULL, "uart3", NULL, "eint2", "csi1"}, 6, 2}, - {"PH3", 7, 3, {"gpio_in", "gpio_out", "lcd1", NULL, "uart3", NULL, "eint3", "csi1"}, 6, 3}, - {"PH4", 7, 4, {"gpio_in", "gpio_out", "lcd1", NULL, "uart4", NULL, "eint4", "csi1"}, 6, 4}, - {"PH5", 7, 5, {"gpio_in", "gpio_out", "lcd1", NULL, "uart4", NULL, "eint5", "csi1"}, 6, 5}, - {"PH6", 7, 6, {"gpio_in", "gpio_out", "lcd1", NULL, "uart5", "ms", "eint6", "csi1"}, 6, 6}, - {"PH7", 7, 7, {"gpio_in", "gpio_out", "lcd1", NULL, "uart5", "ms", "eint7", "csi1"}, 6, 7}, - {"PH8", 7, 8, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "ms", "eint8", "csi1"}, 6, 8}, - {"PH9", 7, 9, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "ms", "eint9", "csi1"}, 6, 9}, - {"PH10", 7, 10, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "ms", "eint10", "csi1"}, 6, 10}, - {"PH11", 7, 11, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "ms", "eint11", "csi1"}, 6, 11}, - {"PH12", 7, 12, {"gpio_in", "gpio_out", "lcd1", NULL, "ps2", NULL, "eint12", "csi1"}, 6, 12}, - {"PH13", 7, 13, {"gpio_in", "gpio_out", "lcd1", NULL, "ps2", "sim", "eint13", "csi1"}, 6, 13}, - {"PH14", 7, 14, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "sim", "eint14", "csi1"}, 6, 14}, - {"PH15", 7, 15, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "sim", "eint15", "csi1"}, 6, 15}, - {"PH16", 7, 16, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "sim", "eint16", "csi1"}, 6, 16}, - {"PH17", 7, 17, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "sim", "eint17", "csi1"}, 6, 17}, - {"PH18", 7, 18, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "sim", "eint18", "csi1"}, 6, 18}, - {"PH19", 7, 19, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "sim", "eint19", "csi1"}, 6, 19}, - {"PH20", 7, 20, {"gpio_in", "gpio_out", "lcd1", NULL, "can", NULL, "eint20", "csi1"}, 6, 20}, - {"PH21", 7, 21, {"gpio_in", "gpio_out", "lcd1", NULL, "can", NULL, "eint21", "csi1"}, 6, 21}, + {"PH0", 7, 0, {"gpio_in", "gpio_out", "lcd1", NULL, "uart3", NULL, "ph_eint0", "csi1"}, 6, 0, 0}, + {"PH1", 7, 1, {"gpio_in", "gpio_out", "lcd1", NULL, "uart3", NULL, "ph_eint1", "csi1"}, 6, 1, 0}, + {"PH2", 7, 2, {"gpio_in", "gpio_out", "lcd1", NULL, "uart3", NULL, "ph_eint2", "csi1"}, 6, 2, 0}, + {"PH3", 7, 3, {"gpio_in", "gpio_out", "lcd1", NULL, "uart3", NULL, "ph_eint3", "csi1"}, 6, 3, 0}, + {"PH4", 7, 4, {"gpio_in", "gpio_out", "lcd1", NULL, "uart4", NULL, "ph_eint4", "csi1"}, 6, 4, 0}, + {"PH5", 7, 5, {"gpio_in", "gpio_out", "lcd1", NULL, "uart4", NULL, "ph_eint5", "csi1"}, 6, 5, 0}, + {"PH6", 7, 6, {"gpio_in", "gpio_out", "lcd1", NULL, "uart5", "ms", "ph_eint6", "csi1"}, 6, 6, 0}, + {"PH7", 7, 7, {"gpio_in", "gpio_out", "lcd1", NULL, "uart5", "ms", "ph_eint7", "csi1"}, 6, 7, 0}, + {"PH8", 7, 8, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "ms", "ph_eint8", "csi1"}, 6, 8, 0}, + {"PH9", 7, 9, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "ms", "ph_eint9", "csi1"}, 6, 9, 0}, + {"PH10", 7, 10, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "ms", "ph_eint10", "csi1"}, 6, 10, 0}, + {"PH11", 7, 11, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "ms", "ph_eint11", "csi1"}, 6, 11, 0}, + {"PH12", 7, 12, {"gpio_in", "gpio_out", "lcd1", NULL, "ps2", NULL, "ph_eint12", "csi1"}, 6, 12, 0}, + {"PH13", 7, 13, {"gpio_in", "gpio_out", "lcd1", NULL, "ps2", "sim", "ph_eint13", "csi1"}, 6, 13, 0}, + {"PH14", 7, 14, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "sim", "ph_eint14", "csi1"}, 6, 14, 0}, + {"PH15", 7, 15, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "sim", "ph_eint15", "csi1"}, 6, 15, 0}, + {"PH16", 7, 16, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "sim", "ph_eint16", "csi1"}, 6, 16, 0}, + {"PH17", 7, 17, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "sim", "ph_eint17", "csi1"}, 6, 17, 0}, + {"PH18", 7, 18, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "sim", "ph_eint18", "csi1"}, 6, 18, 0}, + {"PH19", 7, 19, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "sim", "ph_eint19", "csi1"}, 6, 19, 0}, + {"PH20", 7, 20, {"gpio_in", "gpio_out", "lcd1", NULL, "can", NULL, "ph_eint20", "csi1"}, 6, 20, 0}, + {"PH21", 7, 21, {"gpio_in", "gpio_out", "lcd1", NULL, "can", NULL, "ph_eint21", "csi1"}, 6, 21, 0}, {"PH22", 7, 22, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "mmc1", NULL, "csi1"}}, {"PH23", 7, 23, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "mmc1", NULL, "csi1"}}, {"PH24", 7, 24, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "mmc1", NULL, "csi1"}}, {"PH25", 7, 25, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "mmc1", NULL, "csi1"}}, {"PH26", 7, 26, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "mmc1", NULL, "csi1"}}, {"PH27", 7, 27, {"gpio_in", "gpio_out", "lcd1", NULL, "keypad", "mmc1", NULL, "csi1"}}, {"PI0", 8, 0, {"gpio_in", "gpio_out", NULL, "i2c3", NULL, NULL, NULL, NULL}}, {"PI1", 8, 1, {"gpio_in", "gpio_out", NULL, "i2c3", NULL, NULL, NULL, NULL}}, {"PI2", 8, 2, {"gpio_in", "gpio_out", NULL, "i2c4", NULL, NULL, NULL, NULL}}, {"PI3", 8, 3, {"gpio_in", "gpio_out", "pwm", "i2c4", NULL, NULL, NULL, NULL}}, {"PI4", 8, 4, {"gpio_in", "gpio_out", "mmc3", NULL, NULL, NULL, NULL, NULL}}, {"PI5", 8, 5, {"gpio_in", "gpio_out", "mmc3", NULL, NULL, NULL, NULL, NULL}}, {"PI6", 8, 6, {"gpio_in", "gpio_out", "mmc3", NULL, NULL, NULL, NULL, NULL}}, {"PI7", 8, 7, {"gpio_in", "gpio_out", "mmc3", NULL, NULL, NULL, NULL, NULL}}, {"PI8", 8, 8, {"gpio_in", "gpio_out", "mmc3", NULL, NULL, NULL, NULL, NULL}}, {"PI9", 8, 9, {"gpio_in", "gpio_out", "mmc3", NULL, NULL, NULL, NULL, NULL}}, - {"PI10", 8, 10, {"gpio_in", "gpio_out", "spi0", "uart5", NULL, NULL, "eint", NULL}}, - {"PI11", 8, 11, {"gpio_in", "gpio_out", "spi0", "uart5", NULL, NULL, "eint", NULL}}, - {"PI12", 8, 12, {"gpio_in", "gpio_out", "spi0", "uart6", "clk_out_a", NULL, "eint", NULL}}, - {"PI13", 8, 13, {"gpio_in", "gpio_out", "spi0", "uart6", "clk_out_b", NULL, "eint", NULL}}, - {"PI14", 8, 14, {"gpio_in", "gpio_out", "spi0", "ps2", "timer4", NULL, "eint", NULL}}, - {"PI15", 8, 15, {"gpio_in", "gpio_out", "spi1", "ps2", "timer5", NULL, "eint", NULL}}, - {"PI16", 8, 16, {"gpio_in", "gpio_out", "spi1", "uart2", NULL, NULL, "eint", NULL}}, - {"PI17", 8, 17, {"gpio_in", "gpio_out", "spi1", "uart2", NULL, NULL, "eint", NULL}}, - {"PI18", 8, 18, {"gpio_in", "gpio_out", "spi1", "uart2", NULL, NULL, "eint", NULL}}, - {"PI19", 8, 19, {"gpio_in", "gpio_out", "spi1", "uart2", NULL, NULL, "eint", NULL}}, + {"PI10", 8, 10, {"gpio_in", "gpio_out", "spi0", "uart5", NULL, NULL, "pi_eint22", NULL}, 6, 22, 0}, + {"PI11", 8, 11, {"gpio_in", "gpio_out", "spi0", "uart5", NULL, NULL, "pi_eint23", NULL}, 6, 23, 0}, + {"PI12", 8, 12, {"gpio_in", "gpio_out", "spi0", "uart6", "clk_out_a", NULL, "pi_eint24", NULL}, 6, 24, 0}, + {"PI13", 8, 13, {"gpio_in", "gpio_out", "spi0", "uart6", "clk_out_b", NULL, "pi_eint25", NULL}, 6, 25, 0}, + {"PI14", 8, 14, {"gpio_in", "gpio_out", "spi0", "ps2", "timer4", NULL, "pi_eint26", NULL}, 6, 26, 0}, + {"PI15", 8, 15, {"gpio_in", "gpio_out", "spi1", "ps2", "timer5", NULL, "pi_eint27", NULL}, 6, 27, 0}, + {"PI16", 8, 16, {"gpio_in", "gpio_out", "spi1", "uart2", NULL, NULL, "pi_eint28", NULL}, 6, 28, 0}, + {"PI17", 8, 17, {"gpio_in", "gpio_out", "spi1", "uart2", NULL, NULL, "pi_eint29", NULL}, 6, 29, 0}, + {"PI18", 8, 18, {"gpio_in", "gpio_out", "spi1", "uart2", NULL, NULL, "pi_eint30", NULL}, 6, 30, 0}, + {"PI19", 8, 19, {"gpio_in", "gpio_out", "spi1", "uart2", NULL, NULL, "pi_eint31", NULL}, 6, 31, 0}, {"PI20", 8, 20, {"gpio_in", "gpio_out", "ps2", "uart7", "hdmi", NULL, NULL, NULL}}, {"PI21", 8, 21, {"gpio_in", "gpio_out", "ps2", "uart7", "hdmi", NULL, NULL, NULL}}, }; const struct allwinner_padconf a20_padconf = { .npins = sizeof(a20_pins) / sizeof(struct allwinner_pins), .pins = a20_pins, }; #endif /* SOC_ALLWINNER_A20 */ Index: projects/clang1000-import/sys/arm/allwinner/a31/a31_padconf.c =================================================================== --- projects/clang1000-import/sys/arm/allwinner/a31/a31_padconf.c (revision 356919) +++ projects/clang1000-import/sys/arm/allwinner/a31/a31_padconf.c (revision 356920) @@ -1,219 +1,219 @@ /*- * Copyright (c) 2016 Emmanuel Vadot * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #ifdef SOC_ALLWINNER_A31 const static struct allwinner_pins a31_pins[] = { - {"PA0", 0, 0, {"gpio_in", "gpio_out", "gmac", "lcd1", "uart1", NULL, "pa_eint0", NULL}, 6, 0}, - {"PA1", 0, 1, {"gpio_in", "gpio_out", "gmac", "lcd1", "uart1", NULL, "pa_eint1", NULL}, 6, 1}, - {"PA2", 0, 2, {"gpio_in", "gpio_out", "gmac", "lcd1", "uart1", NULL, "pa_eint2", NULL}, 6, 2}, - {"PA3", 0, 3, {"gpio_in", "gpio_out", "gmac", "lcd1", "uart1", NULL, "pa_eint3", NULL}, 6, 3}, - {"PA4", 0, 4, {"gpio_in", "gpio_out", "gmac", "lcd1", "uart1", NULL, "pa_eint4", NULL}, 6, 4}, - {"PA5", 0, 5, {"gpio_in", "gpio_out", "gmac", "lcd1", "uart1", NULL, "pa_eint5", NULL}, 6, 5}, - {"PA6", 0, 6, {"gpio_in", "gpio_out", "gmac", "lcd1", "uart1", NULL, "pa_eint6", NULL}, 6, 6}, - {"PA7", 0, 7, {"gpio_in", "gpio_out", "gmac", "lcd1", "uart1", NULL, "pa_eint7", NULL}, 6, 7}, - {"PA8", 0, 8, {"gpio_in", "gpio_out", "gmac", "lcd1", NULL, NULL, "pa_eint8", NULL}, 6, 8}, - {"PA9", 0, 9, {"gpio_in", "gpio_out", "gmac", "lcd1", NULL, "mmc2", "pa_eint9", NULL}, 6, 9}, - {"PA10", 0, 10, {"gpio_in", "gpio_out", "gmac", "lcd1", "mmc3", "mmc2", "pa_eint10", NULL}, 6, 10}, - {"PA11", 0, 11, {"gpio_in", "gpio_out", "gmac", "lcd1", "mmc3", "mmc2", "pa_eint11", NULL}, 6, 11}, - {"PA12", 0, 12, {"gpio_in", "gpio_out", "gmac", "lcd1", "mmc3", "mmc2", "pa_eint12", NULL}, 6, 12}, - {"PA13", 0, 13, {"gpio_in", "gpio_out", "gmac", "lcd1", "mmc3", "mmc2", "pa_eint13", NULL}, 6, 13}, - {"PA14", 0, 14, {"gpio_in", "gpio_out", "gmac", "lcd1", "mmc3", "mmc2", "pa_eint14", NULL}, 6, 14}, - {"PA15", 0, 15, {"gpio_in", "gpio_out", "gmac", "lcd1", "clk_out_a", NULL, "pa_eint15", NULL}, 6, 15}, - {"PA16", 0, 16, {"gpio_in", "gpio_out", "gmac", "lcd1", "dmic", NULL, "pa_eint16", NULL}, 6, 16}, - {"PA17", 0, 17, {"gpio_in", "gpio_out", "gmac", "lcd1", "dmic", NULL, "pa_eint17", NULL}, 6, 17}, - {"PA18", 0, 18, {"gpio_in", "gpio_out", "gmac", "lcd1", "clk_out_b", NULL, "pa_eint18", NULL}, 6, 18}, - {"PA19", 0, 19, {"gpio_in", "gpio_out", "gmac", "lcd1", "pwm3", NULL, "pa_eint19", NULL}, 6, 19}, - {"PA20", 0, 20, {"gpio_in", "gpio_out", "gmac", "lcd1", "pwm3", NULL, "pa_eint20", NULL}, 6, 20}, - {"PA21", 0, 21, {"gpio_in", "gpio_out", "gmac", "lcd1", "spi3", NULL, "pa_eint21", NULL}, 6, 21}, - {"PA22", 0, 22, {"gpio_in", "gpio_out", "gmac", "lcd1", "spi3", NULL, "pa_eint22", NULL}, 6, 22}, - {"PA23", 0, 23, {"gpio_in", "gpio_out", "gmac", "lcd1", "spi3", NULL, "pa_eint23", NULL}, 6, 23}, - {"PA24", 0, 24, {"gpio_in", "gpio_out", "gmac", "lcd1", "spi3", NULL, "pa_eint24", NULL}, 6, 24}, - {"PA25", 0, 25, {"gpio_in", "gpio_out", "gmac", "lcd1", "spi3", NULL, "pa_eint25", NULL}, 6, 25}, - {"PA26", 0, 26, {"gpio_in", "gpio_out", "gmac", "lcd1", "clk_out_c", NULL, "pa_eint26", NULL}, 6, 26}, - {"PA27", 0, 27, {"gpio_in", "gpio_out", "gmac", "lcd1", NULL, NULL, "pa_eint27", NULL}, 6, 27}, + {"PA0", 0, 0, {"gpio_in", "gpio_out", "gmac", "lcd1", "uart1", NULL, "pa_eint0", NULL}, 6, 0, 0}, + {"PA1", 0, 1, {"gpio_in", "gpio_out", "gmac", "lcd1", "uart1", NULL, "pa_eint1", NULL}, 6, 1, 0}, + {"PA2", 0, 2, {"gpio_in", "gpio_out", "gmac", "lcd1", "uart1", NULL, "pa_eint2", NULL}, 6, 2, 0}, + {"PA3", 0, 3, {"gpio_in", "gpio_out", "gmac", "lcd1", "uart1", NULL, "pa_eint3", NULL}, 6, 3, 0}, + {"PA4", 0, 4, {"gpio_in", "gpio_out", "gmac", "lcd1", "uart1", NULL, "pa_eint4", NULL}, 6, 4, 0}, + {"PA5", 0, 5, {"gpio_in", "gpio_out", "gmac", "lcd1", "uart1", NULL, "pa_eint5", NULL}, 6, 5, 0}, + {"PA6", 0, 6, {"gpio_in", "gpio_out", "gmac", "lcd1", "uart1", NULL, "pa_eint6", NULL}, 6, 6, 0}, + {"PA7", 0, 7, {"gpio_in", "gpio_out", "gmac", "lcd1", "uart1", NULL, "pa_eint7", NULL}, 6, 7, 0}, + {"PA8", 0, 8, {"gpio_in", "gpio_out", "gmac", "lcd1", NULL, NULL, "pa_eint8", NULL}, 6, 8, 0}, + {"PA9", 0, 9, {"gpio_in", "gpio_out", "gmac", "lcd1", NULL, "mmc2", "pa_eint9", NULL}, 6, 9, 0}, + {"PA10", 0, 10, {"gpio_in", "gpio_out", "gmac", "lcd1", "mmc3", "mmc2", "pa_eint10", NULL}, 6, 10, 0}, + {"PA11", 0, 11, {"gpio_in", "gpio_out", "gmac", "lcd1", "mmc3", "mmc2", "pa_eint11", NULL}, 6, 11, 0}, + {"PA12", 0, 12, {"gpio_in", "gpio_out", "gmac", "lcd1", "mmc3", "mmc2", "pa_eint12", NULL}, 6, 12, 0}, + {"PA13", 0, 13, {"gpio_in", "gpio_out", "gmac", "lcd1", "mmc3", "mmc2", "pa_eint13", NULL}, 6, 13, 0}, + {"PA14", 0, 14, {"gpio_in", "gpio_out", "gmac", "lcd1", "mmc3", "mmc2", "pa_eint14", NULL}, 6, 14, 0}, + {"PA15", 0, 15, {"gpio_in", "gpio_out", "gmac", "lcd1", "clk_out_a", NULL, "pa_eint15", NULL}, 6, 15, 0}, + {"PA16", 0, 16, {"gpio_in", "gpio_out", "gmac", "lcd1", "dmic", NULL, "pa_eint16", NULL}, 6, 16, 0}, + {"PA17", 0, 17, {"gpio_in", "gpio_out", "gmac", "lcd1", "dmic", NULL, "pa_eint17", NULL}, 6, 17, 0}, + {"PA18", 0, 18, {"gpio_in", "gpio_out", "gmac", "lcd1", "clk_out_b", NULL, "pa_eint18", NULL}, 6, 18, 0}, + {"PA19", 0, 19, {"gpio_in", "gpio_out", "gmac", "lcd1", "pwm3", NULL, "pa_eint19", NULL}, 6, 19, 0}, + {"PA20", 0, 20, {"gpio_in", "gpio_out", "gmac", "lcd1", "pwm3", NULL, "pa_eint20", NULL}, 6, 20, 0}, + {"PA21", 0, 21, {"gpio_in", "gpio_out", "gmac", "lcd1", "spi3", NULL, "pa_eint21", NULL}, 6, 21, 0}, + {"PA22", 0, 22, {"gpio_in", "gpio_out", "gmac", "lcd1", "spi3", NULL, "pa_eint22", NULL}, 6, 22, 0}, + {"PA23", 0, 23, {"gpio_in", "gpio_out", "gmac", "lcd1", "spi3", NULL, "pa_eint23", NULL}, 6, 23, 0}, + {"PA24", 0, 24, {"gpio_in", "gpio_out", "gmac", "lcd1", "spi3", NULL, "pa_eint24", NULL}, 6, 24, 0}, + {"PA25", 0, 25, {"gpio_in", "gpio_out", "gmac", "lcd1", "spi3", NULL, "pa_eint25", NULL}, 6, 25, 0}, + {"PA26", 0, 26, {"gpio_in", "gpio_out", "gmac", "lcd1", "clk_out_c", NULL, "pa_eint26", NULL}, 6, 26, 0}, + {"PA27", 0, 27, {"gpio_in", "gpio_out", "gmac", "lcd1", NULL, NULL, "pa_eint27", NULL}, 6, 27, 0}, - {"PB0", 1, 0, {"gpio_in", "gpio_out", "i2s0", "uart3", "csi", NULL, "pb_eint0", NULL}, 6, 0}, - {"PB1", 1, 1, {"gpio_in", "gpio_out", "i2s0", NULL, NULL, NULL, "pb_eint1", NULL}, 6, 1}, - {"PB2", 1, 2, {"gpio_in", "gpio_out", "i2s0", NULL, NULL, NULL, "pb_eint2", NULL}, 6, 2}, - {"PB3", 1, 3, {"gpio_in", "gpio_out", "i2s0", NULL, NULL, NULL, "pb_eint3", NULL}, 6, 3}, - {"PB4", 1, 4, {"gpio_in", "gpio_out", "i2s0", "uart3", NULL, NULL, "pb_eint4", NULL}, 6, 4}, - {"PB5", 1, 5, {"gpio_in", "gpio_out", "i2s0", "uart3", "i2c3", NULL, "pb_eint5", NULL}, 6, 5}, - {"PB6", 1, 6, {"gpio_in", "gpio_out", "i2s0", "uart3", "i2c3", NULL, "pb_eint6", NULL}, 6, 6}, - {"PB7", 1, 7, {"gpio_in", "gpio_out", "i2s0", NULL, NULL, NULL, "pb_eint7", NULL}, 6, 7}, + {"PB0", 1, 0, {"gpio_in", "gpio_out", "i2s0", "uart3", "csi", NULL, "pb_eint0", NULL}, 6, 0, 1}, + {"PB1", 1, 1, {"gpio_in", "gpio_out", "i2s0", NULL, NULL, NULL, "pb_eint1", NULL}, 6, 1, 1}, + {"PB2", 1, 2, {"gpio_in", "gpio_out", "i2s0", NULL, NULL, NULL, "pb_eint2", NULL}, 6, 2, 1}, + {"PB3", 1, 3, {"gpio_in", "gpio_out", "i2s0", NULL, NULL, NULL, "pb_eint3", NULL}, 6, 3, 1}, + {"PB4", 1, 4, {"gpio_in", "gpio_out", "i2s0", "uart3", NULL, NULL, "pb_eint4", NULL}, 6, 4, 1}, + {"PB5", 1, 5, {"gpio_in", "gpio_out", "i2s0", "uart3", "i2c3", NULL, "pb_eint5", NULL}, 6, 5, 1}, + {"PB6", 1, 6, {"gpio_in", "gpio_out", "i2s0", "uart3", "i2c3", NULL, "pb_eint6", NULL}, 6, 6, 1}, + {"PB7", 1, 7, {"gpio_in", "gpio_out", "i2s0", NULL, NULL, NULL, "pb_eint7", NULL}, 6, 7, 1}, {"PC0", 2, 0, {"gpio_in", "gpio_out", "nand0", "spi0", NULL, NULL, NULL, NULL}}, {"PC1", 2, 1, {"gpio_in", "gpio_out", "nand0", "spi0", NULL, NULL, NULL, NULL}}, {"PC2", 2, 2, {"gpio_in", "gpio_out", "nand0", "spi0", NULL, NULL, NULL, NULL}}, {"PC3", 2, 3, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC4", 2, 4, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC5", 2, 5, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC6", 2, 6, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC7", 2, 7, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC8", 2, 8, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC9", 2, 9, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC10", 2, 10, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC11", 2, 11, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC12", 2, 12, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC13", 2, 13, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC14", 2, 14, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC15", 2, 15, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC16", 2, 16, {"gpio_in", "gpio_out", "nand0", "nand1", NULL, NULL, NULL, NULL}}, {"PC17", 2, 17, {"gpio_in", "gpio_out", "nand0", "nand1", NULL, NULL, NULL, NULL}}, {"PC18", 2, 18, {"gpio_in", "gpio_out", "nand0", "nand1", NULL, NULL, NULL, NULL}}, {"PC19", 2, 19, {"gpio_in", "gpio_out", "nand0", "nand1", NULL, NULL, NULL, NULL}}, {"PC20", 2, 20, {"gpio_in", "gpio_out", "nand0", "nand1", NULL, NULL, NULL, NULL}}, {"PC21", 2, 21, {"gpio_in", "gpio_out", "nand0", "nand1", NULL, NULL, NULL, NULL}}, {"PC22", 2, 22, {"gpio_in", "gpio_out", "nand0", "nand1", NULL, NULL, NULL, NULL}}, {"PC23", 2, 23, {"gpio_in", "gpio_out", "nand0", "nand1", NULL, NULL, NULL, NULL}}, {"PC24", 2, 24, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC25", 2, 25, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC26", 2, 26, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC27", 2, 27, {"gpio_in", "gpio_out", NULL, "spi0",NULL, NULL, NULL, NULL}}, {"PD0", 3, 0, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD1", 3, 1, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD2", 3, 2, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD3", 3, 3, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD4", 3, 4, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD5", 3, 5, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD6", 3, 6, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD7", 3, 7, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD8", 3, 8, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD9", 3, 9, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD10", 3, 10, {"gpio_in", "gpio_out", "lcd0", "lvds1", NULL, NULL, NULL, NULL}}, {"PD11", 3, 11, {"gpio_in", "gpio_out", "lcd0", "lvds1", NULL, NULL, NULL, NULL}}, {"PD12", 3, 12, {"gpio_in", "gpio_out", "lcd0", "lvds1", NULL, NULL, NULL, NULL}}, {"PD13", 3, 13, {"gpio_in", "gpio_out", "lcd0", "lvds1", NULL, NULL, NULL, NULL}}, {"PD14", 3, 14, {"gpio_in", "gpio_out", "lcd0", "lvds1", NULL, NULL, NULL, NULL}}, {"PD15", 3, 15, {"gpio_in", "gpio_out", "lcd0", "lvds1", NULL, NULL, NULL, NULL}}, {"PD16", 3, 16, {"gpio_in", "gpio_out", "lcd0", "lvds1", NULL, NULL, NULL, NULL}}, {"PD17", 3, 17, {"gpio_in", "gpio_out", "lcd0", "lvds1", NULL, NULL, NULL, NULL}}, {"PD18", 3, 18, {"gpio_in", "gpio_out", "lcd0", "lvds1", NULL, NULL, NULL, NULL}}, {"PD19", 3, 19, {"gpio_in", "gpio_out", "lcd0", "lvds1", NULL, NULL, NULL, NULL}}, {"PD20", 3, 20, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD21", 3, 21, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD22", 3, 22, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD23", 3, 23, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD24", 3, 24, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD25", 3, 25, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD26", 3, 26, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD27", 3, 27, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, - {"PE0", 4, 0, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint0", NULL}, 6, 0}, - {"PE1", 4, 1, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint1", NULL}, 6, 1}, - {"PE2", 4, 2, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint2", NULL}, 6, 2}, - {"PE3", 4, 3, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint3", NULL}, 6, 3}, - {"PE4", 4, 4, {"gpio_in", "gpio_out", "csi", "uart5", NULL, NULL, "pe_eint4", NULL}, 6, 4}, - {"PE5", 4, 5, {"gpio_in", "gpio_out", "csi", "uart5", NULL, NULL, "pe_eint5", NULL}, 6, 5}, - {"PE6", 4, 6, {"gpio_in", "gpio_out", "csi", "uart5", NULL, NULL, "pe_eint6", NULL}, 6, 6}, - {"PE7", 4, 7, {"gpio_in", "gpio_out", "csi", "uart5", NULL, NULL, "pe_eint7", NULL}, 6, 7}, - {"PE8", 4, 8, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint8", NULL}, 6, 8}, - {"PE9", 4, 9, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint9", NULL}, 6, 9}, - {"PE10", 4, 10, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint10", NULL}, 6, 10}, - {"PE11", 4, 11, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint11", NULL}, 6, 11}, - {"PE12", 4, 12, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint12", NULL}, 6, 12}, - {"PE13", 4, 13, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint13", NULL}, 6, 13}, - {"PE14", 4, 14, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint14", NULL}, 6, 14}, - {"PE15", 4, 15, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint15", NULL}, 6, 15}, - {"PE16", 4, 16, {"gpio_in", "gpio_out", "csi", NULL, NULL, NULL, "pe_eint16", NULL}, 6, 16}, + {"PE0", 4, 0, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint0", NULL}, 6, 0, 4}, + {"PE1", 4, 1, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint1", NULL}, 6, 1, 4}, + {"PE2", 4, 2, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint2", NULL}, 6, 2, 4}, + {"PE3", 4, 3, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint3", NULL}, 6, 3, 4}, + {"PE4", 4, 4, {"gpio_in", "gpio_out", "csi", "uart5", NULL, NULL, "pe_eint4", NULL}, 6, 4, 4}, + {"PE5", 4, 5, {"gpio_in", "gpio_out", "csi", "uart5", NULL, NULL, "pe_eint5", NULL}, 6, 5, 4}, + {"PE6", 4, 6, {"gpio_in", "gpio_out", "csi", "uart5", NULL, NULL, "pe_eint6", NULL}, 6, 6, 4}, + {"PE7", 4, 7, {"gpio_in", "gpio_out", "csi", "uart5", NULL, NULL, "pe_eint7", NULL}, 6, 7, 4}, + {"PE8", 4, 8, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint8", NULL}, 6, 8, 4}, + {"PE9", 4, 9, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint9", NULL}, 6, 9, 4}, + {"PE10", 4, 10, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint10", NULL}, 6, 10, 4}, + {"PE11", 4, 11, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint11", NULL}, 6, 11, 4}, + {"PE12", 4, 12, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint12", NULL}, 6, 12, 4}, + {"PE13", 4, 13, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint13", NULL}, 6, 13, 4}, + {"PE14", 4, 14, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint14", NULL}, 6, 14, 4}, + {"PE15", 4, 15, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint15", NULL}, 6, 15, 4}, + {"PE16", 4, 16, {"gpio_in", "gpio_out", "csi", NULL, NULL, NULL, "pe_eint16", NULL}, 6, 16, 4}, {"PF0", 5, 0, {"gpio_in", "gpio_out", "mmc0", NULL, "jtag", NULL, NULL, NULL}}, {"PF1", 5, 1, {"gpio_in", "gpio_out", "mmc0", NULL, "jtag", NULL, NULL, NULL}}, {"PF2", 5, 2, {"gpio_in", "gpio_out", "mmc0", NULL, "uart0", NULL, NULL, NULL}}, {"PF3", 5, 3, {"gpio_in", "gpio_out", "mmc0", NULL, "jtag", NULL, NULL, NULL}}, {"PF4", 5, 4, {"gpio_in", "gpio_out", "mmc0", NULL, "uart0", NULL, NULL, NULL}}, {"PF5", 5, 5, {"gpio_in", "gpio_out", "mmc0", NULL, "jtag", NULL, NULL, NULL}}, - {"PG0", 6, 0, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint0", NULL}, 6, 0}, - {"PG1", 6, 1, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint1", NULL}, 6, 1}, - {"PG2", 6, 2, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint2", NULL}, 6, 2}, - {"PG3", 6, 3, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint3", NULL}, 6, 3}, - {"PG4", 6, 4, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint4", NULL}, 6, 4}, - {"PG5", 6, 5, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint5", NULL}, 6, 5}, - {"PG6", 6, 6, {"gpio_in", "gpio_out", "uart2", NULL, NULL, NULL, "pg_eint6", NULL}, 6, 6}, - {"PG7", 6, 7, {"gpio_in", "gpio_out", "uart2", NULL, NULL, NULL, "pg_eint7", NULL}, 6, 7}, - {"PG8", 6, 8, {"gpio_in", "gpio_out", "uart2", NULL, NULL, NULL, "pg_eint8", NULL}, 6, 8}, - {"PG9", 6, 9, {"gpio_in", "gpio_out", "uart2", NULL, NULL, NULL, "pg_eint9", NULL}, 6, 9}, - {"PG10", 6, 10, {"gpio_in", "gpio_out", "i2c3", "usb", NULL, NULL, "pg_eint10", NULL}, 6, 10}, - {"PG11", 6, 11, {"gpio_in", "gpio_out", "i2c3", "usb", NULL, NULL, "pg_eint11", NULL}, 6, 11}, - {"PG12", 6, 12, {"gpio_in", "gpio_out", "spi1", "i2s1", NULL, NULL, "pg_eint12", NULL}, 6, 12}, - {"PG13", 6, 13, {"gpio_in", "gpio_out", "spi1", "i2s1", NULL, NULL, "pg_eint13", NULL}, 6, 13}, - {"PG14", 6, 14, {"gpio_in", "gpio_out", "spi1", "i2s1", NULL, NULL, "pg_eint14", NULL}, 6, 14}, - {"PG15", 6, 15, {"gpio_in", "gpio_out", "spi1", "i2s1", NULL, NULL, "pg_eint15", NULL}, 6, 15}, - {"PG16", 6, 16, {"gpio_in", "gpio_out", "spi1", "i2s1", NULL, NULL, "pg_eint16", NULL}, 6, 16}, - {"PG17", 6, 17, {"gpio_in", "gpio_out", "uart4", NULL, NULL, NULL, "pg_eint17", NULL}, 6, 17}, - {"PG18", 6, 18, {"gpio_in", "gpio_out", "uart4", NULL, NULL, NULL, "pg_eint18", NULL}, 6, 18}, + {"PG0", 6, 0, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint0", NULL}, 6, 0, 6}, + {"PG1", 6, 1, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint1", NULL}, 6, 1, 6}, + {"PG2", 6, 2, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint2", NULL}, 6, 2, 6}, + {"PG3", 6, 3, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint3", NULL}, 6, 3, 6}, + {"PG4", 6, 4, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint4", NULL}, 6, 4, 6}, + {"PG5", 6, 5, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint5", NULL}, 6, 5, 6}, + {"PG6", 6, 6, {"gpio_in", "gpio_out", "uart2", NULL, NULL, NULL, "pg_eint6", NULL}, 6, 6, 6}, + {"PG7", 6, 7, {"gpio_in", "gpio_out", "uart2", NULL, NULL, NULL, "pg_eint7", NULL}, 6, 7, 6}, + {"PG8", 6, 8, {"gpio_in", "gpio_out", "uart2", NULL, NULL, NULL, "pg_eint8", NULL}, 6, 8, 6}, + {"PG9", 6, 9, {"gpio_in", "gpio_out", "uart2", NULL, NULL, NULL, "pg_eint9", NULL}, 6, 9, 6}, + {"PG10", 6, 10, {"gpio_in", "gpio_out", "i2c3", "usb", NULL, NULL, "pg_eint10", NULL}, 6, 10, 6}, + {"PG11", 6, 11, {"gpio_in", "gpio_out", "i2c3", "usb", NULL, NULL, "pg_eint11", NULL}, 6, 11, 6}, + {"PG12", 6, 12, {"gpio_in", "gpio_out", "spi1", "i2s1", NULL, NULL, "pg_eint12", NULL}, 6, 12, 6}, + {"PG13", 6, 13, {"gpio_in", "gpio_out", "spi1", "i2s1", NULL, NULL, "pg_eint13", NULL}, 6, 13, 6}, + {"PG14", 6, 14, {"gpio_in", "gpio_out", "spi1", "i2s1", NULL, NULL, "pg_eint14", NULL}, 6, 14, 6}, + {"PG15", 6, 15, {"gpio_in", "gpio_out", "spi1", "i2s1", NULL, NULL, "pg_eint15", NULL}, 6, 15, 6}, + {"PG16", 6, 16, {"gpio_in", "gpio_out", "spi1", "i2s1", NULL, NULL, "pg_eint16", NULL}, 6, 16, 6}, + {"PG17", 6, 17, {"gpio_in", "gpio_out", "uart4", NULL, NULL, NULL, "pg_eint17", NULL}, 6, 17, 6}, + {"PG18", 6, 18, {"gpio_in", "gpio_out", "uart4", NULL, NULL, NULL, "pg_eint18", NULL}, 6, 18, 6}, {"PH0", 7, 0, {"gpio_in", "gpio_out", "nand1", NULL, NULL, NULL, NULL, NULL}}, {"PH1", 7, 1, {"gpio_in", "gpio_out", "nand1", NULL, NULL, NULL, NULL, NULL}}, {"PH2", 7, 2, {"gpio_in", "gpio_out", "nand1", NULL, NULL, NULL, NULL, NULL}}, {"PH3", 7, 3, {"gpio_in", "gpio_out", "nand1", NULL, NULL, NULL, NULL, NULL}}, {"PH4", 7, 4, {"gpio_in", "gpio_out", "nand1", NULL, NULL, NULL, NULL, NULL}}, {"PH5", 7, 5, {"gpio_in", "gpio_out", "nand1", NULL, NULL, NULL, NULL, NULL}}, {"PH6", 7, 6, {"gpio_in", "gpio_out", "nand1", NULL, NULL, NULL, NULL, NULL}}, {"PH7", 7, 7, {"gpio_in", "gpio_out", "nand1", NULL, NULL, NULL, NULL, NULL}}, {"PH8", 7, 8, {"gpio_in", "gpio_out", "nand1", NULL, NULL, NULL, NULL, NULL}}, {"PH9", 7, 9, {"gpio_in", "gpio_out", "spi2", "jtag", "pwm1", NULL, NULL, NULL}}, {"PH10", 7, 10, {"gpio_in", "gpio_out", "spi2", "jtag", "pwm1", NULL, NULL, NULL}}, {"PH11", 7, 11, {"gpio_in", "gpio_out", "spi2", "jtag", "pwm2", NULL, NULL, NULL}}, {"PH12", 7, 12, {"gpio_in", "gpio_out", "spi2", "jtag", "pwm2", NULL, NULL, NULL}}, {"PH13", 7, 13, {"gpio_in", "gpio_out", "pwm0", NULL, NULL, NULL, NULL, NULL}}, {"PH14", 7, 14, {"gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, NULL, NULL}}, {"PH15", 7, 15, {"gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, NULL, NULL}}, {"PH16", 7, 16, {"gpio_in", "gpio_out", "i2c1", NULL, NULL, NULL, NULL, NULL}}, {"PH17", 7, 17, {"gpio_in", "gpio_out", "i2c1", NULL, NULL, NULL, NULL, NULL}}, {"PH18", 7, 18, {"gpio_in", "gpio_out", "i2c2", NULL, NULL, NULL, NULL, NULL}}, {"PH19", 7, 19, {"gpio_in", "gpio_out", "i2c2", NULL, NULL, NULL, NULL, NULL}}, {"PH20", 7, 20, {"gpio_in", "gpio_out", "uart0", NULL, NULL, NULL, NULL, NULL}}, {"PH21", 7, 21, {"gpio_in", "gpio_out", "uart0", NULL, NULL, NULL, NULL, NULL}}, {"PH22", 7, 22, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}}, {"PH23", 7, 23, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}}, {"PH24", 7, 24, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}}, {"PH25", 7, 25, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}}, {"PH26", 7, 26, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}}, {"PH27", 7, 27, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}}, {"PH28", 7, 28, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}}, {"PH29", 7, 29, {"gpio_in", "gpio_out", "nand1", NULL, NULL, NULL, NULL, NULL}}, {"PH30", 7, 30, {"gpio_in", "gpio_out", "nand1", NULL, NULL, NULL, NULL, NULL}}, }; const struct allwinner_padconf a31_padconf = { .npins = nitems(a31_pins), .pins = a31_pins, }; #endif /* SOC_ALLWINNER_A31 */ Index: projects/clang1000-import/sys/arm/allwinner/a31/a31s_padconf.c =================================================================== --- projects/clang1000-import/sys/arm/allwinner/a31/a31s_padconf.c (revision 356919) +++ projects/clang1000-import/sys/arm/allwinner/a31/a31s_padconf.c (revision 356920) @@ -1,199 +1,199 @@ /*- * Copyright (c) 2016 Emmanuel Vadot * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #ifdef SOC_ALLWINNER_A31S const static struct allwinner_pins a31s_pins[] = { - {"PA0", 0, 0, {"gpio_in", "gpio_out", "gmac", NULL, "uart1", NULL, "pa_eint0", NULL}, 6, 0}, - {"PA1", 0, 1, {"gpio_in", "gpio_out", "gmac", NULL, "uart1", NULL, "pa_eint1", NULL}, 6, 1}, - {"PA2", 0, 2, {"gpio_in", "gpio_out", "gmac", NULL, "uart1", NULL, "pa_eint2", NULL}, 6, 2}, - {"PA3", 0, 3, {"gpio_in", "gpio_out", "gmac", NULL, "uart1", NULL, "pa_eint3", NULL}, 6, 3}, - {"PA4", 0, 4, {"gpio_in", "gpio_out", "gmac", NULL, "uart1", NULL, "pa_eint4", NULL}, 6, 4}, - {"PA5", 0, 5, {"gpio_in", "gpio_out", "gmac", NULL, "uart1", NULL, "pa_eint5", NULL}, 6, 5}, - {"PA6", 0, 6, {"gpio_in", "gpio_out", "gmac", NULL, "uart1", NULL, "pa_eint6", NULL}, 6, 6}, - {"PA7", 0, 7, {"gpio_in", "gpio_out", "gmac", NULL, "uart1", NULL, "pa_eint7", NULL}, 6, 7}, - {"PA8", 0, 8, {"gpio_in", "gpio_out", "gmac", NULL, NULL, NULL, "pa_eint8", NULL}, 6, 8}, - {"PA9", 0, 9, {"gpio_in", "gpio_out", "gmac", NULL, NULL, NULL, "pa_eint9", NULL}, 6, 9}, - {"PA10", 0, 10, {"gpio_in", "gpio_out", "gmac", NULL, "mmc3", "mmc2", "pa_eint10", NULL}, 6, 10}, - {"PA11", 0, 11, {"gpio_in", "gpio_out", "gmac", NULL, "mmc3", "mmc2", "pa_eint11", NULL}, 6, 11}, - {"PA12", 0, 12, {"gpio_in", "gpio_out", "gmac", NULL, "mmc3", "mmc2", "pa_eint12", NULL}, 6, 12}, - {"PA13", 0, 13, {"gpio_in", "gpio_out", "gmac", NULL, "mmc3", "mmc2", "pa_eint13", NULL}, 6, 13}, - {"PA14", 0, 14, {"gpio_in", "gpio_out", "gmac", NULL, "mmc3", "mmc2", "pa_eint14", NULL}, 6, 14}, - {"PA15", 0, 15, {"gpio_in", "gpio_out", "gmac", NULL, "dmic", NULL, "pa_eint15", NULL}, 6, 15}, - {"PA16", 0, 16, {"gpio_in", "gpio_out", "gmac", NULL, "dmic", NULL, "pa_eint16", NULL}, 6, 16}, - {"PA17", 0, 17, {"gpio_in", "gpio_out", "gmac", NULL, "clk_out_b", NULL, "pa_eint17", NULL}, 6, 17}, - {"PA18", 0, 18, {"gpio_in", "gpio_out", "gmac", NULL, "pwm3", NULL, "pa_eint18", NULL}, 6, 18}, - {"PA19", 0, 19, {"gpio_in", "gpio_out", "gmac", NULL, "pwm3", NULL, "pa_eint19", NULL}, 6, 19}, - {"PA20", 0, 20, {"gpio_in", "gpio_out", "gmac", NULL, "spi3", NULL, "pa_eint20", NULL}, 6, 20}, - {"PA21", 0, 21, {"gpio_in", "gpio_out", "gmac", NULL, "spi3", NULL, "pa_eint21", NULL}, 6, 21}, - {"PA22", 0, 22, {"gpio_in", "gpio_out", "gmac", NULL, "spi3", NULL, "pa_eint22", NULL}, 6, 22}, - {"PA23", 0, 23, {"gpio_in", "gpio_out", "gmac", NULL, "spi3", NULL, "pa_eint23", NULL}, 6, 23}, - {"PA24", 0, 24, {"gpio_in", "gpio_out", "gmac", NULL, "spi3", NULL, "pa_eint24", NULL}, 6, 24}, - {"PA25", 0, 25, {"gpio_in", "gpio_out", "gmac", NULL, "spi3", NULL, "pa_eint25", NULL}, 6, 25}, - {"PA26", 0, 26, {"gpio_in", "gpio_out", "gmac", NULL, "clk_out_c", NULL, "pa_eint26", NULL}, 6, 26}, - {"PA27", 0, 27, {"gpio_in", "gpio_out", "gmac", NULL, NULL, NULL, "pa_eint27", NULL}, 6, 27}, + {"PA0", 0, 0, {"gpio_in", "gpio_out", "gmac", NULL, "uart1", NULL, "pa_eint0", NULL}, 6, 0, 0}, + {"PA1", 0, 1, {"gpio_in", "gpio_out", "gmac", NULL, "uart1", NULL, "pa_eint1", NULL}, 6, 1, 0}, + {"PA2", 0, 2, {"gpio_in", "gpio_out", "gmac", NULL, "uart1", NULL, "pa_eint2", NULL}, 6, 2, 0}, + {"PA3", 0, 3, {"gpio_in", "gpio_out", "gmac", NULL, "uart1", NULL, "pa_eint3", NULL}, 6, 3, 0}, + {"PA4", 0, 4, {"gpio_in", "gpio_out", "gmac", NULL, "uart1", NULL, "pa_eint4", NULL}, 6, 4, 0}, + {"PA5", 0, 5, {"gpio_in", "gpio_out", "gmac", NULL, "uart1", NULL, "pa_eint5", NULL}, 6, 5, 0}, + {"PA6", 0, 6, {"gpio_in", "gpio_out", "gmac", NULL, "uart1", NULL, "pa_eint6", NULL}, 6, 6, 0}, + {"PA7", 0, 7, {"gpio_in", "gpio_out", "gmac", NULL, "uart1", NULL, "pa_eint7", NULL}, 6, 7, 0}, + {"PA8", 0, 8, {"gpio_in", "gpio_out", "gmac", NULL, NULL, NULL, "pa_eint8", NULL}, 6, 8, 0}, + {"PA9", 0, 9, {"gpio_in", "gpio_out", "gmac", NULL, NULL, NULL, "pa_eint9", NULL}, 6, 9, 0}, + {"PA10", 0, 10, {"gpio_in", "gpio_out", "gmac", NULL, "mmc3", "mmc2", "pa_eint10", NULL}, 6, 10, 0}, + {"PA11", 0, 11, {"gpio_in", "gpio_out", "gmac", NULL, "mmc3", "mmc2", "pa_eint11", NULL}, 6, 11, 0}, + {"PA12", 0, 12, {"gpio_in", "gpio_out", "gmac", NULL, "mmc3", "mmc2", "pa_eint12", NULL}, 6, 12, 0}, + {"PA13", 0, 13, {"gpio_in", "gpio_out", "gmac", NULL, "mmc3", "mmc2", "pa_eint13", NULL}, 6, 13, 0}, + {"PA14", 0, 14, {"gpio_in", "gpio_out", "gmac", NULL, "mmc3", "mmc2", "pa_eint14", NULL}, 6, 14, 0}, + {"PA15", 0, 15, {"gpio_in", "gpio_out", "gmac", NULL, "dmic", NULL, "pa_eint15", NULL}, 6, 15, 0}, + {"PA16", 0, 16, {"gpio_in", "gpio_out", "gmac", NULL, "dmic", NULL, "pa_eint16", NULL}, 6, 16, 0}, + {"PA17", 0, 17, {"gpio_in", "gpio_out", "gmac", NULL, "clk_out_b", NULL, "pa_eint17", NULL}, 6, 17, 0}, + {"PA18", 0, 18, {"gpio_in", "gpio_out", "gmac", NULL, "pwm3", NULL, "pa_eint18", NULL}, 6, 18, 0}, + {"PA19", 0, 19, {"gpio_in", "gpio_out", "gmac", NULL, "pwm3", NULL, "pa_eint19", NULL}, 6, 19, 0}, + {"PA20", 0, 20, {"gpio_in", "gpio_out", "gmac", NULL, "spi3", NULL, "pa_eint20", NULL}, 6, 20, 0}, + {"PA21", 0, 21, {"gpio_in", "gpio_out", "gmac", NULL, "spi3", NULL, "pa_eint21", NULL}, 6, 21, 0}, + {"PA22", 0, 22, {"gpio_in", "gpio_out", "gmac", NULL, "spi3", NULL, "pa_eint22", NULL}, 6, 22, 0}, + {"PA23", 0, 23, {"gpio_in", "gpio_out", "gmac", NULL, "spi3", NULL, "pa_eint23", NULL}, 6, 23, 0}, + {"PA24", 0, 24, {"gpio_in", "gpio_out", "gmac", NULL, "spi3", NULL, "pa_eint24", NULL}, 6, 24, 0}, + {"PA25", 0, 25, {"gpio_in", "gpio_out", "gmac", NULL, "spi3", NULL, "pa_eint25", NULL}, 6, 25, 0}, + {"PA26", 0, 26, {"gpio_in", "gpio_out", "gmac", NULL, "clk_out_c", NULL, "pa_eint26", NULL}, 6, 26, 0}, + {"PA27", 0, 27, {"gpio_in", "gpio_out", "gmac", NULL, NULL, NULL, "pa_eint27", NULL}, 6, 27, 0}, - {"PB0", 1, 0, {"gpio_in", "gpio_out", "i2s0", "uart3", NULL , NULL, "pb_eint0", NULL}, 6, 0}, - {"PB1", 1, 1, {"gpio_in", "gpio_out", "i2s0", NULL, NULL, NULL, "pb_eint1", NULL}, 6, 1}, - {"PB2", 1, 2, {"gpio_in", "gpio_out", "i2s0", NULL, NULL, NULL, "pb_eint2", NULL}, 6, 2}, - {"PB3", 1, 3, {"gpio_in", "gpio_out", "i2s0", NULL, NULL, NULL, "pb_eint3", NULL}, 6, 3}, - {"PB4", 1, 4, {"gpio_in", "gpio_out", "i2s0", "uart3", NULL, NULL, "pb_eint4", NULL}, 6, 4}, - {"PB5", 1, 5, {"gpio_in", "gpio_out", "i2s0", "uart3", "i2c3", NULL, "pb_eint5", NULL}, 6, 5}, - {"PB6", 1, 6, {"gpio_in", "gpio_out", "i2s0", "uart3", "i2c3", NULL, "pb_eint6", NULL}, 6, 6}, - {"PB7", 1, 7, {"gpio_in", "gpio_out", "i2s0", NULL, NULL, NULL, "pb_eint7", NULL}, 6, 7}, + {"PB0", 1, 0, {"gpio_in", "gpio_out", "i2s0", "uart3", NULL , NULL, "pb_eint0", NULL}, 6, 0, 1}, + {"PB1", 1, 1, {"gpio_in", "gpio_out", "i2s0", NULL, NULL, NULL, "pb_eint1", NULL}, 6, 1, 1}, + {"PB2", 1, 2, {"gpio_in", "gpio_out", "i2s0", NULL, NULL, NULL, "pb_eint2", NULL}, 6, 2, 1}, + {"PB3", 1, 3, {"gpio_in", "gpio_out", "i2s0", NULL, NULL, NULL, "pb_eint3", NULL}, 6, 3, 1}, + {"PB4", 1, 4, {"gpio_in", "gpio_out", "i2s0", "uart3", NULL, NULL, "pb_eint4", NULL}, 6, 4, 1}, + {"PB5", 1, 5, {"gpio_in", "gpio_out", "i2s0", "uart3", "i2c3", NULL, "pb_eint5", NULL}, 6, 5, 1}, + {"PB6", 1, 6, {"gpio_in", "gpio_out", "i2s0", "uart3", "i2c3", NULL, "pb_eint6", NULL}, 6, 6, 1}, + {"PB7", 1, 7, {"gpio_in", "gpio_out", "i2s0", NULL, NULL, NULL, "pb_eint7", NULL}, 6, 7, 1}, {"PC0", 2, 0, {"gpio_in", "gpio_out", "nand0", "spi0", NULL, NULL, NULL, NULL}}, {"PC1", 2, 1, {"gpio_in", "gpio_out", "nand0", "spi0", NULL, NULL, NULL, NULL}}, {"PC2", 2, 2, {"gpio_in", "gpio_out", "nand0", "spi0", NULL, NULL, NULL, NULL}}, {"PC3", 2, 3, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC4", 2, 4, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC5", 2, 5, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC6", 2, 6, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC7", 2, 7, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC8", 2, 8, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC9", 2, 9, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC10", 2, 10, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC11", 2, 11, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC12", 2, 12, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC13", 2, 13, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC14", 2, 14, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC15", 2, 15, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC24", 2, 24, {"gpio_in", "gpio_out", "nand0", "mmc2", "mmc3", NULL, NULL, NULL}}, {"PC25", 2, 25, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC26", 2, 26, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC27", 2, 27, {"gpio_in", "gpio_out", NULL, "spi0",NULL, NULL, NULL, NULL}}, {"PD0", 3, 0, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD1", 3, 1, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD2", 3, 2, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD3", 3, 3, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD4", 3, 4, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD5", 3, 5, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD6", 3, 6, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD7", 3, 7, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD8", 3, 8, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD9", 3, 9, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD10", 3, 10, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD11", 3, 11, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD12", 3, 12, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD13", 3, 13, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD14", 3, 14, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD15", 3, 15, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD16", 3, 16, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD17", 3, 17, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD18", 3, 18, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD19", 3, 19, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD20", 3, 20, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD21", 3, 21, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD22", 3, 22, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD23", 3, 23, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD24", 3, 24, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD25", 3, 25, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD26", 3, 26, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD27", 3, 27, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, - {"PE0", 4, 0, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint0", NULL}, 6, 0}, - {"PE1", 4, 1, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint1", NULL}, 6, 1}, - {"PE2", 4, 2, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint2", NULL}, 6, 2}, - {"PE3", 4, 3, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint3", NULL}, 6, 3}, - {"PE4", 4, 4, {"gpio_in", "gpio_out", "csi", "uart5", NULL, NULL, "pe_eint4", NULL}, 6, 4}, - {"PE5", 4, 5, {"gpio_in", "gpio_out", "csi", "uart5", NULL, NULL, "pe_eint5", NULL}, 6, 5}, - {"PE6", 4, 6, {"gpio_in", "gpio_out", "csi", "uart5", NULL, NULL, "pe_eint6", NULL}, 6, 6}, - {"PE7", 4, 7, {"gpio_in", "gpio_out", "csi", "uart5", NULL, NULL, "pe_eint7", NULL}, 6, 7}, - {"PE8", 4, 8, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint8", NULL}, 6, 8}, - {"PE9", 4, 9, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint9", NULL}, 6, 9}, - {"PE10", 4, 10, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint10", NULL}, 6, 10}, - {"PE11", 4, 11, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint11", NULL}, 6, 11}, - {"PE12", 4, 12, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint12", NULL}, 6, 12}, - {"PE13", 4, 13, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint13", NULL}, 6, 13}, - {"PE14", 4, 14, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint14", NULL}, 6, 14}, - {"PE15", 4, 15, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint15", NULL}, 6, 15}, + {"PE0", 4, 0, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint0", NULL}, 6, 0, 4}, + {"PE1", 4, 1, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint1", NULL}, 6, 1, 4}, + {"PE2", 4, 2, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint2", NULL}, 6, 2, 4}, + {"PE3", 4, 3, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint3", NULL}, 6, 3, 4}, + {"PE4", 4, 4, {"gpio_in", "gpio_out", "csi", "uart5", NULL, NULL, "pe_eint4", NULL}, 6, 4, 4}, + {"PE5", 4, 5, {"gpio_in", "gpio_out", "csi", "uart5", NULL, NULL, "pe_eint5", NULL}, 6, 5, 4}, + {"PE6", 4, 6, {"gpio_in", "gpio_out", "csi", "uart5", NULL, NULL, "pe_eint6", NULL}, 6, 6, 4}, + {"PE7", 4, 7, {"gpio_in", "gpio_out", "csi", "uart5", NULL, NULL, "pe_eint7", NULL}, 6, 7, 4}, + {"PE8", 4, 8, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint8", NULL}, 6, 8, 4}, + {"PE9", 4, 9, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint9", NULL}, 6, 9, 4}, + {"PE10", 4, 10, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint10", NULL}, 6, 10, 4}, + {"PE11", 4, 11, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint11", NULL}, 6, 11, 4}, + {"PE12", 4, 12, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint12", NULL}, 6, 12, 4}, + {"PE13", 4, 13, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint13", NULL}, 6, 13, 4}, + {"PE14", 4, 14, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint14", NULL}, 6, 14, 4}, + {"PE15", 4, 15, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, "pe_eint15", NULL}, 6, 15, 4}, {"PF0", 5, 0, {"gpio_in", "gpio_out", "mmc0", NULL, "jtag", NULL, NULL, NULL}}, {"PF1", 5, 1, {"gpio_in", "gpio_out", "mmc0", NULL, "jtag", NULL, NULL, NULL}}, {"PF2", 5, 2, {"gpio_in", "gpio_out", "mmc0", NULL, "uart0", NULL, NULL, NULL}}, {"PF3", 5, 3, {"gpio_in", "gpio_out", "mmc0", NULL, "jtag", NULL, NULL, NULL}}, {"PF4", 5, 4, {"gpio_in", "gpio_out", "mmc0", NULL, "uart0", NULL, NULL, NULL}}, {"PF5", 5, 5, {"gpio_in", "gpio_out", "mmc0", NULL, "jtag", NULL, NULL, NULL}}, - {"PG0", 6, 0, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint0", NULL}, 6, 0}, - {"PG1", 6, 1, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint1", NULL}, 6, 1}, - {"PG2", 6, 2, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint2", NULL}, 6, 2}, - {"PG3", 6, 3, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint3", NULL}, 6, 3}, - {"PG4", 6, 4, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint4", NULL}, 6, 4}, - {"PG5", 6, 5, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint5", NULL}, 6, 5}, - {"PG6", 6, 6, {"gpio_in", "gpio_out", "uart2", NULL, NULL, NULL, "pg_eint6", NULL}, 6, 6}, - {"PG7", 6, 7, {"gpio_in", "gpio_out", "uart2", NULL, NULL, NULL, "pg_eint7", NULL}, 6, 7}, - {"PG8", 6, 8, {"gpio_in", "gpio_out", "uart2", NULL, NULL, NULL, "pg_eint8", NULL}, 6, 8}, - {"PG9", 6, 9, {"gpio_in", "gpio_out", "uart2", NULL, NULL, NULL, "pg_eint9", NULL}, 6, 9}, - {"PG10", 6, 10, {"gpio_in", "gpio_out", "i2c3", NULL, NULL, NULL, "pg_eint10", NULL}, 6, 10}, - {"PG11", 6, 11, {"gpio_in", "gpio_out", "i2c3", NULL, NULL, NULL, "pg_eint11", NULL}, 6, 11}, - {"PG12", 6, 12, {"gpio_in", "gpio_out", "spi1", "i2s1", NULL, NULL, "pg_eint12", NULL}, 6, 12}, - {"PG13", 6, 13, {"gpio_in", "gpio_out", "spi1", "i2s1", NULL, NULL, "pg_eint13", NULL}, 6, 13}, - {"PG14", 6, 14, {"gpio_in", "gpio_out", "spi1", "i2s1", NULL, NULL, "pg_eint14", NULL}, 6, 14}, - {"PG15", 6, 15, {"gpio_in", "gpio_out", "spi1", "i2s1", NULL, NULL, "pg_eint15", NULL}, 6, 15}, - {"PG16", 6, 16, {"gpio_in", "gpio_out", "spi1", "i2s1", NULL, NULL, "pg_eint16", NULL}, 6, 16}, - {"PG17", 6, 17, {"gpio_in", "gpio_out", "uart4", NULL, NULL, NULL, "pg_eint17", NULL}, 6, 17}, - {"PG18", 6, 18, {"gpio_in", "gpio_out", "uart4", NULL, NULL, NULL, "pg_eint18", NULL}, 6, 18}, + {"PG0", 6, 0, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint0", NULL}, 6, 0, 6}, + {"PG1", 6, 1, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint1", NULL}, 6, 1, 6}, + {"PG2", 6, 2, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint2", NULL}, 6, 2, 6}, + {"PG3", 6, 3, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint3", NULL}, 6, 3, 6}, + {"PG4", 6, 4, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint4", NULL}, 6, 4, 6}, + {"PG5", 6, 5, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint5", NULL}, 6, 5, 6}, + {"PG6", 6, 6, {"gpio_in", "gpio_out", "uart2", NULL, NULL, NULL, "pg_eint6", NULL}, 6, 6, 6}, + {"PG7", 6, 7, {"gpio_in", "gpio_out", "uart2", NULL, NULL, NULL, "pg_eint7", NULL}, 6, 7, 6}, + {"PG8", 6, 8, {"gpio_in", "gpio_out", "uart2", NULL, NULL, NULL, "pg_eint8", NULL}, 6, 8, 6}, + {"PG9", 6, 9, {"gpio_in", "gpio_out", "uart2", NULL, NULL, NULL, "pg_eint9", NULL}, 6, 9, 6}, + {"PG10", 6, 10, {"gpio_in", "gpio_out", "i2c3", NULL, NULL, NULL, "pg_eint10", NULL}, 6, 10, 6}, + {"PG11", 6, 11, {"gpio_in", "gpio_out", "i2c3", NULL, NULL, NULL, "pg_eint11", NULL}, 6, 11, 6}, + {"PG12", 6, 12, {"gpio_in", "gpio_out", "spi1", "i2s1", NULL, NULL, "pg_eint12", NULL}, 6, 12, 6}, + {"PG13", 6, 13, {"gpio_in", "gpio_out", "spi1", "i2s1", NULL, NULL, "pg_eint13", NULL}, 6, 13, 6}, + {"PG14", 6, 14, {"gpio_in", "gpio_out", "spi1", "i2s1", NULL, NULL, "pg_eint14", NULL}, 6, 14, 6}, + {"PG15", 6, 15, {"gpio_in", "gpio_out", "spi1", "i2s1", NULL, NULL, "pg_eint15", NULL}, 6, 15, 6}, + {"PG16", 6, 16, {"gpio_in", "gpio_out", "spi1", "i2s1", NULL, NULL, "pg_eint16", NULL}, 6, 16, 6}, + {"PG17", 6, 17, {"gpio_in", "gpio_out", "uart4", NULL, NULL, NULL, "pg_eint17", NULL}, 6, 17, 6}, + {"PG18", 6, 18, {"gpio_in", "gpio_out", "uart4", NULL, NULL, NULL, "pg_eint18", NULL}, 6, 18, 6}, {"PH9", 7, 9, {"gpio_in", "gpio_out", "spi2", "jtag", "pwm1", NULL, NULL, NULL}}, {"PH10", 7, 10, {"gpio_in", "gpio_out", "spi2", "jtag", "pwm1", NULL, NULL, NULL}}, {"PH11", 7, 11, {"gpio_in", "gpio_out", "spi2", "jtag", "pwm2", NULL, NULL, NULL}}, {"PH12", 7, 12, {"gpio_in", "gpio_out", "spi2", "jtag", "pwm2", NULL, NULL, NULL}}, {"PH13", 7, 13, {"gpio_in", "gpio_out", "pwm0", NULL, NULL, NULL, NULL, NULL}}, {"PH14", 7, 14, {"gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, NULL, NULL}}, {"PH15", 7, 15, {"gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, NULL, NULL}}, {"PH16", 7, 16, {"gpio_in", "gpio_out", "i2c1", NULL, NULL, NULL, NULL, NULL}}, {"PH17", 7, 17, {"gpio_in", "gpio_out", "i2c1", NULL, NULL, NULL, NULL, NULL}}, {"PH18", 7, 18, {"gpio_in", "gpio_out", "i2c2", NULL, NULL, NULL, NULL, NULL}}, {"PH19", 7, 19, {"gpio_in", "gpio_out", "i2c2", NULL, NULL, NULL, NULL, NULL}}, {"PH20", 7, 20, {"gpio_in", "gpio_out", "uart0", NULL, NULL, NULL, NULL, NULL}}, {"PH21", 7, 21, {"gpio_in", "gpio_out", "uart0", NULL, NULL, NULL, NULL, NULL}}, {"PH22", 7, 22, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}}, {"PH23", 7, 23, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}}, {"PH24", 7, 24, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}}, {"PH25", 7, 25, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}}, {"PH26", 7, 26, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}}, {"PH27", 7, 27, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}}, {"PH28", 7, 27, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}}, }; const struct allwinner_padconf a31s_padconf = { .npins = nitems(a31s_pins), .pins = a31s_pins, }; #endif /* SOC_ALLWINNER_A31S */ Index: projects/clang1000-import/sys/arm/allwinner/a33/a33_padconf.c =================================================================== --- projects/clang1000-import/sys/arm/allwinner/a33/a33_padconf.c (revision 356919) +++ projects/clang1000-import/sys/arm/allwinner/a33/a33_padconf.c (revision 356920) @@ -1,148 +1,148 @@ /*- * Copyright (c) 2016 Emmanuel Vadot * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #ifdef SOC_ALLWINNER_A33 const static struct allwinner_pins a33_pins[] = { - {"PB0", 1, 0, {"gpio_in", "gpio_out", "uart2", "uart0", "pb_eint0", NULL}, 4, 0}, - {"PB1", 1, 1, {"gpio_in", "gpio_out", "uart2", "uart0", "pb_eint1", NULL}, 4, 1}, - {"PB2", 1, 2, {"gpio_in", "gpio_out", "uart2", NULL, "pb_eint2", NULL}, 4, 2}, - {"PB3", 1, 3, {"gpio_in", "gpio_out", "uart2", NULL, "pb_eint3", NULL}, 4, 3}, - {"PB4", 1, 4, {"gpio_in", "gpio_out", "i2s0", "aif2", "pb_eint4", NULL}, 4, 4}, - {"PB5", 1, 5, {"gpio_in", "gpio_out", "i2s0", "aif2", "pb_eint5", NULL}, 4, 5}, - {"PB6", 1, 6, {"gpio_in", "gpio_out", "i2s0", "aif2", "pb_eint6", NULL}, 4, 6}, - {"PB7", 1, 7, {"gpio_in", "gpio_out", "i2s0", "aif2", "pb_eint7", NULL}, 4, 7}, + {"PB0", 1, 0, {"gpio_in", "gpio_out", "uart2", "uart0", "pb_eint0", NULL}, 4, 0, 1}, + {"PB1", 1, 1, {"gpio_in", "gpio_out", "uart2", "uart0", "pb_eint1", NULL}, 4, 1, 1}, + {"PB2", 1, 2, {"gpio_in", "gpio_out", "uart2", NULL, "pb_eint2", NULL}, 4, 2, 1}, + {"PB3", 1, 3, {"gpio_in", "gpio_out", "uart2", NULL, "pb_eint3", NULL}, 4, 3, 1}, + {"PB4", 1, 4, {"gpio_in", "gpio_out", "i2s0", "aif2", "pb_eint4", NULL}, 4, 4, 1}, + {"PB5", 1, 5, {"gpio_in", "gpio_out", "i2s0", "aif2", "pb_eint5", NULL}, 4, 5, 1}, + {"PB6", 1, 6, {"gpio_in", "gpio_out", "i2s0", "aif2", "pb_eint6", NULL}, 4, 6, 1}, + {"PB7", 1, 7, {"gpio_in", "gpio_out", "i2s0", "aif2", "pb_eint7", NULL}, 4, 7, 1}, {"PC0", 2, 0, {"gpio_in", "gpio_out", "nand0", "spi0", NULL, NULL, NULL, NULL}}, {"PC1", 2, 1, {"gpio_in", "gpio_out", "nand0", "spi0", NULL, NULL, NULL, NULL}}, {"PC2", 2, 2, {"gpio_in", "gpio_out", "nand0", "spi0", NULL, NULL, NULL, NULL}}, {"PC3", 2, 3, {"gpio_in", "gpio_out", "nand0", "spi0", NULL, NULL, NULL, NULL}}, {"PC4", 2, 4, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC5", 2, 5, {"gpio_in", "gpio_out", "nand0", "mmc2", NULL, NULL, NULL, NULL}}, {"PC6", 2, 6, {"gpio_in", "gpio_out", "nand0", "mmc2", NULL, NULL, NULL, NULL}}, {"PC7", 2, 7, {"gpio_in", "gpio_out", "nand0", NULL, NULL, NULL, NULL, NULL}}, {"PC8", 2, 8, {"gpio_in", "gpio_out", "nand0", "mmc2", NULL, NULL, NULL, NULL}}, {"PC9", 2, 9, {"gpio_in", "gpio_out", "nand0", "mmc2", NULL, NULL, NULL, NULL}}, {"PC10", 2, 10, {"gpio_in", "gpio_out", "nand0", "mmc2", NULL, NULL, NULL, NULL}}, {"PC11", 2, 11, {"gpio_in", "gpio_out", "nand0", "mmc2", NULL, NULL, NULL, NULL}}, {"PC12", 2, 12, {"gpio_in", "gpio_out", "nand0", "mmc2", NULL, NULL, NULL, NULL}}, {"PC13", 2, 13, {"gpio_in", "gpio_out", "nand0", "mmc2", NULL, NULL, NULL, NULL}}, {"PC14", 2, 14, {"gpio_in", "gpio_out", "nand0", "mmc2", NULL, NULL, NULL, NULL}}, {"PC15", 2, 15, {"gpio_in", "gpio_out", "nand0", "mmc2", NULL, NULL, NULL, NULL}}, {"PC16", 2, 16, {"gpio_in", "gpio_out", "nand0", "mmc2", NULL, NULL, NULL, NULL}}, {"PD2", 3, 2, {"gpio_in", "gpio_out", "lcd0", "mmc1", NULL, NULL, NULL, NULL}}, {"PD3", 3, 3, {"gpio_in", "gpio_out", "lcd0", "mmc1", NULL, NULL, NULL, NULL}}, {"PD4", 3, 4, {"gpio_in", "gpio_out", "lcd0", "mmc1", NULL, NULL, NULL, NULL}}, {"PD5", 3, 5, {"gpio_in", "gpio_out", "lcd0", "mmc1", NULL, NULL, NULL, NULL}}, {"PD6", 3, 6, {"gpio_in", "gpio_out", "lcd0", "mmc1", NULL, NULL, NULL, NULL}}, {"PD7", 3, 7, {"gpio_in", "gpio_out", "lcd0", "mmc1", NULL, NULL, NULL, NULL}}, {"PD10", 3, 10, {"gpio_in", "gpio_out", "lcd0", "uart1", NULL, NULL, NULL, NULL}}, {"PD11", 3, 11, {"gpio_in", "gpio_out", "lcd0", "uart1", NULL, NULL, NULL, NULL}}, {"PD12", 3, 12, {"gpio_in", "gpio_out", "lcd0", "uart1", NULL, NULL, NULL, NULL}}, {"PD13", 3, 13, {"gpio_in", "gpio_out", "lcd0", "uart1", NULL, NULL, NULL, NULL}}, {"PD14", 3, 14, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD15", 3, 15, {"gpio_in", "gpio_out", "lcd0", NULL, NULL, NULL, NULL, NULL}}, {"PD18", 3, 18, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD19", 3, 19, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD20", 3, 20, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD21", 3, 21, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD22", 3, 22, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD23", 3, 23, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD24", 3, 24, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD25", 3, 25, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD26", 3, 26, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PD27", 3, 27, {"gpio_in", "gpio_out", "lcd0", "lvds0", NULL, NULL, NULL, NULL}}, {"PE0", 4, 0, {"gpio_in", "gpio_out", "csi", NULL, NULL, NULL, NULL, NULL}, 0, 0}, {"PE1", 4, 1, {"gpio_in", "gpio_out", "csi", NULL, NULL, NULL, NULL, NULL}, 0, 0}, {"PE2", 4, 2, {"gpio_in", "gpio_out", "csi", NULL, NULL, NULL, NULL, NULL}, 0, 0}, {"PE3", 4, 3, {"gpio_in", "gpio_out", "csi", NULL, NULL, NULL, NULL, NULL}, 0, 0}, {"PE4", 4, 4, {"gpio_in", "gpio_out", "csi", NULL, NULL, NULL, NULL, NULL}, 0, 0}, {"PE5", 4, 5, {"gpio_in", "gpio_out", "csi", NULL, NULL, NULL, NULL, NULL}, 0, 0}, {"PE6", 4, 6, {"gpio_in", "gpio_out", "csi", NULL, NULL, NULL, NULL, NULL}, 0, 0}, {"PE7", 4, 7, {"gpio_in", "gpio_out", "csi", NULL, NULL, NULL, NULL, NULL}, 0, 0}, {"PE8", 4, 8, {"gpio_in", "gpio_out", "csi", NULL, NULL, NULL, NULL, NULL}, 0, 0}, {"PE9", 4, 9, {"gpio_in", "gpio_out", "csi", NULL, NULL, NULL, NULL, NULL}, 0, 0}, {"PE10", 4, 10, {"gpio_in", "gpio_out", "csi", NULL, NULL, NULL, NULL, NULL}, 0, 0}, {"PE11", 4, 11, {"gpio_in", "gpio_out", "csi", NULL, NULL, NULL, NULL, NULL}, 0, 0}, {"PE12", 4, 12, {"gpio_in", "gpio_out", "csi", "i2c2", NULL, NULL, NULL, NULL}, 0, 0}, {"PE13", 4, 13, {"gpio_in", "gpio_out", "csi", "i2c2", NULL, NULL, NULL, NULL}, 0, 0}, {"PE14", 4, 14, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}, 0, 0}, {"PE15", 4, 15, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}, 0, 0}, {"PE16", 4, 16, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}, 0, 0}, {"PE17", 4, 16, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}, 0, 0}, {"PF0", 5, 0, {"gpio_in", "gpio_out", "mmc0", "jtag", NULL, NULL, NULL}}, {"PF1", 5, 1, {"gpio_in", "gpio_out", "mmc0", "jtag", NULL, NULL, NULL}}, {"PF2", 5, 2, {"gpio_in", "gpio_out", "mmc0", "uart0", NULL, NULL, NULL}}, {"PF3", 5, 3, {"gpio_in", "gpio_out", "mmc0", "jtag", NULL, NULL, NULL}}, {"PF4", 5, 4, {"gpio_in", "gpio_out", "mmc0", "uart0", NULL, NULL, NULL}}, {"PF5", 5, 5, {"gpio_in", "gpio_out", "mmc0", "jtag", NULL, NULL, NULL}}, - {"PG0", 6, 0, {"gpio_in", "gpio_out", "mmc1", NULL, "pg_eint0", NULL}, 4, 0}, - {"PG1", 6, 1, {"gpio_in", "gpio_out", "mmc1", NULL, "pg_eint1", NULL}, 4, 1}, - {"PG2", 6, 2, {"gpio_in", "gpio_out", "mmc1", NULL, "pg_eint2", NULL}, 4, 2}, - {"PG3", 6, 3, {"gpio_in", "gpio_out", "mmc1", NULL, "pg_eint3", NULL}, 4, 3}, - {"PG4", 6, 4, {"gpio_in", "gpio_out", "mmc1", NULL, "pg_eint4", NULL}, 4, 4}, - {"PG5", 6, 5, {"gpio_in", "gpio_out", "mmc1", NULL, "pg_eint5", NULL}, 4, 5}, - {"PG6", 6, 6, {"gpio_in", "gpio_out", "uart1", NULL, "pg_eint6", NULL}, 4, 6}, - {"PG7", 6, 7, {"gpio_in", "gpio_out", "uart1", NULL, "pg_eint7", NULL}, 4, 7}, - {"PG8", 6, 8, {"gpio_in", "gpio_out", "uart1", NULL, "pg_eint8", NULL}, 4, 8}, - {"PG9", 6, 9, {"gpio_in", "gpio_out", "uart1", NULL, "pg_eint9", NULL}, 4, 9}, - {"PG10", 6, 10, {"gpio_in", "gpio_out", "i2s1", "aif3", "pg_eint10", NULL}, 4, 10}, - {"PG11", 6, 11, {"gpio_in", "gpio_out", "i2s1", "aif3", "pg_eint11", NULL}, 4, 11}, - {"PG12", 6, 12, {"gpio_in", "gpio_out", "i2s1", "aif3", "pg_eint12", NULL}, 4, 12}, - {"PG13", 6, 13, {"gpio_in", "gpio_out", "i2s1", "aif3", "pg_eint13", NULL}, 4, 13}, + {"PG0", 6, 0, {"gpio_in", "gpio_out", "mmc1", NULL, "pg_eint0", NULL}, 4, 0, 6}, + {"PG1", 6, 1, {"gpio_in", "gpio_out", "mmc1", NULL, "pg_eint1", NULL}, 4, 1, 6}, + {"PG2", 6, 2, {"gpio_in", "gpio_out", "mmc1", NULL, "pg_eint2", NULL}, 4, 2, 6}, + {"PG3", 6, 3, {"gpio_in", "gpio_out", "mmc1", NULL, "pg_eint3", NULL}, 4, 3, 6}, + {"PG4", 6, 4, {"gpio_in", "gpio_out", "mmc1", NULL, "pg_eint4", NULL}, 4, 4, 6}, + {"PG5", 6, 5, {"gpio_in", "gpio_out", "mmc1", NULL, "pg_eint5", NULL}, 4, 5, 6}, + {"PG6", 6, 6, {"gpio_in", "gpio_out", "uart1", NULL, "pg_eint6", NULL}, 4, 6, 6}, + {"PG7", 6, 7, {"gpio_in", "gpio_out", "uart1", NULL, "pg_eint7", NULL}, 4, 7, 6}, + {"PG8", 6, 8, {"gpio_in", "gpio_out", "uart1", NULL, "pg_eint8", NULL}, 4, 8, 6}, + {"PG9", 6, 9, {"gpio_in", "gpio_out", "uart1", NULL, "pg_eint9", NULL}, 4, 9, 6}, + {"PG10", 6, 10, {"gpio_in", "gpio_out", "i2s1", "aif3", "pg_eint10", NULL}, 4, 10, 6}, + {"PG11", 6, 11, {"gpio_in", "gpio_out", "i2s1", "aif3", "pg_eint11", NULL}, 4, 11, 6}, + {"PG12", 6, 12, {"gpio_in", "gpio_out", "i2s1", "aif3", "pg_eint12", NULL}, 4, 12, 6}, + {"PG13", 6, 13, {"gpio_in", "gpio_out", "i2s1", "aif3", "pg_eint13", NULL}, 4, 13, 6}, {"PH0", 7, 0, {"gpio_in", "gpio_out", "pwm0", NULL, NULL, NULL, NULL, NULL}}, {"PH1", 7, 1, {"gpio_in", "gpio_out", "pwm1", NULL, NULL, NULL, NULL, NULL}}, {"PH2", 7, 2, {"gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, NULL, NULL}}, {"PH3", 7, 3, {"gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, NULL, NULL}}, {"PH4", 7, 4, {"gpio_in", "gpio_out", "i2c1", NULL, NULL, NULL, NULL, NULL}}, {"PH5", 7, 5, {"gpio_in", "gpio_out", "i2c1", NULL, NULL, NULL, NULL, NULL}}, {"PH6", 7, 6, {"gpio_in", "gpio_out", "spi0", "uart3", NULL, NULL, NULL, NULL}}, {"PH7", 7, 7, {"gpio_in", "gpio_out", "spi0", "uart3", NULL, NULL, NULL, NULL}}, {"PH8", 7, 8, {"gpio_in", "gpio_out", "spi0", "uart3", NULL, NULL, NULL, NULL}}, {"PH9", 7, 9, {"gpio_in", "gpio_out", "spi0", "uart3", NULL, NULL, NULL, NULL}}, }; const struct allwinner_padconf a33_padconf = { .npins = nitems(a33_pins), .pins = a33_pins, }; #endif /* SOC_ALLWINNER_A33 */ Index: projects/clang1000-import/sys/arm/allwinner/a64/a64_padconf.c =================================================================== --- projects/clang1000-import/sys/arm/allwinner/a64/a64_padconf.c (revision 356919) +++ projects/clang1000-import/sys/arm/allwinner/a64/a64_padconf.c (revision 356920) @@ -1,159 +1,159 @@ /*- * Copyright (c) 2016 Jared McNeill * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include "opt_soc.h" #ifdef SOC_ALLWINNER_A64 static const struct allwinner_pins a64_pins[] = { - { "PB0", 1, 0, { "gpio_in", "gpio_out", "uart2", NULL, "jtag", NULL, "pb_eint0" }, 6, 0}, - { "PB1", 1, 1, { "gpio_in", "gpio_out", "uart2", NULL, "jtag", "sim", "pb_eint1" }, 6, 1}, - { "PB2", 1, 2, { "gpio_in", "gpio_out", "uart2", NULL, "jtag", "sim", "pb_eint2" }, 6, 2}, - { "PB3", 1, 3, { "gpio_in", "gpio_out", "uart2", "i2s0", "jtag", "sim", "pb_eint3" }, 6, 3}, - { "PB4", 1, 4, { "gpio_in", "gpio_out", "aif2", "pcm0", NULL, "sim", "pb_eint4" }, 6, 4}, - { "PB5", 1, 5, { "gpio_in", "gpio_out", "aif2", "pcm0", NULL, "sim", "pb_eint5" }, 6, 5}, - { "PB6", 1, 6, { "gpio_in", "gpio_out", "aif2", "pcm0", NULL, "sim", "pb_eint6" }, 6, 6}, - { "PB7", 1, 7, { "gpio_in", "gpio_out", "aif2", "pcm0", NULL, "sim", "pb_eint7" }, 6, 7}, - { "PB8", 1, 8, { "gpio_in", "gpio_out", NULL, NULL, "uart0", NULL, "pb_eint8" }, 6, 8}, - { "PB9", 1, 9, { "gpio_in", "gpio_out", NULL, NULL, "uart0", NULL, "pb_eint9" }, 6, 9}, + { "PB0", 1, 0, { "gpio_in", "gpio_out", "uart2", NULL, "jtag", NULL, "pb_eint0" }, 6, 0, 0}, + { "PB1", 1, 1, { "gpio_in", "gpio_out", "uart2", NULL, "jtag", "sim", "pb_eint1" }, 6, 1, 0}, + { "PB2", 1, 2, { "gpio_in", "gpio_out", "uart2", NULL, "jtag", "sim", "pb_eint2" }, 6, 2, 0}, + { "PB3", 1, 3, { "gpio_in", "gpio_out", "uart2", "i2s0", "jtag", "sim", "pb_eint3" }, 6, 3, 0}, + { "PB4", 1, 4, { "gpio_in", "gpio_out", "aif2", "pcm0", NULL, "sim", "pb_eint4" }, 6, 4, 0}, + { "PB5", 1, 5, { "gpio_in", "gpio_out", "aif2", "pcm0", NULL, "sim", "pb_eint5" }, 6, 5, 0}, + { "PB6", 1, 6, { "gpio_in", "gpio_out", "aif2", "pcm0", NULL, "sim", "pb_eint6" }, 6, 6, 0}, + { "PB7", 1, 7, { "gpio_in", "gpio_out", "aif2", "pcm0", NULL, "sim", "pb_eint7" }, 6, 7, 0}, + { "PB8", 1, 8, { "gpio_in", "gpio_out", NULL, NULL, "uart0", NULL, "pb_eint8" }, 6, 8, 0}, + { "PB9", 1, 9, { "gpio_in", "gpio_out", NULL, NULL, "uart0", NULL, "pb_eint9" }, 6, 9, 0}, { "PC0", 2, 0, { "gpio_in", "gpio_out", "nand", NULL, "spi0" } }, { "PC1", 2, 1, { "gpio_in", "gpio_out", "nand", "mmc2", "spi0" } }, { "PC2", 2, 2, { "gpio_in", "gpio_out", "nand", NULL, "spi0" } }, { "PC3", 2, 3, { "gpio_in", "gpio_out", "nand", NULL, "spi0" } }, { "PC4", 2, 4, { "gpio_in", "gpio_out", "nand" } }, { "PC5", 2, 5, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC6", 2, 6, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC7", 2, 7, { "gpio_in", "gpio_out", "nand" } }, { "PC8", 2, 8, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC9", 2, 9, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC10", 2, 10, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC11", 2, 11, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC12", 2, 12, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC13", 2, 13, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC14", 2, 14, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC15", 2, 15, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC16", 2, 16, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PD0", 3, 0, { "gpio_in", "gpio_out", "lcd", "uart3", "spi1", "ccir" } }, { "PD1", 3, 1, { "gpio_in", "gpio_out", "lcd", "uart3", "spi1", "ccir" } }, { "PD2", 3, 2, { "gpio_in", "gpio_out", "lcd", "uart4", "spi1", "ccir" } }, { "PD3", 3, 3, { "gpio_in", "gpio_out", "lcd", "uart4", "spi1", "ccir" } }, { "PD4", 3, 4, { "gpio_in", "gpio_out", "lcd", "uart4", "spi1", "ccir" } }, { "PD5", 3, 5, { "gpio_in", "gpio_out", "lcd", "uart4", "spi1", "ccir" } }, { "PD6", 3, 6, { "gpio_in", "gpio_out", "lcd", NULL, NULL, "ccir" } }, { "PD7", 3, 7, { "gpio_in", "gpio_out", "lcd", NULL, NULL, "ccir" } }, { "PD8", 3, 8, { "gpio_in", "gpio_out", "lcd", NULL, "emac", "ccir" } }, { "PD9", 3, 9, { "gpio_in", "gpio_out", "lcd", NULL, "emac", "ccir" } }, { "PD10", 3, 10, { "gpio_in", "gpio_out", "lcd", NULL, "emac" } }, { "PD11", 3, 11, { "gpio_in", "gpio_out", "lcd", NULL, "emac" } }, { "PD12", 3, 12, { "gpio_in", "gpio_out", "lcd", "lvds", "emac" } }, { "PD13", 3, 13, { "gpio_in", "gpio_out", "lcd", "lvds", "emac" } }, { "PD14", 3, 14, { "gpio_in", "gpio_out", "lcd", "lvds", "emac" } }, { "PD15", 3, 15, { "gpio_in", "gpio_out", "lcd", "lvds", "emac", "ccir" } }, { "PD16", 3, 16, { "gpio_in", "gpio_out", "lcd", "lvds", "emac", "ccir" } }, { "PD17", 3, 17, { "gpio_in", "gpio_out", "lcd", "lvds", "emac" } }, { "PD18", 3, 18, { "gpio_in", "gpio_out", "lcd", "lvds", "emac" } }, { "PD19", 3, 19, { "gpio_in", "gpio_out", "lcd", "lvds", "emac" } }, { "PD20", 3, 20, { "gpio_in", "gpio_out", "lcd", "lvds", "emac" } }, { "PD21", 3, 21, { "gpio_in", "gpio_out", "lcd", "lvds", "emac" } }, { "PD22", 3, 22, { "gpio_in", "gpio_out", "pwm0", NULL, "emac" } }, { "PD23", 3, 23, { "gpio_in", "gpio_out", NULL, NULL, "emac" } }, { "PD24", 3, 24, { "gpio_in", "gpio_out" } }, { "PE0", 4, 0, { "gpio_in", "gpio_out", "csi", NULL, "ts" } }, { "PE1", 4, 1, { "gpio_in", "gpio_out", "csi", NULL, "ts" } }, { "PE2", 4, 2, { "gpio_in", "gpio_out", "csi", NULL, "ts" } }, { "PE3", 4, 3, { "gpio_in", "gpio_out", "csi", NULL, "ts" } }, { "PE4", 4, 4, { "gpio_in", "gpio_out", "csi", NULL, "ts" } }, { "PE5", 4, 5, { "gpio_in", "gpio_out", "csi", NULL, "ts" } }, { "PE6", 4, 6, { "gpio_in", "gpio_out", "csi", NULL, "ts" } }, { "PE7", 4, 7, { "gpio_in", "gpio_out", "csi", NULL, "ts" } }, { "PE8", 4, 8, { "gpio_in", "gpio_out", "csi", NULL, "ts" } }, { "PE9", 4, 9, { "gpio_in", "gpio_out", "csi", NULL, "ts" } }, { "PE10", 4, 10, { "gpio_in", "gpio_out", "csi", NULL, "ts" } }, { "PE11", 4, 11, { "gpio_in", "gpio_out", "csi", NULL, "ts" } }, { "PE12", 4, 12, { "gpio_in", "gpio_out", "csi" } }, { "PE13", 4, 13, { "gpio_in", "gpio_out", "csi" } }, { "PE14", 4, 14, { "gpio_in", "gpio_out", "pll_lock", "twi2" } }, { "PE15", 4, 15, { "gpio_in", "gpio_out", NULL, "twi2" } }, { "PE16", 4, 16, { "gpio_in", "gpio_out" } }, { "PE17", 4, 17, { "gpio_in", "gpio_out" } }, { "PF0", 5, 0, { "gpio_in", "gpio_out", "mmc0", "jtag" } }, { "PF1", 5, 1, { "gpio_in", "gpio_out", "mmc0", "jtag" } }, { "PF2", 5, 2, { "gpio_in", "gpio_out", "mmc0", "uart0" } }, { "PF3", 5, 3, { "gpio_in", "gpio_out", "mmc0", "jtag" } }, { "PF4", 5, 4, { "gpio_in", "gpio_out", "mmc0", "uart0" } }, { "PF5", 5, 5, { "gpio_in", "gpio_out", "mmc0", "jtag" } }, { "PF6", 5, 6, { "gpio_in", "gpio_out" } }, - { "PG0", 6, 0, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint0" }, 6, 0}, - { "PG1", 6, 1, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint1" }, 6, 1}, - { "PG2", 6, 2, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint2" }, 6, 2}, - { "PG3", 6, 3, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint3" }, 6, 3}, - { "PG4", 6, 4, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint4" }, 6, 4}, - { "PG5", 6, 5, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint5" }, 6, 5}, - { "PG6", 6, 6, { "gpio_in", "gpio_out", "uart1", NULL, NULL, NULL, "pg_eint6" }, 6, 6}, - { "PG7", 6, 7, { "gpio_in", "gpio_out", "uart1", NULL, NULL, NULL, "pg_eint7" }, 6, 7}, - { "PG8", 6, 8, { "gpio_in", "gpio_out", "uart1", NULL, NULL, NULL, "pg_eint8" }, 6, 8}, - { "PG9", 6, 9, { "gpio_in", "gpio_out", "uart1", NULL, NULL, NULL, "pg_eint9" }, 6, 9}, - { "PG10", 6, 10, { "gpio_in", "gpio_out", "aif3", "pcm1", NULL, NULL, "pg_eint10" }, 6, 10}, - { "PG11", 6, 11, { "gpio_in", "gpio_out", "aif3", "pcm1", NULL, NULL, "pg_eint11" }, 6, 11}, - { "PG12", 6, 12, { "gpio_in", "gpio_out", "aif3", "pcm1", NULL, NULL, "pg_eint12" }, 6, 12}, - { "PG13", 6, 13, { "gpio_in", "gpio_out", "aif3", "pcm1", NULL, NULL, "pg_eint13" }, 6, 13}, + { "PG0", 6, 0, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint0" }, 6, 0, 1}, + { "PG1", 6, 1, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint1" }, 6, 1, 1}, + { "PG2", 6, 2, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint2" }, 6, 2, 1}, + { "PG3", 6, 3, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint3" }, 6, 3, 1}, + { "PG4", 6, 4, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint4" }, 6, 4, 1}, + { "PG5", 6, 5, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint5" }, 6, 5, 1}, + { "PG6", 6, 6, { "gpio_in", "gpio_out", "uart1", NULL, NULL, NULL, "pg_eint6" }, 6, 6, 1}, + { "PG7", 6, 7, { "gpio_in", "gpio_out", "uart1", NULL, NULL, NULL, "pg_eint7" }, 6, 7, 1}, + { "PG8", 6, 8, { "gpio_in", "gpio_out", "uart1", NULL, NULL, NULL, "pg_eint8" }, 6, 8, 1}, + { "PG9", 6, 9, { "gpio_in", "gpio_out", "uart1", NULL, NULL, NULL, "pg_eint9" }, 6, 9, 1}, + { "PG10", 6, 10, { "gpio_in", "gpio_out", "aif3", "pcm1", NULL, NULL, "pg_eint10" }, 6, 10, 1}, + { "PG11", 6, 11, { "gpio_in", "gpio_out", "aif3", "pcm1", NULL, NULL, "pg_eint11" }, 6, 11, 1}, + { "PG12", 6, 12, { "gpio_in", "gpio_out", "aif3", "pcm1", NULL, NULL, "pg_eint12" }, 6, 12, 1}, + { "PG13", 6, 13, { "gpio_in", "gpio_out", "aif3", "pcm1", NULL, NULL, "pg_eint13" }, 6, 13, 1}, - { "PH0", 7, 0, { "gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, "ph_eint0" }, 6, 0}, - { "PH1", 7, 1, { "gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, "ph_eint1" }, 6, 1}, - { "PH2", 7, 2, { "gpio_in", "gpio_out", "i2c1", NULL, NULL, NULL, "ph_eint2" }, 6, 2}, - { "PH3", 7, 3, { "gpio_in", "gpio_out", "i2c1", NULL, NULL, NULL, "ph_eint3" }, 6, 3}, - { "PH4", 7, 4, { "gpio_in", "gpio_out", "uart3", NULL, NULL, NULL, "ph_eint4" }, 6, 4}, - { "PH5", 7, 5, { "gpio_in", "gpio_out", "uart3", NULL, NULL, NULL, "ph_eint5" }, 6, 5}, - { "PH6", 7, 6, { "gpio_in", "gpio_out", "uart3", NULL, NULL, NULL, "ph_eint6" }, 6, 6}, - { "PH7", 7, 7, { "gpio_in", "gpio_out", "uart3", NULL, NULL, NULL, "ph_eint7" }, 6, 7}, - { "PH8", 7, 8, { "gpio_in", "gpio_out", "owa", NULL, NULL, NULL, "ph_eint8" }, 6, 8}, - { "PH9", 7, 9, { "gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "ph_eint9" }, 6, 9}, - { "PH10", 7, 10, { "gpio_in", "gpio_out", "mic", NULL, NULL, NULL, "ph_eint10" }, 6, 10}, - { "PH11", 7, 11, { "gpio_in", "gpio_out", "mic", NULL, NULL, NULL, "ph_eint11" }, 6, 11}, + { "PH0", 7, 0, { "gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, "ph_eint0" }, 6, 0, 2}, + { "PH1", 7, 1, { "gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, "ph_eint1" }, 6, 1, 2}, + { "PH2", 7, 2, { "gpio_in", "gpio_out", "i2c1", NULL, NULL, NULL, "ph_eint2" }, 6, 2, 2}, + { "PH3", 7, 3, { "gpio_in", "gpio_out", "i2c1", NULL, NULL, NULL, "ph_eint3" }, 6, 3, 2}, + { "PH4", 7, 4, { "gpio_in", "gpio_out", "uart3", NULL, NULL, NULL, "ph_eint4" }, 6, 4, 2}, + { "PH5", 7, 5, { "gpio_in", "gpio_out", "uart3", NULL, NULL, NULL, "ph_eint5" }, 6, 5, 2}, + { "PH6", 7, 6, { "gpio_in", "gpio_out", "uart3", NULL, NULL, NULL, "ph_eint6" }, 6, 6, 2}, + { "PH7", 7, 7, { "gpio_in", "gpio_out", "uart3", NULL, NULL, NULL, "ph_eint7" }, 6, 7, 2}, + { "PH8", 7, 8, { "gpio_in", "gpio_out", "owa", NULL, NULL, NULL, "ph_eint8" }, 6, 8, 2}, + { "PH9", 7, 9, { "gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "ph_eint9" }, 6, 9, 2}, + { "PH10", 7, 10, { "gpio_in", "gpio_out", "mic", NULL, NULL, NULL, "ph_eint10" }, 6, 10, 2}, + { "PH11", 7, 11, { "gpio_in", "gpio_out", "mic", NULL, NULL, NULL, "ph_eint11" }, 6, 11, 2}, }; const struct allwinner_padconf a64_padconf = { .npins = nitems(a64_pins), .pins = a64_pins, }; #endif /* !SOC_ALLWINNER_A64 */ Index: projects/clang1000-import/sys/arm/allwinner/a64/a64_r_padconf.c =================================================================== --- projects/clang1000-import/sys/arm/allwinner/a64/a64_r_padconf.c (revision 356919) +++ projects/clang1000-import/sys/arm/allwinner/a64/a64_r_padconf.c (revision 356920) @@ -1,63 +1,63 @@ /*- * Copyright (c) 2016 Jared McNeill * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include "opt_soc.h" #ifdef SOC_ALLWINNER_A64 static const struct allwinner_pins a64_r_pins[] = { - { "PL0", 0, 0, { "gpio_in", "gpio_out", "s_rsb", "s_i2c", NULL, NULL, "pl_eint0" }, 6, 0}, - { "PL1", 0, 1, { "gpio_in", "gpio_out", "s_rsb", "s_i2c", NULL, NULL, "pl_eint1" }, 6, 1}, - { "PL2", 0, 2, { "gpio_in", "gpio_out", "s_uart", NULL, NULL, NULL, "pl_eint2" }, 6, 2}, - { "PL3", 0, 3, { "gpio_in", "gpio_out", "s_uart", NULL, NULL, NULL, "pl_eint3" }, 6, 3}, - { "PL4", 0, 4, { "gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint4" }, 6, 4}, - { "PL5", 0, 5, { "gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint5" }, 6, 5}, - { "PL6", 0, 6, { "gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint6" }, 6, 6}, - { "PL7", 0, 7, { "gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint7" }, 6, 7}, - { "PL8", 0, 8, { "gpio_in", "gpio_out", "s_i2c", NULL, NULL, NULL, "pl_eint8" }, 6, 8}, - { "PL9", 0, 9, { "gpio_in", "gpio_out", "s_i2c", NULL, NULL, NULL, "pl_eint9" }, 6, 9}, - { "PL10", 0, 10, { "gpio_in", "gpio_out", "s_pwm", NULL, NULL, NULL, "pl_eint10" }, 6, 10}, - { "PL11", 0, 11, { "gpio_in", "gpio_out", "s_cir", NULL, NULL, NULL, "pl_eint11" }, 6, 11}, - { "PL12", 0, 12, { "gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "pl_eint12" }, 6, 12}, + { "PL0", 0, 0, { "gpio_in", "gpio_out", "s_rsb", "s_i2c", NULL, NULL, "pl_eint0" }, 6, 0, 0}, + { "PL1", 0, 1, { "gpio_in", "gpio_out", "s_rsb", "s_i2c", NULL, NULL, "pl_eint1" }, 6, 1, 0}, + { "PL2", 0, 2, { "gpio_in", "gpio_out", "s_uart", NULL, NULL, NULL, "pl_eint2" }, 6, 2, 0}, + { "PL3", 0, 3, { "gpio_in", "gpio_out", "s_uart", NULL, NULL, NULL, "pl_eint3" }, 6, 3, 0}, + { "PL4", 0, 4, { "gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint4" }, 6, 4, 0}, + { "PL5", 0, 5, { "gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint5" }, 6, 5, 0}, + { "PL6", 0, 6, { "gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint6" }, 6, 6, 0}, + { "PL7", 0, 7, { "gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint7" }, 6, 7, 0}, + { "PL8", 0, 8, { "gpio_in", "gpio_out", "s_i2c", NULL, NULL, NULL, "pl_eint8" }, 6, 8, 0}, + { "PL9", 0, 9, { "gpio_in", "gpio_out", "s_i2c", NULL, NULL, NULL, "pl_eint9" }, 6, 9, 0}, + { "PL10", 0, 10, { "gpio_in", "gpio_out", "s_pwm", NULL, NULL, NULL, "pl_eint10" }, 6, 10, 0}, + { "PL11", 0, 11, { "gpio_in", "gpio_out", "s_cir", NULL, NULL, NULL, "pl_eint11" }, 6, 11, 0}, + { "PL12", 0, 12, { "gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "pl_eint12" }, 6, 12, 0}, }; const struct allwinner_padconf a64_r_padconf = { .npins = nitems(a64_r_pins), .pins = a64_r_pins, }; #endif /* !SOC_ALLWINNER_A64 */ Index: projects/clang1000-import/sys/arm/allwinner/a83t/a83t_padconf.c =================================================================== --- projects/clang1000-import/sys/arm/allwinner/a83t/a83t_padconf.c (revision 356919) +++ projects/clang1000-import/sys/arm/allwinner/a83t/a83t_padconf.c (revision 356920) @@ -1,161 +1,161 @@ /*- * Copyright (c) 2016 Jared McNeill * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #ifdef SOC_ALLWINNER_A83T static const struct allwinner_pins a83t_pins[] = { - { "PB0", 1, 0, { "gpio_in", "gpio_out", "uart2", "jtag", NULL, NULL, "eint" } }, - { "PB1", 1, 1, { "gpio_in", "gpio_out", "uart2", "jtag", NULL, NULL, "eint" } }, - { "PB2", 1, 2, { "gpio_in", "gpio_out", "uart2", "jtag", NULL, NULL, "eint" } }, - { "PB3", 1, 3, { "gpio_in", "gpio_out", "uart2", "jtag", NULL, NULL, "eint" } }, - { "PB4", 1, 4, { "gpio_in", "gpio_out", "i2s0", "tdm", NULL, NULL, "eint" } }, - { "PB5", 1, 5, { "gpio_in", "gpio_out", "i2s0", "tdm", NULL, NULL, "eint" } }, - { "PB6", 1, 6, { "gpio_in", "gpio_out", "i2s0", "tdm", NULL, NULL, "eint" } }, - { "PB7", 1, 7, { "gpio_in", "gpio_out", "i2s0", "tdm", NULL, NULL, "eint" } }, - { "PB8", 1, 8, { "gpio_in", "gpio_out", "i2s0", "tdm", NULL, NULL, "eint" } }, - { "PB9", 1, 9, { "gpio_in", "gpio_out", "uart0", NULL, NULL, NULL, "eint" } }, - { "PB10", 1, 10, { "gpio_in", "gpio_out", "uart0", NULL, NULL, NULL, "eint" } }, + { "PB0", 1, 0, { "gpio_in", "gpio_out", "uart2", "jtag", NULL, NULL, "pb_eint0" }, 6, 0, 0}, + { "PB1", 1, 1, { "gpio_in", "gpio_out", "uart2", "jtag", NULL, NULL, "pb_eint1" }, 6, 1, 0}, + { "PB2", 1, 2, { "gpio_in", "gpio_out", "uart2", "jtag", NULL, NULL, "pb_eint2" }, 6, 2, 0}, + { "PB3", 1, 3, { "gpio_in", "gpio_out", "uart2", "jtag", NULL, NULL, "pb_eint3" }, 6, 3, 0}, + { "PB4", 1, 4, { "gpio_in", "gpio_out", "i2s0", "tdm", NULL, NULL, "pb_eint4" }, 6, 4, 0}, + { "PB5", 1, 5, { "gpio_in", "gpio_out", "i2s0", "tdm", NULL, NULL, "pb_eint5" }, 6, 5, 0}, + { "PB6", 1, 6, { "gpio_in", "gpio_out", "i2s0", "tdm", NULL, NULL, "pb_eint6" }, 6, 6, 0}, + { "PB7", 1, 7, { "gpio_in", "gpio_out", "i2s0", "tdm", NULL, NULL, "pb_eint7" }, 6, 7, 0}, + { "PB8", 1, 8, { "gpio_in", "gpio_out", "i2s0", "tdm", NULL, NULL, "pb_eint8" }, 6, 8, 0}, + { "PB9", 1, 9, { "gpio_in", "gpio_out", "uart0", NULL, NULL, NULL, "pb_eint9" }, 6, 9, 0}, + { "PB10", 1, 10, { "gpio_in", "gpio_out", "uart0", NULL, NULL, NULL, "pb_eint10" }, 6, 10, 0}, { "PC0", 2, 0, { "gpio_in", "gpio_out", "nand", "spi0" } }, { "PC1", 2, 1, { "gpio_in", "gpio_out", "nand", "spi0" } }, { "PC2", 2, 2, { "gpio_in", "gpio_out", "nand", "spi0" } }, { "PC3", 2, 3, { "gpio_in", "gpio_out", "nand", "spi0" } }, { "PC4", 2, 4, { "gpio_in", "gpio_out", "nand" } }, { "PC5", 2, 5, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC6", 2, 6, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC7", 2, 7, { "gpio_in", "gpio_out", "nand" } }, { "PC8", 2, 8, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC9", 2, 9, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC10", 2, 10, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC11", 2, 11, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC12", 2, 12, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC13", 2, 13, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC14", 2, 14, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC15", 2, 15, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC16", 2, 16, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC17", 2, 17, { "gpio_in", "gpio_out", "nand" } }, { "PC18", 2, 18, { "gpio_in", "gpio_out", "nand" } }, { "PD2", 3, 2, { "gpio_in", "gpio_out", "lcd", NULL, "gmac" } }, { "PD3", 3, 3, { "gpio_in", "gpio_out", "lcd", NULL, "gmac" } }, { "PD4", 3, 4, { "gpio_in", "gpio_out", "lcd", NULL, "gmac" } }, { "PD5", 3, 5, { "gpio_in", "gpio_out", "lcd", NULL, "gmac" } }, { "PD6", 3, 6, { "gpio_in", "gpio_out", "lcd", NULL, "gmac" } }, { "PD7", 3, 7, { "gpio_in", "gpio_out", "lcd", NULL, "gmac" } }, { "PD10", 3, 10, { "gpio_in", "gpio_out", "lcd", NULL, "gmac" } }, { "PD11", 3, 11, { "gpio_in", "gpio_out", "lcd", NULL, "gmac" } }, { "PD12", 3, 12, { "gpio_in", "gpio_out", "lcd", NULL, "gmac" } }, { "PD13", 3, 13, { "gpio_in", "gpio_out", "lcd", NULL, "gmac" } }, { "PD14", 3, 14, { "gpio_in", "gpio_out", "lcd", NULL, "gmac" } }, { "PD15", 3, 15, { "gpio_in", "gpio_out", "lcd", NULL, "gmac" } }, { "PD18", 3, 18, { "gpio_in", "gpio_out", "lcd", "lvds", "gmac" } }, { "PD19", 3, 19, { "gpio_in", "gpio_out", "lcd", "lvds", "gmac" } }, { "PD20", 3, 20, { "gpio_in", "gpio_out", "lcd", "lvds", "gmac" } }, { "PD21", 3, 21, { "gpio_in", "gpio_out", "lcd", "lvds", "gmac" } }, { "PD22", 3, 22, { "gpio_in", "gpio_out", "lcd", "lvds", "gmac" } }, { "PD23", 3, 23, { "gpio_in", "gpio_out", "lcd", "lvds", "gmac" } }, { "PD24", 3, 24, { "gpio_in", "gpio_out", "lcd", "lvds" } }, { "PD25", 3, 25, { "gpio_in", "gpio_out", "lcd", "lvds" } }, { "PD26", 3, 26, { "gpio_in", "gpio_out", "lcd", "lvds" } }, { "PD27", 3, 27, { "gpio_in", "gpio_out", "lcd", "lvds" } }, { "PD28", 3, 28, { "gpio_in", "gpio_out", "pwm" } }, { "PD29", 3, 29, { "gpio_in", "gpio_out" } }, { "PE0", 4, 0, { "gpio_in", "gpio_out", "csi", NULL, "ccir" } }, { "PE1", 4, 1, { "gpio_in", "gpio_out", "csi", NULL, "ccir" } }, { "PE2", 4, 2, { "gpio_in", "gpio_out", "csi", NULL, "ccir" } }, { "PE3", 4, 3, { "gpio_in", "gpio_out", "csi", NULL, "ccir" } }, { "PE4", 4, 4, { "gpio_in", "gpio_out", "csi" } }, { "PE5", 4, 5, { "gpio_in", "gpio_out", "csi" } }, { "PE6", 4, 6, { "gpio_in", "gpio_out", "csi", NULL, "ccir" } }, { "PE7", 4, 7, { "gpio_in", "gpio_out", "csi", NULL, "ccir" } }, { "PE8", 4, 8, { "gpio_in", "gpio_out", "csi", NULL, "ccir" } }, { "PE9", 4, 9, { "gpio_in", "gpio_out", "csi", NULL, "ccir" } }, { "PE10", 4, 10, { "gpio_in", "gpio_out", "csi", "uart4", "ccir" } }, { "PE11", 4, 11, { "gpio_in", "gpio_out", "csi", "uart4", "ccir" } }, { "PE12", 4, 12, { "gpio_in", "gpio_out", "csi", "uart4", "ccir" } }, { "PE13", 4, 13, { "gpio_in", "gpio_out", "csi", "uart4", "ccir" } }, { "PE14", 4, 14, { "gpio_in", "gpio_out", "csi", "twi2" } }, { "PE15", 4, 15, { "gpio_in", "gpio_out", "csi", "twi2" } }, { "PE16", 4, 16, { "gpio_in", "gpio_out" } }, { "PE17", 4, 17, { "gpio_in", "gpio_out" } }, { "PE18", 4, 18, { "gpio_in", "gpio_out", NULL, "owa" } }, { "PE19", 4, 19, { "gpio_in", "gpio_out" } }, { "PF0", 5, 0, { "gpio_in", "gpio_out", "mmc0", "jtag" } }, { "PF1", 5, 1, { "gpio_in", "gpio_out", "mmc0", "jtag" } }, { "PF2", 5, 2, { "gpio_in", "gpio_out", "mmc0", "uart0" } }, { "PF3", 5, 3, { "gpio_in", "gpio_out", "mmc0", "jtag" } }, { "PF4", 5, 4, { "gpio_in", "gpio_out", "mmc0", "uart0" } }, { "PF5", 5, 5, { "gpio_in", "gpio_out", "mmc0", "jtag" } }, { "PF6", 5, 6, { "gpio_in", "gpio_out" } }, - { "PG0", 6, 0, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "eint" } }, - { "PG1", 6, 1, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "eint" } }, - { "PG2", 6, 2, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "eint" } }, - { "PG3", 6, 3, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "eint" } }, - { "PG4", 6, 4, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "eint" } }, - { "PG5", 6, 5, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "eint" } }, - { "PG6", 6, 6, { "gpio_in", "gpio_out", "uart1", "spi1", NULL, NULL, "eint" } }, - { "PG7", 6, 7, { "gpio_in", "gpio_out", "uart1", "spi1", NULL, NULL, "eint" } }, - { "PG8", 6, 8, { "gpio_in", "gpio_out", "uart1", "spi1", NULL, NULL, "eint" } }, - { "PG9", 6, 9, { "gpio_in", "gpio_out", "uart1", "spi1", NULL, NULL, "eint" } }, - { "PG10", 6, 10, { "gpio_in", "gpio_out", "i2s1", "uart3", NULL, NULL, "eint" } }, - { "PG11", 6, 11, { "gpio_in", "gpio_out", "i2s1", "uart3", NULL, NULL, "eint" } }, - { "PG12", 6, 12, { "gpio_in", "gpio_out", "i2s1", "uart3", NULL, NULL, "eint" } }, - { "PG13", 6, 13, { "gpio_in", "gpio_out", "i2s1", "uart3", NULL, NULL, "eint" } }, + { "PG0", 6, 0, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint0" }, 6, 0, 1}, + { "PG1", 6, 1, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint1" }, 6, 1, 1}, + { "PG2", 6, 2, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint2" }, 6, 2, 1}, + { "PG3", 6, 3, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint3" }, 6, 3, 1}, + { "PG4", 6, 4, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint4" }, 6, 4, 1}, + { "PG5", 6, 5, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint5" }, 6, 5, 1}, + { "PG6", 6, 6, { "gpio_in", "gpio_out", "uart1", "spi1", NULL, NULL, "pg_eint6" }, 6, 6, 1}, + { "PG7", 6, 7, { "gpio_in", "gpio_out", "uart1", "spi1", NULL, NULL, "pg_eint7" }, 6, 7, 1}, + { "PG8", 6, 8, { "gpio_in", "gpio_out", "uart1", "spi1", NULL, NULL, "pg_eint8" }, 6, 8, 1}, + { "PG9", 6, 9, { "gpio_in", "gpio_out", "uart1", "spi1", NULL, NULL, "pg_eint9" }, 6, 9, 1}, + { "PG10", 6, 10, { "gpio_in", "gpio_out", "i2s1", "uart3", NULL, NULL, "pg_eint10" }, 6, 10, 1}, + { "PG11", 6, 11, { "gpio_in", "gpio_out", "i2s1", "uart3", NULL, NULL, "pg_eint11" }, 6, 11, 1}, + { "PG12", 6, 12, { "gpio_in", "gpio_out", "i2s1", "uart3", NULL, NULL, "pg_eint12" }, 6, 12, 1}, + { "PG13", 6, 13, { "gpio_in", "gpio_out", "i2s1", "uart3", NULL, NULL, "pg_eint13" }, 6, 13, 1}, - { "PH0", 7, 0, { "gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, "eint" } }, - { "PH1", 7, 1, { "gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, "eint" } }, - { "PH2", 7, 2, { "gpio_in", "gpio_out", "i2c1", NULL, NULL, NULL, "eint" } }, - { "PH3", 7, 3, { "gpio_in", "gpio_out", "i2c1", NULL, NULL, NULL, "eint" } }, - { "PH4", 7, 4, { "gpio_in", "gpio_out", "i2c2", NULL, NULL, NULL, "eint" } }, - { "PH5", 7, 5, { "gpio_in", "gpio_out", "i2c2", NULL, NULL, NULL, "eint" } }, - { "PH6", 7, 6, { "gpio_in", "gpio_out", "hdmiddc", NULL, NULL, NULL, "eint" } }, - { "PH7", 7, 7, { "gpio_in", "gpio_out", "hdmiddc", NULL, NULL, NULL, "eint" } }, - { "PH8", 7, 8, { "gpio_in", "gpio_out", "hdmiddc", NULL, NULL, NULL, "eint" } }, - { "PH9", 7, 9, { "gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "eint" } }, - { "PH10", 7, 10, { "gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "eint" } }, - { "PH11", 7, 11, { "gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "eint" } }, + { "PH0", 7, 0, { "gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, "ph_eint0" }, 6, 0, 2}, + { "PH1", 7, 1, { "gpio_in", "gpio_out", "i2c0", NULL, NULL, NULL, "ph_eint1" }, 6, 1, 2}, + { "PH2", 7, 2, { "gpio_in", "gpio_out", "i2c1", NULL, NULL, NULL, "ph_eint2" }, 6, 2, 2}, + { "PH3", 7, 3, { "gpio_in", "gpio_out", "i2c1", NULL, NULL, NULL, "ph_eint3" }, 6, 3, 2}, + { "PH4", 7, 4, { "gpio_in", "gpio_out", "i2c2", NULL, NULL, NULL, "ph_eint4" }, 6, 4, 2}, + { "PH5", 7, 5, { "gpio_in", "gpio_out", "i2c2", NULL, NULL, NULL, "ph_eint5" }, 6, 5, 2}, + { "PH6", 7, 6, { "gpio_in", "gpio_out", "hdmiddc", NULL, NULL, NULL, "ph_eint6" }, 6, 6, 2}, + { "PH7", 7, 7, { "gpio_in", "gpio_out", "hdmiddc", NULL, NULL, NULL, "ph_eint7" }, 6, 7, 2}, + { "PH8", 7, 8, { "gpio_in", "gpio_out", "hdmiddc", NULL, NULL, NULL, "ph_eint8" }, 6, 8, 2}, + { "PH9", 7, 9, { "gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "ph_eint9" }, 6, 9, 2}, + { "PH10", 7, 10, { "gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "ph_eint10" }, 6, 10, 2}, + { "PH11", 7, 11, { "gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "ph_eint11" }, 6, 11, 2}, }; const struct allwinner_padconf a83t_padconf = { .npins = nitems(a83t_pins), .pins = a83t_pins, }; #endif /* !SOC_ALLWINNER_A83T */ Index: projects/clang1000-import/sys/arm/allwinner/allwinner_pinctrl.h =================================================================== --- projects/clang1000-import/sys/arm/allwinner/allwinner_pinctrl.h (revision 356919) +++ projects/clang1000-import/sys/arm/allwinner/allwinner_pinctrl.h (revision 356920) @@ -1,47 +1,48 @@ /*- * Copyright (c) 2016 Emmanuel Vadot * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _ALLWINNER_PINCTRL_H_ #define _ALLWINNER_PINCTRL_H_ #define AW_MAX_FUNC_BY_PIN 8 struct allwinner_pins { const char *name; uint8_t port; uint8_t pin; const char *functions[8]; uint8_t eint_func; uint8_t eint_num; + uint8_t eint_bank; }; struct allwinner_padconf { uint32_t npins; const struct allwinner_pins * pins; }; #endif /* _ALLWINNER_PINCTRL_H_ */ Index: projects/clang1000-import/sys/arm/allwinner/aw_gpio.c =================================================================== --- projects/clang1000-import/sys/arm/allwinner/aw_gpio.c (revision 356919) +++ projects/clang1000-import/sys/arm/allwinner/aw_gpio.c (revision 356920) @@ -1,1132 +1,1498 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 Ganbold Tsagaankhuu * Copyright (c) 2012 Oleksandr Tymoshenko * Copyright (c) 2012 Luiz Otavio O Souza. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__aarch64__) #include "opt_soc.h" #endif +#ifdef INTRNG +#include "pic_if.h" +#endif + #include "gpio_if.h" #define AW_GPIO_DEFAULT_CAPS (GPIO_PIN_INPUT | GPIO_PIN_OUTPUT | \ - GPIO_PIN_PULLUP | GPIO_PIN_PULLDOWN) + GPIO_PIN_PULLUP | GPIO_PIN_PULLDOWN); +#define AW_GPIO_INTR_CAPS (GPIO_INTR_LEVEL_LOW | GPIO_INTR_LEVEL_HIGH | \ + GPIO_INTR_EDGE_RISING | GPIO_INTR_EDGE_FALLING | GPIO_INTR_EDGE_BOTH) + #define AW_GPIO_NONE 0 #define AW_GPIO_PULLUP 1 #define AW_GPIO_PULLDOWN 2 #define AW_GPIO_INPUT 0 #define AW_GPIO_OUTPUT 1 #define AW_GPIO_DRV_MASK 0x3 #define AW_GPIO_PUD_MASK 0x3 #define AW_PINCTRL 1 #define AW_R_PINCTRL 2 struct aw_gpio_conf { struct allwinner_padconf *padconf; const char *banks; }; /* Defined in aw_padconf.c */ #ifdef SOC_ALLWINNER_A10 extern struct allwinner_padconf a10_padconf; struct aw_gpio_conf a10_gpio_conf = { .padconf = &a10_padconf, .banks = "abcdefghi", }; #endif /* Defined in a13_padconf.c */ #ifdef SOC_ALLWINNER_A13 extern struct allwinner_padconf a13_padconf; struct aw_gpio_conf a13_gpio_conf = { .padconf = &a13_padconf, .banks = "bcdefg", }; #endif /* Defined in a20_padconf.c */ #ifdef SOC_ALLWINNER_A20 extern struct allwinner_padconf a20_padconf; struct aw_gpio_conf a20_gpio_conf = { .padconf = &a20_padconf, .banks = "abcdefghi", }; #endif /* Defined in a31_padconf.c */ #ifdef SOC_ALLWINNER_A31 extern struct allwinner_padconf a31_padconf; struct aw_gpio_conf a31_gpio_conf = { .padconf = &a31_padconf, .banks = "abcdefgh", }; #endif /* Defined in a31s_padconf.c */ #ifdef SOC_ALLWINNER_A31S extern struct allwinner_padconf a31s_padconf; struct aw_gpio_conf a31s_gpio_conf = { .padconf = &a31s_padconf, .banks = "abcdefgh", }; #endif #if defined(SOC_ALLWINNER_A31) || defined(SOC_ALLWINNER_A31S) extern struct allwinner_padconf a31_r_padconf; struct aw_gpio_conf a31_r_gpio_conf = { .padconf = &a31_r_padconf, .banks = "lm", }; #endif /* Defined in a33_padconf.c */ #ifdef SOC_ALLWINNER_A33 extern struct allwinner_padconf a33_padconf; struct aw_gpio_conf a33_gpio_conf = { .padconf = &a33_padconf, .banks = "bcdefgh", }; #endif /* Defined in h3_padconf.c */ #if defined(SOC_ALLWINNER_H3) || defined(SOC_ALLWINNER_H5) extern struct allwinner_padconf h3_padconf; extern struct allwinner_padconf h3_r_padconf; struct aw_gpio_conf h3_gpio_conf = { .padconf = &h3_padconf, .banks = "acdefg", }; struct aw_gpio_conf h3_r_gpio_conf = { .padconf = &h3_r_padconf, .banks = "l", }; #endif /* Defined in a83t_padconf.c */ #ifdef SOC_ALLWINNER_A83T extern struct allwinner_padconf a83t_padconf; extern struct allwinner_padconf a83t_r_padconf; struct aw_gpio_conf a83t_gpio_conf = { .padconf = &a83t_padconf, .banks = "bcdefgh" }; struct aw_gpio_conf a83t_r_gpio_conf = { .padconf = &a83t_r_padconf, .banks = "l", }; #endif /* Defined in a64_padconf.c */ #ifdef SOC_ALLWINNER_A64 extern struct allwinner_padconf a64_padconf; extern struct allwinner_padconf a64_r_padconf; struct aw_gpio_conf a64_gpio_conf = { .padconf = &a64_padconf, .banks = "bcdefgh", }; struct aw_gpio_conf a64_r_gpio_conf = { .padconf = &a64_r_padconf, .banks = "l", }; #endif /* Defined in h6_padconf.c */ #ifdef SOC_ALLWINNER_H6 extern struct allwinner_padconf h6_padconf; extern struct allwinner_padconf h6_r_padconf; struct aw_gpio_conf h6_gpio_conf = { .padconf = &h6_padconf, .banks = "cdfgh", }; struct aw_gpio_conf h6_r_gpio_conf = { .padconf = &h6_r_padconf, .banks = "lm", }; #endif static struct ofw_compat_data compat_data[] = { #ifdef SOC_ALLWINNER_A10 {"allwinner,sun4i-a10-pinctrl", (uintptr_t)&a10_gpio_conf}, #endif #ifdef SOC_ALLWINNER_A13 {"allwinner,sun5i-a13-pinctrl", (uintptr_t)&a13_gpio_conf}, #endif #ifdef SOC_ALLWINNER_A20 {"allwinner,sun7i-a20-pinctrl", (uintptr_t)&a20_gpio_conf}, #endif #ifdef SOC_ALLWINNER_A31 {"allwinner,sun6i-a31-pinctrl", (uintptr_t)&a31_gpio_conf}, #endif #ifdef SOC_ALLWINNER_A31S {"allwinner,sun6i-a31s-pinctrl", (uintptr_t)&a31s_gpio_conf}, #endif #if defined(SOC_ALLWINNER_A31) || defined(SOC_ALLWINNER_A31S) {"allwinner,sun6i-a31-r-pinctrl", (uintptr_t)&a31_r_gpio_conf}, #endif #ifdef SOC_ALLWINNER_A33 {"allwinner,sun6i-a33-pinctrl", (uintptr_t)&a33_gpio_conf}, #endif #ifdef SOC_ALLWINNER_A83T {"allwinner,sun8i-a83t-pinctrl", (uintptr_t)&a83t_gpio_conf}, {"allwinner,sun8i-a83t-r-pinctrl", (uintptr_t)&a83t_r_gpio_conf}, #endif #if defined(SOC_ALLWINNER_H3) || defined(SOC_ALLWINNER_H5) {"allwinner,sun8i-h3-pinctrl", (uintptr_t)&h3_gpio_conf}, {"allwinner,sun50i-h5-pinctrl", (uintptr_t)&h3_gpio_conf}, {"allwinner,sun8i-h3-r-pinctrl", (uintptr_t)&h3_r_gpio_conf}, #endif #ifdef SOC_ALLWINNER_A64 {"allwinner,sun50i-a64-pinctrl", (uintptr_t)&a64_gpio_conf}, {"allwinner,sun50i-a64-r-pinctrl", (uintptr_t)&a64_r_gpio_conf}, #endif #ifdef SOC_ALLWINNER_H6 {"allwinner,sun50i-h6-pinctrl", (uintptr_t)&h6_gpio_conf}, {"allwinner,sun50i-h6-r-pinctrl", (uintptr_t)&h6_r_gpio_conf}, #endif {NULL, 0} }; struct clk_list { TAILQ_ENTRY(clk_list) next; clk_t clk; }; +#ifdef INTRNG +struct gpio_irqsrc { + struct intr_irqsrc isrc; + u_int irq; + uint32_t mode; + uint32_t pin; + uint32_t bank; + uint32_t intnum; + uint32_t intfunc; + uint32_t oldfunc; + bool enabled; +}; +#endif + +#define AW_GPIO_MEMRES 0 +#define AW_GPIO_IRQRES 1 +#define AW_GPIO_RESSZ 2 + struct aw_gpio_softc { device_t sc_dev; device_t sc_busdev; + struct resource * sc_res[AW_GPIO_RESSZ]; struct mtx sc_mtx; struct resource * sc_mem_res; struct resource * sc_irq_res; - bus_space_tag_t sc_bst; - bus_space_handle_t sc_bsh; void * sc_intrhand; struct aw_gpio_conf *conf; TAILQ_HEAD(, clk_list) clk_list; + +#ifdef INTRNG + struct gpio_irqsrc *gpio_pic_irqsrc; + int nirqs; +#endif }; +static struct resource_spec aw_gpio_res_spec[] = { + { SYS_RES_MEMORY, 0, RF_ACTIVE }, + { SYS_RES_IRQ, 0, RF_ACTIVE | RF_SHAREABLE }, + { -1, 0, 0 } +}; + #define AW_GPIO_LOCK(_sc) mtx_lock_spin(&(_sc)->sc_mtx) #define AW_GPIO_UNLOCK(_sc) mtx_unlock_spin(&(_sc)->sc_mtx) #define AW_GPIO_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_mtx, MA_OWNED) #define AW_GPIO_GP_CFG(_bank, _idx) 0x00 + ((_bank) * 0x24) + ((_idx) << 2) #define AW_GPIO_GP_DAT(_bank) 0x10 + ((_bank) * 0x24) #define AW_GPIO_GP_DRV(_bank, _idx) 0x14 + ((_bank) * 0x24) + ((_idx) << 2) #define AW_GPIO_GP_PUL(_bank, _idx) 0x1c + ((_bank) * 0x24) + ((_idx) << 2) -#define AW_GPIO_GP_INT_CFG0 0x200 -#define AW_GPIO_GP_INT_CFG1 0x204 -#define AW_GPIO_GP_INT_CFG2 0x208 -#define AW_GPIO_GP_INT_CFG3 0x20c +#define AW_GPIO_GP_INT_BASE(_bank) (0x200 + 0x20 * _bank) -#define AW_GPIO_GP_INT_CTL 0x210 -#define AW_GPIO_GP_INT_STA 0x214 -#define AW_GPIO_GP_INT_DEB 0x218 +#define AW_GPIO_GP_INT_CFG(_bank, _pin) (AW_GPIO_GP_INT_BASE(_bank) + (0x4 * ((_pin) / 8))) +#define AW_GPIO_GP_INT_CTL(_bank) (AW_GPIO_GP_INT_BASE(_bank) + 0x10) +#define AW_GPIO_GP_INT_STA(_bank) (AW_GPIO_GP_INT_BASE(_bank) + 0x14) +#define AW_GPIO_GP_INT_DEB(_bank) (AW_GPIO_GP_INT_BASE(_bank) + 0x18) +#define AW_GPIO_INT_EDGE_POSITIVE 0x0 +#define AW_GPIO_INT_EDGE_NEGATIVE 0x1 +#define AW_GPIO_INT_LEVEL_HIGH 0x2 +#define AW_GPIO_INT_LEVEL_LOW 0x3 +#define AW_GPIO_INT_EDGE_BOTH 0x4 + static char *aw_gpio_parse_function(phandle_t node); static const char **aw_gpio_parse_pins(phandle_t node, int *pins_nb); static uint32_t aw_gpio_parse_bias(phandle_t node); static int aw_gpio_parse_drive_strength(phandle_t node, uint32_t *drive); static int aw_gpio_pin_get(device_t dev, uint32_t pin, unsigned int *value); static int aw_gpio_pin_set(device_t dev, uint32_t pin, unsigned int value); static int aw_gpio_pin_get_locked(struct aw_gpio_softc *sc, uint32_t pin, unsigned int *value); static int aw_gpio_pin_set_locked(struct aw_gpio_softc *sc, uint32_t pin, unsigned int value); +static void aw_gpio_intr(void *arg); +static void aw_gpio_pic_disable_intr(device_t dev, struct intr_irqsrc *isrc); +static void aw_gpio_pic_disable_intr_locked(struct aw_gpio_softc *sc, struct intr_irqsrc *isrc); +static void aw_gpio_pic_post_filter(device_t dev, struct intr_irqsrc *isrc); +static int aw_gpio_register_isrcs(struct aw_gpio_softc *sc); + #define AW_GPIO_WRITE(_sc, _off, _val) \ - bus_space_write_4(_sc->sc_bst, _sc->sc_bsh, _off, _val) + bus_write_4((_sc)->sc_res[AW_GPIO_MEMRES], _off, _val) #define AW_GPIO_READ(_sc, _off) \ - bus_space_read_4(_sc->sc_bst, _sc->sc_bsh, _off) + bus_read_4((_sc)->sc_res[AW_GPIO_MEMRES], _off) static uint32_t aw_gpio_get_function(struct aw_gpio_softc *sc, uint32_t pin) { uint32_t bank, func, offset; /* Must be called with lock held. */ AW_GPIO_LOCK_ASSERT(sc); if (pin > sc->conf->padconf->npins) return (0); bank = sc->conf->padconf->pins[pin].port; pin = sc->conf->padconf->pins[pin].pin; offset = ((pin & 0x07) << 2); func = AW_GPIO_READ(sc, AW_GPIO_GP_CFG(bank, pin >> 3)); return ((func >> offset) & 0x7); } static int aw_gpio_set_function(struct aw_gpio_softc *sc, uint32_t pin, uint32_t f) { uint32_t bank, data, offset; /* Check if the function exists in the padconf data */ if (sc->conf->padconf->pins[pin].functions[f] == NULL) return (EINVAL); /* Must be called with lock held. */ AW_GPIO_LOCK_ASSERT(sc); bank = sc->conf->padconf->pins[pin].port; pin = sc->conf->padconf->pins[pin].pin; offset = ((pin & 0x07) << 2); data = AW_GPIO_READ(sc, AW_GPIO_GP_CFG(bank, pin >> 3)); data &= ~(7 << offset); data |= (f << offset); AW_GPIO_WRITE(sc, AW_GPIO_GP_CFG(bank, pin >> 3), data); return (0); } static uint32_t aw_gpio_get_pud(struct aw_gpio_softc *sc, uint32_t pin) { uint32_t bank, offset, val; /* Must be called with lock held. */ AW_GPIO_LOCK_ASSERT(sc); bank = sc->conf->padconf->pins[pin].port; pin = sc->conf->padconf->pins[pin].pin; offset = ((pin & 0x0f) << 1); val = AW_GPIO_READ(sc, AW_GPIO_GP_PUL(bank, pin >> 4)); return ((val >> offset) & AW_GPIO_PUD_MASK); } static void aw_gpio_set_pud(struct aw_gpio_softc *sc, uint32_t pin, uint32_t state) { uint32_t bank, offset, val; if (aw_gpio_get_pud(sc, pin) == state) return; /* Must be called with lock held. */ AW_GPIO_LOCK_ASSERT(sc); bank = sc->conf->padconf->pins[pin].port; pin = sc->conf->padconf->pins[pin].pin; offset = ((pin & 0x0f) << 1); val = AW_GPIO_READ(sc, AW_GPIO_GP_PUL(bank, pin >> 4)); val &= ~(AW_GPIO_PUD_MASK << offset); val |= (state << offset); AW_GPIO_WRITE(sc, AW_GPIO_GP_PUL(bank, pin >> 4), val); } static uint32_t aw_gpio_get_drv(struct aw_gpio_softc *sc, uint32_t pin) { uint32_t bank, offset, val; /* Must be called with lock held. */ AW_GPIO_LOCK_ASSERT(sc); bank = sc->conf->padconf->pins[pin].port; pin = sc->conf->padconf->pins[pin].pin; offset = ((pin & 0x0f) << 1); val = AW_GPIO_READ(sc, AW_GPIO_GP_DRV(bank, pin >> 4)); return ((val >> offset) & AW_GPIO_DRV_MASK); } static void aw_gpio_set_drv(struct aw_gpio_softc *sc, uint32_t pin, uint32_t drive) { uint32_t bank, offset, val; if (aw_gpio_get_drv(sc, pin) == drive) return; /* Must be called with lock held. */ AW_GPIO_LOCK_ASSERT(sc); bank = sc->conf->padconf->pins[pin].port; pin = sc->conf->padconf->pins[pin].pin; offset = ((pin & 0x0f) << 1); val = AW_GPIO_READ(sc, AW_GPIO_GP_DRV(bank, pin >> 4)); val &= ~(AW_GPIO_DRV_MASK << offset); val |= (drive << offset); AW_GPIO_WRITE(sc, AW_GPIO_GP_DRV(bank, pin >> 4), val); } static int aw_gpio_pin_configure(struct aw_gpio_softc *sc, uint32_t pin, uint32_t flags) { u_int val; int err = 0; /* Must be called with lock held. */ AW_GPIO_LOCK_ASSERT(sc); if (pin > sc->conf->padconf->npins) return (EINVAL); /* Manage input/output. */ if (flags & GPIO_PIN_INPUT) { err = aw_gpio_set_function(sc, pin, AW_GPIO_INPUT); } else if ((flags & GPIO_PIN_OUTPUT) && aw_gpio_get_function(sc, pin) != AW_GPIO_OUTPUT) { if (flags & GPIO_PIN_PRESET_LOW) { aw_gpio_pin_set_locked(sc, pin, 0); } else if (flags & GPIO_PIN_PRESET_HIGH) { aw_gpio_pin_set_locked(sc, pin, 1); } else { /* Read the pin and preset output to current state. */ err = aw_gpio_set_function(sc, pin, AW_GPIO_INPUT); if (err == 0) { aw_gpio_pin_get_locked(sc, pin, &val); aw_gpio_pin_set_locked(sc, pin, val); } } if (err == 0) err = aw_gpio_set_function(sc, pin, AW_GPIO_OUTPUT); } if (err) return (err); /* Manage Pull-up/pull-down. */ if (flags & GPIO_PIN_PULLUP) aw_gpio_set_pud(sc, pin, AW_GPIO_PULLUP); else if (flags & GPIO_PIN_PULLDOWN) aw_gpio_set_pud(sc, pin, AW_GPIO_PULLDOWN); else aw_gpio_set_pud(sc, pin, AW_GPIO_NONE); return (0); } static device_t aw_gpio_get_bus(device_t dev) { struct aw_gpio_softc *sc; sc = device_get_softc(dev); return (sc->sc_busdev); } static int aw_gpio_pin_max(device_t dev, int *maxpin) { struct aw_gpio_softc *sc; sc = device_get_softc(dev); *maxpin = sc->conf->padconf->npins - 1; return (0); } static int aw_gpio_pin_getcaps(device_t dev, uint32_t pin, uint32_t *caps) { struct aw_gpio_softc *sc; sc = device_get_softc(dev); if (pin >= sc->conf->padconf->npins) return (EINVAL); *caps = AW_GPIO_DEFAULT_CAPS; + if (sc->conf->padconf->pins[pin].eint_func != 0) + *caps |= AW_GPIO_INTR_CAPS; return (0); } static int aw_gpio_pin_getflags(device_t dev, uint32_t pin, uint32_t *flags) { struct aw_gpio_softc *sc; uint32_t func; uint32_t pud; sc = device_get_softc(dev); if (pin >= sc->conf->padconf->npins) return (EINVAL); AW_GPIO_LOCK(sc); func = aw_gpio_get_function(sc, pin); switch (func) { case AW_GPIO_INPUT: *flags = GPIO_PIN_INPUT; break; case AW_GPIO_OUTPUT: *flags = GPIO_PIN_OUTPUT; break; default: *flags = 0; break; } pud = aw_gpio_get_pud(sc, pin); switch (pud) { case AW_GPIO_PULLDOWN: *flags |= GPIO_PIN_PULLDOWN; break; case AW_GPIO_PULLUP: *flags |= GPIO_PIN_PULLUP; break; default: break; } AW_GPIO_UNLOCK(sc); return (0); } static int aw_gpio_pin_getname(device_t dev, uint32_t pin, char *name) { struct aw_gpio_softc *sc; sc = device_get_softc(dev); if (pin >= sc->conf->padconf->npins) return (EINVAL); snprintf(name, GPIOMAXNAME - 1, "%s", sc->conf->padconf->pins[pin].name); name[GPIOMAXNAME - 1] = '\0'; return (0); } static int aw_gpio_pin_setflags(device_t dev, uint32_t pin, uint32_t flags) { struct aw_gpio_softc *sc; int err; sc = device_get_softc(dev); if (pin > sc->conf->padconf->npins) return (EINVAL); AW_GPIO_LOCK(sc); err = aw_gpio_pin_configure(sc, pin, flags); AW_GPIO_UNLOCK(sc); return (err); } static int aw_gpio_pin_set_locked(struct aw_gpio_softc *sc, uint32_t pin, unsigned int value) { uint32_t bank, data; AW_GPIO_LOCK_ASSERT(sc); if (pin > sc->conf->padconf->npins) return (EINVAL); bank = sc->conf->padconf->pins[pin].port; pin = sc->conf->padconf->pins[pin].pin; data = AW_GPIO_READ(sc, AW_GPIO_GP_DAT(bank)); if (value) data |= (1 << pin); else data &= ~(1 << pin); AW_GPIO_WRITE(sc, AW_GPIO_GP_DAT(bank), data); return (0); } static int aw_gpio_pin_set(device_t dev, uint32_t pin, unsigned int value) { struct aw_gpio_softc *sc; int ret; sc = device_get_softc(dev); AW_GPIO_LOCK(sc); ret = aw_gpio_pin_set_locked(sc, pin, value); AW_GPIO_UNLOCK(sc); return (ret); } static int aw_gpio_pin_get_locked(struct aw_gpio_softc *sc,uint32_t pin, unsigned int *val) { uint32_t bank, reg_data; AW_GPIO_LOCK_ASSERT(sc); if (pin > sc->conf->padconf->npins) return (EINVAL); bank = sc->conf->padconf->pins[pin].port; pin = sc->conf->padconf->pins[pin].pin; reg_data = AW_GPIO_READ(sc, AW_GPIO_GP_DAT(bank)); *val = (reg_data & (1 << pin)) ? 1 : 0; return (0); } static char * aw_gpio_parse_function(phandle_t node) { char *function; if (OF_getprop_alloc(node, "function", (void **)&function) != -1) return (function); if (OF_getprop_alloc(node, "allwinner,function", (void **)&function) != -1) return (function); return (NULL); } static const char ** aw_gpio_parse_pins(phandle_t node, int *pins_nb) { const char **pinlist; *pins_nb = ofw_bus_string_list_to_array(node, "pins", &pinlist); if (*pins_nb > 0) return (pinlist); *pins_nb = ofw_bus_string_list_to_array(node, "allwinner,pins", &pinlist); if (*pins_nb > 0) return (pinlist); return (NULL); } static uint32_t aw_gpio_parse_bias(phandle_t node) { uint32_t bias; if (OF_getencprop(node, "pull", &bias, sizeof(bias)) != -1) return (bias); if (OF_getencprop(node, "allwinner,pull", &bias, sizeof(bias)) != -1) return (bias); if (OF_hasprop(node, "bias-disable")) return (AW_GPIO_NONE); if (OF_hasprop(node, "bias-pull-up")) return (AW_GPIO_PULLUP); if (OF_hasprop(node, "bias-pull-down")) return (AW_GPIO_PULLDOWN); return (AW_GPIO_NONE); } static int aw_gpio_parse_drive_strength(phandle_t node, uint32_t *drive) { uint32_t drive_str; if (OF_getencprop(node, "drive", drive, sizeof(*drive)) != -1) return (0); if (OF_getencprop(node, "allwinner,drive", drive, sizeof(*drive)) != -1) return (0); if (OF_getencprop(node, "drive-strength", &drive_str, sizeof(drive_str)) != -1) { *drive = (drive_str / 10) - 1; return (0); } return (1); } static int aw_gpio_pin_get(device_t dev, uint32_t pin, unsigned int *val) { struct aw_gpio_softc *sc; int ret; sc = device_get_softc(dev); AW_GPIO_LOCK(sc); ret = aw_gpio_pin_get_locked(sc, pin, val); AW_GPIO_UNLOCK(sc); return (ret); } static int aw_gpio_pin_toggle(device_t dev, uint32_t pin) { struct aw_gpio_softc *sc; uint32_t bank, data; sc = device_get_softc(dev); if (pin > sc->conf->padconf->npins) return (EINVAL); bank = sc->conf->padconf->pins[pin].port; pin = sc->conf->padconf->pins[pin].pin; AW_GPIO_LOCK(sc); data = AW_GPIO_READ(sc, AW_GPIO_GP_DAT(bank)); if (data & (1 << pin)) data &= ~(1 << pin); else data |= (1 << pin); AW_GPIO_WRITE(sc, AW_GPIO_GP_DAT(bank), data); AW_GPIO_UNLOCK(sc); return (0); } static int aw_gpio_pin_access_32(device_t dev, uint32_t first_pin, uint32_t clear_pins, uint32_t change_pins, uint32_t *orig_pins) { struct aw_gpio_softc *sc; uint32_t bank, data, pin; sc = device_get_softc(dev); if (first_pin > sc->conf->padconf->npins) return (EINVAL); /* * We require that first_pin refers to the first pin in a bank, because * this API is not about convenience, it's for making a set of pins * change simultaneously (required) with reasonably high performance * (desired); we need to do a read-modify-write on a single register. */ bank = sc->conf->padconf->pins[first_pin].port; pin = sc->conf->padconf->pins[first_pin].pin; if (pin != 0) return (EINVAL); AW_GPIO_LOCK(sc); data = AW_GPIO_READ(sc, AW_GPIO_GP_DAT(bank)); if ((clear_pins | change_pins) != 0) AW_GPIO_WRITE(sc, AW_GPIO_GP_DAT(bank), (data & ~clear_pins) ^ change_pins); AW_GPIO_UNLOCK(sc); if (orig_pins != NULL) *orig_pins = data; return (0); } static int aw_gpio_pin_config_32(device_t dev, uint32_t first_pin, uint32_t num_pins, uint32_t *pin_flags) { struct aw_gpio_softc *sc; uint32_t bank, pin; int err; sc = device_get_softc(dev); if (first_pin > sc->conf->padconf->npins) return (EINVAL); bank = sc->conf->padconf->pins[first_pin].port; if (sc->conf->padconf->pins[first_pin].pin != 0) return (EINVAL); /* * The configuration for a bank of pins is scattered among several * registers; we cannot g'tee to simultaneously change the state of all * the pins in the flags array. So just loop through the array * configuring each pin for now. If there was a strong need, it might * be possible to support some limited simultaneous config, such as * adjacent groups of 8 pins that line up the same as the config regs. */ for (err = 0, pin = first_pin; err == 0 && pin < num_pins; ++pin) { if (pin_flags[pin] & (GPIO_PIN_INPUT | GPIO_PIN_OUTPUT)) err = aw_gpio_pin_configure(sc, pin, pin_flags[pin]); } return (err); } static int +aw_gpio_map_gpios(device_t bus, phandle_t dev, phandle_t gparent, int gcells, + pcell_t *gpios, uint32_t *pin, uint32_t *flags) +{ + struct aw_gpio_softc *sc; + int i; + + sc = device_get_softc(bus); + + /* The GPIO pins are mapped as: . */ + for (i = 0; i < sc->conf->padconf->npins; i++) + if (sc->conf->padconf->pins[i].port == gpios[0] && + sc->conf->padconf->pins[i].pin == gpios[1]) { + *pin = i; + break; + } + *flags = gpios[gcells - 1]; + + return (0); +} + +static int aw_find_pinnum_by_name(struct aw_gpio_softc *sc, const char *pinname) { int i; for (i = 0; i < sc->conf->padconf->npins; i++) if (!strcmp(pinname, sc->conf->padconf->pins[i].name)) return i; return (-1); } static int aw_find_pin_func(struct aw_gpio_softc *sc, int pin, const char *func) { int i; for (i = 0; i < AW_MAX_FUNC_BY_PIN; i++) if (sc->conf->padconf->pins[pin].functions[i] && !strcmp(func, sc->conf->padconf->pins[pin].functions[i])) return (i); return (-1); } static int aw_fdt_configure_pins(device_t dev, phandle_t cfgxref) { struct aw_gpio_softc *sc; phandle_t node; const char **pinlist = NULL; char *pin_function = NULL; uint32_t pin_drive, pin_pull; int pins_nb, pin_num, pin_func, i, ret; bool set_drive; sc = device_get_softc(dev); node = OF_node_from_xref(cfgxref); ret = 0; set_drive = false; /* Getting all prop for configuring pins */ pinlist = aw_gpio_parse_pins(node, &pins_nb); if (pinlist == NULL) return (ENOENT); pin_function = aw_gpio_parse_function(node); if (pin_function == NULL) { ret = ENOENT; goto out; } if (aw_gpio_parse_drive_strength(node, &pin_drive) == 0) set_drive = true; pin_pull = aw_gpio_parse_bias(node); /* Configure each pin to the correct function, drive and pull */ for (i = 0; i < pins_nb; i++) { pin_num = aw_find_pinnum_by_name(sc, pinlist[i]); if (pin_num == -1) { ret = ENOENT; goto out; } pin_func = aw_find_pin_func(sc, pin_num, pin_function); if (pin_func == -1) { ret = ENOENT; goto out; } AW_GPIO_LOCK(sc); if (aw_gpio_get_function(sc, pin_num) != pin_func) aw_gpio_set_function(sc, pin_num, pin_func); if (set_drive) aw_gpio_set_drv(sc, pin_num, pin_drive); if (pin_pull != AW_GPIO_NONE) aw_gpio_set_pud(sc, pin_num, pin_pull); AW_GPIO_UNLOCK(sc); } out: OF_prop_free(pinlist); OF_prop_free(pin_function); return (ret); } static void aw_gpio_enable_bank_supply(void *arg) { struct aw_gpio_softc *sc = arg; regulator_t vcc_supply; char bank_reg_name[16]; int i, nbanks; nbanks = strlen(sc->conf->banks); for (i = 0; i < nbanks; i++) { snprintf(bank_reg_name, sizeof(bank_reg_name), "vcc-p%c-supply", sc->conf->banks[i]); if (regulator_get_by_ofw_property(sc->sc_dev, 0, bank_reg_name, &vcc_supply) == 0) { if (bootverbose) device_printf(sc->sc_dev, "Enabling regulator for gpio bank %c\n", sc->conf->banks[i]); if (regulator_enable(vcc_supply) != 0) { device_printf(sc->sc_dev, "Cannot enable regulator for bank %c\n", sc->conf->banks[i]); } } } } static int aw_gpio_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (ofw_bus_search_compatible(dev, compat_data)->ocd_data == 0) return (ENXIO); device_set_desc(dev, "Allwinner GPIO/Pinmux controller"); return (BUS_PROBE_DEFAULT); } static int aw_gpio_attach(device_t dev) { - int rid, error; + int error; phandle_t gpio; struct aw_gpio_softc *sc; struct clk_list *clkp, *clkp_tmp; clk_t clk; hwreset_t rst = NULL; int off, err, clkret; sc = device_get_softc(dev); sc->sc_dev = dev; mtx_init(&sc->sc_mtx, "aw gpio", "gpio", MTX_SPIN); - rid = 0; - sc->sc_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, - RF_ACTIVE); - if (!sc->sc_mem_res) { - device_printf(dev, "cannot allocate memory window\n"); - goto fail; + if (bus_alloc_resources(dev, aw_gpio_res_spec, sc->sc_res) != 0) { + device_printf(dev, "cannot allocate device resources\n"); + return (ENXIO); } - sc->sc_bst = rman_get_bustag(sc->sc_mem_res); - sc->sc_bsh = rman_get_bushandle(sc->sc_mem_res); - - rid = 0; - sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, - RF_ACTIVE); - if (!sc->sc_irq_res) { - device_printf(dev, "cannot allocate interrupt\n"); + if (bus_setup_intr(dev, sc->sc_res[AW_GPIO_IRQRES], + INTR_TYPE_CLK | INTR_MPSAFE, NULL, aw_gpio_intr, sc, + &sc->sc_intrhand)) { + device_printf(dev, "cannot setup interrupt handler\n"); goto fail; } /* Find our node. */ gpio = ofw_bus_get_node(sc->sc_dev); if (!OF_hasprop(gpio, "gpio-controller")) /* Node is not a GPIO controller. */ goto fail; /* Use the right pin data for the current SoC */ sc->conf = (struct aw_gpio_conf *)ofw_bus_search_compatible(dev, compat_data)->ocd_data; if (hwreset_get_by_ofw_idx(dev, 0, 0, &rst) == 0) { error = hwreset_deassert(rst); if (error != 0) { device_printf(dev, "cannot de-assert reset\n"); goto fail; } } TAILQ_INIT(&sc->clk_list); for (off = 0, clkret = 0; clkret == 0; off++) { clkret = clk_get_by_ofw_index(dev, 0, off, &clk); if (clkret != 0) break; err = clk_enable(clk); if (err != 0) { device_printf(dev, "Could not enable clock %s\n", clk_get_name(clk)); goto fail; } clkp = malloc(sizeof(*clkp), M_DEVBUF, M_WAITOK | M_ZERO); clkp->clk = clk; TAILQ_INSERT_TAIL(&sc->clk_list, clkp, next); } if (clkret != 0 && clkret != ENOENT) { device_printf(dev, "Could not find clock at offset %d (%d)\n", off, clkret); goto fail; } +#ifdef INTRNG + aw_gpio_register_isrcs(sc); + intr_pic_register(dev, OF_xref_from_node(ofw_bus_get_node(dev))); +#endif + sc->sc_busdev = gpiobus_attach_bus(dev); if (sc->sc_busdev == NULL) goto fail; /* * Register as a pinctrl device */ fdt_pinctrl_register(dev, "pins"); fdt_pinctrl_configure_tree(dev); fdt_pinctrl_register(dev, "allwinner,pins"); fdt_pinctrl_configure_tree(dev); config_intrhook_oneshot(aw_gpio_enable_bank_supply, sc); return (0); fail: if (sc->sc_irq_res) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq_res); if (sc->sc_mem_res) bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->sc_mem_res); mtx_destroy(&sc->sc_mtx); /* Disable clock */ TAILQ_FOREACH_SAFE(clkp, &sc->clk_list, next, clkp_tmp) { err = clk_disable(clkp->clk); if (err != 0) device_printf(dev, "Could not disable clock %s\n", clk_get_name(clkp->clk)); err = clk_release(clkp->clk); if (err != 0) device_printf(dev, "Could not release clock %s\n", clk_get_name(clkp->clk)); TAILQ_REMOVE(&sc->clk_list, clkp, next); free(clkp, M_DEVBUF); } /* Assert resets */ if (rst) { hwreset_assert(rst); hwreset_release(rst); } return (ENXIO); } static int aw_gpio_detach(device_t dev) { return (EBUSY); } -static phandle_t -aw_gpio_get_node(device_t dev, device_t bus) +static void +aw_gpio_intr(void *arg) { + struct aw_gpio_softc *sc; + struct intr_irqsrc *isrc; + uint32_t reg; + int irq; - /* We only have one child, the GPIO bus, which needs our own node. */ - return (ofw_bus_get_node(dev)); + sc = (struct aw_gpio_softc *)arg; + + AW_GPIO_LOCK(sc); + for (irq = 0; irq < sc->nirqs; irq++) { + if (!sc->gpio_pic_irqsrc[irq].enabled) + continue; + + reg = AW_GPIO_READ(sc, AW_GPIO_GP_INT_STA(sc->gpio_pic_irqsrc[irq].bank)); + if (!(reg & (1 << sc->gpio_pic_irqsrc[irq].intnum))) + continue; + + isrc = &sc->gpio_pic_irqsrc[irq].isrc; + if (intr_isrc_dispatch(isrc, curthread->td_intr_frame) != 0) { + aw_gpio_pic_disable_intr_locked(sc, isrc); + aw_gpio_pic_post_filter(sc->sc_dev, isrc); + device_printf(sc->sc_dev, "Stray irq %u disabled\n", irq); + } + } + AW_GPIO_UNLOCK(sc); } +/* + * Interrupts support + */ + static int -aw_gpio_map_gpios(device_t bus, phandle_t dev, phandle_t gparent, int gcells, - pcell_t *gpios, uint32_t *pin, uint32_t *flags) +aw_gpio_register_isrcs(struct aw_gpio_softc *sc) { + const char *name; + int nirqs; + int pin; + int err; + + name = device_get_nameunit(sc->sc_dev); + + for (nirqs = 0, pin = 0; pin < sc->conf->padconf->npins; pin++) { + if (sc->conf->padconf->pins[pin].eint_func == 0) + continue; + + nirqs++; + } + + sc->gpio_pic_irqsrc = malloc(sizeof(*sc->gpio_pic_irqsrc) * nirqs, + M_DEVBUF, M_WAITOK | M_ZERO); + for (nirqs = 0, pin = 0; pin < sc->conf->padconf->npins; pin++) { + if (sc->conf->padconf->pins[pin].eint_func == 0) + continue; + + sc->gpio_pic_irqsrc[nirqs].pin = pin; + sc->gpio_pic_irqsrc[nirqs].bank = sc->conf->padconf->pins[pin].eint_bank; + sc->gpio_pic_irqsrc[nirqs].intnum = sc->conf->padconf->pins[pin].eint_num; + sc->gpio_pic_irqsrc[nirqs].intfunc = sc->conf->padconf->pins[pin].eint_func; + sc->gpio_pic_irqsrc[nirqs].irq = nirqs; + sc->gpio_pic_irqsrc[nirqs].mode = GPIO_INTR_CONFORM; + + err = intr_isrc_register(&sc->gpio_pic_irqsrc[nirqs].isrc, + sc->sc_dev, 0, "%s,%s", name, + sc->conf->padconf->pins[pin].functions[sc->conf->padconf->pins[pin].eint_func]); + if (err) { + device_printf(sc->sc_dev, "intr_isrs_register failed for irq %d\n", nirqs); + } + + nirqs++; + } + + sc->nirqs = nirqs; + + return (0); +} + +static void +aw_gpio_pic_disable_intr_locked(struct aw_gpio_softc *sc, struct intr_irqsrc *isrc) +{ + u_int irq; + uint32_t reg; + + AW_GPIO_LOCK_ASSERT(sc); + irq = ((struct gpio_irqsrc *)isrc)->irq; + reg = AW_GPIO_READ(sc, AW_GPIO_GP_INT_CTL(sc->gpio_pic_irqsrc[irq].bank)); + reg &= ~(1 << sc->gpio_pic_irqsrc[irq].intnum); + AW_GPIO_WRITE(sc, AW_GPIO_GP_INT_CTL(sc->gpio_pic_irqsrc[irq].bank), reg); + + sc->gpio_pic_irqsrc[irq].enabled = false; +} + +static void +aw_gpio_pic_disable_intr(device_t dev, struct intr_irqsrc *isrc) +{ struct aw_gpio_softc *sc; - int i; - sc = device_get_softc(bus); + sc = device_get_softc(dev); - /* The GPIO pins are mapped as: . */ - for (i = 0; i < sc->conf->padconf->npins; i++) - if (sc->conf->padconf->pins[i].port == gpios[0] && - sc->conf->padconf->pins[i].pin == gpios[1]) { - *pin = i; + AW_GPIO_LOCK(sc); + aw_gpio_pic_disable_intr_locked(sc, isrc); + AW_GPIO_UNLOCK(sc); +} + +static void +aw_gpio_pic_enable_intr(device_t dev, struct intr_irqsrc *isrc) +{ + struct aw_gpio_softc *sc; + u_int irq; + uint32_t reg; + + sc = device_get_softc(dev); + irq = ((struct gpio_irqsrc *)isrc)->irq; + AW_GPIO_LOCK(sc); + reg = AW_GPIO_READ(sc, AW_GPIO_GP_INT_CTL(sc->gpio_pic_irqsrc[irq].bank)); + reg |= 1 << sc->gpio_pic_irqsrc[irq].intnum; + AW_GPIO_WRITE(sc, AW_GPIO_GP_INT_CTL(sc->gpio_pic_irqsrc[irq].bank), reg); + AW_GPIO_UNLOCK(sc); + + sc->gpio_pic_irqsrc[irq].enabled = true; +} + +static int +aw_gpio_pic_map_gpio(struct aw_gpio_softc *sc, struct intr_map_data_gpio *dag, + u_int *irqp, u_int *mode) +{ + u_int irq; + int pin; + + irq = dag->gpio_pin_num; + + for (pin = 0; pin < sc->nirqs; pin++) + if (sc->gpio_pic_irqsrc[pin].pin == irq) break; - } - *flags = gpios[gcells - 1]; + if (pin == sc->nirqs) { + device_printf(sc->sc_dev, "Invalid interrupt number %u\n", irq); + return (EINVAL); + } + switch (dag->gpio_intr_mode) { + case GPIO_INTR_LEVEL_LOW: + case GPIO_INTR_LEVEL_HIGH: + case GPIO_INTR_EDGE_RISING: + case GPIO_INTR_EDGE_FALLING: + case GPIO_INTR_EDGE_BOTH: + break; + default: + device_printf(sc->sc_dev, "Unsupported interrupt mode 0x%8x\n", + dag->gpio_intr_mode); + return (EINVAL); + } + + *irqp = pin; + if (mode != NULL) + *mode = dag->gpio_intr_mode; + return (0); } +static int +aw_gpio_pic_map_intr(device_t dev, struct intr_map_data *data, + struct intr_irqsrc **isrcp) +{ + struct aw_gpio_softc *sc; + u_int irq; + int err; + + sc = device_get_softc(dev); + switch (data->type) { + case INTR_MAP_DATA_GPIO: + err = aw_gpio_pic_map_gpio(sc, + (struct intr_map_data_gpio *)data, + &irq, NULL); + break; + default: + return (ENOTSUP); + }; + + if (err == 0) + *isrcp = &sc->gpio_pic_irqsrc[irq].isrc; + return (0); +} + +static int +aw_gpio_pic_setup_intr(device_t dev, struct intr_irqsrc *isrc, + struct resource *res, struct intr_map_data *data) +{ + struct aw_gpio_softc *sc; + struct gpio_irqsrc *gi; + uint32_t irqcfg; + uint32_t pinidx, reg; + u_int irq, mode; + int err; + + sc = device_get_softc(dev); + gi = (struct gpio_irqsrc *)isrc; + + switch (data->type) { + case INTR_MAP_DATA_GPIO: + err = aw_gpio_pic_map_gpio(sc, + (struct intr_map_data_gpio *)data, + &irq, &mode); + break; + default: + return (ENOTSUP); + }; + + pinidx = (sc->gpio_pic_irqsrc[irq].intnum % 8) * 4; + + AW_GPIO_LOCK(sc); + switch (mode) { + case GPIO_INTR_LEVEL_LOW: + irqcfg = AW_GPIO_INT_LEVEL_LOW << pinidx; + break; + case GPIO_INTR_LEVEL_HIGH: + irqcfg = AW_GPIO_INT_LEVEL_HIGH << pinidx; + break; + case GPIO_INTR_EDGE_RISING: + irqcfg = AW_GPIO_INT_EDGE_POSITIVE << pinidx; + break; + case GPIO_INTR_EDGE_FALLING: + irqcfg = AW_GPIO_INT_EDGE_NEGATIVE << pinidx; + break; + case GPIO_INTR_EDGE_BOTH: + irqcfg = AW_GPIO_INT_EDGE_BOTH << pinidx; + break; + } + + /* Switch the pin to interrupt mode */ + sc->gpio_pic_irqsrc[irq].oldfunc = aw_gpio_get_function(sc, + sc->gpio_pic_irqsrc[irq].pin); + aw_gpio_set_function(sc, sc->gpio_pic_irqsrc[irq].pin, + sc->gpio_pic_irqsrc[irq].intfunc); + + /* Write interrupt mode */ + reg = AW_GPIO_READ(sc, + AW_GPIO_GP_INT_CFG(sc->gpio_pic_irqsrc[irq].bank, + sc->gpio_pic_irqsrc[irq].intnum)); + reg &= ~(0xF << pinidx); + reg |= irqcfg; + AW_GPIO_WRITE(sc, + AW_GPIO_GP_INT_CFG(sc->gpio_pic_irqsrc[irq].bank, + sc->gpio_pic_irqsrc[irq].intnum), + reg); + + AW_GPIO_UNLOCK(sc); + + return (0); +} + +static int +aw_gpio_pic_teardown_intr(device_t dev, struct intr_irqsrc *isrc, + struct resource *res, struct intr_map_data *data) +{ + struct aw_gpio_softc *sc; + struct gpio_irqsrc *gi; + + sc = device_get_softc(dev); + gi = (struct gpio_irqsrc *)isrc; + + /* Switch back the pin to it's original function */ + AW_GPIO_LOCK(sc); + aw_gpio_set_function(sc, gi->pin, gi->oldfunc); + AW_GPIO_UNLOCK(sc); + + return (0); +} + +static void +aw_gpio_pic_post_filter(device_t dev, struct intr_irqsrc *isrc) +{ + struct aw_gpio_softc *sc; + struct gpio_irqsrc *gi; + + sc = device_get_softc(dev); + gi = (struct gpio_irqsrc *)isrc; + + arm_irq_memory_barrier(0); + AW_GPIO_WRITE(sc, AW_GPIO_GP_INT_STA(gi->bank), 1 << gi->intnum); +} + +static void +aw_gpio_pic_post_ithread(device_t dev, struct intr_irqsrc *isrc) +{ + struct aw_gpio_softc *sc; + struct gpio_irqsrc *gi; + + sc = device_get_softc(dev); + gi = (struct gpio_irqsrc *)isrc; + + arm_irq_memory_barrier(0); + AW_GPIO_WRITE(sc, AW_GPIO_GP_INT_STA(gi->bank), 1 << gi->intnum); + aw_gpio_pic_enable_intr(dev, isrc); +} + +static void +aw_gpio_pic_pre_ithread(device_t dev, struct intr_irqsrc *isrc) +{ + struct aw_gpio_softc *sc; + + sc = device_get_softc(dev); + aw_gpio_pic_disable_intr_locked(sc, isrc); +} + +/* + * OFWBUS Interface + */ +static phandle_t +aw_gpio_get_node(device_t dev, device_t bus) +{ + + /* We only have one child, the GPIO bus, which needs our own node. */ + return (ofw_bus_get_node(dev)); +} + static device_method_t aw_gpio_methods[] = { /* Device interface */ DEVMETHOD(device_probe, aw_gpio_probe), DEVMETHOD(device_attach, aw_gpio_attach), DEVMETHOD(device_detach, aw_gpio_detach), + +#ifdef INTRNG + /* Interrupt controller interface */ + DEVMETHOD(pic_disable_intr, aw_gpio_pic_disable_intr), + DEVMETHOD(pic_enable_intr, aw_gpio_pic_enable_intr), + DEVMETHOD(pic_map_intr, aw_gpio_pic_map_intr), + DEVMETHOD(pic_setup_intr, aw_gpio_pic_setup_intr), + DEVMETHOD(pic_teardown_intr, aw_gpio_pic_teardown_intr), + DEVMETHOD(pic_post_filter, aw_gpio_pic_post_filter), + DEVMETHOD(pic_post_ithread, aw_gpio_pic_post_ithread), + DEVMETHOD(pic_pre_ithread, aw_gpio_pic_pre_ithread), +#endif /* GPIO protocol */ DEVMETHOD(gpio_get_bus, aw_gpio_get_bus), DEVMETHOD(gpio_pin_max, aw_gpio_pin_max), DEVMETHOD(gpio_pin_getname, aw_gpio_pin_getname), DEVMETHOD(gpio_pin_getflags, aw_gpio_pin_getflags), DEVMETHOD(gpio_pin_getcaps, aw_gpio_pin_getcaps), DEVMETHOD(gpio_pin_setflags, aw_gpio_pin_setflags), DEVMETHOD(gpio_pin_get, aw_gpio_pin_get), DEVMETHOD(gpio_pin_set, aw_gpio_pin_set), DEVMETHOD(gpio_pin_toggle, aw_gpio_pin_toggle), DEVMETHOD(gpio_pin_access_32, aw_gpio_pin_access_32), DEVMETHOD(gpio_pin_config_32, aw_gpio_pin_config_32), DEVMETHOD(gpio_map_gpios, aw_gpio_map_gpios), /* ofw_bus interface */ DEVMETHOD(ofw_bus_get_node, aw_gpio_get_node), /* fdt_pinctrl interface */ DEVMETHOD(fdt_pinctrl_configure,aw_fdt_configure_pins), DEVMETHOD_END }; static devclass_t aw_gpio_devclass; static driver_t aw_gpio_driver = { "gpio", aw_gpio_methods, sizeof(struct aw_gpio_softc), }; EARLY_DRIVER_MODULE(aw_gpio, simplebus, aw_gpio_driver, aw_gpio_devclass, 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_LATE); Index: projects/clang1000-import/sys/arm/allwinner/h3/h3_padconf.c =================================================================== --- projects/clang1000-import/sys/arm/allwinner/h3/h3_padconf.c (revision 356919) +++ projects/clang1000-import/sys/arm/allwinner/h3/h3_padconf.c (revision 356920) @@ -1,150 +1,150 @@ /*- * Copyright (c) 2016-2017 Emmanuel Vadot * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #if defined(__aarch64__) #include "opt_soc.h" #endif #include #if defined(SOC_ALLWINNER_H3) || defined(SOC_ALLWINNER_H5) const static struct allwinner_pins h3_pins[] = { {"PA0", 0, 0, {"gpio_in", "gpio_out", "uart2", "jtag", NULL, NULL, "pa_eint0", NULL}, 6, 0}, {"PA1", 0, 1, {"gpio_in", "gpio_out", "uart2", "jtag", NULL, NULL, "pa_eint1", NULL}, 6, 1}, {"PA2", 0, 2, {"gpio_in", "gpio_out", "uart2", "jtag", NULL, NULL, "pa_eint2", NULL}, 6, 2}, {"PA3", 0, 3, {"gpio_in", "gpio_out", "uart2", "jtag", NULL, NULL, "pa_eint3", NULL}, 6, 3}, {"PA4", 0, 4, {"gpio_in", "gpio_out", "uart0", NULL, NULL, NULL, "pa_eint4", NULL}, 6, 4}, {"PA5", 0, 5, {"gpio_in", "gpio_out", "uart0", "pwm0", NULL, NULL, "pa_eint5", NULL}, 6, 5}, {"PA6", 0, 6, {"gpio_in", "gpio_out", "sim", NULL, NULL, NULL, "pa_eint6", NULL}, 6, 6}, {"PA7", 0, 7, {"gpio_in", "gpio_out", "sim", NULL, NULL, NULL, "pa_eint7", NULL}, 6, 7}, {"PA8", 0, 8, {"gpio_in", "gpio_out", "sim", NULL, NULL, NULL, "pa_eint8", NULL}, 6, 8}, {"PA9", 0, 9, {"gpio_in", "gpio_out", "sim", NULL, NULL, NULL, "pa_eint9", NULL}, 6, 9}, {"PA10", 0, 10, {"gpio_in", "gpio_out", "sim", NULL, NULL, NULL, "pa_eint10", NULL}, 6, 10}, {"PA11", 0, 11, {"gpio_in", "gpio_out", "i2c0", "di", NULL, NULL, "pa_eint11", NULL}, 6, 11}, {"PA12", 0, 12, {"gpio_in", "gpio_out", "i2c0", "di", NULL, NULL, "pa_eint12", NULL}, 6, 12}, {"PA13", 0, 13, {"gpio_in", "gpio_out", "spi1", "uart3", NULL, NULL, "pa_eint13", NULL}, 6, 13}, {"PA14", 0, 14, {"gpio_in", "gpio_out", "spi1", "uart3", NULL, NULL, "pa_eint14", NULL}, 6, 14}, {"PA15", 0, 15, {"gpio_in", "gpio_out", "spi1", "uart3", NULL, NULL, "pa_eint15", NULL}, 6, 15}, {"PA16", 0, 16, {"gpio_in", "gpio_out", "spi1", "uart3", NULL, NULL, "pa_eint16", NULL}, 6, 16}, {"PA17", 0, 17, {"gpio_in", "gpio_out", "spdif", NULL, NULL, NULL, "pa_eint17", NULL}, 6, 17}, {"PA18", 0, 18, {"gpio_in", "gpio_out", "i2s0", "i2c1", NULL, NULL, "pa_eint18", NULL}, 6, 18}, {"PA19", 0, 19, {"gpio_in", "gpio_out", "i2s0", "i2c1", NULL, NULL, "pa_eint19", NULL}, 6, 19}, {"PA20", 0, 20, {"gpio_in", "gpio_out", "i2s0", "sim", NULL, NULL, "pa_eint20", NULL}, 6, 20}, {"PA21", 0, 21, {"gpio_in", "gpio_out", "i2s0", "sim", NULL, NULL, "pa_eint21", NULL}, 6, 21}, {"PC0", 2, 0, {"gpio_in", "gpio_out", "nand", "spi0", NULL, NULL, NULL, NULL}}, {"PC1", 2, 1, {"gpio_in", "gpio_out", "nand", "spi0", NULL, NULL, NULL, NULL}}, {"PC2", 2, 2, {"gpio_in", "gpio_out", "nand", "spi0", NULL, NULL, NULL, NULL}}, {"PC3", 2, 3, {"gpio_in", "gpio_out", "nand", "spi0", NULL, NULL, NULL, NULL}}, {"PC4", 2, 4, {"gpio_in", "gpio_out", "nand", NULL, NULL, NULL, NULL, NULL}}, {"PC5", 2, 5, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC6", 2, 6, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC7", 2, 7, {"gpio_in", "gpio_out", "nand", NULL, NULL, NULL, NULL, NULL}}, {"PC8", 2, 8, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC9", 2, 9, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC10", 2, 10, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC11", 2, 11, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC12", 2, 12, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC13", 2, 13, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC14", 2, 14, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC15", 2, 15, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PC16", 2, 16, {"gpio_in", "gpio_out", "nand", "mmc2", NULL, NULL, NULL, NULL}}, {"PD0", 3, 0, {"gpio_in", "gpio_out", "emac", NULL, NULL, NULL, NULL, NULL}}, {"PD1", 3, 1, {"gpio_in", "gpio_out", "emac", NULL, NULL, NULL, NULL, NULL}}, {"PD2", 3, 2, {"gpio_in", "gpio_out", "emac", NULL, NULL, NULL, NULL, NULL}}, {"PD3", 3, 3, {"gpio_in", "gpio_out", "emac", NULL, NULL, NULL, NULL, NULL}}, {"PD4", 3, 4, {"gpio_in", "gpio_out", "emac", NULL, NULL, NULL, NULL, NULL}}, {"PD5", 3, 5, {"gpio_in", "gpio_out", "emac", NULL, NULL, NULL, NULL, NULL}}, {"PD6", 3, 6, {"gpio_in", "gpio_out", "emac", NULL, NULL, NULL, NULL, NULL}}, {"PD7", 3, 7, {"gpio_in", "gpio_out", "emac", NULL, NULL, NULL, NULL, NULL}}, {"PD8", 3, 8, {"gpio_in", "gpio_out", "emac", NULL, NULL, NULL, NULL, NULL}}, {"PD9", 3, 9, {"gpio_in", "gpio_out", "emac", NULL, NULL, NULL, NULL, NULL}}, {"PD10", 3, 10, {"gpio_in", "gpio_out", "emac", NULL, NULL, NULL, NULL, NULL}}, {"PD11", 3, 11, {"gpio_in", "gpio_out", "emac", NULL, NULL, NULL, NULL, NULL}}, {"PD12", 3, 12, {"gpio_in", "gpio_out", "emac", NULL, NULL, NULL, NULL, NULL}}, {"PD13", 3, 13, {"gpio_in", "gpio_out", "emac", NULL, NULL, NULL, NULL, NULL}}, {"PD14", 3, 14, {"gpio_in", "gpio_out", "emac", NULL, NULL, NULL, NULL, NULL}}, {"PD15", 3, 15, {"gpio_in", "gpio_out", "emac", NULL, NULL, NULL, NULL, NULL}}, {"PD16", 3, 16, {"gpio_in", "gpio_out", "emac", NULL, NULL, NULL, NULL, NULL}}, {"PD17", 3, 17, {"gpio_in", "gpio_out", "emac", NULL, NULL, NULL, NULL, NULL}}, {"PE0", 4, 0, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, NULL, NULL}}, {"PE1", 4, 1, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, NULL, NULL}}, {"PE2", 4, 2, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, NULL, NULL}}, {"PE3", 4, 3, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, NULL, NULL}}, {"PE4", 4, 4, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, NULL, NULL}}, {"PE5", 4, 5, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, NULL, NULL}}, {"PE6", 4, 6, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, NULL, NULL}}, {"PE7", 4, 7, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, NULL, NULL}}, {"PE8", 4, 8, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, NULL, NULL}}, {"PE9", 4, 9, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, NULL, NULL}}, {"PE10", 4, 10, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, NULL, NULL}}, {"PE11", 4, 11, {"gpio_in", "gpio_out", "csi", "ts", NULL, NULL, NULL, NULL}}, {"PE12", 4, 12, {"gpio_in", "gpio_out", "csi", "i2c2", NULL, NULL, NULL, NULL}}, {"PE13", 4, 13, {"gpio_in", "gpio_out", "csi", "i2c2", NULL, NULL, NULL, NULL}}, {"PE14", 4, 14, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}}, {"PE15", 4, 15, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}}, {"PF0", 5, 0, {"gpio_in", "gpio_out", "mmc0", "jtag", NULL, NULL, NULL, NULL}}, {"PF1", 5, 1, {"gpio_in", "gpio_out", "mmc0", "jtag", NULL, NULL, NULL, NULL}}, {"PF2", 5, 2, {"gpio_in", "gpio_out", "mmc0", "uart0", NULL, NULL, NULL, NULL}}, {"PF3", 5, 3, {"gpio_in", "gpio_out", "mmc0", "jtag", NULL, NULL, NULL, NULL}}, {"PF4", 5, 4, {"gpio_in", "gpio_out", "mmc0", "uart0", NULL, NULL, NULL, NULL}}, {"PF5", 5, 5, {"gpio_in", "gpio_out", "mmc0", "jtag", NULL, NULL, NULL, NULL}}, {"PF6", 5, 6, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, NULL, NULL}}, - {"PG0", 6, 0, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint0", NULL}, 6, 0}, - {"PG1", 6, 1, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint1", NULL}, 6, 1}, - {"PG2", 6, 2, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint2", NULL}, 6, 2}, - {"PG3", 6, 3, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint3", NULL}, 6, 3}, - {"PG4", 6, 4, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint4", NULL}, 6, 4}, - {"PG5", 6, 5, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint5", NULL}, 6, 5}, - {"PG6", 6, 6, {"gpio_in", "gpio_out", "uart1", NULL, NULL, NULL, "pg_eint6", NULL}, 6, 6}, - {"PG7", 6, 7, {"gpio_in", "gpio_out", "uart1", NULL, NULL, NULL, "pg_eint7", NULL}, 6, 7}, - {"PG8", 6, 8, {"gpio_in", "gpio_out", "uart1", NULL, NULL, NULL, "pg_eint8", NULL}, 6, 8}, - {"PG9", 6, 9, {"gpio_in", "gpio_out", "uart1", NULL, NULL, NULL, "pg_eint9", NULL}, 6, 9}, - {"PG10", 6, 10, {"gpio_in", "gpio_out", "i2s1", NULL, NULL, NULL, "pg_eint10", NULL}, 6, 10}, - {"PG11", 6, 11, {"gpio_in", "gpio_out", "i2s1", NULL, NULL, NULL, "pg_eint11", NULL}, 6, 11}, - {"PG12", 6, 12, {"gpio_in", "gpio_out", "i2s1", NULL, NULL, NULL, "pg_eint12", NULL}, 6, 12}, - {"PG13", 6, 13, {"gpio_in", "gpio_out", "i2s1", NULL, NULL, NULL, "pg_eint13", NULL}, 6, 13}, + {"PG0", 6, 0, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint0", NULL}, 6, 0, 1}, + {"PG1", 6, 1, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint1", NULL}, 6, 1, 1}, + {"PG2", 6, 2, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint2", NULL}, 6, 2, 1}, + {"PG3", 6, 3, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint3", NULL}, 6, 3, 1}, + {"PG4", 6, 4, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint4", NULL}, 6, 4, 1}, + {"PG5", 6, 5, {"gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint5", NULL}, 6, 5, 1}, + {"PG6", 6, 6, {"gpio_in", "gpio_out", "uart1", NULL, NULL, NULL, "pg_eint6", NULL}, 6, 6, 1}, + {"PG7", 6, 7, {"gpio_in", "gpio_out", "uart1", NULL, NULL, NULL, "pg_eint7", NULL}, 6, 7, 1}, + {"PG8", 6, 8, {"gpio_in", "gpio_out", "uart1", NULL, NULL, NULL, "pg_eint8", NULL}, 6, 8, 1}, + {"PG9", 6, 9, {"gpio_in", "gpio_out", "uart1", NULL, NULL, NULL, "pg_eint9", NULL}, 6, 9, 1}, + {"PG10", 6, 10, {"gpio_in", "gpio_out", "i2s1", NULL, NULL, NULL, "pg_eint10", NULL}, 6, 10, 1}, + {"PG11", 6, 11, {"gpio_in", "gpio_out", "i2s1", NULL, NULL, NULL, "pg_eint11", NULL}, 6, 11, 1}, + {"PG12", 6, 12, {"gpio_in", "gpio_out", "i2s1", NULL, NULL, NULL, "pg_eint12", NULL}, 6, 12, 1}, + {"PG13", 6, 13, {"gpio_in", "gpio_out", "i2s1", NULL, NULL, NULL, "pg_eint13", NULL}, 6, 13, 1}, }; const struct allwinner_padconf h3_padconf = { .npins = nitems(h3_pins), .pins = h3_pins, }; #endif /* SOC_ALLWINNER_H3 */ Index: projects/clang1000-import/sys/arm/allwinner/h3/h3_r_padconf.c =================================================================== --- projects/clang1000-import/sys/arm/allwinner/h3/h3_r_padconf.c (revision 356919) +++ projects/clang1000-import/sys/arm/allwinner/h3/h3_r_padconf.c (revision 356920) @@ -1,63 +1,63 @@ /*- * Copyright (c) 2016-2017 Emmanuel Vadot * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #if defined(__aarch64__) #include "opt_soc.h" #endif #include #if defined(SOC_ALLWINNER_H3) || defined(SOC_ALLWINNER_H5) const static struct allwinner_pins h3_r_pins[] = { - {"PL0", 0, 0, {"gpio_in", "gpio_out", "s_twi", NULL, NULL, NULL, "pl_eint0", NULL}, 6, 0}, - {"PL1", 0, 1, {"gpio_in", "gpio_out", "s_twi", NULL, NULL, NULL, "pl_eint1", NULL}, 6, 1}, - {"PL2", 0, 2, {"gpio_in", "gpio_out", "s_uart", NULL, NULL, NULL, "pl_eint2", NULL}, 6, 2}, - {"PL3", 0, 3, {"gpio_in", "gpio_out", "s_uart", NULL, NULL, NULL, "pl_eint3", NULL}, 6, 3}, - {"PL4", 0, 4, {"gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint4", NULL}, 6, 4}, - {"PL5", 0, 5, {"gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint5", NULL}, 6, 5}, - {"PL6", 0, 6, {"gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint6", NULL}, 6, 6}, - {"PL7", 0, 7, {"gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint7", NULL}, 6, 7}, - {"PL8", 0, 8, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "pl_eint8", NULL}, 6, 8}, - {"PL9", 0, 9, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "pl_eint9", NULL}, 6, 9}, - {"PL10", 0, 10, {"gpio_in", "gpio_out", "s_pwm", NULL, NULL, NULL, "pl_eint10", NULL}, 6, 10}, - {"PL11", 0, 11, {"gpio_in", "gpio_out", "s_cir_rx", NULL, NULL, NULL, "pl_eint11", NULL}, 6, 11}, + {"PL0", 0, 0, {"gpio_in", "gpio_out", "s_twi", NULL, NULL, NULL, "pl_eint0", NULL}, 6, 0, 0}, + {"PL1", 0, 1, {"gpio_in", "gpio_out", "s_twi", NULL, NULL, NULL, "pl_eint1", NULL}, 6, 1, 0}, + {"PL2", 0, 2, {"gpio_in", "gpio_out", "s_uart", NULL, NULL, NULL, "pl_eint2", NULL}, 6, 2, 0}, + {"PL3", 0, 3, {"gpio_in", "gpio_out", "s_uart", NULL, NULL, NULL, "pl_eint3", NULL}, 6, 3, 0}, + {"PL4", 0, 4, {"gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint4", NULL}, 6, 4, 0}, + {"PL5", 0, 5, {"gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint5", NULL}, 6, 5, 0}, + {"PL6", 0, 6, {"gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint6", NULL}, 6, 6, 0}, + {"PL7", 0, 7, {"gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint7", NULL}, 6, 7, 0}, + {"PL8", 0, 8, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "pl_eint8", NULL}, 6, 8, 0}, + {"PL9", 0, 9, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "pl_eint9", NULL}, 6, 9, 0}, + {"PL10", 0, 10, {"gpio_in", "gpio_out", "s_pwm", NULL, NULL, NULL, "pl_eint10", NULL}, 6, 10, 0}, + {"PL11", 0, 11, {"gpio_in", "gpio_out", "s_cir_rx", NULL, NULL, NULL, "pl_eint11", NULL}, 6, 11, 0}, }; const struct allwinner_padconf h3_r_padconf = { .npins = nitems(h3_r_pins), .pins = h3_r_pins, }; #endif /* SOC_ALLWINNER_H3 */ Index: projects/clang1000-import/sys/arm/allwinner/h6/h6_padconf.c =================================================================== --- projects/clang1000-import/sys/arm/allwinner/h6/h6_padconf.c (revision 356919) +++ projects/clang1000-import/sys/arm/allwinner/h6/h6_padconf.c (revision 356920) @@ -1,126 +1,126 @@ /*- * Copyright (c) 2019 Emmanuel Vadot * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include "opt_soc.h" static const struct allwinner_pins h6_pins[] = { { "PC0", 2, 0, { "gpio_in", "gpio_out", "nand", NULL, "spi0" } }, { "PC1", 2, 1, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC2", 2, 2, { "gpio_in", "gpio_out", "nand", NULL, "spi0" } }, { "PC3", 2, 3, { "gpio_in", "gpio_out", "nand", NULL, "spi0" } }, { "PC4", 2, 4, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC5", 2, 5, { "gpio_in", "gpio_out", "nand", "mmc2", "spi0" } }, { "PC6", 2, 6, { "gpio_in", "gpio_out", "nand", "mmc2", "spi0" } }, { "PC7", 2, 7, { "gpio_in", "gpio_out", "nand", "mmc2", "spi0" } }, { "PC8", 2, 8, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC9", 2, 9, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC10", 2, 10, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC11", 2, 11, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC12", 2, 12, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC13", 2, 13, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC14", 2, 14, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC15", 2, 15, { "gpio_in", "gpio_out", "nand", "mmc2" } }, { "PC16", 2, 16, { "gpio_in", "gpio_out", "nand", } }, { "PD0", 3, 0, { "gpio_in", "gpio_out", "lcd0", "ts0", "csi", "emac" } }, { "PD1", 3, 1, { "gpio_in", "gpio_out", "lcd0", "ts0", "csi", "emac" } }, { "PD2", 3, 2, { "gpio_in", "gpio_out", "lcd0", "ts0", "csi", "emac" } }, { "PD3", 3, 3, { "gpio_in", "gpio_out", "lcd0", "ts0", "csi", "emac" } }, { "PD4", 3, 4, { "gpio_in", "gpio_out", "lcd0", "ts0", "csi", "emac" } }, { "PD5", 3, 5, { "gpio_in", "gpio_out", "lcd0", "ts0", "csi", "emac" } }, { "PD6", 3, 6, { "gpio_in", "gpio_out", "lcd0", "ts0", "csi", "emac" } }, { "PD7", 3, 7, { "gpio_in", "gpio_out", "lcd0", "ts0", "csi", "emac" } }, { "PD8", 3, 8, { "gpio_in", "gpio_out", "lcd0", "ts0", "csi", "emac" } }, { "PD9", 3, 9, { "gpio_in", "gpio_out", "lcd0", "ts0", "csi", "emac" } }, { "PD10", 3, 10, { "gpio_in", "gpio_out", "lcd0", "ts0", "csi", "emac" } }, { "PD11", 3, 11, { "gpio_in", "gpio_out", "lcd0", "ts0", "csi", "emac" } }, { "PD12", 3, 12, { "gpio_in", "gpio_out", "lcd0", "ts1", "csi", "emac" } }, { "PD13", 3, 13, { "gpio_in", "gpio_out", "lcd0", "ts1", "csi", "emac" } }, { "PD14", 3, 14, { "gpio_in", "gpio_out", "lcd0", "ts1", "dmic", "csi" } }, { "PD15", 3, 15, { "gpio_in", "gpio_out", "lcd0", "ts1", "dmic", "csi" } }, { "PD16", 3, 16, { "gpio_in", "gpio_out", "lcd0", "ts1", "dmic" } }, { "PD17", 3, 17, { "gpio_in", "gpio_out", "lcd0", "ts2", "dmic" } }, { "PD18", 3, 18, { "gpio_in", "gpio_out", "lcd0", "ts2", "dmic" } }, { "PD19", 3, 19, { "gpio_in", "gpio_out", "lcd0", "ts2", "uart2", "emac" } }, { "PD20", 3, 20, { "gpio_in", "gpio_out", "lcd0", "ts2", "uart2", "emac" } }, { "PD21", 3, 21, { "gpio_in", "gpio_out", "lcd0", "ts2", "uart2" } }, { "PD22", 3, 22, { "gpio_in", "gpio_out", "pwm0", "ts3", "uart2" } }, { "PD23", 3, 23, { "gpio_in", "gpio_out", "i2c2", "ts3", "uart3", "jtag" } }, { "PD24", 3, 24, { "gpio_in", "gpio_out", "i2c2", "ts3", "uart3", "jtag" } }, { "PD25", 3, 25, { "gpio_in", "gpio_out", "i2c0", "ts3", "uart3", "jtag" } }, { "PD26", 3, 26, { "gpio_in", "gpio_out", "i2c0", "ts3", "uart3", "jtag" } }, - { "PF0", 5, 0, { "gpio_in", "gpio_out", "mmc0", "jtag", NULL, NULL, "pf_eint0" } }, - { "PF1", 5, 1, { "gpio_in", "gpio_out", "mmc0", "jtag", NULL, NULL, "pf_eint1" } }, - { "PF2", 5, 2, { "gpio_in", "gpio_out", "mmc0", "uart0", NULL, NULL, "pf_eint2" } }, - { "PF3", 5, 3, { "gpio_in", "gpio_out", "mmc0", "jtag", NULL, NULL, "pf_eint3" } }, - { "PF4", 5, 4, { "gpio_in", "gpio_out", "mmc0", "uart0", NULL, NULL, "pf_eint4" } }, - { "PF5", 5, 5, { "gpio_in", "gpio_out", "mmc0", "jtag", NULL, NULL, "pf_eint5" } }, - { "PF6", 5, 6, { "gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "pf_eint6" } }, + { "PF0", 5, 0, { "gpio_in", "gpio_out", "mmc0", "jtag", NULL, NULL, "pf_eint0" }, 6, 0, 5 }, + { "PF1", 5, 1, { "gpio_in", "gpio_out", "mmc0", "jtag", NULL, NULL, "pf_eint1" }, 6, 1, 5 }, + { "PF2", 5, 2, { "gpio_in", "gpio_out", "mmc0", "uart0", NULL, NULL, "pf_eint2" }, 6, 2, 5 }, + { "PF3", 5, 3, { "gpio_in", "gpio_out", "mmc0", "jtag", NULL, NULL, "pf_eint3" }, 6, 3, 5 }, + { "PF4", 5, 4, { "gpio_in", "gpio_out", "mmc0", "uart0", NULL, NULL, "pf_eint4" }, 6, 4, 5 }, + { "PF5", 5, 5, { "gpio_in", "gpio_out", "mmc0", "jtag", NULL, NULL, "pf_eint5" }, 6, 5, 5 }, + { "PF6", 5, 6, { "gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "pf_eint6" }, 6, 6, 5 }, - { "PG0", 6, 0, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint0" }, 6, 0 }, - { "PG1", 6, 1, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint1" }, 6, 1 }, - { "PG2", 6, 2, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint2" }, 6, 2 }, - { "PG3", 6, 3, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint3" }, 6, 3 }, - { "PG4", 6, 4, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint4" }, 6, 4 }, - { "PG5", 6, 5, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint5" }, 6, 5 }, - { "PG6", 6, 6, { "gpio_in", "gpio_out", "uart1", NULL, NULL, NULL, "pg_eint6" }, 6, 6 }, - { "PG7", 6, 7, { "gpio_in", "gpio_out", "uart1", NULL, NULL, NULL, "pg_eint7" }, 6, 7 }, - { "PG8", 6, 8, { "gpio_in", "gpio_out", "uart1", NULL, "sim0", NULL, "pg_eint8" }, 6, 8 }, - { "PG9", 6, 9, { "gpio_in", "gpio_out", "uart1", NULL, "sim0", NULL, "pg_eint9" }, 6, 9 }, - { "PG10", 6, 10, { "gpio_in", "gpio_out", "i2s2", "h_i2s2", "sim0", NULL, "pg_eint10" }, 6, 10 }, - { "PG11", 6, 11, { "gpio_in", "gpio_out", "i2s2", "h_i2s2", "sim0", NULL, "pg_eint11" }, 6, 11 }, - { "PG12", 6, 12, { "gpio_in", "gpio_out", "i2s2", "h_i2s2", "sim0", NULL, "pg_eint12" }, 6, 12 }, - { "PG13", 6, 13, { "gpio_in", "gpio_out", "i2s2", "h_i2s2", "sim0", NULL, "pg_eint13" }, 6, 13 }, - { "PG14", 6, 14, { "gpio_in", "gpio_out", "i2s2", "h_i2s2", "sim0", NULL, "pg_eint14" }, 6, 13 }, + { "PG0", 6, 0, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint0" }, 6, 0, 6}, + { "PG1", 6, 1, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint1" }, 6, 1, 6}, + { "PG2", 6, 2, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint2" }, 6, 2, 6}, + { "PG3", 6, 3, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint3" }, 6, 3, 6}, + { "PG4", 6, 4, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint4" }, 6, 4, 6}, + { "PG5", 6, 5, { "gpio_in", "gpio_out", "mmc1", NULL, NULL, NULL, "pg_eint5" }, 6, 5, 6}, + { "PG6", 6, 6, { "gpio_in", "gpio_out", "uart1", NULL, NULL, NULL, "pg_eint6" }, 6, 6, 6}, + { "PG7", 6, 7, { "gpio_in", "gpio_out", "uart1", NULL, NULL, NULL, "pg_eint7" }, 6, 7, 6}, + { "PG8", 6, 8, { "gpio_in", "gpio_out", "uart1", NULL, "sim0", NULL, "pg_eint8" }, 6, 8, 6}, + { "PG9", 6, 9, { "gpio_in", "gpio_out", "uart1", NULL, "sim0", NULL, "pg_eint9" }, 6, 9, 6}, + { "PG10", 6, 10, { "gpio_in", "gpio_out", "i2s2", "h_i2s2", "sim0", NULL, "pg_eint10" }, 6, 10, 6}, + { "PG11", 6, 11, { "gpio_in", "gpio_out", "i2s2", "h_i2s2", "sim0", NULL, "pg_eint11" }, 6, 11, 6}, + { "PG12", 6, 12, { "gpio_in", "gpio_out", "i2s2", "h_i2s2", "sim0", NULL, "pg_eint12" }, 6, 12, 6}, + { "PG13", 6, 13, { "gpio_in", "gpio_out", "i2s2", "h_i2s2", "sim0", NULL, "pg_eint13" }, 6, 13, 6}, + { "PG14", 6, 14, { "gpio_in", "gpio_out", "i2s2", "h_i2s2", "sim0", NULL, "pg_eint14" }, 6, 13, 6}, - { "PH0", 7, 0, { "gpio_in", "gpio_out", "uart0", "i2s0", "h_i2s0", "sim1", "ph_eint0" }, 6, 0 }, - { "PH1", 7, 1, { "gpio_in", "gpio_out", "uart0", "i2s0", "h_i2s0", "sim1", "ph_eint1" }, 6, 1 }, - { "PH2", 7, 2, { "gpio_in", "gpio_out", "cir", "i2s0", "h_i2s0", "sim1", "ph_eint2" }, 6, 2 }, - { "PH3", 7, 3, { "gpio_in", "gpio_out", "spi1", "i2s0", "h_i2s0", "sim1", "ph_eint3" }, 6, 3 }, - { "PH4", 7, 4, { "gpio_in", "gpio_out", "spi1", "i2s0", "h_i2s0", "sim1", "ph_eint4" }, 6, 4 }, - { "PH5", 7, 5, { "gpio_in", "gpio_out", "spi1", "spdif", "i2c1", "sim1", "ph_eint5" }, 6, 5 }, - { "PH6", 7, 6, { "gpio_in", "gpio_out", "spi1", "spdif", "i2c1", "sim1", "ph_eint6" }, 6, 6 }, - { "PH7", 7, 7, { "gpio_in", "gpio_out", NULL, "spdif", NULL, NULL, "ph_eint7" }, 6, 7 }, - { "PH8", 7, 8, { "gpio_in", "gpio_out", "hdmi", NULL, NULL, NULL, "ph_eint8" }, 6, 8 }, - { "PH9", 7, 9, { "gpio_in", "gpio_out", "hdmi", NULL, NULL, NULL, "ph_eint9" }, 6, 9 }, - { "PH10", 7, 10, { "gpio_in", "gpio_out", "hdmi", NULL, NULL, NULL, "ph_eint10" }, 6, 10 }, + { "PH0", 7, 0, { "gpio_in", "gpio_out", "uart0", "i2s0", "h_i2s0", "sim1", "ph_eint0" }, 6, 0, 7}, + { "PH1", 7, 1, { "gpio_in", "gpio_out", "uart0", "i2s0", "h_i2s0", "sim1", "ph_eint1" }, 6, 1, 7}, + { "PH2", 7, 2, { "gpio_in", "gpio_out", "cir", "i2s0", "h_i2s0", "sim1", "ph_eint2" }, 6, 2, 7}, + { "PH3", 7, 3, { "gpio_in", "gpio_out", "spi1", "i2s0", "h_i2s0", "sim1", "ph_eint3" }, 6, 3, 7}, + { "PH4", 7, 4, { "gpio_in", "gpio_out", "spi1", "i2s0", "h_i2s0", "sim1", "ph_eint4" }, 6, 4, 7}, + { "PH5", 7, 5, { "gpio_in", "gpio_out", "spi1", "spdif", "i2c1", "sim1", "ph_eint5" }, 6, 5, 7}, + { "PH6", 7, 6, { "gpio_in", "gpio_out", "spi1", "spdif", "i2c1", "sim1", "ph_eint6" }, 6, 6, 7}, + { "PH7", 7, 7, { "gpio_in", "gpio_out", NULL, "spdif", NULL, NULL, "ph_eint7" }, 6, 7, 7}, + { "PH8", 7, 8, { "gpio_in", "gpio_out", "hdmi", NULL, NULL, NULL, "ph_eint8" }, 6, 8, 7}, + { "PH9", 7, 9, { "gpio_in", "gpio_out", "hdmi", NULL, NULL, NULL, "ph_eint9" }, 6, 9, 7}, + { "PH10", 7, 10, { "gpio_in", "gpio_out", "hdmi", NULL, NULL, NULL, "ph_eint10" }, 6, 10, 7}, }; const struct allwinner_padconf h6_padconf = { .npins = nitems(h6_pins), .pins = h6_pins, }; Index: projects/clang1000-import/sys/arm/allwinner/h6/h6_r_padconf.c =================================================================== --- projects/clang1000-import/sys/arm/allwinner/h6/h6_r_padconf.c (revision 356919) +++ projects/clang1000-import/sys/arm/allwinner/h6/h6_r_padconf.c (revision 356920) @@ -1,60 +1,60 @@ /*- * Copyright (c) 2019 Emmanuel Vadot * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include const static struct allwinner_pins h6_r_pins[] = { - {"PL0", 0, 0, {"gpio_in", "gpio_out", NULL, "s_i2c", NULL, NULL, "pl_eint0", NULL}, 6, 0}, - {"PL1", 0, 1, {"gpio_in", "gpio_out", NULL, "s_i2c", NULL, NULL, "pl_eint1", NULL}, 6, 1}, - {"PL2", 0, 2, {"gpio_in", "gpio_out", "s_uart", NULL, NULL, NULL, "pl_eint2", NULL}, 6, 2}, - {"PL3", 0, 3, {"gpio_in", "gpio_out", "s_uart", NULL, NULL, NULL, "pl_eint3", NULL}, 6, 3}, - {"PL4", 0, 4, {"gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint4", NULL}, 6, 4}, - {"PL5", 0, 5, {"gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint5", NULL}, 6, 5}, - {"PL6", 0, 6, {"gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint6", NULL}, 6, 6}, - {"PL7", 0, 7, {"gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint7", NULL}, 6, 7}, - {"PL8", 0, 8, {"gpio_in", "gpio_out", "s_i2c", NULL, NULL, NULL, "pl_eint8", NULL}, 6, 8}, - {"PL9", 0, 9, {"gpio_in", "gpio_out", "s_cir", NULL, NULL, NULL, "pl_eint9", NULL}, 6, 9}, - {"PL10", 0, 10, {"gpio_in", "gpio_out", "s_spdif", NULL, NULL, NULL, "pl_eint10", NULL}, 6, 10}, + {"PL0", 0, 0, {"gpio_in", "gpio_out", NULL, "s_i2c", NULL, NULL, "pl_eint0", NULL}, 6, 0, 0}, + {"PL1", 0, 1, {"gpio_in", "gpio_out", NULL, "s_i2c", NULL, NULL, "pl_eint1", NULL}, 6, 1, 0}, + {"PL2", 0, 2, {"gpio_in", "gpio_out", "s_uart", NULL, NULL, NULL, "pl_eint2", NULL}, 6, 2, 0}, + {"PL3", 0, 3, {"gpio_in", "gpio_out", "s_uart", NULL, NULL, NULL, "pl_eint3", NULL}, 6, 3, 0}, + {"PL4", 0, 4, {"gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint4", NULL}, 6, 4, 0}, + {"PL5", 0, 5, {"gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint5", NULL}, 6, 5, 0}, + {"PL6", 0, 6, {"gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint6", NULL}, 6, 6, 0}, + {"PL7", 0, 7, {"gpio_in", "gpio_out", "s_jtag", NULL, NULL, NULL, "pl_eint7", NULL}, 6, 7, 0}, + {"PL8", 0, 8, {"gpio_in", "gpio_out", "s_i2c", NULL, NULL, NULL, "pl_eint8", NULL}, 6, 8, 0}, + {"PL9", 0, 9, {"gpio_in", "gpio_out", "s_cir", NULL, NULL, NULL, "pl_eint9", NULL}, 6, 9, 0}, + {"PL10", 0, 10, {"gpio_in", "gpio_out", "s_spdif", NULL, NULL, NULL, "pl_eint10", NULL}, 6, 10, 0}, - {"PM0", 0, 0, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "pm_eint0", NULL}, 6, 0}, - {"PM1", 0, 0, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "pm_eint1", NULL}, 6, 1}, - {"PM2", 0, 0, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "pm_eint2", NULL}, 6, 2}, - {"PM3", 0, 0, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "pm_eint3", NULL}, 6, 3}, - {"PM4", 0, 0, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "pm_eint4", NULL}, 6, 4}, + {"PM0", 0, 0, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "pm_eint0", NULL}, 6, 0, 1}, + {"PM1", 0, 0, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "pm_eint1", NULL}, 6, 1, 1}, + {"PM2", 0, 0, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "pm_eint2", NULL}, 6, 2, 1}, + {"PM3", 0, 0, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "pm_eint3", NULL}, 6, 3, 1}, + {"PM4", 0, 0, {"gpio_in", "gpio_out", NULL, NULL, NULL, NULL, "pm_eint4", NULL}, 6, 4, 1}, }; const struct allwinner_padconf h6_r_padconf = { .npins = nitems(h6_r_pins), .pins = h6_r_pins, }; Index: projects/clang1000-import/sys/arm/broadcom/bcm2835/bcm2835_vcbus.c =================================================================== --- projects/clang1000-import/sys/arm/broadcom/bcm2835/bcm2835_vcbus.c (revision 356919) +++ projects/clang1000-import/sys/arm/broadcom/bcm2835/bcm2835_vcbus.c (revision 356920) @@ -1,301 +1,280 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2019 Kyle Evans * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); /* * This file contains facilities for runtime determination of address space * mappings for use in DMA/mailbox interactions. This is only used for the * arm64 SoC because the 32-bit SoC used the same mappings. */ -#if defined (__aarch64__) -#include "opt_soc.h" -#endif #include #include #include #include #include #include #include /* * This structure describes mappings that need to take place when transforming * ARM core addresses into vcbus addresses for use with the DMA/mailbox * interfaces. Currently, we only deal with peripheral/SDRAM address spaces * here. * * The SDRAM address space is consistently mapped starting at 0 and extends to * the size of the installed SDRAM. * * Peripherals are mapped further up at spots that vary per-SOC. */ struct bcm283x_memory_mapping { vm_paddr_t armc_start; vm_paddr_t armc_size; vm_paddr_t vcbus_start; }; -#ifdef SOC_BCM2835 static struct bcm283x_memory_mapping bcm2835_memmap[] = { { /* SDRAM */ .armc_start = 0x00000000, .armc_size = BCM2835_ARM_IO_BASE, .vcbus_start = BCM2835_VCBUS_SDRAM_BASE, }, { /* Peripherals */ .armc_start = BCM2835_ARM_IO_BASE, .armc_size = BCM28XX_ARM_IO_SIZE, .vcbus_start = BCM2835_VCBUS_IO_BASE, }, { 0, 0, 0 }, }; -#endif -#ifdef SOC_BCM2836 static struct bcm283x_memory_mapping bcm2836_memmap[] = { { /* SDRAM */ .armc_start = 0x00000000, .armc_size = BCM2836_ARM_IO_BASE, .vcbus_start = BCM2836_VCBUS_SDRAM_BASE, }, { /* Peripherals */ .armc_start = BCM2836_ARM_IO_BASE, .armc_size = BCM28XX_ARM_IO_SIZE, .vcbus_start = BCM2836_VCBUS_IO_BASE, }, { 0, 0, 0 }, }; -#endif -#ifdef SOC_BRCM_BCM2837 static struct bcm283x_memory_mapping bcm2837_memmap[] = { { /* SDRAM */ .armc_start = 0x00000000, .armc_size = BCM2837_ARM_IO_BASE, .vcbus_start = BCM2837_VCBUS_SDRAM_BASE, }, { /* Peripherals */ .armc_start = BCM2837_ARM_IO_BASE, .armc_size = BCM28XX_ARM_IO_SIZE, .vcbus_start = BCM2837_VCBUS_IO_BASE, }, { 0, 0, 0 }, }; -#endif -#ifdef SOC_BRCM_BCM2838 - /* * The BCM2838 supports up to 4GB of SDRAM, but unfortunately we can still only * map the first 1GB into the "legacy master view" (vcbus) address space. Thus, * peripherals can still only access the lower end of SDRAM. For this reason, * we also capture the main-peripheral busdma restriction below. */ static struct bcm283x_memory_mapping bcm2838_memmap[] = { { /* SDRAM */ .armc_start = 0x00000000, .armc_size = 0x40000000, .vcbus_start = BCM2838_VCBUS_SDRAM_BASE, }, { /* Main peripherals */ .armc_start = BCM2838_ARM_IO_BASE, .armc_size = BCM28XX_ARM_IO_SIZE, .vcbus_start = BCM2838_VCBUS_IO_BASE, }, { 0, 0, 0 }, }; -#endif static struct bcm283x_memory_soc_cfg { struct bcm283x_memory_mapping *memmap; const char *soc_compat; bus_addr_t busdma_lowaddr; } bcm283x_memory_configs[] = { -#ifdef SOC_BCM2835 /* Legacy */ { .memmap = bcm2835_memmap, .soc_compat = "raspberrypi,model-b", .busdma_lowaddr = BUS_SPACE_MAXADDR_32BIT, }, /* Modern */ { .memmap = bcm2835_memmap, .soc_compat = "brcm,bcm2835", .busdma_lowaddr = BUS_SPACE_MAXADDR_32BIT, }, -#endif -#ifdef SOC_BCM2836 /* Legacy */ { .memmap = bcm2836_memmap, .soc_compat = "brcm,bcm2709", .busdma_lowaddr = BUS_SPACE_MAXADDR_32BIT, }, /* Modern */ { .memmap = bcm2836_memmap, .soc_compat = "brcm,bcm2836", .busdma_lowaddr = BUS_SPACE_MAXADDR_32BIT, }, - -#endif -#ifdef SOC_BRCM_BCM2837 { .memmap = bcm2837_memmap, .soc_compat = "brcm,bcm2837", .busdma_lowaddr = BUS_SPACE_MAXADDR_32BIT, }, -#endif -#ifdef SOC_BRCM_BCM2838 { .memmap = bcm2838_memmap, .soc_compat = "brcm,bcm2711", .busdma_lowaddr = BCM2838_PERIPH_MAXADDR, }, { .memmap = bcm2838_memmap, .soc_compat = "brcm,bcm2838", .busdma_lowaddr = BCM2838_PERIPH_MAXADDR, }, -#endif }; static struct bcm283x_memory_soc_cfg *booted_soc_memcfg; static struct bcm283x_memory_soc_cfg * bcm283x_get_current_memcfg(void) { phandle_t root; int i; /* We'll cache it once we decide, because it won't change per-boot. */ if (booted_soc_memcfg != NULL) return (booted_soc_memcfg); KASSERT(nitems(bcm283x_memory_configs) != 0, ("No SOC memory configurations enabled!")); root = OF_finddevice("/"); for (i = 0; i < nitems(bcm283x_memory_configs); ++i) { booted_soc_memcfg = &bcm283x_memory_configs[i]; if (bootverbose) printf("Checking root against %s\n", booted_soc_memcfg->soc_compat); if (ofw_bus_node_is_compatible(root, booted_soc_memcfg->soc_compat)) return (booted_soc_memcfg); } /* * The kernel doesn't fit the board; we can't really make a reasonable * guess, as these SOC are different enough that something will blow up * later. */ panic("No suitable SOC memory configuration found."); } #define BCM283X_MEMMAP_ISTERM(ent) \ ((ent)->armc_start == 0 && (ent)->armc_size == 0 && \ (ent)->vcbus_start == 0) vm_paddr_t bcm283x_armc_to_vcbus(vm_paddr_t pa) { struct bcm283x_memory_soc_cfg *cfg; struct bcm283x_memory_mapping *map, *ment; /* Guaranteed not NULL if we haven't panicked yet. */ cfg = bcm283x_get_current_memcfg(); map = cfg->memmap; for (ment = map; !BCM283X_MEMMAP_ISTERM(ment); ++ment) { if (pa >= ment->armc_start && pa < ment->armc_start + ment->armc_size) { return (pa - ment->armc_start) + ment->vcbus_start; } } /* * Assume 1:1 mapping for anything else, but complain about it on * verbose boots. */ if (bootverbose) printf("bcm283x_vcbus: No armc -> vcbus mapping found: %jx\n", (uintmax_t)pa); return (pa); } vm_paddr_t bcm283x_vcbus_to_armc(vm_paddr_t vca) { struct bcm283x_memory_soc_cfg *cfg; struct bcm283x_memory_mapping *map, *ment; /* Guaranteed not NULL if we haven't panicked yet. */ cfg = bcm283x_get_current_memcfg(); map = cfg->memmap; for (ment = map; !BCM283X_MEMMAP_ISTERM(ment); ++ment) { if (vca >= ment->vcbus_start && vca < ment->vcbus_start + ment->armc_size) { return (vca - ment->vcbus_start) + ment->armc_start; } } /* * Assume 1:1 mapping for anything else, but complain about it on * verbose boots. */ if (bootverbose) printf("bcm283x_vcbus: No vcbus -> armc mapping found: %jx\n", (uintmax_t)vca); return (vca); } bus_addr_t bcm283x_dmabus_peripheral_lowaddr(void) { struct bcm283x_memory_soc_cfg *cfg; cfg = bcm283x_get_current_memcfg(); return (cfg->busdma_lowaddr); } Index: projects/clang1000-import/sys/arm/conf/GENERIC =================================================================== --- projects/clang1000-import/sys/arm/conf/GENERIC (revision 356919) +++ projects/clang1000-import/sys/arm/conf/GENERIC (revision 356920) @@ -1,294 +1,295 @@ # # GENERICV6 -- Generic(ish) kernel config. # # For more information on this file, please read the config(5) manual page, # and/or the handbook section on Kernel Configuration Files: # # https://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html # # The handbook is also available locally in /usr/share/doc/handbook # if you've installed the doc distribution, otherwise always see the # FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the # latest information. # # An exhaustive list of options and more detailed explanations of the # device lines is also present in the ../../conf/NOTES and NOTES files. # If you are in doubt as to the purpose or necessity of a line, check first # in NOTES. # # $FreeBSD$ ident GENERIC cpu CPU_CORTEXA cpu CPU_MV_PJ4B options SMP_ON_UP machine arm armv7 makeoptions CONF_CFLAGS="-march=armv7a" include "std.armv7" files "../allwinner/files.allwinner" files "../allwinner/files.allwinner_up" files "../allwinner/a10/files.a10" files "../allwinner/a13/files.a13" files "../allwinner/a20/files.a20" files "../allwinner/a31/files.a31" files "../allwinner/a33/files.a33" files "../allwinner/a83t/files.a83t" files "../allwinner/h3/files.h3" files "../broadcom/bcm2835/files.bcm2836" files "../broadcom/bcm2835/files.bcm283x" files "../freescale/imx/files.imx6" files "../mv/files.arm7" files "../nvidia/tegra124/files.tegra124" files "../qemu/files.qemu" files "../ti/files.ti" files "../ti/am335x/files.am335x" files "../ti/omap4/files.omap4" files "../xilinx/files.zynq7" options SOC_ALLWINNER_A10 options SOC_ALLWINNER_A13 options SOC_ALLWINNER_A20 options SOC_ALLWINNER_A31 options SOC_ALLWINNER_A31S options SOC_ALLWINNER_A33 options SOC_ALLWINNER_A83T options SOC_ALLWINNER_H2PLUS options SOC_ALLWINNER_H3 options SOC_BCM2836 options SOC_BRCM_BCM2837 options SOC_MV_ARMADA38X options SOC_MV_ARMADAXP options SOC_TI_AM335X options SOC_OMAP4 options SCHED_ULE # ULE scheduler options SMP # Enable multiple cores options PLATFORM options LINUX_BOOT_ABI # EXT_RESOURCES pseudo devices options EXT_RESOURCES device clk device phy device hwreset device nvmem device regulator device syscon # CPU frequency control device cpufreq # Interrupt controller device gic # PMU support (for CCNT). device pmu # ARM Generic Timer device generic_timer device mpcore_timer # MMC/SD/SDIO Card slot support device sdhci # SD controller device mmc # mmc/sd bus device mmcsd # mmc/sd flash cards # ATA controllers device ahci # AHCI-compatible SATA controllers #device ata # Legacy ATA/SATA controllers # PCI options NEW_PCIB device pci device pci_host_generic # PCI NICs device re # RealTek 8139C+/8169/8169S/8110S # VirtIO device virtio device virtio_mmio device virtio_pci device virtio_blk device vtnet # Console and misc device uart device uart_ns8250 device uart_snps device pl011 device pty device snp device md # Memory "disks" device firmware # firmware assist module device pl310 # PL310 L2 cache controller device psci # I2C support device iicbus device iic device twsi device rsb # Allwinner Reduced Serial Bus device p2wi # Allwinner Push-Pull Two Wire device axp209 # AXP209 Power Management Unit device axp81x # AXP813/818 Power Management Unit device bcm2835_bsc device fsliic # Freescale i2c/iic device icee # AT24Cxxx and compatible EEPROMs device sy8106a # SY8106A Buck Regulator device ti_i2c device am335x_pmic # AM335x Power Management IC (TPC65217) device am335x_rtc # RTC support (power management only) device twl # TI TWLX0X0/TPS659x0 Power Management device twl_vreg # twl voltage regulation device twl_clks # twl external clocks # i2c RTCs device ds1307 # Dallas DS1307 RTC and compatible device ds13rtc # All Dallas/Maxim DS13xx RTCs device ds1672 # Dallas DS1672 RTC device ds3231 # Dallas DS3231 RTC + temperature device nxprtc # NXP RTCs: PCA/PFC212x PCA/PCF85xx device s35390a # Seiko s3539x RTCs # GPIO device dwgpio # Synopsys DesignWare APB GPIO Controller device gpio device gpiobacklight device gpioled device gpioregulator # EVDEV support device evdev # input event device support options EVDEV_SUPPORT # evdev support in legacy drivers device uinput # install /dev/uinput cdev device aw_cir # SPI device spibus device spigen device bcm2835_spi device mv_spi device ti_spi +device zy7_qspi # Xilinx Zynq QSPI controller # ADC support device ti_adc # PWM device pwm # Watchdog support # If we don't enable the watchdog driver, the BealeBone could potentially # reboot automatically because the boot loader might have enabled the # watchdog. device ti_wdt device imxwdt # Watchdog. WARNING: can't be disabled!!! device aw_wdog # Allwinner Watchdog device scbus # SCSI bus (required for ATA/SCSI) device da # Direct Access (disks) device cd # CD device pass # Passthrough device (direct ATA/SCSI access) # USB support options USB_HOST_ALIGN=64 # Align usb buffers to cache line size. device usb device uhci device ohci device ehci device xhci device dwcotg # DWC OTG controller device musb device axe # USB-Ethernet device umass # Disks/Mass storage - Requires scbus and da device uhid # "Human Interface Devices" device ukbd # Allow keyboard like HIDs to control console # Device mode support device usb_template # Control of the gadget # Ethernet device loop device ether device vlan # 802.1Q VLAN support device bpf device mii device mdio device etherswitch device e6000sw # Ethernet NICs that use the common MII bus controller code. # NOTE: Be sure to keep the 'device miibus' line in order to use these NICs! device miibus device awg # 10/100/1000 integrated EMAC controller device cpsw # TI Common Platform Ethernet Switch (CPSW) device cgem # Zynq-7000 gig ethernet device device dwc # 10/100/1000 integrated GMAC controller device emac # 10/100 integrated EMAC controller device ffec # Freescale Fast Ethernet Controller device neta # Marvell 10/100/1000 Network controller device smsc # SMSC LAN91C111 # Sound support device sound # Framebuffer support device vt device kbdmux device ums device videomode device hdmi device vchiq # Pinmux device fdt_pinctrl # TI Programmable Realtime Unit support device ti_pruss # Mailbox support device ti_mbox # DMA controller device fslsdma device ti_sdma device a10_dmac device a31_dmac # Extensible Firmware Interface options EFI # Marvell Cryptographic Engine and Security Accelerator device cesa device crypto device cryptodev # RTC device imx6_snvs # IMX6 On-chip RTC device aw_rtc # Allwinner On-chip RTC # EFUSE device aw_sid # Allwinner Secure ID EFUSE # Thermal sensors device aw_thermal # Allwinner Thermal Sensor Controller # Flattened Device Tree options FDT # Configure using FDT/DTB data makeoptions MODULES_EXTRA+="dtb/allwinner" makeoptions MODULES_EXTRA+="dtb/am335x" makeoptions MODULES_EXTRA+="dtb/imx6" makeoptions MODULES_EXTRA+="dtb/nvidia" makeoptions MODULES_EXTRA+="dtb/omap4" makeoptions MODULES_EXTRA+="dtb/rpi" makeoptions MODULES_EXTRA+="dtb/zynq" # SOC-specific modules makeoptions MODULES_EXTRA+="allwinner" makeoptions MODULES_EXTRA+="arm_ti" makeoptions MODULES_EXTRA+="imx" Index: projects/clang1000-import/sys/arm/conf/ZEDBOARD =================================================================== --- projects/clang1000-import/sys/arm/conf/ZEDBOARD (revision 356919) +++ projects/clang1000-import/sys/arm/conf/ZEDBOARD (revision 356920) @@ -1,79 +1,83 @@ # # ZEDBOARD -- Custom configuration for the Xilinx Zynq-7000 based # ZedBoard (www.zedboard.org) and similar Zynq boards. # # For more information on this file, please read the config(5) manual page, # and/or the handbook section on Kernel Configuration Files: # # https://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html # # The handbook is also available locally in /usr/share/doc/handbook # if you've installed the doc distribution, otherwise always see the # FreeBSD World Wide Web server (https://www.FreeBSD.org/) for the # latest information. # # An exhaustive list of options and more detailed explanations of the # device lines is also present in the ../../conf/NOTES and NOTES files. # If you are in doubt as to the purpose or necessity of a line, check first # in NOTES. # # $FreeBSD$ ident ZEDBOARD include "std.armv7" include "../xilinx/std.zynq7" makeoptions MODULES_EXTRA="dtb/zynq" options SCHED_ULE # ULE scheduler options PLATFORM # Platform based SoC #options NFSSD # Network Filesystem Server options SMP # Enable multiple cores # NFS root from boopt/dhcp #options BOOTP #options BOOTP_NFSROOT #options BOOTP_COMPAT #options BOOTP_NFSV3 options ROOTDEVNAME=\"ufs:mmcsd0s2a\" # Interrupt controller device gic # Cache controller device pl310 # PL310 L2 cache controller # ARM MPCore timer device mpcore_timer device loop device ether device cgem # Zynq-7000 gig ethernet device device mii device e1000phy device rgephy # Zybo uses Realtek RTL8211E device pty device uart device gpio +device spibus +device mx25l +device zy7_qspi # Xilinx Zynq QSPI controller + device md device mmc # mmc/sd bus device mmcsd # mmc/sd flash cards device sdhci # generic sdhci device bpf # Berkeley packet filter # USB support device usb device ehci device umass device scbus # SCSI bus (required for ATA/SCSI) device da # Direct Access (disks) device axe # USB-Ethernet # Flattened Device Tree options FDT # Configure using FDT/DTB data #options FDT_DTB_STATIC #makeoptions FDT_DTS_FILE=zedboard.dts Index: projects/clang1000-import/sys/arm/linux/Makefile =================================================================== --- projects/clang1000-import/sys/arm/linux/Makefile (revision 356919) +++ projects/clang1000-import/sys/arm/linux/Makefile (revision 356920) @@ -1,28 +1,7 @@ # Makefile for syscall tables # # $FreeBSD$ -# Don't use an OBJDIR -.OBJDIR: ${.CURDIR} +GENERATED_PREFIX= linux_ -.include - -MAKESYSCALLS= ../../tools/makesyscalls.lua -SRCS= syscalls.conf \ - syscalls.master -GENERATED= linux_proto.h \ - linux_syscall.h \ - linux_syscalls.c \ - linux_sysent.c \ - linux_systrace_args.c - -all: - @echo "make sysent only" - -# We .ORDER these explicitly so that we only run MAKESYSCALLS once, rather than -# potentially once for each ${GENERATED} file. -.ORDER: ${GENERATED} -sysent: ${GENERATED} - -${GENERATED}: ${MAKESYSCALLS} ${SRCS} - ${LUA} ${MAKESYSCALLS} syscalls.master syscalls.conf +.include "../../conf/sysent.mk" Index: projects/clang1000-import/sys/arm/xilinx/files.zynq7 =================================================================== --- projects/clang1000-import/sys/arm/xilinx/files.zynq7 (revision 356919) +++ projects/clang1000-import/sys/arm/xilinx/files.zynq7 (revision 356920) @@ -1,16 +1,17 @@ # # files.zynq7 # # $FreeBSD$ arm/xilinx/zy7_machdep.c standard arm/xilinx/zy7_l2cache.c standard arm/xilinx/zy7_slcr.c standard arm/xilinx/zy7_devcfg.c standard arm/xilinx/zy7_mp.c optional smp dev/cadence/if_cgem.c optional cgem arm/xilinx/zy7_ehci.c optional ehci arm/xilinx/uart_dev_cdnc.c optional uart arm/xilinx/zy7_gpio.c optional gpio +arm/xilinx/zy7_qspi.c optional zy7_qspi Index: projects/clang1000-import/sys/arm/xilinx/zy7_qspi.c =================================================================== --- projects/clang1000-import/sys/arm/xilinx/zy7_qspi.c (nonexistent) +++ projects/clang1000-import/sys/arm/xilinx/zy7_qspi.c (revision 356920) @@ -0,0 +1,763 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2018 Thomas Skibo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +/* + * This is a driver for the Quad-SPI Flash Controller in the Xilinx + * Zynq-7000 SoC. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include + +#include "spibus_if.h" + +static struct ofw_compat_data compat_data[] = { + {"xlnx,zy7_qspi", 1}, + {"xlnx,zynq-qspi-1.0", 1}, + {NULL, 0} +}; + +struct zy7_qspi_softc { + device_t dev; + device_t child; + struct mtx sc_mtx; + struct resource *mem_res; + struct resource *irq_res; + void *intrhandle; + + uint32_t cfg_reg_shadow; + uint32_t lqspi_cfg_shadow; + uint32_t spi_clock; + uint32_t ref_clock; + unsigned int spi_clk_real_freq; + unsigned int rx_overflows; + unsigned int tx_underflows; + unsigned int interrupts; + unsigned int stray_ints; + struct spi_command *cmd; + int tx_bytes; /* tx_cmd_sz + tx_data_sz */ + int tx_bytes_sent; + int rx_bytes; /* rx_cmd_sz + rx_data_sz */ + int rx_bytes_rcvd; + int busy; + int is_dual; + int is_stacked; + int is_dio; +}; + +#define ZY7_QSPI_DEFAULT_SPI_CLOCK 50000000 + +#define QSPI_SC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) +#define QSPI_SC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) +#define QSPI_SC_LOCK_INIT(sc) \ + mtx_init(&(sc)->sc_mtx, device_get_nameunit((sc)->dev), NULL, MTX_DEF) +#define QSPI_SC_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) +#define QSPI_SC_ASSERT_LOCKED(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) + +#define RD4(sc, off) (bus_read_4((sc)->mem_res, (off))) +#define WR4(sc, off, val) (bus_write_4((sc)->mem_res, (off), (val))) + +/* + * QSPI device registers. + * Reference: Zynq-7000 All Programmable SoC Technical Reference Manual. + * (v1.12.2) July 1, 2018. Xilinx doc UG585. + */ +#define ZY7_QSPI_CONFIG_REG 0x0000 +#define ZY7_QSPI_CONFIG_IFMODE (1U << 31) +#define ZY7_QSPI_CONFIG_ENDIAN (1 << 26) +#define ZY7_QSPI_CONFIG_HOLDB_DR (1 << 19) +#define ZY7_QSPI_CONFIG_RSVD1 (1 << 17) /* must be 1 */ +#define ZY7_QSPI_CONFIG_MANSTRT (1 << 16) +#define ZY7_QSPI_CONFIG_MANSTRTEN (1 << 15) +#define ZY7_QSPI_CONFIG_SSFORCE (1 << 14) +#define ZY7_QSPI_CONFIG_PCS (1 << 10) +#define ZY7_QSPI_CONFIG_REF_CLK (1 << 8) +#define ZY7_QSPI_CONFIG_FIFO_WIDTH_MASK (3 << 6) +#define ZY7_QSPI_CONFIG_FIFO_WIDTH32 (3 << 6) +#define ZY7_QSPI_CONFIG_BAUD_RATE_DIV_MASK (7 << 3) +#define ZY7_QSPI_CONFIG_BAUD_RATE_DIV_SHIFT 3 +#define ZY7_QSPI_CONFIG_BAUD_RATE_DIV(x) ((x) << 3) /* divide by 2< 0) { + nvalid = MIN(4, nbytes); + data = 0xffffffff; + + /* + * A hardware bug forces us to wait until the tx fifo is + * empty before writing partial words. We'll come back + * next tx interrupt. + */ + if (nvalid < 4 && (RD4(sc, ZY7_QSPI_INTR_STAT_REG) & + ZY7_QSPI_INTR_TX_FIFO_NOT_FULL) == 0) + return; + + if (sc->tx_bytes_sent < sc->cmd->tx_cmd_sz) { + /* Writing command. */ + n = MIN(nvalid, sc->cmd->tx_cmd_sz - + sc->tx_bytes_sent); + memcpy(&data, (uint8_t *)sc->cmd->tx_cmd + + sc->tx_bytes_sent, n); + + if (nvalid > n) { + /* Writing start of data. */ + memcpy((uint8_t *)&data + n, + sc->cmd->tx_data, nvalid - n); + } + } else + /* Writing data. */ + memcpy(&data, (uint8_t *)sc->cmd->tx_data + + (sc->tx_bytes_sent - sc->cmd->tx_cmd_sz), nvalid); + + switch (nvalid) { + case 1: + WR4(sc, ZY7_QSPI_TXD1_REG, data); + break; + case 2: + WR4(sc, ZY7_QSPI_TXD2_REG, data); + break; + case 3: + WR4(sc, ZY7_QSPI_TXD3_REG, data); + break; + case 4: + WR4(sc, ZY7_QSPI_TXD0_REG, data); + break; + } + + sc->tx_bytes_sent += nvalid; + nbytes -= nvalid; + } +} + + +/* Read hardware fifo data into command response and data buffers. */ +static void +zy7_qspi_read_fifo(struct zy7_qspi_softc *sc) +{ + int n, nbytes; + uint32_t data; + + do { + data = RD4(sc, ZY7_QSPI_RX_DATA_REG); + nbytes = MIN(4, sc->rx_bytes - sc->rx_bytes_rcvd); + + /* + * Last word in non-word-multiple transfer is packed + * non-intuitively. + */ + if (nbytes < 4) + data >>= 8 * (4 - nbytes); + + if (sc->rx_bytes_rcvd < sc->cmd->rx_cmd_sz) { + /* Reading command. */ + n = MIN(nbytes, sc->cmd->rx_cmd_sz - + sc->rx_bytes_rcvd); + memcpy((uint8_t *)sc->cmd->rx_cmd + sc->rx_bytes_rcvd, + &data, n); + sc->rx_bytes_rcvd += n; + nbytes -= n; + data >>= 8 * n; + } + + if (nbytes > 0) { + /* Reading data. */ + memcpy((uint8_t *)sc->cmd->rx_data + + (sc->rx_bytes_rcvd - sc->cmd->rx_cmd_sz), + &data, nbytes); + sc->rx_bytes_rcvd += nbytes; + } + + } while (sc->rx_bytes_rcvd < sc->rx_bytes && + (RD4(sc, ZY7_QSPI_INTR_STAT_REG) & + ZY7_QSPI_INTR_RX_FIFO_NOT_EMPTY) != 0); +} + +/* End a transfer early by draining rx fifo and disabling interrupts. */ +static void +zy7_qspi_abort_transfer(struct zy7_qspi_softc *sc) +{ + /* Drain receive fifo. */ + while ((RD4(sc, ZY7_QSPI_INTR_STAT_REG) & + ZY7_QSPI_INTR_RX_FIFO_NOT_EMPTY) != 0) + (void)RD4(sc, ZY7_QSPI_RX_DATA_REG); + + /* Shut down interrupts. */ + WR4(sc, ZY7_QSPI_INTR_DIS_REG, + ZY7_QSPI_INTR_RX_OVERFLOW | + ZY7_QSPI_INTR_RX_FIFO_NOT_EMPTY | + ZY7_QSPI_INTR_TX_FIFO_NOT_FULL); +} + + +static void +zy7_qspi_intr(void *arg) +{ + struct zy7_qspi_softc *sc = (struct zy7_qspi_softc *)arg; + uint32_t istatus; + + QSPI_SC_LOCK(sc); + + sc->interrupts++; + + istatus = RD4(sc, ZY7_QSPI_INTR_STAT_REG); + + /* Stray interrupts can happen if a transfer gets interrupted. */ + if (!sc->busy) { + sc->stray_ints++; + QSPI_SC_UNLOCK(sc); + return; + } + + if ((istatus & ZY7_QSPI_INTR_RX_OVERFLOW) != 0) { + device_printf(sc->dev, "rx fifo overflow!\n"); + sc->rx_overflows++; + + /* Clear status bit. */ + WR4(sc, ZY7_QSPI_INTR_STAT_REG, + ZY7_QSPI_INTR_RX_OVERFLOW); + } + + /* Empty receive fifo before any more transmit data is sent. */ + if (sc->rx_bytes_rcvd < sc->rx_bytes && + (istatus & ZY7_QSPI_INTR_RX_FIFO_NOT_EMPTY) != 0) { + zy7_qspi_read_fifo(sc); + if (sc->rx_bytes_rcvd == sc->rx_bytes) + /* Disable receive interrupts. */ + WR4(sc, ZY7_QSPI_INTR_DIS_REG, + ZY7_QSPI_INTR_RX_FIFO_NOT_EMPTY | + ZY7_QSPI_INTR_RX_OVERFLOW); + } + + /* + * Transmit underflows aren't really a bug because a hardware + * bug forces us to allow the tx fifo to go empty between full + * and partial fifo writes. Why bother counting? + */ + if ((istatus & ZY7_QSPI_INTR_TX_FIFO_UNDERFLOW) != 0) { + sc->tx_underflows++; + + /* Clear status bit. */ + WR4(sc, ZY7_QSPI_INTR_STAT_REG, + ZY7_QSPI_INTR_TX_FIFO_UNDERFLOW); + } + + /* Fill transmit fifo. */ + if (sc->tx_bytes_sent < sc->tx_bytes && + (istatus & ZY7_QSPI_INTR_TX_FIFO_NOT_FULL) != 0) { + zy7_qspi_write_fifo(sc, MIN(240, sc->tx_bytes - + sc->tx_bytes_sent)); + + if (sc->tx_bytes_sent == sc->tx_bytes) { + /* + * Disable transmit FIFO interrupt, enable receive + * FIFO interrupt. + */ + WR4(sc, ZY7_QSPI_INTR_DIS_REG, + ZY7_QSPI_INTR_TX_FIFO_NOT_FULL); + WR4(sc, ZY7_QSPI_INTR_EN_REG, + ZY7_QSPI_INTR_RX_FIFO_NOT_EMPTY); + } + } + + /* Finished with transfer? */ + if (sc->tx_bytes_sent == sc->tx_bytes && + sc->rx_bytes_rcvd == sc->rx_bytes) { + + /* De-assert CS. */ + sc->cfg_reg_shadow |= ZY7_QSPI_CONFIG_PCS; + WR4(sc, ZY7_QSPI_CONFIG_REG, sc->cfg_reg_shadow); + + wakeup(sc->dev); + } + + QSPI_SC_UNLOCK(sc); +} + +/* Initialize hardware. */ +static int +zy7_qspi_init_hw(struct zy7_qspi_softc *sc) +{ + uint32_t baud_div; + + /* Configure LQSPI Config register. Disable linear mode. */ + sc->lqspi_cfg_shadow = RD4(sc, ZY7_QSPI_LQSPI_CFG_REG); + sc->lqspi_cfg_shadow &= ~(ZY7_QSPI_LQSPI_CFG_LINEAR | + ZY7_QSPI_LQSPI_CFG_TWO_MEM | + ZY7_QSPI_LQSPI_CFG_SEP_BUS); + if (sc->is_dual) { + sc->lqspi_cfg_shadow |= ZY7_QSPI_LQSPI_CFG_TWO_MEM; + if (sc->is_stacked) { + sc->lqspi_cfg_shadow &= + ~ZY7_QSPI_LQSPI_CFG_INST_CODE_MASK; + sc->lqspi_cfg_shadow |= + ZY7_QSPI_LQSPI_CFG_INST_CODE(sc->is_dio ? + CMD_READ_DUAL_IO : CMD_READ_QUAD_OUTPUT); + } else + sc->lqspi_cfg_shadow |= ZY7_QSPI_LQSPI_CFG_SEP_BUS; + } + WR4(sc, ZY7_QSPI_LQSPI_CFG_REG, sc->lqspi_cfg_shadow); + + /* Find best clock divider. */ + baud_div = 0; + while ((sc->ref_clock >> (baud_div + 1)) > sc->spi_clock && + baud_div < 8) + baud_div++; + if (baud_div >= 8) { + device_printf(sc->dev, "cannot configure clock divider: ref=%d" + " spi=%d.\n", sc->ref_clock, sc->spi_clock); + return (EINVAL); + } + sc->spi_clk_real_freq = sc->ref_clock >> (baud_div + 1); + + /* + * If divider is 2 (the max speed), use internal loopback master + * clock for read data. (See section 12.3.1 in ref man.) + */ + if (baud_div == 0) + WR4(sc, ZY7_QSPI_LPBK_DLY_ADJ_REG, + ZY7_QSPI_LPBK_DLY_ADJ_USE_LPBK | + ZY7_QSPI_LPBK_DLY_ADJ_DLY1(0) | + ZY7_QSPI_LPBK_DLY_ADJ_DLY0(0)); + else + WR4(sc, ZY7_QSPI_LPBK_DLY_ADJ_REG, 0); + + /* Set up configuration register. */ + sc->cfg_reg_shadow = + ZY7_QSPI_CONFIG_IFMODE | + ZY7_QSPI_CONFIG_HOLDB_DR | + ZY7_QSPI_CONFIG_RSVD1 | + ZY7_QSPI_CONFIG_SSFORCE | + ZY7_QSPI_CONFIG_PCS | + ZY7_QSPI_CONFIG_FIFO_WIDTH32 | + ZY7_QSPI_CONFIG_BAUD_RATE_DIV(baud_div) | + ZY7_QSPI_CONFIG_MODE_SEL; + WR4(sc, ZY7_QSPI_CONFIG_REG, sc->cfg_reg_shadow); + + /* + * Set thresholds. We must use 1 for tx threshold because there + * is no fifo empty flag and we need one to implement a bug + * workaround. + */ + WR4(sc, ZY7_QSPI_TX_THRESH_REG, 1); + WR4(sc, ZY7_QSPI_RX_THRESH_REG, 1); + + /* Clear and disable all interrupts. */ + WR4(sc, ZY7_QSPI_INTR_STAT_REG, ~0); + WR4(sc, ZY7_QSPI_INTR_DIS_REG, ~0); + + /* Enable SPI. */ + WR4(sc, ZY7_QSPI_EN_REG, ZY7_SPI_ENABLE); + + return (0); +} + + +static void +zy7_qspi_add_sysctls(device_t dev) +{ + struct zy7_qspi_softc *sc = device_get_softc(dev); + struct sysctl_ctx_list *ctx; + struct sysctl_oid_list *child; + + ctx = device_get_sysctl_ctx(dev); + child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); + + SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "spi_clk_real_freq", CTLFLAG_RD, + &sc->spi_clk_real_freq, 0, "SPI clock real frequency"); + + SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_overflows", CTLFLAG_RD, + &sc->rx_overflows, 0, "RX FIFO overflow events"); + + SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_underflows", CTLFLAG_RD, + &sc->tx_underflows, 0, "TX FIFO underflow events"); + + SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "interrupts", CTLFLAG_RD, + &sc->interrupts, 0, "interrupt calls"); + + SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "stray_ints", CTLFLAG_RD, + &sc->stray_ints, 0, "stray interrupts"); +} + + +static int +zy7_qspi_probe(device_t dev) +{ + + if (!ofw_bus_status_okay(dev)) + return (ENXIO); + + if (ofw_bus_search_compatible(dev, compat_data)->ocd_data == 0) + return (ENXIO); + + device_set_desc(dev, "Zynq Quad-SPI Flash Controller"); + + return (BUS_PROBE_DEFAULT); +} + + +static int +zy7_qspi_attach(device_t dev) +{ + struct zy7_qspi_softc *sc; + int rid, err; + phandle_t node; + pcell_t cell; + + sc = device_get_softc(dev); + sc->dev = dev; + + QSPI_SC_LOCK_INIT(sc); + + /* Get ref-clock, spi-clock, and other properties. */ + node = ofw_bus_get_node(dev); + if (OF_getprop(node, "ref-clock", &cell, sizeof(cell)) > 0) + sc->ref_clock = fdt32_to_cpu(cell); + else { + device_printf(dev, "must have ref-clock property\n"); + return (ENXIO); + } + if (OF_getprop(node, "spi-clock", &cell, sizeof(cell)) > 0) + sc->spi_clock = fdt32_to_cpu(cell); + else + sc->spi_clock = ZY7_QSPI_DEFAULT_SPI_CLOCK; + if (OF_getprop(node, "is-stacked", &cell, sizeof(cell)) > 0 && + fdt32_to_cpu(cell) != 0) { + sc->is_dual = 1; + sc->is_stacked = 1; + } else if (OF_getprop(node, "is-dual", &cell, sizeof(cell)) > 0 && + fdt32_to_cpu(cell) != 0) + sc->is_dual = 1; + if (OF_getprop(node, "is-dio", &cell, sizeof(cell)) > 0 && + fdt32_to_cpu(cell) != 0) + sc->is_dio = 1; + + /* Get memory resource. */ + rid = 0; + sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, + RF_ACTIVE); + if (sc->mem_res == NULL) { + device_printf(dev, "could not allocate memory resources.\n"); + zy7_qspi_detach(dev); + return (ENOMEM); + } + + /* Allocate IRQ. */ + rid = 0; + sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, + RF_ACTIVE); + if (sc->irq_res == NULL) { + device_printf(dev, "could not allocate IRQ resource.\n"); + zy7_qspi_detach(dev); + return (ENOMEM); + } + + /* Activate the interrupt. */ + err = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_MISC | INTR_MPSAFE, + NULL, zy7_qspi_intr, sc, &sc->intrhandle); + if (err) { + device_printf(dev, "could not setup IRQ.\n"); + zy7_qspi_detach(dev); + return (err); + } + + /* Configure the device. */ + err = zy7_qspi_init_hw(sc); + if (err) { + zy7_qspi_detach(dev); + return (err); + } + + sc->child = device_add_child(dev, "spibus", -1); + + zy7_qspi_add_sysctls(dev); + + /* Attach spibus driver as a child later when interrupts work. */ + config_intrhook_oneshot((ich_func_t)bus_generic_attach, dev); + + return (0); +} + +static int +zy7_qspi_detach(device_t dev) +{ + struct zy7_qspi_softc *sc = device_get_softc(dev); + + if (device_is_attached(dev)) + bus_generic_detach(dev); + + /* Delete child bus. */ + if (sc->child) + device_delete_child(dev, sc->child); + + /* Disable hardware. */ + if (sc->mem_res != NULL) { + /* Disable SPI. */ + WR4(sc, ZY7_QSPI_EN_REG, 0); + + /* Clear and disable all interrupts. */ + WR4(sc, ZY7_QSPI_INTR_STAT_REG, ~0); + WR4(sc, ZY7_QSPI_INTR_DIS_REG, ~0); + } + + /* Teardown and release interrupt. */ + if (sc->irq_res != NULL) { + if (sc->intrhandle) + bus_teardown_intr(dev, sc->irq_res, sc->intrhandle); + bus_release_resource(dev, SYS_RES_IRQ, + rman_get_rid(sc->irq_res), sc->irq_res); + } + + /* Release memory resource. */ + if (sc->mem_res != NULL) + bus_release_resource(dev, SYS_RES_MEMORY, + rman_get_rid(sc->mem_res), sc->mem_res); + + QSPI_SC_LOCK_DESTROY(sc); + + return (0); +} + + +static phandle_t +zy7_qspi_get_node(device_t bus, device_t dev) +{ + + return (ofw_bus_get_node(bus)); +} + + +static int +zy7_qspi_transfer(device_t dev, device_t child, struct spi_command *cmd) +{ + struct zy7_qspi_softc *sc = device_get_softc(dev); + int err = 0; + + KASSERT(cmd->tx_cmd_sz == cmd->rx_cmd_sz, + ("TX/RX command sizes should be equal")); + KASSERT(cmd->tx_data_sz == cmd->rx_data_sz, + ("TX/RX data sizes should be equal")); + + if (sc->is_dual && cmd->tx_data_sz % 2 != 0) { + device_printf(dev, "driver does not support odd byte data " + "transfers in dual mode. (sz=%d)\n", cmd->tx_data_sz); + return (EINVAL); + } + + QSPI_SC_LOCK(sc); + + /* Wait for controller available. */ + while (sc->busy != 0) { + err = mtx_sleep(dev, &sc->sc_mtx, 0, "zqspi0", 0); + if (err) { + QSPI_SC_UNLOCK(sc); + return (err); + } + } + + /* Start transfer. */ + sc->busy = 1; + sc->cmd = cmd; + sc->tx_bytes = sc->cmd->tx_cmd_sz + sc->cmd->tx_data_sz; + sc->tx_bytes_sent = 0; + sc->rx_bytes = sc->cmd->rx_cmd_sz + sc->cmd->rx_data_sz; + sc->rx_bytes_rcvd = 0; + + /* Enable interrupts. zy7_qspi_intr() will handle transfer. */ + WR4(sc, ZY7_QSPI_INTR_EN_REG, + ZY7_QSPI_INTR_TX_FIFO_NOT_FULL | + ZY7_QSPI_INTR_RX_OVERFLOW); + +#ifdef SPI_XFER_U_PAGE /* XXX: future support for stacked memories. */ + if (sc->is_stacked) { + if ((cmd->flags & SPI_XFER_U_PAGE) != 0) + sc->lqspi_cfg_shadow |= ZY7_QSPI_LQSPI_CFG_U_PAGE; + else + sc->lqspi_cfg_shadow &= ~ZY7_QSPI_LQSPI_CFG_U_PAGE; + WR4(sc, ZY7_QSPI_LQSPI_CFG_REG, sc->lqspi_cfg_shadow); + } +#endif + + /* Assert CS. */ + sc->cfg_reg_shadow &= ~ZY7_QSPI_CONFIG_PCS; + WR4(sc, ZY7_QSPI_CONFIG_REG, sc->cfg_reg_shadow); + + /* Wait for completion. */ + err = mtx_sleep(dev, &sc->sc_mtx, 0, "zqspi1", hz * 2); + if (err) + zy7_qspi_abort_transfer(sc); + + /* Release controller. */ + sc->busy = 0; + wakeup_one(dev); + + QSPI_SC_UNLOCK(sc); + + return (err); +} + +static device_method_t zy7_qspi_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, zy7_qspi_probe), + DEVMETHOD(device_attach, zy7_qspi_attach), + DEVMETHOD(device_detach, zy7_qspi_detach), + + /* SPI interface */ + DEVMETHOD(spibus_transfer, zy7_qspi_transfer), + + /* ofw_bus interface */ + DEVMETHOD(ofw_bus_get_node, zy7_qspi_get_node), + + DEVMETHOD_END +}; + + +static driver_t zy7_qspi_driver = { + "zy7_qspi", + zy7_qspi_methods, + sizeof(struct zy7_qspi_softc), +}; +static devclass_t zy7_qspi_devclass; + +DRIVER_MODULE(zy7_qspi, simplebus, zy7_qspi_driver, zy7_qspi_devclass, 0, 0); +DRIVER_MODULE(ofw_spibus, zy7_qspi, ofw_spibus_driver, ofw_spibus_devclass, 0, 0); +SIMPLEBUS_PNP_INFO(compat_data); +MODULE_DEPEND(zy7_qspi, ofw_spibus, 1, 1, 1); Property changes on: projects/clang1000-import/sys/arm/xilinx/zy7_qspi.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/clang1000-import/sys/arm64/linux/Makefile =================================================================== --- projects/clang1000-import/sys/arm64/linux/Makefile (revision 356919) +++ projects/clang1000-import/sys/arm64/linux/Makefile (revision 356920) @@ -1,28 +1,7 @@ # Makefile for syscall tables # # $FreeBSD$ -# Don't use an OBJDIR -.OBJDIR: ${.CURDIR} +GENERATED_PREFIX= linux_ -.include - -MAKESYSCALLS= ../../tools/makesyscalls.lua -SRCS= syscalls.conf \ - syscalls.master -GENERATED= linux_proto.h \ - linux_syscall.h \ - linux_syscalls.c \ - linux_sysent.c \ - linux_systrace_args.c - -all: - @echo "make sysent only" - -# We .ORDER these explicitly so that we only run MAKESYSCALLS once, rather than -# potentially once for each ${GENERATED} file. -.ORDER: ${GENERATED} -sysent: ${GENERATED} - -${GENERATED}: ${MAKESYSCALLS} ${SRCS} - ${LUA} ${MAKESYSCALLS} syscalls.master syscalls.conf +.include "../../conf/sysent.mk" Index: projects/clang1000-import/sys/arm64/rockchip/rk805.c =================================================================== --- projects/clang1000-import/sys/arm64/rockchip/rk805.c (revision 356919) +++ projects/clang1000-import/sys/arm64/rockchip/rk805.c (revision 356920) @@ -1,695 +1,717 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2018 Emmanuel Vadot * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include "regdev_if.h" MALLOC_DEFINE(M_RK805_REG, "RK805 regulator", "RK805 power regulator"); /* #define dprintf(sc, format, arg...) device_printf(sc->base_dev, "%s: " format, __func__, arg) */ #define dprintf(sc, format, arg...) enum rk_pmic_type { RK805 = 1, RK808, }; static struct ofw_compat_data compat_data[] = { {"rockchip,rk805", RK805}, {"rockchip,rk808", RK808}, {NULL, 0} }; struct rk805_regdef { intptr_t id; char *name; uint8_t enable_reg; uint8_t enable_mask; uint8_t voltage_reg; uint8_t voltage_mask; int voltage_min; int voltage_max; int voltage_step; int voltage_nstep; }; struct rk805_reg_sc { struct regnode *regnode; device_t base_dev; struct rk805_regdef *def; phandle_t xref; struct regnode_std_param *param; }; struct rk805_softc { device_t dev; struct mtx mtx; struct resource * res[1]; void * intrcookie; struct intr_config_hook intr_hook; enum rk_pmic_type type; struct rk805_reg_sc **regs; int nregs; }; +static int rk805_regnode_set_voltage(struct regnode *regnode, int min_uvolt, + int max_uvolt, int *udelay); + static struct rk805_regdef rk805_regdefs[] = { { .id = RK805_DCDC1, .name = "DCDC_REG1", .enable_reg = RK805_DCDC_EN, .enable_mask = 0x11, .voltage_reg = RK805_DCDC1_ON_VSEL, .voltage_mask = 0x3F, .voltage_min = 712500, .voltage_max = 1450000, .voltage_step = 12500, .voltage_nstep = 64, }, { .id = RK805_DCDC2, .name = "DCDC_REG2", .enable_reg = RK805_DCDC_EN, .enable_mask = 0x22, .voltage_reg = RK805_DCDC2_ON_VSEL, .voltage_mask = 0x3F, .voltage_min = 712500, .voltage_max = 1450000, .voltage_step = 12500, .voltage_nstep = 64, }, { .id = RK805_DCDC3, .name = "DCDC_REG3", .enable_reg = RK805_DCDC_EN, .enable_mask = 0x44, }, { .id = RK805_DCDC4, .name = "DCDC_REG4", .enable_reg = RK805_DCDC_EN, .enable_mask = 0x88, .voltage_reg = RK805_DCDC4_ON_VSEL, .voltage_mask = 0x3F, .voltage_min = 800000, .voltage_max = 3500000, .voltage_step = 100000, .voltage_nstep = 28, }, { .id = RK805_LDO1, .name = "LDO_REG1", .enable_reg = RK805_LDO_EN, .enable_mask = 0x11, .voltage_reg = RK805_LDO1_ON_VSEL, .voltage_mask = 0x1F, .voltage_min = 800000, .voltage_max = 3400000, .voltage_step = 100000, .voltage_nstep = 27, }, { .id = RK805_LDO2, .name = "LDO_REG2", .enable_reg = RK805_LDO_EN, .enable_mask = 0x22, .voltage_reg = RK805_LDO2_ON_VSEL, .voltage_mask = 0x1F, .voltage_min = 800000, .voltage_max = 3400000, .voltage_step = 100000, .voltage_nstep = 27, }, { .id = RK805_LDO3, .name = "LDO_REG3", .enable_reg = RK805_LDO_EN, .enable_mask = 0x44, .voltage_reg = RK805_LDO3_ON_VSEL, .voltage_mask = 0x1F, .voltage_min = 800000, .voltage_max = 3400000, .voltage_step = 100000, .voltage_nstep = 27, }, }; static struct rk805_regdef rk808_regdefs[] = { { .id = RK805_DCDC1, .name = "DCDC_REG1", .enable_reg = RK805_DCDC_EN, .enable_mask = 0x1, .voltage_reg = RK805_DCDC1_ON_VSEL, .voltage_mask = 0x3F, .voltage_min = 712500, .voltage_max = 1500000, .voltage_step = 12500, .voltage_nstep = 64, }, { .id = RK805_DCDC2, .name = "DCDC_REG2", .enable_reg = RK805_DCDC_EN, .enable_mask = 0x2, .voltage_reg = RK805_DCDC2_ON_VSEL, .voltage_mask = 0x3F, .voltage_min = 712500, .voltage_max = 1500000, .voltage_step = 12500, .voltage_nstep = 64, }, { /* BUCK3 voltage is calculated based on external resistor */ .id = RK805_DCDC3, .name = "DCDC_REG3", .enable_reg = RK805_DCDC_EN, .enable_mask = 0x4, }, { .id = RK805_DCDC4, .name = "DCDC_REG4", .enable_reg = RK805_DCDC_EN, .enable_mask = 0x8, .voltage_reg = RK805_DCDC4_ON_VSEL, .voltage_mask = 0xF, .voltage_min = 1800000, .voltage_max = 3300000, .voltage_step = 100000, .voltage_nstep = 16, }, { .id = RK808_LDO1, .name = "LDO_REG1", .enable_reg = RK808_LDO_EN, .enable_mask = 0x1, .voltage_reg = RK805_LDO1_ON_VSEL, .voltage_mask = 0x1F, .voltage_min = 1800000, .voltage_max = 3400000, .voltage_step = 100000, .voltage_nstep = 17, }, { .id = RK808_LDO2, .name = "LDO_REG2", .enable_reg = RK808_LDO_EN, .enable_mask = 0x2, .voltage_reg = RK805_LDO2_ON_VSEL, .voltage_mask = 0x1F, .voltage_min = 1800000, .voltage_max = 3400000, .voltage_step = 100000, .voltage_nstep = 17, }, { .id = RK808_LDO3, .name = "LDO_REG3", .enable_reg = RK808_LDO_EN, .enable_mask = 0x4, .voltage_reg = RK805_LDO3_ON_VSEL, .voltage_mask = 0xF, .voltage_min = 800000, .voltage_max = 2500000, .voltage_step = 100000, .voltage_nstep = 18, }, { .id = RK808_LDO4, .name = "LDO_REG4", .enable_reg = RK808_LDO_EN, .enable_mask = 0x8, .voltage_reg = RK808_LDO4_ON_VSEL, .voltage_mask = 0x1F, .voltage_min = 1800000, .voltage_max = 3400000, .voltage_step = 100000, .voltage_nstep = 17, }, { .id = RK808_LDO5, .name = "LDO_REG5", .enable_reg = RK808_LDO_EN, .enable_mask = 0x10, .voltage_reg = RK808_LDO5_ON_VSEL, .voltage_mask = 0x1F, .voltage_min = 1800000, .voltage_max = 3400000, .voltage_step = 100000, .voltage_nstep = 17, }, { .id = RK808_LDO6, .name = "LDO_REG6", .enable_reg = RK808_LDO_EN, .enable_mask = 0x20, .voltage_reg = RK808_LDO6_ON_VSEL, .voltage_mask = 0x1F, .voltage_min = 800000, .voltage_max = 2500000, .voltage_step = 100000, .voltage_nstep = 18, }, { .id = RK808_LDO7, .name = "LDO_REG7", .enable_reg = RK808_LDO_EN, .enable_mask = 0x40, .voltage_reg = RK808_LDO7_ON_VSEL, .voltage_mask = 0x1F, .voltage_min = 800000, .voltage_max = 2500000, .voltage_step = 100000, .voltage_nstep = 18, }, { .id = RK808_LDO8, .name = "LDO_REG8", .enable_reg = RK808_LDO_EN, .enable_mask = 0x80, .voltage_reg = RK808_LDO8_ON_VSEL, .voltage_mask = 0x1F, .voltage_min = 1800000, .voltage_max = 3400000, .voltage_step = 100000, .voltage_nstep = 17, }, { .id = RK808_SWITCH1, .name = "SWITCH_REG1", .enable_reg = RK805_DCDC_EN, .enable_mask = 0x20, .voltage_min = 3000000, .voltage_max = 3000000, }, { .id = RK808_SWITCH2, .name = "SWITCH_REG2", .enable_reg = RK805_DCDC_EN, .enable_mask = 0x40, .voltage_min = 3000000, .voltage_max = 3000000, }, }; static int rk805_read(device_t dev, uint8_t reg, uint8_t *data, uint8_t size) { int err; err = iicdev_readfrom(dev, reg, data, size, IIC_INTRWAIT); return (err); } static int rk805_write(device_t dev, uint8_t reg, uint8_t data) { return (iicdev_writeto(dev, reg, &data, 1, IIC_INTRWAIT)); } static int rk805_regnode_init(struct regnode *regnode) { - return (0); + struct rk805_reg_sc *sc; + struct regnode_std_param *param; + int rv, udelay; + + sc = regnode_get_softc(regnode); + param = regnode_get_stdparam(regnode); + if (param->min_uvolt == 0) + return (0); + + /* + * Set the regulator at the correct voltage + * Do not enable it, this is will be done either by a + * consumer or by regnode_set_constraint if boot_on is true + */ + rv = rk805_regnode_set_voltage(regnode, param->min_uvolt, + param->max_uvolt, &udelay); + if (rv != 0) + DELAY(udelay); + + return (rv); } static int rk805_regnode_enable(struct regnode *regnode, bool enable, int *udelay) { struct rk805_reg_sc *sc; uint8_t val; sc = regnode_get_softc(regnode); dprintf(sc, "%sabling regulator %s\n", enable ? "En" : "Dis", sc->def->name); rk805_read(sc->base_dev, sc->def->enable_reg, &val, 1); if (enable) val |= sc->def->enable_mask; else val &= ~sc->def->enable_mask; rk805_write(sc->base_dev, sc->def->enable_reg, val); *udelay = 0; return (0); } static void rk805_regnode_reg_to_voltage(struct rk805_reg_sc *sc, uint8_t val, int *uv) { if (val < sc->def->voltage_nstep) *uv = sc->def->voltage_min + val * sc->def->voltage_step; else *uv = sc->def->voltage_min + (sc->def->voltage_nstep * sc->def->voltage_step); } static int rk805_regnode_voltage_to_reg(struct rk805_reg_sc *sc, int min_uvolt, int max_uvolt, uint8_t *val) { uint8_t nval; int nstep, uvolt; nval = 0; uvolt = sc->def->voltage_min; for (nstep = 0; nstep < sc->def->voltage_nstep && uvolt < min_uvolt; nstep++) { ++nval; uvolt += sc->def->voltage_step; } if (uvolt > max_uvolt) return (EINVAL); *val = nval; return (0); } static int rk805_regnode_status(struct regnode *regnode, int *status) { struct rk805_reg_sc *sc; uint8_t val; sc = regnode_get_softc(regnode); *status = 0; rk805_read(sc->base_dev, sc->def->enable_reg, &val, 1); if (val & sc->def->enable_mask) *status = REGULATOR_STATUS_ENABLED; return (0); } static int rk805_regnode_set_voltage(struct regnode *regnode, int min_uvolt, int max_uvolt, int *udelay) { struct rk805_reg_sc *sc; uint8_t val; int uvolt; sc = regnode_get_softc(regnode); if (!sc->def->voltage_step) return (ENXIO); dprintf(sc, "Setting %s to %d<->%d uvolts\n", sc->def->name, min_uvolt, max_uvolt); rk805_read(sc->base_dev, sc->def->voltage_reg, &val, 1); if (rk805_regnode_voltage_to_reg(sc, min_uvolt, max_uvolt, &val) != 0) return (ERANGE); rk805_write(sc->base_dev, sc->def->voltage_reg, val); rk805_read(sc->base_dev, sc->def->voltage_reg, &val, 1); *udelay = 0; rk805_regnode_reg_to_voltage(sc, val, &uvolt); dprintf(sc, "Regulator %s set to %d uvolt\n", sc->def->name, uvolt); return (0); } static int rk805_regnode_get_voltage(struct regnode *regnode, int *uvolt) { struct rk805_reg_sc *sc; uint8_t val; sc = regnode_get_softc(regnode); if (sc->def->voltage_min == sc->def->voltage_max) { *uvolt = sc->def->voltage_min; return (0); } if (!sc->def->voltage_step) return (ENXIO); rk805_read(sc->base_dev, sc->def->voltage_reg, &val, 1); rk805_regnode_reg_to_voltage(sc, val & sc->def->voltage_mask, uvolt); dprintf(sc, "Regulator %s is at %d uvolt\n", sc->def->name, *uvolt); return (0); } static regnode_method_t rk805_regnode_methods[] = { /* Regulator interface */ REGNODEMETHOD(regnode_init, rk805_regnode_init), REGNODEMETHOD(regnode_enable, rk805_regnode_enable), REGNODEMETHOD(regnode_status, rk805_regnode_status), REGNODEMETHOD(regnode_set_voltage, rk805_regnode_set_voltage), REGNODEMETHOD(regnode_get_voltage, rk805_regnode_get_voltage), REGNODEMETHOD(regnode_check_voltage, regnode_method_check_voltage), REGNODEMETHOD_END }; DEFINE_CLASS_1(rk805_regnode, rk805_regnode_class, rk805_regnode_methods, sizeof(struct rk805_reg_sc), regnode_class); static struct rk805_reg_sc * rk805_reg_attach(device_t dev, phandle_t node, struct rk805_regdef *def) { struct rk805_reg_sc *reg_sc; struct regnode_init_def initdef; struct regnode *regnode; memset(&initdef, 0, sizeof(initdef)); if (regulator_parse_ofw_stdparam(dev, node, &initdef) != 0) { device_printf(dev, "cannot create regulator\n"); return (NULL); } if (initdef.std_param.min_uvolt == 0) initdef.std_param.min_uvolt = def->voltage_min; if (initdef.std_param.max_uvolt == 0) initdef.std_param.max_uvolt = def->voltage_max; initdef.id = def->id; initdef.ofw_node = node; regnode = regnode_create(dev, &rk805_regnode_class, &initdef); if (regnode == NULL) { device_printf(dev, "cannot create regulator\n"); return (NULL); } reg_sc = regnode_get_softc(regnode); reg_sc->regnode = regnode; reg_sc->base_dev = dev; reg_sc->def = def; reg_sc->xref = OF_xref_from_node(node); reg_sc->param = regnode_get_stdparam(regnode); regnode_register(regnode); return (reg_sc); } static int rk805_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (ofw_bus_search_compatible(dev, compat_data)->ocd_data == 0) return (ENXIO); device_set_desc(dev, "RockChip RK805 PMIC"); return (BUS_PROBE_DEFAULT); } static void rk805_start(void *pdev) { struct rk805_softc *sc; device_t dev; uint8_t data[2]; int err; dev = pdev; sc = device_get_softc(dev); sc->dev = dev; /* No version register in RK808 */ if (bootverbose && sc->type == RK805) { err = rk805_read(dev, RK805_CHIP_NAME, data, 1); if (err != 0) { device_printf(dev, "Cannot read chip name reg\n"); return; } err = rk805_read(dev, RK805_CHIP_VER, data + 1, 1); if (err != 0) { device_printf(dev, "Cannot read chip version reg\n"); return; } device_printf(dev, "Chip Name: %x\n", data[0] << 4 | ((data[1] >> 4) & 0xf)); device_printf(dev, "Chip Version: %x\n", data[1] & 0xf); } config_intrhook_disestablish(&sc->intr_hook); } static int rk805_attach(device_t dev) { struct rk805_softc *sc; struct rk805_reg_sc *reg; struct rk805_regdef *regdefs; phandle_t rnode, child; int i; sc = device_get_softc(dev); sc->intr_hook.ich_func = rk805_start; sc->intr_hook.ich_arg = dev; if (config_intrhook_establish(&sc->intr_hook) != 0) return (ENOMEM); sc->type = ofw_bus_search_compatible(dev, compat_data)->ocd_data; switch (sc->type) { case RK805: regdefs = rk805_regdefs; sc->nregs = nitems(rk805_regdefs); break; case RK808: regdefs = rk808_regdefs; sc->nregs = nitems(rk808_regdefs); break; default: device_printf(dev, "Unknown type %d\n", sc->type); return (ENXIO); } sc->regs = malloc(sizeof(struct rk805_reg_sc *) * sc->nregs, M_RK805_REG, M_WAITOK | M_ZERO); rnode = ofw_bus_find_child(ofw_bus_get_node(dev), "regulators"); if (rnode > 0) { for (i = 0; i < sc->nregs; i++) { child = ofw_bus_find_child(rnode, regdefs[i].name); if (child == 0) continue; reg = rk805_reg_attach(dev, child, ®defs[i]); if (reg == NULL) { device_printf(dev, "cannot attach regulator %s\n", regdefs[i].name); continue; } sc->regs[i] = reg; if (bootverbose) device_printf(dev, "Regulator %s attached\n", regdefs[i].name); } } return (0); } static int rk805_detach(device_t dev) { /* We cannot detach regulators */ return (EBUSY); } static int rk805_map(device_t dev, phandle_t xref, int ncells, pcell_t *cells, intptr_t *id) { struct rk805_softc *sc; int i; sc = device_get_softc(dev); for (i = 0; i < sc->nregs; i++) { if (sc->regs[i]->xref == xref) { *id = sc->regs[i]->def->id; return (0); } } return (ERANGE); } static device_method_t rk805_methods[] = { DEVMETHOD(device_probe, rk805_probe), DEVMETHOD(device_attach, rk805_attach), DEVMETHOD(device_detach, rk805_detach), /* regdev interface */ DEVMETHOD(regdev_map, rk805_map), DEVMETHOD_END }; static driver_t rk805_driver = { "rk805_pmu", rk805_methods, sizeof(struct rk805_softc), }; static devclass_t rk805_devclass; EARLY_DRIVER_MODULE(rk805, iicbus, rk805_driver, rk805_devclass, 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_LAST); MODULE_DEPEND(rk805, iicbus, IICBUS_MINVER, IICBUS_PREFVER, IICBUS_MAXVER); MODULE_VERSION(rk805, 1); Index: projects/clang1000-import/sys/compat/cloudabi32/Makefile =================================================================== --- projects/clang1000-import/sys/compat/cloudabi32/Makefile (revision 356919) +++ projects/clang1000-import/sys/compat/cloudabi32/Makefile (revision 356920) @@ -1,27 +1,6 @@ # $FreeBSD$ -# Don't use an OBJDIR -.OBJDIR: ${.CURDIR} +SYSENT_FILE= ${SYSDIR}/contrib/cloudabi/syscalls32.master +GENERATED_PREFIX= cloudabi32_ -.include - -MAKESYSCALLS= ../../tools/makesyscalls.lua -SRCS= syscalls.conf \ - ../../contrib/cloudabi/syscalls32.master -GENERATED= cloudabi32_proto.h \ - cloudabi32_syscall.h \ - cloudabi32_syscalls.c \ - cloudabi32_sysent.c \ - cloudabi32_systrace_args.c - -all: - @echo "make sysent only" - -# We .ORDER these explicitly so that we only run MAKESYSCALLS once, rather than -# potentially once for each ${GENERATED} file. -.ORDER: ${GENERATED} -sysent: ${GENERATED} - -${GENERATED}: ${MAKESYSCALLS} ${SRCS} - ${LUA} ${MAKESYSCALLS} \ - ../../contrib/cloudabi/syscalls32.master syscalls.conf +.include "../../conf/sysent.mk" Index: projects/clang1000-import/sys/compat/cloudabi64/Makefile =================================================================== --- projects/clang1000-import/sys/compat/cloudabi64/Makefile (revision 356919) +++ projects/clang1000-import/sys/compat/cloudabi64/Makefile (revision 356920) @@ -1,27 +1,6 @@ # $FreeBSD$ -.include +SYSENT_FILE= ${SYSDIR}/contrib/cloudabi/syscalls64.master +GENERATED_PREFIX= cloudabi64_ -# Don't use an OBJDIR -.OBJDIR: ${.CURDIR} - -MAKESYSCALLS= ../../tools/makesyscalls.lua -SRCS= syscalls.conf \ - ../../contrib/cloudabi/syscalls64.master -GENERATED= cloudabi64_proto.h \ - cloudabi64_syscall.h \ - cloudabi64_syscalls.c \ - cloudabi64_sysent.c \ - cloudabi64_systrace_args.c - -all: - @echo "make sysent only" - -# We .ORDER these explicitly so that we only run MAKESYSCALLS once, rather than -# potentially once for each ${GENERATED} file. -.ORDER: ${GENERATED} -sysent: ${GENERATED} - -${GENERATED}: ${MAKESYSCALLS} ${SRCS} - ${LUA} ${MAKESYSCALLS} \ - ../../contrib/cloudabi/syscalls64.master syscalls.conf +.include "../../conf/sysent.mk" Index: projects/clang1000-import/sys/compat/freebsd32/Makefile =================================================================== --- projects/clang1000-import/sys/compat/freebsd32/Makefile (revision 356919) +++ projects/clang1000-import/sys/compat/freebsd32/Makefile (revision 356920) @@ -1,29 +1,7 @@ # Makefile for syscall tables # # $FreeBSD$ -# Don't use an OBJDIR -.OBJDIR: ${.CURDIR} +GENERATED_PREFIX= freebsd32_ -.include - -MAKESYSCALLS= ../../tools/makesyscalls.lua -SRCS= ../../kern/capabilities.conf \ - syscalls.conf \ - syscalls.master -GENERATED= freebsd32_proto.h \ - freebsd32_syscall.h \ - freebsd32_syscalls.c \ - freebsd32_sysent.c \ - freebsd32_systrace_args.c - -all: - @echo "make sysent only" - -# We .ORDER these explicitly so that we only run MAKESYSCALLS once, rather than -# potentially once for each ${GENERATED} file. -.ORDER: ${GENERATED} -sysent: ${GENERATED} - -${GENERATED}: ${MAKESYSCALLS} ${SRCS} - ${LUA} ${MAKESYSCALLS} syscalls.master syscalls.conf +.include "../../conf/sysent.mk" Index: projects/clang1000-import/sys/compat/linux/linux_file.c =================================================================== --- projects/clang1000-import/sys/compat/linux/linux_file.c (revision 356919) +++ projects/clang1000-import/sys/compat/linux/linux_file.c (revision 356920) @@ -1,1608 +1,1613 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include #include static int linux_common_open(struct thread *, int, char *, int, int); static int linux_getdents_error(struct thread *, int, int); #ifdef LINUX_LEGACY_SYSCALLS int linux_creat(struct thread *td, struct linux_creat_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE, O_WRONLY | O_CREAT | O_TRUNC, args->mode); LFREEPATH(path); return (error); } #endif static int linux_common_open(struct thread *td, int dirfd, char *path, int l_flags, int mode) { struct proc *p = td->td_proc; struct file *fp; int fd; int bsd_flags, error; bsd_flags = 0; switch (l_flags & LINUX_O_ACCMODE) { case LINUX_O_WRONLY: bsd_flags |= O_WRONLY; break; case LINUX_O_RDWR: bsd_flags |= O_RDWR; break; default: bsd_flags |= O_RDONLY; } if (l_flags & LINUX_O_NDELAY) bsd_flags |= O_NONBLOCK; if (l_flags & LINUX_O_APPEND) bsd_flags |= O_APPEND; if (l_flags & LINUX_O_SYNC) bsd_flags |= O_FSYNC; if (l_flags & LINUX_O_CLOEXEC) bsd_flags |= O_CLOEXEC; if (l_flags & LINUX_O_NONBLOCK) bsd_flags |= O_NONBLOCK; if (l_flags & LINUX_FASYNC) bsd_flags |= O_ASYNC; if (l_flags & LINUX_O_CREAT) bsd_flags |= O_CREAT; if (l_flags & LINUX_O_TRUNC) bsd_flags |= O_TRUNC; if (l_flags & LINUX_O_EXCL) bsd_flags |= O_EXCL; if (l_flags & LINUX_O_NOCTTY) bsd_flags |= O_NOCTTY; if (l_flags & LINUX_O_DIRECT) bsd_flags |= O_DIRECT; if (l_flags & LINUX_O_NOFOLLOW) bsd_flags |= O_NOFOLLOW; if (l_flags & LINUX_O_DIRECTORY) bsd_flags |= O_DIRECTORY; /* XXX LINUX_O_NOATIME: unable to be easily implemented. */ error = kern_openat(td, dirfd, path, UIO_SYSSPACE, bsd_flags, mode); if (error != 0) { if (error == EMLINK) error = ELOOP; goto done; } if (p->p_flag & P_CONTROLT) goto done; if (bsd_flags & O_NOCTTY) goto done; /* * XXX In between kern_openat() and fget(), another process * having the same filedesc could use that fd without * checking below. */ fd = td->td_retval[0]; if (fget(td, fd, &cap_ioctl_rights, &fp) == 0) { if (fp->f_type != DTYPE_VNODE) { fdrop(fp, td); goto done; } sx_slock(&proctree_lock); PROC_LOCK(p); if (SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) { PROC_UNLOCK(p); sx_sunlock(&proctree_lock); /* XXXPJD: Verify if TIOCSCTTY is allowed. */ (void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0, td->td_ucred, td); } else { PROC_UNLOCK(p); sx_sunlock(&proctree_lock); } fdrop(fp, td); } done: LFREEPATH(path); return (error); } int linux_openat(struct thread *td, struct linux_openat_args *args) { char *path; int dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; if (args->flags & LINUX_O_CREAT) LCONVPATH_AT(td, args->filename, &path, 1, dfd); else LCONVPATH_AT(td, args->filename, &path, 0, dfd); return (linux_common_open(td, dfd, path, args->flags, args->mode)); } #ifdef LINUX_LEGACY_SYSCALLS int linux_open(struct thread *td, struct linux_open_args *args) { char *path; if (args->flags & LINUX_O_CREAT) LCONVPATHCREAT(td, args->path, &path); else LCONVPATHEXIST(td, args->path, &path); return (linux_common_open(td, AT_FDCWD, path, args->flags, args->mode)); } #endif int linux_lseek(struct thread *td, struct linux_lseek_args *args) { return (kern_lseek(td, args->fdes, args->off, args->whence)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_llseek(struct thread *td, struct linux_llseek_args *args) { int error; off_t off; off = (args->olow) | (((off_t) args->ohigh) << 32); error = kern_lseek(td, args->fd, off, args->whence); if (error != 0) return (error); error = copyout(td->td_retval, args->res, sizeof(off_t)); if (error != 0) return (error); td->td_retval[0] = 0; return (0); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ /* * Note that linux_getdents(2) and linux_getdents64(2) have the same * arguments. They only differ in the definition of struct dirent they * operate on. * Note that linux_readdir(2) is a special case of linux_getdents(2) * where count is always equals 1, meaning that the buffer is one * dirent-structure in size and that the code can't handle more anyway. * Note that linux_readdir(2) can't be implemented by means of linux_getdents(2) * as in case when the *dent buffer size is equal to 1 linux_getdents(2) will * trash user stack. */ static int linux_getdents_error(struct thread *td, int fd, int err) { struct vnode *vp; struct file *fp; int error; /* Linux return ENOTDIR in case when fd is not a directory. */ error = getvnode(td, fd, &cap_read_rights, &fp); if (error != 0) return (error); vp = fp->f_vnode; if (vp->v_type != VDIR) { fdrop(fp, td); return (ENOTDIR); } fdrop(fp, td); return (err); } struct l_dirent { l_ulong d_ino; l_off_t d_off; l_ushort d_reclen; char d_name[LINUX_NAME_MAX + 1]; }; struct l_dirent64 { uint64_t d_ino; int64_t d_off; l_ushort d_reclen; u_char d_type; char d_name[LINUX_NAME_MAX + 1]; }; /* * Linux uses the last byte in the dirent buffer to store d_type, * at least glibc-2.7 requires it. That is why l_dirent is padded with 2 bytes. */ #define LINUX_RECLEN(namlen) \ roundup(offsetof(struct l_dirent, d_name) + (namlen) + 2, sizeof(l_ulong)) #define LINUX_RECLEN64(namlen) \ roundup(offsetof(struct l_dirent64, d_name) + (namlen) + 1, \ sizeof(uint64_t)) #ifdef LINUX_LEGACY_SYSCALLS int linux_getdents(struct thread *td, struct linux_getdents_args *args) { struct dirent *bdp; caddr_t inp, buf; /* BSD-format */ int len, reclen; /* BSD-format */ caddr_t outp; /* Linux-format */ int resid, linuxreclen; /* Linux-format */ caddr_t lbuf; /* Linux-format */ off_t base; struct l_dirent *linux_dirent; int buflen, error; size_t retval; buflen = min(args->count, MAXBSIZE); buf = malloc(buflen, M_TEMP, M_WAITOK); error = kern_getdirentries(td, args->fd, buf, buflen, &base, NULL, UIO_SYSSPACE); if (error != 0) { error = linux_getdents_error(td, args->fd, error); goto out1; } lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO); len = td->td_retval[0]; inp = buf; outp = (caddr_t)args->dent; resid = args->count; retval = 0; while (len > 0) { bdp = (struct dirent *) inp; reclen = bdp->d_reclen; linuxreclen = LINUX_RECLEN(bdp->d_namlen); /* * No more space in the user supplied dirent buffer. * Return EINVAL. */ if (resid < linuxreclen) { error = EINVAL; goto out; } linux_dirent = (struct l_dirent*)lbuf; linux_dirent->d_ino = bdp->d_fileno; linux_dirent->d_off = base + reclen; linux_dirent->d_reclen = linuxreclen; /* * Copy d_type to last byte of l_dirent buffer */ lbuf[linuxreclen - 1] = bdp->d_type; strlcpy(linux_dirent->d_name, bdp->d_name, linuxreclen - offsetof(struct l_dirent, d_name)-1); error = copyout(linux_dirent, outp, linuxreclen); if (error != 0) goto out; inp += reclen; base += reclen; len -= reclen; retval += linuxreclen; outp += linuxreclen; resid -= linuxreclen; } td->td_retval[0] = retval; out: free(lbuf, M_TEMP); out1: free(buf, M_TEMP); return (error); } #endif int linux_getdents64(struct thread *td, struct linux_getdents64_args *args) { struct dirent *bdp; caddr_t inp, buf; /* BSD-format */ int len, reclen; /* BSD-format */ caddr_t outp; /* Linux-format */ int resid, linuxreclen; /* Linux-format */ caddr_t lbuf; /* Linux-format */ off_t base; struct l_dirent64 *linux_dirent64; int buflen, error; size_t retval; buflen = min(args->count, MAXBSIZE); buf = malloc(buflen, M_TEMP, M_WAITOK); error = kern_getdirentries(td, args->fd, buf, buflen, &base, NULL, UIO_SYSSPACE); if (error != 0) { error = linux_getdents_error(td, args->fd, error); goto out1; } lbuf = malloc(LINUX_RECLEN64(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO); len = td->td_retval[0]; inp = buf; outp = (caddr_t)args->dirent; resid = args->count; retval = 0; while (len > 0) { bdp = (struct dirent *) inp; reclen = bdp->d_reclen; linuxreclen = LINUX_RECLEN64(bdp->d_namlen); /* * No more space in the user supplied dirent buffer. * Return EINVAL. */ if (resid < linuxreclen) { error = EINVAL; goto out; } linux_dirent64 = (struct l_dirent64*)lbuf; linux_dirent64->d_ino = bdp->d_fileno; linux_dirent64->d_off = base + reclen; linux_dirent64->d_reclen = linuxreclen; linux_dirent64->d_type = bdp->d_type; strlcpy(linux_dirent64->d_name, bdp->d_name, linuxreclen - offsetof(struct l_dirent64, d_name)); error = copyout(linux_dirent64, outp, linuxreclen); if (error != 0) goto out; inp += reclen; base += reclen; len -= reclen; retval += linuxreclen; outp += linuxreclen; resid -= linuxreclen; } td->td_retval[0] = retval; out: free(lbuf, M_TEMP); out1: free(buf, M_TEMP); return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_readdir(struct thread *td, struct linux_readdir_args *args) { struct dirent *bdp; caddr_t buf; /* BSD-format */ int linuxreclen; /* Linux-format */ caddr_t lbuf; /* Linux-format */ off_t base; struct l_dirent *linux_dirent; int buflen, error; buflen = LINUX_RECLEN(LINUX_NAME_MAX); buf = malloc(buflen, M_TEMP, M_WAITOK); error = kern_getdirentries(td, args->fd, buf, buflen, &base, NULL, UIO_SYSSPACE); if (error != 0) { error = linux_getdents_error(td, args->fd, error); goto out; } if (td->td_retval[0] == 0) goto out; lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO); bdp = (struct dirent *) buf; linuxreclen = LINUX_RECLEN(bdp->d_namlen); linux_dirent = (struct l_dirent*)lbuf; linux_dirent->d_ino = bdp->d_fileno; linux_dirent->d_off = linuxreclen; linux_dirent->d_reclen = bdp->d_namlen; strlcpy(linux_dirent->d_name, bdp->d_name, linuxreclen - offsetof(struct l_dirent, d_name)); error = copyout(linux_dirent, args->dent, linuxreclen); if (error == 0) td->td_retval[0] = linuxreclen; free(lbuf, M_TEMP); out: free(buf, M_TEMP); return (error); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ /* * These exist mainly for hooks for doing /compat/linux translation. */ #ifdef LINUX_LEGACY_SYSCALLS int linux_access(struct thread *td, struct linux_access_args *args) { char *path; int error; /* Linux convention. */ if (args->amode & ~(F_OK | X_OK | W_OK | R_OK)) return (EINVAL); LCONVPATHEXIST(td, args->path, &path); error = kern_accessat(td, AT_FDCWD, path, UIO_SYSSPACE, 0, args->amode); LFREEPATH(path); return (error); } #endif int linux_faccessat(struct thread *td, struct linux_faccessat_args *args) { char *path; int error, dfd; /* Linux convention. */ if (args->amode & ~(F_OK | X_OK | W_OK | R_OK)) return (EINVAL); dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->filename, &path, dfd); error = kern_accessat(td, dfd, path, UIO_SYSSPACE, 0, args->amode); LFREEPATH(path); return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_unlink(struct thread *td, struct linux_unlink_args *args) { char *path; int error; struct stat st; LCONVPATHEXIST(td, args->path, &path); error = kern_funlinkat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0, 0); if (error == EPERM) { /* Introduce POSIX noncompliant behaviour of Linux */ if (kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &st, NULL) == 0) { if (S_ISDIR(st.st_mode)) error = EISDIR; } } LFREEPATH(path); return (error); } #endif int linux_unlinkat(struct thread *td, struct linux_unlinkat_args *args) { char *path; int error, dfd; struct stat st; if (args->flag & ~LINUX_AT_REMOVEDIR) return (EINVAL); dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); if (args->flag & LINUX_AT_REMOVEDIR) error = kern_frmdirat(td, dfd, path, FD_NONE, UIO_SYSSPACE, 0); else error = kern_funlinkat(td, dfd, path, FD_NONE, UIO_SYSSPACE, 0, 0); if (error == EPERM && !(args->flag & LINUX_AT_REMOVEDIR)) { /* Introduce POSIX noncompliant behaviour of Linux */ if (kern_statat(td, AT_SYMLINK_NOFOLLOW, dfd, path, UIO_SYSSPACE, &st, NULL) == 0 && S_ISDIR(st.st_mode)) error = EISDIR; } LFREEPATH(path); return (error); } int linux_chdir(struct thread *td, struct linux_chdir_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); error = kern_chdir(td, path, UIO_SYSSPACE); LFREEPATH(path); return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_chmod(struct thread *td, struct linux_chmod_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); error = kern_fchmodat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode, 0); LFREEPATH(path); return (error); } #endif int linux_fchmodat(struct thread *td, struct linux_fchmodat_args *args) { char *path; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->filename, &path, dfd); error = kern_fchmodat(td, dfd, path, UIO_SYSSPACE, args->mode, 0); LFREEPATH(path); return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_mkdir(struct thread *td, struct linux_mkdir_args *args) { char *path; int error; LCONVPATHCREAT(td, args->path, &path); error = kern_mkdirat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode); LFREEPATH(path); return (error); } #endif int linux_mkdirat(struct thread *td, struct linux_mkdirat_args *args) { char *path; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHCREAT_AT(td, args->pathname, &path, dfd); error = kern_mkdirat(td, dfd, path, UIO_SYSSPACE, args->mode); LFREEPATH(path); return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_rmdir(struct thread *td, struct linux_rmdir_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); error = kern_frmdirat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0); LFREEPATH(path); return (error); } int linux_rename(struct thread *td, struct linux_rename_args *args) { char *from, *to; int error; LCONVPATHEXIST(td, args->from, &from); /* Expand LCONVPATHCREATE so that `from' can be freed on errors */ error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD); if (to == NULL) { LFREEPATH(from); return (error); } error = kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, UIO_SYSSPACE); LFREEPATH(from); LFREEPATH(to); return (error); } #endif int linux_renameat(struct thread *td, struct linux_renameat_args *args) { struct linux_renameat2_args renameat2_args = { .olddfd = args->olddfd, .oldname = args->oldname, .newdfd = args->newdfd, .newname = args->newname, .flags = 0 }; return (linux_renameat2(td, &renameat2_args)); } int linux_renameat2(struct thread *td, struct linux_renameat2_args *args) { char *from, *to; int error, olddfd, newdfd; if (args->flags != 0) { if (args->flags & ~(LINUX_RENAME_EXCHANGE | LINUX_RENAME_NOREPLACE | LINUX_RENAME_WHITEOUT)) return (EINVAL); if (args->flags & LINUX_RENAME_EXCHANGE && args->flags & (LINUX_RENAME_NOREPLACE | LINUX_RENAME_WHITEOUT)) return (EINVAL); linux_msg(td, "renameat2 unsupported flags 0x%x", args->flags); return (EINVAL); } olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd; newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd; LCONVPATHEXIST_AT(td, args->oldname, &from, olddfd); /* Expand LCONVPATHCREATE so that `from' can be freed on errors */ error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd); if (to == NULL) { LFREEPATH(from); return (error); } error = kern_renameat(td, olddfd, from, newdfd, to, UIO_SYSSPACE); LFREEPATH(from); LFREEPATH(to); return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_symlink(struct thread *td, struct linux_symlink_args *args) { char *path, *to; int error; LCONVPATHEXIST(td, args->path, &path); /* Expand LCONVPATHCREATE so that `path' can be freed on errors */ error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD); if (to == NULL) { LFREEPATH(path); return (error); } error = kern_symlinkat(td, path, AT_FDCWD, to, UIO_SYSSPACE); LFREEPATH(path); LFREEPATH(to); return (error); } #endif int linux_symlinkat(struct thread *td, struct linux_symlinkat_args *args) { char *path, *to; int error, dfd; dfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd; LCONVPATHEXIST(td, args->oldname, &path); /* Expand LCONVPATHCREATE so that `path' can be freed on errors */ error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, dfd); if (to == NULL) { LFREEPATH(path); return (error); } error = kern_symlinkat(td, path, dfd, to, UIO_SYSSPACE); LFREEPATH(path); LFREEPATH(to); return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_readlink(struct thread *td, struct linux_readlink_args *args) { char *name; int error; LCONVPATHEXIST(td, args->name, &name); error = kern_readlinkat(td, AT_FDCWD, name, UIO_SYSSPACE, args->buf, UIO_USERSPACE, args->count); LFREEPATH(name); return (error); } #endif int linux_readlinkat(struct thread *td, struct linux_readlinkat_args *args) { char *name; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->path, &name, dfd); error = kern_readlinkat(td, dfd, name, UIO_SYSSPACE, args->buf, UIO_USERSPACE, args->bufsiz); LFREEPATH(name); return (error); } int linux_truncate(struct thread *td, struct linux_truncate_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); error = kern_truncate(td, path, UIO_SYSSPACE, args->length); LFREEPATH(path); return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_truncate64(struct thread *td, struct linux_truncate64_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); error = kern_truncate(td, path, UIO_SYSSPACE, args->length); LFREEPATH(path); return (error); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_ftruncate(struct thread *td, struct linux_ftruncate_args *args) { return (kern_ftruncate(td, args->fd, args->length)); } #ifdef LINUX_LEGACY_SYSCALLS int linux_link(struct thread *td, struct linux_link_args *args) { char *path, *to; int error; LCONVPATHEXIST(td, args->path, &path); /* Expand LCONVPATHCREATE so that `path' can be freed on errors */ error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD); if (to == NULL) { LFREEPATH(path); return (error); } error = kern_linkat(td, AT_FDCWD, AT_FDCWD, path, to, UIO_SYSSPACE, FOLLOW); LFREEPATH(path); LFREEPATH(to); return (error); } #endif int linux_linkat(struct thread *td, struct linux_linkat_args *args) { char *path, *to; int error, olddfd, newdfd, follow; if (args->flag & ~LINUX_AT_SYMLINK_FOLLOW) return (EINVAL); olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd; newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd; LCONVPATHEXIST_AT(td, args->oldname, &path, olddfd); /* Expand LCONVPATHCREATE so that `path' can be freed on errors */ error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd); if (to == NULL) { LFREEPATH(path); return (error); } follow = (args->flag & LINUX_AT_SYMLINK_FOLLOW) == 0 ? NOFOLLOW : FOLLOW; error = kern_linkat(td, olddfd, newdfd, path, to, UIO_SYSSPACE, follow); LFREEPATH(path); LFREEPATH(to); return (error); } int linux_fdatasync(struct thread *td, struct linux_fdatasync_args *uap) { return (kern_fsync(td, uap->fd, false)); } int linux_sync_file_range(struct thread *td, struct linux_sync_file_range_args *uap) { if (uap->offset < 0 || uap->nbytes < 0 || (uap->flags & ~(LINUX_SYNC_FILE_RANGE_WAIT_BEFORE | LINUX_SYNC_FILE_RANGE_WRITE | LINUX_SYNC_FILE_RANGE_WAIT_AFTER)) != 0) { return (EINVAL); } return (kern_fsync(td, uap->fd, false)); } int linux_pread(struct thread *td, struct linux_pread_args *uap) { struct vnode *vp; int error; error = kern_pread(td, uap->fd, uap->buf, uap->nbyte, uap->offset); if (error == 0) { /* This seems to violate POSIX but Linux does it. */ error = fgetvp(td, uap->fd, &cap_pread_rights, &vp); if (error != 0) return (error); if (vp->v_type == VDIR) { vrele(vp); return (EISDIR); } vrele(vp); } return (error); } int linux_pwrite(struct thread *td, struct linux_pwrite_args *uap) { return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, uap->offset)); } int linux_preadv(struct thread *td, struct linux_preadv_args *uap) { struct uio *auio; int error; off_t offset; /* * According http://man7.org/linux/man-pages/man2/preadv.2.html#NOTES * pos_l and pos_h, respectively, contain the * low order and high order 32 bits of offset. */ offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) << (sizeof(offset) * 4)) | uap->pos_l; if (offset < 0) return (EINVAL); #ifdef COMPAT_LINUX32 error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio); #else error = copyinuio(uap->vec, uap->vlen, &auio); #endif if (error != 0) return (error); error = kern_preadv(td, uap->fd, auio, offset); free(auio, M_IOV); return (error); } int linux_pwritev(struct thread *td, struct linux_pwritev_args *uap) { struct uio *auio; int error; off_t offset; /* * According http://man7.org/linux/man-pages/man2/pwritev.2.html#NOTES * pos_l and pos_h, respectively, contain the * low order and high order 32 bits of offset. */ offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) << (sizeof(offset) * 4)) | uap->pos_l; if (offset < 0) return (EINVAL); #ifdef COMPAT_LINUX32 error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio); #else error = copyinuio(uap->vec, uap->vlen, &auio); #endif if (error != 0) return (error); error = kern_pwritev(td, uap->fd, auio, offset); free(auio, M_IOV); return (error); } int linux_mount(struct thread *td, struct linux_mount_args *args) { char fstypename[MFSNAMELEN]; char *mntonname, *mntfromname; int error, fsflags; mntonname = malloc(MNAMELEN, M_TEMP, M_WAITOK); mntfromname = malloc(MNAMELEN, M_TEMP, M_WAITOK); error = copyinstr(args->filesystemtype, fstypename, MFSNAMELEN - 1, NULL); if (error != 0) goto out; if (args->specialfile != NULL) { error = copyinstr(args->specialfile, mntfromname, MNAMELEN - 1, NULL); if (error != 0) goto out; } else { mntfromname[0] = '\0'; } error = copyinstr(args->dir, mntonname, MNAMELEN - 1, NULL); if (error != 0) goto out; if (strcmp(fstypename, "ext2") == 0) { strcpy(fstypename, "ext2fs"); } else if (strcmp(fstypename, "proc") == 0) { strcpy(fstypename, "linprocfs"); } else if (strcmp(fstypename, "vfat") == 0) { strcpy(fstypename, "msdosfs"); } fsflags = 0; /* * Linux SYNC flag is not included; the closest equivalent * FreeBSD has is !ASYNC, which is our default. */ if (args->rwflag & LINUX_MS_RDONLY) fsflags |= MNT_RDONLY; if (args->rwflag & LINUX_MS_NOSUID) fsflags |= MNT_NOSUID; if (args->rwflag & LINUX_MS_NOEXEC) fsflags |= MNT_NOEXEC; if (args->rwflag & LINUX_MS_REMOUNT) fsflags |= MNT_UPDATE; error = kernel_vmount(fsflags, "fstype", fstypename, "fspath", mntonname, "from", mntfromname, NULL); out: free(mntonname, M_TEMP); free(mntfromname, M_TEMP); return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_oldumount(struct thread *td, struct linux_oldumount_args *args) { struct linux_umount_args args2; args2.path = args->path; args2.flags = 0; return (linux_umount(td, &args2)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ #ifdef LINUX_LEGACY_SYSCALLS int linux_umount(struct thread *td, struct linux_umount_args *args) { struct unmount_args bsd; + int flags; + flags = 0; + if ((args->flags & LINUX_MNT_FORCE) != 0) + flags |= MNT_FORCE; + bsd.path = args->path; - bsd.flags = args->flags; /* XXX correct? */ + bsd.flags = flags; return (sys_unmount(td, &bsd)); } #endif /* * fcntl family of syscalls */ struct l_flock { l_short l_type; l_short l_whence; l_off_t l_start; l_off_t l_len; l_pid_t l_pid; } #if defined(__amd64__) && defined(COMPAT_LINUX32) __packed #endif ; static void linux_to_bsd_flock(struct l_flock *linux_flock, struct flock *bsd_flock) { switch (linux_flock->l_type) { case LINUX_F_RDLCK: bsd_flock->l_type = F_RDLCK; break; case LINUX_F_WRLCK: bsd_flock->l_type = F_WRLCK; break; case LINUX_F_UNLCK: bsd_flock->l_type = F_UNLCK; break; default: bsd_flock->l_type = -1; break; } bsd_flock->l_whence = linux_flock->l_whence; bsd_flock->l_start = (off_t)linux_flock->l_start; bsd_flock->l_len = (off_t)linux_flock->l_len; bsd_flock->l_pid = (pid_t)linux_flock->l_pid; bsd_flock->l_sysid = 0; } static void bsd_to_linux_flock(struct flock *bsd_flock, struct l_flock *linux_flock) { switch (bsd_flock->l_type) { case F_RDLCK: linux_flock->l_type = LINUX_F_RDLCK; break; case F_WRLCK: linux_flock->l_type = LINUX_F_WRLCK; break; case F_UNLCK: linux_flock->l_type = LINUX_F_UNLCK; break; } linux_flock->l_whence = bsd_flock->l_whence; linux_flock->l_start = (l_off_t)bsd_flock->l_start; linux_flock->l_len = (l_off_t)bsd_flock->l_len; linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid; } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) struct l_flock64 { l_short l_type; l_short l_whence; l_loff_t l_start; l_loff_t l_len; l_pid_t l_pid; } #if defined(__amd64__) && defined(COMPAT_LINUX32) __packed #endif ; static void linux_to_bsd_flock64(struct l_flock64 *linux_flock, struct flock *bsd_flock) { switch (linux_flock->l_type) { case LINUX_F_RDLCK: bsd_flock->l_type = F_RDLCK; break; case LINUX_F_WRLCK: bsd_flock->l_type = F_WRLCK; break; case LINUX_F_UNLCK: bsd_flock->l_type = F_UNLCK; break; default: bsd_flock->l_type = -1; break; } bsd_flock->l_whence = linux_flock->l_whence; bsd_flock->l_start = (off_t)linux_flock->l_start; bsd_flock->l_len = (off_t)linux_flock->l_len; bsd_flock->l_pid = (pid_t)linux_flock->l_pid; bsd_flock->l_sysid = 0; } static void bsd_to_linux_flock64(struct flock *bsd_flock, struct l_flock64 *linux_flock) { switch (bsd_flock->l_type) { case F_RDLCK: linux_flock->l_type = LINUX_F_RDLCK; break; case F_WRLCK: linux_flock->l_type = LINUX_F_WRLCK; break; case F_UNLCK: linux_flock->l_type = LINUX_F_UNLCK; break; } linux_flock->l_whence = bsd_flock->l_whence; linux_flock->l_start = (l_loff_t)bsd_flock->l_start; linux_flock->l_len = (l_loff_t)bsd_flock->l_len; linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid; } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ static int fcntl_common(struct thread *td, struct linux_fcntl_args *args) { struct l_flock linux_flock; struct flock bsd_flock; struct file *fp; long arg; int error, result; switch (args->cmd) { case LINUX_F_DUPFD: return (kern_fcntl(td, args->fd, F_DUPFD, args->arg)); case LINUX_F_GETFD: return (kern_fcntl(td, args->fd, F_GETFD, 0)); case LINUX_F_SETFD: return (kern_fcntl(td, args->fd, F_SETFD, args->arg)); case LINUX_F_GETFL: error = kern_fcntl(td, args->fd, F_GETFL, 0); result = td->td_retval[0]; td->td_retval[0] = 0; if (result & O_RDONLY) td->td_retval[0] |= LINUX_O_RDONLY; if (result & O_WRONLY) td->td_retval[0] |= LINUX_O_WRONLY; if (result & O_RDWR) td->td_retval[0] |= LINUX_O_RDWR; if (result & O_NDELAY) td->td_retval[0] |= LINUX_O_NONBLOCK; if (result & O_APPEND) td->td_retval[0] |= LINUX_O_APPEND; if (result & O_FSYNC) td->td_retval[0] |= LINUX_O_SYNC; if (result & O_ASYNC) td->td_retval[0] |= LINUX_FASYNC; #ifdef LINUX_O_NOFOLLOW if (result & O_NOFOLLOW) td->td_retval[0] |= LINUX_O_NOFOLLOW; #endif #ifdef LINUX_O_DIRECT if (result & O_DIRECT) td->td_retval[0] |= LINUX_O_DIRECT; #endif return (error); case LINUX_F_SETFL: arg = 0; if (args->arg & LINUX_O_NDELAY) arg |= O_NONBLOCK; if (args->arg & LINUX_O_APPEND) arg |= O_APPEND; if (args->arg & LINUX_O_SYNC) arg |= O_FSYNC; if (args->arg & LINUX_FASYNC) arg |= O_ASYNC; #ifdef LINUX_O_NOFOLLOW if (args->arg & LINUX_O_NOFOLLOW) arg |= O_NOFOLLOW; #endif #ifdef LINUX_O_DIRECT if (args->arg & LINUX_O_DIRECT) arg |= O_DIRECT; #endif return (kern_fcntl(td, args->fd, F_SETFL, arg)); case LINUX_F_GETLK: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock(&linux_flock, &bsd_flock); error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock); if (error) return (error); bsd_to_linux_flock(&bsd_flock, &linux_flock); return (copyout(&linux_flock, (void *)args->arg, sizeof(linux_flock))); case LINUX_F_SETLK: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock(&linux_flock, &bsd_flock); return (kern_fcntl(td, args->fd, F_SETLK, (intptr_t)&bsd_flock)); case LINUX_F_SETLKW: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock(&linux_flock, &bsd_flock); return (kern_fcntl(td, args->fd, F_SETLKW, (intptr_t)&bsd_flock)); case LINUX_F_GETOWN: return (kern_fcntl(td, args->fd, F_GETOWN, 0)); case LINUX_F_SETOWN: /* * XXX some Linux applications depend on F_SETOWN having no * significant effect for pipes (SIGIO is not delivered for * pipes under Linux-2.2.35 at least). */ error = fget(td, args->fd, &cap_fcntl_rights, &fp); if (error) return (error); if (fp->f_type == DTYPE_PIPE) { fdrop(fp, td); return (EINVAL); } fdrop(fp, td); return (kern_fcntl(td, args->fd, F_SETOWN, args->arg)); case LINUX_F_DUPFD_CLOEXEC: return (kern_fcntl(td, args->fd, F_DUPFD_CLOEXEC, args->arg)); } return (EINVAL); } int linux_fcntl(struct thread *td, struct linux_fcntl_args *args) { return (fcntl_common(td, args)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args) { struct l_flock64 linux_flock; struct flock bsd_flock; struct linux_fcntl_args fcntl_args; int error; switch (args->cmd) { case LINUX_F_GETLK64: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock64(&linux_flock, &bsd_flock); error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock); if (error) return (error); bsd_to_linux_flock64(&bsd_flock, &linux_flock); return (copyout(&linux_flock, (void *)args->arg, sizeof(linux_flock))); case LINUX_F_SETLK64: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock64(&linux_flock, &bsd_flock); return (kern_fcntl(td, args->fd, F_SETLK, (intptr_t)&bsd_flock)); case LINUX_F_SETLKW64: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock64(&linux_flock, &bsd_flock); return (kern_fcntl(td, args->fd, F_SETLKW, (intptr_t)&bsd_flock)); } fcntl_args.fd = args->fd; fcntl_args.cmd = args->cmd; fcntl_args.arg = args->arg; return (fcntl_common(td, &fcntl_args)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ #ifdef LINUX_LEGACY_SYSCALLS int linux_chown(struct thread *td, struct linux_chown_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid, args->gid, 0); LFREEPATH(path); return (error); } #endif int linux_fchownat(struct thread *td, struct linux_fchownat_args *args) { char *path; int error, dfd, flag; if (args->flag & ~LINUX_AT_SYMLINK_NOFOLLOW) return (EINVAL); dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->filename, &path, dfd); flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) == 0 ? 0 : AT_SYMLINK_NOFOLLOW; error = kern_fchownat(td, dfd, path, UIO_SYSSPACE, args->uid, args->gid, flag); LFREEPATH(path); return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_lchown(struct thread *td, struct linux_lchown_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid, args->gid, AT_SYMLINK_NOFOLLOW); LFREEPATH(path); return (error); } #endif static int convert_fadvice(int advice) { switch (advice) { case LINUX_POSIX_FADV_NORMAL: return (POSIX_FADV_NORMAL); case LINUX_POSIX_FADV_RANDOM: return (POSIX_FADV_RANDOM); case LINUX_POSIX_FADV_SEQUENTIAL: return (POSIX_FADV_SEQUENTIAL); case LINUX_POSIX_FADV_WILLNEED: return (POSIX_FADV_WILLNEED); case LINUX_POSIX_FADV_DONTNEED: return (POSIX_FADV_DONTNEED); case LINUX_POSIX_FADV_NOREUSE: return (POSIX_FADV_NOREUSE); default: return (-1); } } int linux_fadvise64(struct thread *td, struct linux_fadvise64_args *args) { int advice; advice = convert_fadvice(args->advice); if (advice == -1) return (EINVAL); return (kern_posix_fadvise(td, args->fd, args->offset, args->len, advice)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_fadvise64_64(struct thread *td, struct linux_fadvise64_64_args *args) { int advice; advice = convert_fadvice(args->advice); if (advice == -1) return (EINVAL); return (kern_posix_fadvise(td, args->fd, args->offset, args->len, advice)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ #ifdef LINUX_LEGACY_SYSCALLS int linux_pipe(struct thread *td, struct linux_pipe_args *args) { int fildes[2]; int error; error = kern_pipe(td, fildes, 0, NULL, NULL); if (error != 0) return (error); error = copyout(fildes, args->pipefds, sizeof(fildes)); if (error != 0) { (void)kern_close(td, fildes[0]); (void)kern_close(td, fildes[1]); } return (error); } #endif int linux_pipe2(struct thread *td, struct linux_pipe2_args *args) { int fildes[2]; int error, flags; if ((args->flags & ~(LINUX_O_NONBLOCK | LINUX_O_CLOEXEC)) != 0) return (EINVAL); flags = 0; if ((args->flags & LINUX_O_NONBLOCK) != 0) flags |= O_NONBLOCK; if ((args->flags & LINUX_O_CLOEXEC) != 0) flags |= O_CLOEXEC; error = kern_pipe(td, fildes, flags, NULL, NULL); if (error != 0) return (error); error = copyout(fildes, args->pipefds, sizeof(fildes)); if (error != 0) { (void)kern_close(td, fildes[0]); (void)kern_close(td, fildes[1]); } return (error); } int linux_dup3(struct thread *td, struct linux_dup3_args *args) { int cmd; intptr_t newfd; if (args->oldfd == args->newfd) return (EINVAL); if ((args->flags & ~LINUX_O_CLOEXEC) != 0) return (EINVAL); if (args->flags & LINUX_O_CLOEXEC) cmd = F_DUP2FD_CLOEXEC; else cmd = F_DUP2FD; newfd = args->newfd; return (kern_fcntl(td, args->oldfd, cmd, newfd)); } int linux_fallocate(struct thread *td, struct linux_fallocate_args *args) { /* * We emulate only posix_fallocate system call for which * mode should be 0. */ if (args->mode != 0) return (ENOSYS); return (kern_posix_fallocate(td, args->fd, args->offset, args->len)); } int linux_copy_file_range(struct thread *td, struct linux_copy_file_range_args *args) { l_loff_t inoff, outoff, *inoffp, *outoffp; int error, flags; /* * copy_file_range(2) on Linux doesn't define any flags (yet), so is * the native implementation. Enforce it. */ if (args->flags != 0) { linux_msg(td, "copy_file_range unsupported flags 0x%x", args->flags); return (EINVAL); } flags = 0; inoffp = outoffp = NULL; if (args->off_in != NULL) { error = copyin(args->off_in, &inoff, sizeof(l_loff_t)); if (error != 0) return (error); inoffp = &inoff; } if (args->off_out != NULL) { error = copyin(args->off_out, &outoff, sizeof(l_loff_t)); if (error != 0) return (error); outoffp = &outoff; } error = kern_copy_file_range(td, args->fd_in, inoffp, args->fd_out, outoffp, args->len, flags); if (error == 0 && args->off_in != NULL) error = copyout(inoffp, args->off_in, sizeof(l_loff_t)); if (error == 0 && args->off_out != NULL) error = copyout(outoffp, args->off_out, sizeof(l_loff_t)); return (error); } Index: projects/clang1000-import/sys/compat/linux/linux_file.h =================================================================== --- projects/clang1000-import/sys/compat/linux/linux_file.h (revision 356919) +++ projects/clang1000-import/sys/compat/linux/linux_file.h (revision 356920) @@ -1,144 +1,149 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2007 Roman Divacky * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LINUX_FILE_H_ #define _LINUX_FILE_H_ #define LINUX_AT_FDCWD -100 #define LINUX_AT_SYMLINK_NOFOLLOW 0x100 #define LINUX_AT_EACCESS 0x200 #define LINUX_AT_REMOVEDIR 0x200 #define LINUX_AT_SYMLINK_FOLLOW 0x400 /* * posix_fadvise advice */ #define LINUX_POSIX_FADV_NORMAL 0 #define LINUX_POSIX_FADV_RANDOM 1 #define LINUX_POSIX_FADV_SEQUENTIAL 2 #define LINUX_POSIX_FADV_WILLNEED 3 #define LINUX_POSIX_FADV_DONTNEED 4 #define LINUX_POSIX_FADV_NOREUSE 5 /* * mount flags */ #define LINUX_MS_RDONLY 0x0001 #define LINUX_MS_NOSUID 0x0002 #define LINUX_MS_NODEV 0x0004 #define LINUX_MS_NOEXEC 0x0008 #define LINUX_MS_REMOUNT 0x0020 /* + * umount2 flags + */ +#define LINUX_MNT_FORCE 0x0001 + +/* * common open/fcntl flags */ #define LINUX_O_RDONLY 00000000 #define LINUX_O_WRONLY 00000001 #define LINUX_O_RDWR 00000002 #define LINUX_O_ACCMODE 00000003 #define LINUX_O_CREAT 00000100 #define LINUX_O_EXCL 00000200 #define LINUX_O_NOCTTY 00000400 #define LINUX_O_TRUNC 00001000 #define LINUX_O_APPEND 00002000 #define LINUX_O_NONBLOCK 00004000 #define LINUX_O_NDELAY LINUX_O_NONBLOCK #define LINUX_O_SYNC 00010000 #define LINUX_FASYNC 00020000 #define LINUX_O_DIRECT 00040000 /* Direct disk access hint */ #define LINUX_O_LARGEFILE 00100000 #define LINUX_O_DIRECTORY 00200000 /* Must be a directory */ #define LINUX_O_NOFOLLOW 00400000 /* Do not follow links */ #define LINUX_O_NOATIME 01000000 #define LINUX_O_CLOEXEC 02000000 #define LINUX_F_DUPFD 0 #define LINUX_F_GETFD 1 #define LINUX_F_SETFD 2 #define LINUX_F_GETFL 3 #define LINUX_F_SETFL 4 #ifndef LINUX_F_GETLK #define LINUX_F_GETLK 5 #define LINUX_F_SETLK 6 #define LINUX_F_SETLKW 7 #endif #ifndef LINUX_F_SETOWN #define LINUX_F_SETOWN 8 #define LINUX_F_GETOWN 9 #endif #ifndef LINUX_F_SETSIG #define LINUX_F_SETSIG 10 #define LINUX_F_GETSIG 11 #endif #ifndef LINUX_F_SETOWN_EX #define LINUX_F_SETOWN_EX 15 #define LINUX_F_GETOWN_EX 16 #define LINUX_F_GETOWNER_UIDS 17 #endif #define LINUX_F_SPECIFIC_BASE 1024 #define LINUX_F_SETLEASE (LINUX_F_SPECIFIC_BASE + 0) #define LINUX_F_GETLEASE (LINUX_F_SPECIFIC_BASE + 1) #define LINUX_F_CANCELLK (LINUX_F_SPECIFIC_BASE + 5) #define LINUX_F_DUPFD_CLOEXEC (LINUX_F_SPECIFIC_BASE + 6) #define LINUX_F_NOTIFY (LINUX_F_SPECIFIC_BASE + 2) #define LINUX_F_SETPIPE_SZ (LINUX_F_SPECIFIC_BASE + 7) #define LINUX_F_GETPIPE_SZ (LINUX_F_SPECIFIC_BASE + 8) #define LINUX_F_GETLKP 36 #define LINUX_F_SETLKP 37 #define LINUX_F_SETLKPW 38 #define LINUX_F_OWNER_TID 0 #define LINUX_F_OWNER_PID 1 #define LINUX_F_OWNER_PGRP 2 #ifndef LINUX_F_RDLCK #define LINUX_F_RDLCK 0 #define LINUX_F_WRLCK 1 #define LINUX_F_UNLCK 2 #endif /* * renameat2 flags */ #define LINUX_RENAME_NOREPLACE 0x00000001 #define LINUX_RENAME_EXCHANGE 0x00000002 #define LINUX_RENAME_WHITEOUT 0x00000004 /* * sync_file_range flags */ #define LINUX_SYNC_FILE_RANGE_WAIT_BEFORE 1 #define LINUX_SYNC_FILE_RANGE_WRITE 2 #define LINUX_SYNC_FILE_RANGE_WAIT_AFTER 4 #endif /* !_LINUX_FILE_H_ */ Index: projects/clang1000-import/sys/conf/files.powerpc =================================================================== --- projects/clang1000-import/sys/conf/files.powerpc (revision 356919) +++ projects/clang1000-import/sys/conf/files.powerpc (revision 356920) @@ -1,278 +1,279 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # # $FreeBSD$ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and # dependency lines other than the first are silently ignored. # # font.h optional sc \ compile-with "uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h" \ no-obj no-implicit-rule before-depend \ clean "font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8" # # There is only an asm version on ppc64. cddl/compat/opensolaris/kern/opensolaris_atomic.c optional zfs powerpc | dtrace powerpc | zfs powerpcspe | dtrace powerpcspe compile-with "${ZFS_C}" cddl/dev/dtrace/powerpc/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/powerpc/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/powerpc/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" crypto/blowfish/bf_enc.c optional crypto | ipsec | ipsec_support crypto/des/des_enc.c optional crypto | ipsec | ipsec_support | netsmb dev/adb/adb_bus.c optional adb dev/adb/adb_kbd.c optional adb dev/adb/adb_mouse.c optional adb dev/adb/adb_hb_if.m optional adb dev/adb/adb_if.m optional adb dev/adb/adb_buttons.c optional adb dev/agp/agp_apple.c optional agp powermac dev/fb/fb.c optional sc dev/hwpmc/hwpmc_e500.c optional hwpmc dev/hwpmc/hwpmc_mpc7xxx.c optional hwpmc dev/hwpmc/hwpmc_powerpc.c optional hwpmc dev/hwpmc/hwpmc_ppc970.c optional hwpmc dev/iicbus/ad7417.c optional ad7417 powermac dev/iicbus/adm1030.c optional powermac windtunnel | adm1030 powermac dev/iicbus/adt746x.c optional adt746x powermac dev/iicbus/ds1631.c optional ds1631 powermac dev/iicbus/ds1775.c optional ds1775 powermac dev/iicbus/max6690.c optional max6690 powermac dev/iicbus/ofw_iicbus.c optional iicbus aim dev/ipmi/ipmi.c optional ipmi dev/ipmi/ipmi_opal.c optional powernv ipmi # Most ofw stuff below is brought in by conf/files for options FDT, but # we always want it, even on non-FDT platforms. dev/fdt/simplebus.c standard dev/ofw/openfirm.c standard dev/ofw/openfirmio.c standard dev/ofw/ofw_bus_if.m standard dev/ofw/ofw_cpu.c standard dev/ofw/ofw_if.m standard dev/ofw/ofw_bus_subr.c standard dev/ofw/ofw_console.c optional aim dev/ofw/ofw_disk.c optional ofwd aim dev/ofw/ofwbus.c standard dev/ofw/ofwpci.c optional pci dev/ofw/ofw_standard.c optional aim powerpc dev/ofw/ofw_subr.c standard dev/powermac_nvram/powermac_nvram.c optional powermac_nvram powermac dev/quicc/quicc_bfe_fdt.c optional quicc mpc85xx dev/random/darn.c optional powerpc64 !random_loadable dev/scc/scc_bfe_macio.c optional scc powermac dev/sdhci/fsl_sdhci.c optional mpc85xx sdhci dev/sec/sec.c optional sec mpc85xx dev/sound/macio/aoa.c optional snd_davbus | snd_ai2s powermac dev/sound/macio/davbus.c optional snd_davbus powermac dev/sound/macio/i2s.c optional snd_ai2s powermac dev/sound/macio/onyx.c optional snd_ai2s iicbus powermac dev/sound/macio/snapper.c optional snd_ai2s iicbus powermac dev/sound/macio/tumbler.c optional snd_ai2s iicbus powermac dev/syscons/scgfbrndr.c optional sc dev/tsec/if_tsec.c optional tsec dev/tsec/if_tsec_fdt.c optional tsec dev/uart/uart_cpu_powerpc.c optional uart dev/usb/controller/ehci_fsl.c optional ehci mpc85xx dev/vt/hw/ofwfb/ofwfb.c optional vt aim kern/kern_clocksource.c standard kern/subr_atomic64.c optional powerpc | powerpcspe kern/subr_dummy_vdso_tc.c standard kern/syscalls.c optional ktr kern/subr_sfbuf.c standard libkern/ashldi3.c optional powerpc | powerpcspe libkern/ashrdi3.c optional powerpc | powerpcspe libkern/bcmp.c standard libkern/bcopy.c standard libkern/cmpdi2.c optional powerpc | powerpcspe libkern/divdi3.c optional powerpc | powerpcspe libkern/ffs.c standard libkern/ffsl.c standard libkern/ffsll.c standard libkern/flsll.c standard libkern/lshrdi3.c optional powerpc | powerpcspe libkern/memcmp.c standard libkern/memset.c standard libkern/moddi3.c optional powerpc | powerpcspe libkern/qdivrem.c optional powerpc | powerpcspe libkern/ucmpdi2.c optional powerpc | powerpcspe libkern/udivdi3.c optional powerpc | powerpcspe libkern/umoddi3.c optional powerpc | powerpcspe powerpc/aim/locore.S optional aim no-obj powerpc/aim/aim_machdep.c optional aim powerpc/aim/mmu_oea.c optional aim powerpc powerpc/aim/mmu_oea64.c optional aim powerpc/aim/moea64_if.m optional aim powerpc/aim/moea64_native.c optional aim powerpc/aim/mp_cpudep.c optional aim powerpc/aim/slb.c optional aim powerpc64 powerpc/amigaone/platform_amigaone.c optional amigaone +powerpc/amigaone/cpld_x5000.c optional powerpc amigaone | powerpc64 amigaone powerpc/booke/locore.S optional booke no-obj powerpc/booke/booke_machdep.c optional booke powerpc/booke/machdep_e500.c optional booke_e500 powerpc/booke/mp_cpudep.c optional booke smp powerpc/booke/platform_bare.c optional booke powerpc/booke/pmap.c optional booke powerpc/booke/spe.c optional powerpcspe powerpc/cpufreq/dfs.c optional cpufreq powerpc/cpufreq/mpc85xx_jog.c optional cpufreq mpc85xx powerpc/cpufreq/pcr.c optional cpufreq aim powerpc/cpufreq/pmcr.c optional cpufreq aim powerpc64 powerpc/cpufreq/pmufreq.c optional cpufreq aim pmu powerpc/fpu/fpu_add.c optional fpu_emu | powerpcspe powerpc/fpu/fpu_compare.c optional fpu_emu | powerpcspe powerpc/fpu/fpu_div.c optional fpu_emu | powerpcspe powerpc/fpu/fpu_emu.c optional fpu_emu powerpc/fpu/fpu_explode.c optional fpu_emu | powerpcspe powerpc/fpu/fpu_implode.c optional fpu_emu | powerpcspe powerpc/fpu/fpu_mul.c optional fpu_emu | powerpcspe powerpc/fpu/fpu_sqrt.c optional fpu_emu powerpc/fpu/fpu_subr.c optional fpu_emu | powerpcspe powerpc/mambo/mambocall.S optional mambo powerpc/mambo/mambo.c optional mambo powerpc/mambo/mambo_console.c optional mambo powerpc/mambo/mambo_disk.c optional mambo powerpc/mikrotik/platform_rb.c optional mikrotik powerpc/mikrotik/rb_led.c optional mikrotik powerpc/mpc85xx/atpic.c optional mpc85xx isa powerpc/mpc85xx/ds1553_bus_fdt.c optional ds1553 powerpc/mpc85xx/ds1553_core.c optional ds1553 powerpc/mpc85xx/fsl_diu.c optional mpc85xx diu powerpc/mpc85xx/fsl_espi.c optional mpc85xx spibus powerpc/mpc85xx/fsl_sata.c optional mpc85xx ata powerpc/mpc85xx/i2c.c optional mpc85xx iicbus powerpc/mpc85xx/isa.c optional mpc85xx isa powerpc/mpc85xx/lbc.c optional mpc85xx powerpc/mpc85xx/mpc85xx.c optional mpc85xx powerpc/mpc85xx/mpc85xx_cache.c optional mpc85xx powerpc/mpc85xx/mpc85xx_gpio.c optional mpc85xx gpio powerpc/mpc85xx/platform_mpc85xx.c optional mpc85xx powerpc/mpc85xx/pci_mpc85xx.c optional pci mpc85xx powerpc/mpc85xx/pci_mpc85xx_pcib.c optional pci mpc85xx powerpc/mpc85xx/qoriq_gpio.c optional mpc85xx gpio powerpc/ofw/ofw_machdep.c standard powerpc/ofw/ofw_pcibus.c optional pci powerpc/ofw/ofw_pcib_pci.c optional pci powerpc/ofw/ofw_real.c optional aim powerpc/ofw/ofw_syscons.c optional sc aim powerpc/ofw/ofwcall32.S optional aim powerpc powerpc/ofw/ofwcall64.S optional aim powerpc64 powerpc/ofw/openpic_ofw.c standard powerpc/ofw/rtas.c optional aim powerpc/ofw/ofw_initrd.c optional md_root_mem powerpc64 powerpc/powermac/ata_kauai.c optional powermac ata | powermac atamacio powerpc/powermac/ata_macio.c optional powermac ata | powermac atamacio powerpc/powermac/ata_dbdma.c optional powermac ata | powermac atamacio powerpc/powermac/atibl.c optional powermac atibl powerpc/powermac/cuda.c optional powermac cuda powerpc/powermac/cpcht.c optional powermac pci powerpc/powermac/dbdma.c optional powermac pci powerpc/powermac/fcu.c optional powermac fcu powerpc/powermac/grackle.c optional powermac pci powerpc/powermac/hrowpic.c optional powermac pci powerpc/powermac/kiic.c optional powermac kiic powerpc/powermac/macgpio.c optional powermac pci powerpc/powermac/macio.c optional powermac pci powerpc/powermac/nvbl.c optional powermac nvbl powerpc/powermac/platform_powermac.c optional powermac powerpc/powermac/powermac_thermal.c optional powermac powerpc/powermac/pswitch.c optional powermac pswitch powerpc/powermac/pmu.c optional powermac pmu powerpc/powermac/smu.c optional powermac smu powerpc/powermac/smusat.c optional powermac smu powerpc/powermac/uninorth.c optional powermac powerpc/powermac/uninorthpci.c optional powermac pci powerpc/powermac/vcoregpio.c optional powermac powerpc/powernv/opal.c optional powernv powerpc/powernv/opal_async.c optional powernv powerpc/powernv/opal_console.c optional powernv powerpc/powernv/opal_dbg.c optional powernv gdb powerpc/powernv/opal_dev.c optional powernv powerpc/powernv/opal_flash.c optional powernv opalflash powerpc/powernv/opal_hmi.c optional powernv powerpc/powernv/opal_i2c.c optional iicbus fdt powernv powerpc/powernv/opal_i2cm.c optional iicbus fdt powernv powerpc/powernv/opal_nvram.c optional powernv nvram powerpc/powernv/opal_pci.c optional powernv pci powerpc/powernv/opal_sensor.c optional powernv powerpc/powernv/opalcall.S optional powernv powerpc/powernv/platform_powernv.c optional powernv powerpc/powernv/powernv_centaur.c optional powernv powerpc/powernv/powernv_xscom.c optional powernv powerpc/powernv/xive.c optional powernv powerpc/powerpc/altivec.c optional powerpc | powerpc64 powerpc/powerpc/autoconf.c standard powerpc/powerpc/bus_machdep.c standard powerpc/powerpc/busdma_machdep.c standard powerpc/powerpc/clock.c standard powerpc/powerpc/copyinout.c standard powerpc/powerpc/copystr.c standard powerpc/powerpc/cpu.c standard powerpc/powerpc/cpu_subr64.S optional powerpc64 powerpc/powerpc/db_disasm.c optional ddb powerpc/powerpc/db_hwwatch.c optional ddb powerpc/powerpc/db_interface.c optional ddb powerpc/powerpc/db_trace.c optional ddb powerpc/powerpc/dump_machdep.c standard powerpc/powerpc/elf32_machdep.c optional powerpc | powerpcspe | compat_freebsd32 powerpc/powerpc/elf64_machdep.c optional powerpc64 powerpc/powerpc/exec_machdep.c standard powerpc/powerpc/fpu.c standard powerpc/powerpc/gdb_machdep.c optional gdb powerpc/powerpc/in_cksum.c optional inet | inet6 powerpc/powerpc/interrupt.c standard powerpc/powerpc/intr_machdep.c standard powerpc/powerpc/iommu_if.m standard powerpc/powerpc/machdep.c standard powerpc/powerpc/mem.c optional mem powerpc/powerpc/minidump_machdep.c optional powerpc64 powerpc/powerpc/mmu_if.m standard powerpc/powerpc/mp_machdep.c optional smp powerpc/powerpc/nexus.c standard powerpc/powerpc/openpic.c standard powerpc/powerpc/pic_if.m standard powerpc/powerpc/pmap_dispatch.c standard powerpc/powerpc/platform.c standard powerpc/powerpc/platform_if.m standard powerpc/powerpc/ptrace_machdep.c standard powerpc/powerpc/sc_machdep.c optional sc powerpc/powerpc/setjmp.S standard powerpc/powerpc/sigcode32.S optional powerpc | powerpcspe | compat_freebsd32 powerpc/powerpc/sigcode64.S optional powerpc64 powerpc/powerpc/swtch32.S optional powerpc | powerpcspe powerpc/powerpc/swtch64.S optional powerpc64 powerpc/powerpc/stack_machdep.c optional ddb | stack powerpc/powerpc/syncicache.c standard powerpc/powerpc/sys_machdep.c standard powerpc/powerpc/trap.c standard powerpc/powerpc/uio_machdep.c standard powerpc/powerpc/uma_machdep.c standard powerpc/powerpc/vm_machdep.c standard powerpc/ps3/ehci_ps3.c optional ps3 ehci powerpc/ps3/ohci_ps3.c optional ps3 ohci powerpc/ps3/if_glc.c optional ps3 glc powerpc/ps3/mmu_ps3.c optional ps3 powerpc/ps3/platform_ps3.c optional ps3 powerpc/ps3/ps3bus.c optional ps3 powerpc/ps3/ps3cdrom.c optional ps3 scbus powerpc/ps3/ps3disk.c optional ps3 powerpc/ps3/ps3pic.c optional ps3 powerpc/ps3/ps3_syscons.c optional ps3 vt powerpc/ps3/ps3-hvcall.S optional ps3 powerpc/pseries/phyp-hvcall.S optional pseries powerpc64 powerpc/pseries/mmu_phyp.c optional pseries powerpc64 powerpc/pseries/phyp_console.c optional pseries powerpc64 uart powerpc/pseries/phyp_dbg.c optional pseries powerpc64 gdb powerpc/pseries/phyp_llan.c optional llan powerpc/pseries/phyp_vscsi.c optional pseries powerpc64 scbus powerpc/pseries/platform_chrp.c optional pseries powerpc/pseries/plpar_iommu.c optional pseries powerpc64 powerpc/pseries/plpar_pcibus.c optional pseries powerpc64 pci powerpc/pseries/rtas_dev.c optional pseries powerpc/pseries/rtas_pci.c optional pseries pci powerpc/pseries/vdevice.c optional pseries powerpc64 powerpc/pseries/xics.c optional pseries powerpc64 powerpc/psim/iobus.c optional psim powerpc/psim/ata_iobus.c optional ata psim powerpc/psim/openpic_iobus.c optional psim powerpc/psim/uart_iobus.c optional uart psim Index: projects/clang1000-import/sys/conf/sysent.mk =================================================================== --- projects/clang1000-import/sys/conf/sysent.mk (nonexistent) +++ projects/clang1000-import/sys/conf/sysent.mk (revision 356920) @@ -0,0 +1,35 @@ +# $FreeBSD$ + +# Don't use an OBJDIR +.OBJDIR: ${.CURDIR} + +.include +.include + +COMMON_GENERATED= proto.h \ + syscall.h \ + syscalls.c \ + sysent.c \ + systrace_args.c + +GENERATED_PREFIX?= +GENERATED?= ${COMMON_GENERATED:S/^/${GENERATED_PREFIX}/} +SYSENT_FILE?= syscalls.master +SYSENT_CONF?= syscalls.conf + +# Including Makefile should override SYSENT_FILE and SYSENT_CONF as needed, +# and set GENERATED. +SRCS+= ${SYSENT_FILE} +SRCS+= ${SYSENT_CONF} +MAKESYSCALLS= ${SYSDIR}/tools/makesyscalls.lua + +all: + @echo "make sysent only" + +# We .ORDER these explicitly so that we only run MAKESYSCALLS once, rather than +# potentially once for each ${GENERATED} file. +.ORDER: ${GENERATED} +sysent: ${GENERATED} + +${GENERATED}: ${MAKESYSCALLS} ${SRCS} + ${LUA} ${MAKESYSCALLS} ${SYSENT_FILE} ${SYSENT_CONF} Property changes on: projects/clang1000-import/sys/conf/sysent.mk ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/clang1000-import/sys/dev/e1000/e1000_api.c =================================================================== --- projects/clang1000-import/sys/dev/e1000/e1000_api.c (revision 356919) +++ projects/clang1000-import/sys/dev/e1000/e1000_api.c (revision 356920) @@ -1,1388 +1,1389 @@ /****************************************************************************** SPDX-License-Identifier: BSD-3-Clause Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "e1000_api.h" /** * e1000_init_mac_params - Initialize MAC function pointers * @hw: pointer to the HW structure * * This function initializes the function pointers for the MAC * set of functions. Called by drivers or by e1000_setup_init_funcs. **/ s32 e1000_init_mac_params(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; if (hw->mac.ops.init_params) { ret_val = hw->mac.ops.init_params(hw); if (ret_val) { DEBUGOUT("MAC Initialization Error\n"); goto out; } } else { DEBUGOUT("mac.init_mac_params was NULL\n"); ret_val = -E1000_ERR_CONFIG; } out: return ret_val; } /** * e1000_init_nvm_params - Initialize NVM function pointers * @hw: pointer to the HW structure * * This function initializes the function pointers for the NVM * set of functions. Called by drivers or by e1000_setup_init_funcs. **/ s32 e1000_init_nvm_params(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; if (hw->nvm.ops.init_params) { ret_val = hw->nvm.ops.init_params(hw); if (ret_val) { DEBUGOUT("NVM Initialization Error\n"); goto out; } } else { DEBUGOUT("nvm.init_nvm_params was NULL\n"); ret_val = -E1000_ERR_CONFIG; } out: return ret_val; } /** * e1000_init_phy_params - Initialize PHY function pointers * @hw: pointer to the HW structure * * This function initializes the function pointers for the PHY * set of functions. Called by drivers or by e1000_setup_init_funcs. **/ s32 e1000_init_phy_params(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; if (hw->phy.ops.init_params) { ret_val = hw->phy.ops.init_params(hw); if (ret_val) { DEBUGOUT("PHY Initialization Error\n"); goto out; } } else { DEBUGOUT("phy.init_phy_params was NULL\n"); ret_val = -E1000_ERR_CONFIG; } out: return ret_val; } /** * e1000_init_mbx_params - Initialize mailbox function pointers * @hw: pointer to the HW structure * * This function initializes the function pointers for the PHY * set of functions. Called by drivers or by e1000_setup_init_funcs. **/ s32 e1000_init_mbx_params(struct e1000_hw *hw) { s32 ret_val = E1000_SUCCESS; if (hw->mbx.ops.init_params) { ret_val = hw->mbx.ops.init_params(hw); if (ret_val) { DEBUGOUT("Mailbox Initialization Error\n"); goto out; } } else { DEBUGOUT("mbx.init_mbx_params was NULL\n"); ret_val = -E1000_ERR_CONFIG; } out: return ret_val; } /** * e1000_set_mac_type - Sets MAC type * @hw: pointer to the HW structure * * This function sets the mac type of the adapter based on the * device ID stored in the hw structure. * MUST BE FIRST FUNCTION CALLED (explicitly or through * e1000_setup_init_funcs()). **/ s32 e1000_set_mac_type(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; s32 ret_val = E1000_SUCCESS; DEBUGFUNC("e1000_set_mac_type"); switch (hw->device_id) { case E1000_DEV_ID_82542: mac->type = e1000_82542; break; case E1000_DEV_ID_82543GC_FIBER: case E1000_DEV_ID_82543GC_COPPER: mac->type = e1000_82543; break; case E1000_DEV_ID_82544EI_COPPER: case E1000_DEV_ID_82544EI_FIBER: case E1000_DEV_ID_82544GC_COPPER: case E1000_DEV_ID_82544GC_LOM: mac->type = e1000_82544; break; case E1000_DEV_ID_82540EM: case E1000_DEV_ID_82540EM_LOM: case E1000_DEV_ID_82540EP: case E1000_DEV_ID_82540EP_LOM: case E1000_DEV_ID_82540EP_LP: mac->type = e1000_82540; break; case E1000_DEV_ID_82545EM_COPPER: case E1000_DEV_ID_82545EM_FIBER: mac->type = e1000_82545; break; case E1000_DEV_ID_82545GM_COPPER: case E1000_DEV_ID_82545GM_FIBER: case E1000_DEV_ID_82545GM_SERDES: mac->type = e1000_82545_rev_3; break; case E1000_DEV_ID_82546EB_COPPER: case E1000_DEV_ID_82546EB_FIBER: case E1000_DEV_ID_82546EB_QUAD_COPPER: mac->type = e1000_82546; break; case E1000_DEV_ID_82546GB_COPPER: case E1000_DEV_ID_82546GB_FIBER: case E1000_DEV_ID_82546GB_SERDES: case E1000_DEV_ID_82546GB_PCIE: case E1000_DEV_ID_82546GB_QUAD_COPPER: case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: mac->type = e1000_82546_rev_3; break; case E1000_DEV_ID_82541EI: case E1000_DEV_ID_82541EI_MOBILE: case E1000_DEV_ID_82541ER_LOM: mac->type = e1000_82541; break; case E1000_DEV_ID_82541ER: case E1000_DEV_ID_82541GI: case E1000_DEV_ID_82541GI_LF: case E1000_DEV_ID_82541GI_MOBILE: mac->type = e1000_82541_rev_2; break; case E1000_DEV_ID_82547EI: case E1000_DEV_ID_82547EI_MOBILE: mac->type = e1000_82547; break; case E1000_DEV_ID_82547GI: mac->type = e1000_82547_rev_2; break; case E1000_DEV_ID_82571EB_COPPER: case E1000_DEV_ID_82571EB_FIBER: case E1000_DEV_ID_82571EB_SERDES: case E1000_DEV_ID_82571EB_SERDES_DUAL: case E1000_DEV_ID_82571EB_SERDES_QUAD: case E1000_DEV_ID_82571EB_QUAD_COPPER: case E1000_DEV_ID_82571PT_QUAD_COPPER: case E1000_DEV_ID_82571EB_QUAD_FIBER: case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: mac->type = e1000_82571; break; case E1000_DEV_ID_82572EI: case E1000_DEV_ID_82572EI_COPPER: case E1000_DEV_ID_82572EI_FIBER: case E1000_DEV_ID_82572EI_SERDES: mac->type = e1000_82572; break; case E1000_DEV_ID_82573E: case E1000_DEV_ID_82573E_IAMT: case E1000_DEV_ID_82573L: mac->type = e1000_82573; break; case E1000_DEV_ID_82574L: case E1000_DEV_ID_82574LA: mac->type = e1000_82574; break; case E1000_DEV_ID_82583V: mac->type = e1000_82583; break; case E1000_DEV_ID_80003ES2LAN_COPPER_DPT: case E1000_DEV_ID_80003ES2LAN_SERDES_DPT: case E1000_DEV_ID_80003ES2LAN_COPPER_SPT: case E1000_DEV_ID_80003ES2LAN_SERDES_SPT: mac->type = e1000_80003es2lan; break; case E1000_DEV_ID_ICH8_IFE: case E1000_DEV_ID_ICH8_IFE_GT: case E1000_DEV_ID_ICH8_IFE_G: case E1000_DEV_ID_ICH8_IGP_M: case E1000_DEV_ID_ICH8_IGP_M_AMT: case E1000_DEV_ID_ICH8_IGP_AMT: case E1000_DEV_ID_ICH8_IGP_C: case E1000_DEV_ID_ICH8_82567V_3: mac->type = e1000_ich8lan; break; case E1000_DEV_ID_ICH9_IFE: case E1000_DEV_ID_ICH9_IFE_GT: case E1000_DEV_ID_ICH9_IFE_G: case E1000_DEV_ID_ICH9_IGP_M: case E1000_DEV_ID_ICH9_IGP_M_AMT: case E1000_DEV_ID_ICH9_IGP_M_V: case E1000_DEV_ID_ICH9_IGP_AMT: case E1000_DEV_ID_ICH9_BM: case E1000_DEV_ID_ICH9_IGP_C: case E1000_DEV_ID_ICH10_R_BM_LM: case E1000_DEV_ID_ICH10_R_BM_LF: case E1000_DEV_ID_ICH10_R_BM_V: mac->type = e1000_ich9lan; break; case E1000_DEV_ID_ICH10_D_BM_LM: case E1000_DEV_ID_ICH10_D_BM_LF: case E1000_DEV_ID_ICH10_D_BM_V: mac->type = e1000_ich10lan; break; case E1000_DEV_ID_PCH_D_HV_DM: case E1000_DEV_ID_PCH_D_HV_DC: case E1000_DEV_ID_PCH_M_HV_LM: case E1000_DEV_ID_PCH_M_HV_LC: mac->type = e1000_pchlan; break; case E1000_DEV_ID_PCH2_LV_LM: case E1000_DEV_ID_PCH2_LV_V: mac->type = e1000_pch2lan; break; case E1000_DEV_ID_PCH_LPT_I217_LM: case E1000_DEV_ID_PCH_LPT_I217_V: case E1000_DEV_ID_PCH_LPTLP_I218_LM: case E1000_DEV_ID_PCH_LPTLP_I218_V: case E1000_DEV_ID_PCH_I218_LM2: case E1000_DEV_ID_PCH_I218_V2: case E1000_DEV_ID_PCH_I218_LM3: case E1000_DEV_ID_PCH_I218_V3: mac->type = e1000_pch_lpt; break; case E1000_DEV_ID_PCH_SPT_I219_LM: case E1000_DEV_ID_PCH_SPT_I219_V: case E1000_DEV_ID_PCH_SPT_I219_LM2: case E1000_DEV_ID_PCH_SPT_I219_V2: case E1000_DEV_ID_PCH_LBG_I219_LM3: case E1000_DEV_ID_PCH_SPT_I219_LM4: case E1000_DEV_ID_PCH_SPT_I219_V4: case E1000_DEV_ID_PCH_SPT_I219_LM5: case E1000_DEV_ID_PCH_SPT_I219_V5: mac->type = e1000_pch_spt; break; case E1000_DEV_ID_PCH_CNP_I219_LM6: case E1000_DEV_ID_PCH_CNP_I219_V6: case E1000_DEV_ID_PCH_CNP_I219_LM7: case E1000_DEV_ID_PCH_CNP_I219_V7: case E1000_DEV_ID_PCH_ICP_I219_LM8: case E1000_DEV_ID_PCH_ICP_I219_V8: case E1000_DEV_ID_PCH_ICP_I219_LM9: case E1000_DEV_ID_PCH_ICP_I219_V9: + case E1000_DEV_ID_PCH_ICP_I219_V10: mac->type = e1000_pch_cnp; break; case E1000_DEV_ID_82575EB_COPPER: case E1000_DEV_ID_82575EB_FIBER_SERDES: case E1000_DEV_ID_82575GB_QUAD_COPPER: mac->type = e1000_82575; break; case E1000_DEV_ID_82576: case E1000_DEV_ID_82576_FIBER: case E1000_DEV_ID_82576_SERDES: case E1000_DEV_ID_82576_QUAD_COPPER: case E1000_DEV_ID_82576_QUAD_COPPER_ET2: case E1000_DEV_ID_82576_NS: case E1000_DEV_ID_82576_NS_SERDES: case E1000_DEV_ID_82576_SERDES_QUAD: mac->type = e1000_82576; break; case E1000_DEV_ID_82580_COPPER: case E1000_DEV_ID_82580_FIBER: case E1000_DEV_ID_82580_SERDES: case E1000_DEV_ID_82580_SGMII: case E1000_DEV_ID_82580_COPPER_DUAL: case E1000_DEV_ID_82580_QUAD_FIBER: case E1000_DEV_ID_DH89XXCC_SGMII: case E1000_DEV_ID_DH89XXCC_SERDES: case E1000_DEV_ID_DH89XXCC_BACKPLANE: case E1000_DEV_ID_DH89XXCC_SFP: mac->type = e1000_82580; break; case E1000_DEV_ID_I350_COPPER: case E1000_DEV_ID_I350_FIBER: case E1000_DEV_ID_I350_SERDES: case E1000_DEV_ID_I350_SGMII: case E1000_DEV_ID_I350_DA4: mac->type = e1000_i350; break; case E1000_DEV_ID_I210_COPPER_FLASHLESS: case E1000_DEV_ID_I210_SERDES_FLASHLESS: case E1000_DEV_ID_I210_COPPER: case E1000_DEV_ID_I210_COPPER_OEM1: case E1000_DEV_ID_I210_COPPER_IT: case E1000_DEV_ID_I210_FIBER: case E1000_DEV_ID_I210_SERDES: case E1000_DEV_ID_I210_SGMII: mac->type = e1000_i210; break; case E1000_DEV_ID_I211_COPPER: mac->type = e1000_i211; break; case E1000_DEV_ID_82576_VF: case E1000_DEV_ID_82576_VF_HV: mac->type = e1000_vfadapt; break; case E1000_DEV_ID_I350_VF: case E1000_DEV_ID_I350_VF_HV: mac->type = e1000_vfadapt_i350; break; case E1000_DEV_ID_I354_BACKPLANE_1GBPS: case E1000_DEV_ID_I354_SGMII: case E1000_DEV_ID_I354_BACKPLANE_2_5GBPS: mac->type = e1000_i354; break; default: /* Should never have loaded on this device */ ret_val = -E1000_ERR_MAC_INIT; break; } return ret_val; } /** * e1000_setup_init_funcs - Initializes function pointers * @hw: pointer to the HW structure * @init_device: TRUE will initialize the rest of the function pointers * getting the device ready for use. FALSE will only set * MAC type and the function pointers for the other init * functions. Passing FALSE will not generate any hardware * reads or writes. * * This function must be called by a driver in order to use the rest * of the 'shared' code files. Called by drivers only. **/ s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device) { s32 ret_val; /* Can't do much good without knowing the MAC type. */ ret_val = e1000_set_mac_type(hw); if (ret_val) { DEBUGOUT("ERROR: MAC type could not be set properly.\n"); goto out; } if (!hw->hw_addr) { DEBUGOUT("ERROR: Registers not mapped\n"); ret_val = -E1000_ERR_CONFIG; goto out; } /* * Init function pointers to generic implementations. We do this first * allowing a driver module to override it afterward. */ e1000_init_mac_ops_generic(hw); e1000_init_phy_ops_generic(hw); e1000_init_nvm_ops_generic(hw); e1000_init_mbx_ops_generic(hw); /* * Set up the init function pointers. These are functions within the * adapter family file that sets up function pointers for the rest of * the functions in that family. */ switch (hw->mac.type) { case e1000_82542: e1000_init_function_pointers_82542(hw); break; case e1000_82543: case e1000_82544: e1000_init_function_pointers_82543(hw); break; case e1000_82540: case e1000_82545: case e1000_82545_rev_3: case e1000_82546: case e1000_82546_rev_3: e1000_init_function_pointers_82540(hw); break; case e1000_82541: case e1000_82541_rev_2: case e1000_82547: case e1000_82547_rev_2: e1000_init_function_pointers_82541(hw); break; case e1000_82571: case e1000_82572: case e1000_82573: case e1000_82574: case e1000_82583: e1000_init_function_pointers_82571(hw); break; case e1000_80003es2lan: e1000_init_function_pointers_80003es2lan(hw); break; case e1000_ich8lan: case e1000_ich9lan: case e1000_ich10lan: case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: e1000_init_function_pointers_ich8lan(hw); break; case e1000_82575: case e1000_82576: case e1000_82580: case e1000_i350: case e1000_i354: e1000_init_function_pointers_82575(hw); break; case e1000_i210: case e1000_i211: e1000_init_function_pointers_i210(hw); break; case e1000_vfadapt: e1000_init_function_pointers_vf(hw); break; case e1000_vfadapt_i350: e1000_init_function_pointers_vf(hw); break; default: DEBUGOUT("Hardware not supported\n"); ret_val = -E1000_ERR_CONFIG; break; } /* * Initialize the rest of the function pointers. These require some * register reads/writes in some cases. */ if (!(ret_val) && init_device) { ret_val = e1000_init_mac_params(hw); if (ret_val) goto out; ret_val = e1000_init_nvm_params(hw); if (ret_val) goto out; ret_val = e1000_init_phy_params(hw); if (ret_val) goto out; ret_val = e1000_init_mbx_params(hw); if (ret_val) goto out; } out: return ret_val; } /** * e1000_get_bus_info - Obtain bus information for adapter * @hw: pointer to the HW structure * * This will obtain information about the HW bus for which the * adapter is attached and stores it in the hw structure. This is a * function pointer entry point called by drivers. **/ s32 e1000_get_bus_info(struct e1000_hw *hw) { if (hw->mac.ops.get_bus_info) return hw->mac.ops.get_bus_info(hw); return E1000_SUCCESS; } /** * e1000_clear_vfta - Clear VLAN filter table * @hw: pointer to the HW structure * * This clears the VLAN filter table on the adapter. This is a function * pointer entry point called by drivers. **/ void e1000_clear_vfta(struct e1000_hw *hw) { if (hw->mac.ops.clear_vfta) hw->mac.ops.clear_vfta(hw); } /** * e1000_write_vfta - Write value to VLAN filter table * @hw: pointer to the HW structure * @offset: the 32-bit offset in which to write the value to. * @value: the 32-bit value to write at location offset. * * This writes a 32-bit value to a 32-bit offset in the VLAN filter * table. This is a function pointer entry point called by drivers. **/ void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value) { if (hw->mac.ops.write_vfta) hw->mac.ops.write_vfta(hw, offset, value); } /** * e1000_update_mc_addr_list - Update Multicast addresses * @hw: pointer to the HW structure * @mc_addr_list: array of multicast addresses to program * @mc_addr_count: number of multicast addresses to program * * Updates the Multicast Table Array. * The caller must have a packed mc_addr_list of multicast addresses. **/ void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list, u32 mc_addr_count) { if (hw->mac.ops.update_mc_addr_list) hw->mac.ops.update_mc_addr_list(hw, mc_addr_list, mc_addr_count); } /** * e1000_force_mac_fc - Force MAC flow control * @hw: pointer to the HW structure * * Force the MAC's flow control settings. Currently no func pointer exists * and all implementations are handled in the generic version of this * function. **/ s32 e1000_force_mac_fc(struct e1000_hw *hw) { return e1000_force_mac_fc_generic(hw); } /** * e1000_check_for_link - Check/Store link connection * @hw: pointer to the HW structure * * This checks the link condition of the adapter and stores the * results in the hw->mac structure. This is a function pointer entry * point called by drivers. **/ s32 e1000_check_for_link(struct e1000_hw *hw) { if (hw->mac.ops.check_for_link) return hw->mac.ops.check_for_link(hw); return -E1000_ERR_CONFIG; } /** * e1000_check_mng_mode - Check management mode * @hw: pointer to the HW structure * * This checks if the adapter has manageability enabled. * This is a function pointer entry point called by drivers. **/ bool e1000_check_mng_mode(struct e1000_hw *hw) { if (hw->mac.ops.check_mng_mode) return hw->mac.ops.check_mng_mode(hw); return FALSE; } /** * e1000_mng_write_dhcp_info - Writes DHCP info to host interface * @hw: pointer to the HW structure * @buffer: pointer to the host interface * @length: size of the buffer * * Writes the DHCP information to the host interface. **/ s32 e1000_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length) { return e1000_mng_write_dhcp_info_generic(hw, buffer, length); } /** * e1000_reset_hw - Reset hardware * @hw: pointer to the HW structure * * This resets the hardware into a known state. This is a function pointer * entry point called by drivers. **/ s32 e1000_reset_hw(struct e1000_hw *hw) { if (hw->mac.ops.reset_hw) return hw->mac.ops.reset_hw(hw); return -E1000_ERR_CONFIG; } /** * e1000_init_hw - Initialize hardware * @hw: pointer to the HW structure * * This inits the hardware readying it for operation. This is a function * pointer entry point called by drivers. **/ s32 e1000_init_hw(struct e1000_hw *hw) { if (hw->mac.ops.init_hw) return hw->mac.ops.init_hw(hw); return -E1000_ERR_CONFIG; } /** * e1000_setup_link - Configures link and flow control * @hw: pointer to the HW structure * * This configures link and flow control settings for the adapter. This * is a function pointer entry point called by drivers. While modules can * also call this, they probably call their own version of this function. **/ s32 e1000_setup_link(struct e1000_hw *hw) { if (hw->mac.ops.setup_link) return hw->mac.ops.setup_link(hw); return -E1000_ERR_CONFIG; } /** * e1000_get_speed_and_duplex - Returns current speed and duplex * @hw: pointer to the HW structure * @speed: pointer to a 16-bit value to store the speed * @duplex: pointer to a 16-bit value to store the duplex. * * This returns the speed and duplex of the adapter in the two 'out' * variables passed in. This is a function pointer entry point called * by drivers. **/ s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex) { if (hw->mac.ops.get_link_up_info) return hw->mac.ops.get_link_up_info(hw, speed, duplex); return -E1000_ERR_CONFIG; } /** * e1000_setup_led - Configures SW controllable LED * @hw: pointer to the HW structure * * This prepares the SW controllable LED for use and saves the current state * of the LED so it can be later restored. This is a function pointer entry * point called by drivers. **/ s32 e1000_setup_led(struct e1000_hw *hw) { if (hw->mac.ops.setup_led) return hw->mac.ops.setup_led(hw); return E1000_SUCCESS; } /** * e1000_cleanup_led - Restores SW controllable LED * @hw: pointer to the HW structure * * This restores the SW controllable LED to the value saved off by * e1000_setup_led. This is a function pointer entry point called by drivers. **/ s32 e1000_cleanup_led(struct e1000_hw *hw) { if (hw->mac.ops.cleanup_led) return hw->mac.ops.cleanup_led(hw); return E1000_SUCCESS; } /** * e1000_blink_led - Blink SW controllable LED * @hw: pointer to the HW structure * * This starts the adapter LED blinking. Request the LED to be setup first * and cleaned up after. This is a function pointer entry point called by * drivers. **/ s32 e1000_blink_led(struct e1000_hw *hw) { if (hw->mac.ops.blink_led) return hw->mac.ops.blink_led(hw); return E1000_SUCCESS; } /** * e1000_id_led_init - store LED configurations in SW * @hw: pointer to the HW structure * * Initializes the LED config in SW. This is a function pointer entry point * called by drivers. **/ s32 e1000_id_led_init(struct e1000_hw *hw) { if (hw->mac.ops.id_led_init) return hw->mac.ops.id_led_init(hw); return E1000_SUCCESS; } /** * e1000_led_on - Turn on SW controllable LED * @hw: pointer to the HW structure * * Turns the SW defined LED on. This is a function pointer entry point * called by drivers. **/ s32 e1000_led_on(struct e1000_hw *hw) { if (hw->mac.ops.led_on) return hw->mac.ops.led_on(hw); return E1000_SUCCESS; } /** * e1000_led_off - Turn off SW controllable LED * @hw: pointer to the HW structure * * Turns the SW defined LED off. This is a function pointer entry point * called by drivers. **/ s32 e1000_led_off(struct e1000_hw *hw) { if (hw->mac.ops.led_off) return hw->mac.ops.led_off(hw); return E1000_SUCCESS; } /** * e1000_reset_adaptive - Reset adaptive IFS * @hw: pointer to the HW structure * * Resets the adaptive IFS. Currently no func pointer exists and all * implementations are handled in the generic version of this function. **/ void e1000_reset_adaptive(struct e1000_hw *hw) { e1000_reset_adaptive_generic(hw); } /** * e1000_update_adaptive - Update adaptive IFS * @hw: pointer to the HW structure * * Updates adapter IFS. Currently no func pointer exists and all * implementations are handled in the generic version of this function. **/ void e1000_update_adaptive(struct e1000_hw *hw) { e1000_update_adaptive_generic(hw); } /** * e1000_disable_pcie_master - Disable PCI-Express master access * @hw: pointer to the HW structure * * Disables PCI-Express master access and verifies there are no pending * requests. Currently no func pointer exists and all implementations are * handled in the generic version of this function. **/ s32 e1000_disable_pcie_master(struct e1000_hw *hw) { return e1000_disable_pcie_master_generic(hw); } /** * e1000_config_collision_dist - Configure collision distance * @hw: pointer to the HW structure * * Configures the collision distance to the default value and is used * during link setup. **/ void e1000_config_collision_dist(struct e1000_hw *hw) { if (hw->mac.ops.config_collision_dist) hw->mac.ops.config_collision_dist(hw); } /** * e1000_rar_set - Sets a receive address register * @hw: pointer to the HW structure * @addr: address to set the RAR to * @index: the RAR to set * * Sets a Receive Address Register (RAR) to the specified address. **/ int e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index) { if (hw->mac.ops.rar_set) return hw->mac.ops.rar_set(hw, addr, index); return E1000_SUCCESS; } /** * e1000_validate_mdi_setting - Ensures valid MDI/MDIX SW state * @hw: pointer to the HW structure * * Ensures that the MDI/MDIX SW state is valid. **/ s32 e1000_validate_mdi_setting(struct e1000_hw *hw) { if (hw->mac.ops.validate_mdi_setting) return hw->mac.ops.validate_mdi_setting(hw); return E1000_SUCCESS; } /** * e1000_hash_mc_addr - Determines address location in multicast table * @hw: pointer to the HW structure * @mc_addr: Multicast address to hash. * * This hashes an address to determine its location in the multicast * table. Currently no func pointer exists and all implementations * are handled in the generic version of this function. **/ u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr) { return e1000_hash_mc_addr_generic(hw, mc_addr); } /** * e1000_enable_tx_pkt_filtering - Enable packet filtering on TX * @hw: pointer to the HW structure * * Enables packet filtering on transmit packets if manageability is enabled * and host interface is enabled. * Currently no func pointer exists and all implementations are handled in the * generic version of this function. **/ bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw) { return e1000_enable_tx_pkt_filtering_generic(hw); } /** * e1000_mng_host_if_write - Writes to the manageability host interface * @hw: pointer to the HW structure * @buffer: pointer to the host interface buffer * @length: size of the buffer * @offset: location in the buffer to write to * @sum: sum of the data (not checksum) * * This function writes the buffer content at the offset given on the host if. * It also does alignment considerations to do the writes in most efficient * way. Also fills up the sum of the buffer in *buffer parameter. **/ s32 e1000_mng_host_if_write(struct e1000_hw *hw, u8 *buffer, u16 length, u16 offset, u8 *sum) { return e1000_mng_host_if_write_generic(hw, buffer, length, offset, sum); } /** * e1000_mng_write_cmd_header - Writes manageability command header * @hw: pointer to the HW structure * @hdr: pointer to the host interface command header * * Writes the command header after does the checksum calculation. **/ s32 e1000_mng_write_cmd_header(struct e1000_hw *hw, struct e1000_host_mng_command_header *hdr) { return e1000_mng_write_cmd_header_generic(hw, hdr); } /** * e1000_mng_enable_host_if - Checks host interface is enabled * @hw: pointer to the HW structure * * Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND * * This function checks whether the HOST IF is enabled for command operation * and also checks whether the previous command is completed. It busy waits * in case of previous command is not completed. **/ s32 e1000_mng_enable_host_if(struct e1000_hw *hw) { return e1000_mng_enable_host_if_generic(hw); } /** * e1000_set_obff_timer - Set Optimized Buffer Flush/Fill timer * @hw: pointer to the HW structure * @itr: u32 indicating itr value * * Set the OBFF timer based on the given interrupt rate. **/ s32 e1000_set_obff_timer(struct e1000_hw *hw, u32 itr) { if (hw->mac.ops.set_obff_timer) return hw->mac.ops.set_obff_timer(hw, itr); return E1000_SUCCESS; } /** * e1000_check_reset_block - Verifies PHY can be reset * @hw: pointer to the HW structure * * Checks if the PHY is in a state that can be reset or if manageability * has it tied up. This is a function pointer entry point called by drivers. **/ s32 e1000_check_reset_block(struct e1000_hw *hw) { if (hw->phy.ops.check_reset_block) return hw->phy.ops.check_reset_block(hw); return E1000_SUCCESS; } /** * e1000_read_phy_reg - Reads PHY register * @hw: pointer to the HW structure * @offset: the register to read * @data: the buffer to store the 16-bit read. * * Reads the PHY register and returns the value in data. * This is a function pointer entry point called by drivers. **/ s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data) { if (hw->phy.ops.read_reg) return hw->phy.ops.read_reg(hw, offset, data); return E1000_SUCCESS; } /** * e1000_write_phy_reg - Writes PHY register * @hw: pointer to the HW structure * @offset: the register to write * @data: the value to write. * * Writes the PHY register at offset with the value in data. * This is a function pointer entry point called by drivers. **/ s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data) { if (hw->phy.ops.write_reg) return hw->phy.ops.write_reg(hw, offset, data); return E1000_SUCCESS; } /** * e1000_release_phy - Generic release PHY * @hw: pointer to the HW structure * * Return if silicon family does not require a semaphore when accessing the * PHY. **/ void e1000_release_phy(struct e1000_hw *hw) { if (hw->phy.ops.release) hw->phy.ops.release(hw); } /** * e1000_acquire_phy - Generic acquire PHY * @hw: pointer to the HW structure * * Return success if silicon family does not require a semaphore when * accessing the PHY. **/ s32 e1000_acquire_phy(struct e1000_hw *hw) { if (hw->phy.ops.acquire) return hw->phy.ops.acquire(hw); return E1000_SUCCESS; } /** * e1000_cfg_on_link_up - Configure PHY upon link up * @hw: pointer to the HW structure **/ s32 e1000_cfg_on_link_up(struct e1000_hw *hw) { if (hw->phy.ops.cfg_on_link_up) return hw->phy.ops.cfg_on_link_up(hw); return E1000_SUCCESS; } /** * e1000_read_kmrn_reg - Reads register using Kumeran interface * @hw: pointer to the HW structure * @offset: the register to read * @data: the location to store the 16-bit value read. * * Reads a register out of the Kumeran interface. Currently no func pointer * exists and all implementations are handled in the generic version of * this function. **/ s32 e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data) { return e1000_read_kmrn_reg_generic(hw, offset, data); } /** * e1000_write_kmrn_reg - Writes register using Kumeran interface * @hw: pointer to the HW structure * @offset: the register to write * @data: the value to write. * * Writes a register to the Kumeran interface. Currently no func pointer * exists and all implementations are handled in the generic version of * this function. **/ s32 e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data) { return e1000_write_kmrn_reg_generic(hw, offset, data); } /** * e1000_get_cable_length - Retrieves cable length estimation * @hw: pointer to the HW structure * * This function estimates the cable length and stores them in * hw->phy.min_length and hw->phy.max_length. This is a function pointer * entry point called by drivers. **/ s32 e1000_get_cable_length(struct e1000_hw *hw) { if (hw->phy.ops.get_cable_length) return hw->phy.ops.get_cable_length(hw); return E1000_SUCCESS; } /** * e1000_get_phy_info - Retrieves PHY information from registers * @hw: pointer to the HW structure * * This function gets some information from various PHY registers and * populates hw->phy values with it. This is a function pointer entry * point called by drivers. **/ s32 e1000_get_phy_info(struct e1000_hw *hw) { if (hw->phy.ops.get_info) return hw->phy.ops.get_info(hw); return E1000_SUCCESS; } /** * e1000_phy_hw_reset - Hard PHY reset * @hw: pointer to the HW structure * * Performs a hard PHY reset. This is a function pointer entry point called * by drivers. **/ s32 e1000_phy_hw_reset(struct e1000_hw *hw) { if (hw->phy.ops.reset) return hw->phy.ops.reset(hw); return E1000_SUCCESS; } /** * e1000_phy_commit - Soft PHY reset * @hw: pointer to the HW structure * * Performs a soft PHY reset on those that apply. This is a function pointer * entry point called by drivers. **/ s32 e1000_phy_commit(struct e1000_hw *hw) { if (hw->phy.ops.commit) return hw->phy.ops.commit(hw); return E1000_SUCCESS; } /** * e1000_set_d0_lplu_state - Sets low power link up state for D0 * @hw: pointer to the HW structure * @active: boolean used to enable/disable lplu * * Success returns 0, Failure returns 1 * * The low power link up (lplu) state is set to the power management level D0 * and SmartSpeed is disabled when active is TRUE, else clear lplu for D0 * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU * is used during Dx states where the power conservation is most important. * During driver activity, SmartSpeed should be enabled so performance is * maintained. This is a function pointer entry point called by drivers. **/ s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active) { if (hw->phy.ops.set_d0_lplu_state) return hw->phy.ops.set_d0_lplu_state(hw, active); return E1000_SUCCESS; } /** * e1000_set_d3_lplu_state - Sets low power link up state for D3 * @hw: pointer to the HW structure * @active: boolean used to enable/disable lplu * * Success returns 0, Failure returns 1 * * The low power link up (lplu) state is set to the power management level D3 * and SmartSpeed is disabled when active is TRUE, else clear lplu for D3 * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU * is used during Dx states where the power conservation is most important. * During driver activity, SmartSpeed should be enabled so performance is * maintained. This is a function pointer entry point called by drivers. **/ s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active) { if (hw->phy.ops.set_d3_lplu_state) return hw->phy.ops.set_d3_lplu_state(hw, active); return E1000_SUCCESS; } /** * e1000_read_mac_addr - Reads MAC address * @hw: pointer to the HW structure * * Reads the MAC address out of the adapter and stores it in the HW structure. * Currently no func pointer exists and all implementations are handled in the * generic version of this function. **/ s32 e1000_read_mac_addr(struct e1000_hw *hw) { if (hw->mac.ops.read_mac_addr) return hw->mac.ops.read_mac_addr(hw); return e1000_read_mac_addr_generic(hw); } /** * e1000_read_pba_string - Read device part number string * @hw: pointer to the HW structure * @pba_num: pointer to device part number * @pba_num_size: size of part number buffer * * Reads the product board assembly (PBA) number from the EEPROM and stores * the value in pba_num. * Currently no func pointer exists and all implementations are handled in the * generic version of this function. **/ s32 e1000_read_pba_string(struct e1000_hw *hw, u8 *pba_num, u32 pba_num_size) { return e1000_read_pba_string_generic(hw, pba_num, pba_num_size); } /** * e1000_read_pba_length - Read device part number string length * @hw: pointer to the HW structure * @pba_num_size: size of part number buffer * * Reads the product board assembly (PBA) number length from the EEPROM and * stores the value in pba_num. * Currently no func pointer exists and all implementations are handled in the * generic version of this function. **/ s32 e1000_read_pba_length(struct e1000_hw *hw, u32 *pba_num_size) { return e1000_read_pba_length_generic(hw, pba_num_size); } /** * e1000_validate_nvm_checksum - Verifies NVM (EEPROM) checksum * @hw: pointer to the HW structure * * Validates the NVM checksum is correct. This is a function pointer entry * point called by drivers. **/ s32 e1000_validate_nvm_checksum(struct e1000_hw *hw) { if (hw->nvm.ops.validate) return hw->nvm.ops.validate(hw); return -E1000_ERR_CONFIG; } /** * e1000_update_nvm_checksum - Updates NVM (EEPROM) checksum * @hw: pointer to the HW structure * * Updates the NVM checksum. Currently no func pointer exists and all * implementations are handled in the generic version of this function. **/ s32 e1000_update_nvm_checksum(struct e1000_hw *hw) { if (hw->nvm.ops.update) return hw->nvm.ops.update(hw); return -E1000_ERR_CONFIG; } /** * e1000_reload_nvm - Reloads EEPROM * @hw: pointer to the HW structure * * Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the * extended control register. **/ void e1000_reload_nvm(struct e1000_hw *hw) { if (hw->nvm.ops.reload) hw->nvm.ops.reload(hw); } /** * e1000_read_nvm - Reads NVM (EEPROM) * @hw: pointer to the HW structure * @offset: the word offset to read * @words: number of 16-bit words to read * @data: pointer to the properly sized buffer for the data. * * Reads 16-bit chunks of data from the NVM (EEPROM). This is a function * pointer entry point called by drivers. **/ s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { if (hw->nvm.ops.read) return hw->nvm.ops.read(hw, offset, words, data); return -E1000_ERR_CONFIG; } /** * e1000_write_nvm - Writes to NVM (EEPROM) * @hw: pointer to the HW structure * @offset: the word offset to read * @words: number of 16-bit words to write * @data: pointer to the properly sized buffer for the data. * * Writes 16-bit chunks of data to the NVM (EEPROM). This is a function * pointer entry point called by drivers. **/ s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) { if (hw->nvm.ops.write) return hw->nvm.ops.write(hw, offset, words, data); return E1000_SUCCESS; } /** * e1000_write_8bit_ctrl_reg - Writes 8bit Control register * @hw: pointer to the HW structure * @reg: 32bit register offset * @offset: the register to write * @data: the value to write. * * Writes the PHY register at offset with the value in data. * This is a function pointer entry point called by drivers. **/ s32 e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, u32 offset, u8 data) { return e1000_write_8bit_ctrl_reg_generic(hw, reg, offset, data); } /** * e1000_power_up_phy - Restores link in case of PHY power down * @hw: pointer to the HW structure * * The phy may be powered down to save power, to turn off link when the * driver is unloaded, or wake on lan is not enabled (among others). **/ void e1000_power_up_phy(struct e1000_hw *hw) { if (hw->phy.ops.power_up) hw->phy.ops.power_up(hw); e1000_setup_link(hw); } /** * e1000_power_down_phy - Power down PHY * @hw: pointer to the HW structure * * The phy may be powered down to save power, to turn off link when the * driver is unloaded, or wake on lan is not enabled (among others). **/ void e1000_power_down_phy(struct e1000_hw *hw) { if (hw->phy.ops.power_down) hw->phy.ops.power_down(hw); } /** * e1000_power_up_fiber_serdes_link - Power up serdes link * @hw: pointer to the HW structure * * Power on the optics and PCS. **/ void e1000_power_up_fiber_serdes_link(struct e1000_hw *hw) { if (hw->mac.ops.power_up_serdes) hw->mac.ops.power_up_serdes(hw); } /** * e1000_shutdown_fiber_serdes_link - Remove link during power down * @hw: pointer to the HW structure * * Shutdown the optics and PCS on driver unload. **/ void e1000_shutdown_fiber_serdes_link(struct e1000_hw *hw) { if (hw->mac.ops.shutdown_serdes) hw->mac.ops.shutdown_serdes(hw); } Index: projects/clang1000-import/sys/dev/e1000/e1000_hw.h =================================================================== --- projects/clang1000-import/sys/dev/e1000/e1000_hw.h (revision 356919) +++ projects/clang1000-import/sys/dev/e1000/e1000_hw.h (revision 356920) @@ -1,1047 +1,1048 @@ /****************************************************************************** SPDX-License-Identifier: BSD-3-Clause Copyright (c) 2001-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef _E1000_HW_H_ #define _E1000_HW_H_ #include "e1000_osdep.h" #include "e1000_regs.h" #include "e1000_defines.h" struct e1000_hw; #define E1000_DEV_ID_82542 0x1000 #define E1000_DEV_ID_82543GC_FIBER 0x1001 #define E1000_DEV_ID_82543GC_COPPER 0x1004 #define E1000_DEV_ID_82544EI_COPPER 0x1008 #define E1000_DEV_ID_82544EI_FIBER 0x1009 #define E1000_DEV_ID_82544GC_COPPER 0x100C #define E1000_DEV_ID_82544GC_LOM 0x100D #define E1000_DEV_ID_82540EM 0x100E #define E1000_DEV_ID_82540EM_LOM 0x1015 #define E1000_DEV_ID_82540EP_LOM 0x1016 #define E1000_DEV_ID_82540EP 0x1017 #define E1000_DEV_ID_82540EP_LP 0x101E #define E1000_DEV_ID_82545EM_COPPER 0x100F #define E1000_DEV_ID_82545EM_FIBER 0x1011 #define E1000_DEV_ID_82545GM_COPPER 0x1026 #define E1000_DEV_ID_82545GM_FIBER 0x1027 #define E1000_DEV_ID_82545GM_SERDES 0x1028 #define E1000_DEV_ID_82546EB_COPPER 0x1010 #define E1000_DEV_ID_82546EB_FIBER 0x1012 #define E1000_DEV_ID_82546EB_QUAD_COPPER 0x101D #define E1000_DEV_ID_82546GB_COPPER 0x1079 #define E1000_DEV_ID_82546GB_FIBER 0x107A #define E1000_DEV_ID_82546GB_SERDES 0x107B #define E1000_DEV_ID_82546GB_PCIE 0x108A #define E1000_DEV_ID_82546GB_QUAD_COPPER 0x1099 #define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5 #define E1000_DEV_ID_82541EI 0x1013 #define E1000_DEV_ID_82541EI_MOBILE 0x1018 #define E1000_DEV_ID_82541ER_LOM 0x1014 #define E1000_DEV_ID_82541ER 0x1078 #define E1000_DEV_ID_82541GI 0x1076 #define E1000_DEV_ID_82541GI_LF 0x107C #define E1000_DEV_ID_82541GI_MOBILE 0x1077 #define E1000_DEV_ID_82547EI 0x1019 #define E1000_DEV_ID_82547EI_MOBILE 0x101A #define E1000_DEV_ID_82547GI 0x1075 #define E1000_DEV_ID_82571EB_COPPER 0x105E #define E1000_DEV_ID_82571EB_FIBER 0x105F #define E1000_DEV_ID_82571EB_SERDES 0x1060 #define E1000_DEV_ID_82571EB_SERDES_DUAL 0x10D9 #define E1000_DEV_ID_82571EB_SERDES_QUAD 0x10DA #define E1000_DEV_ID_82571EB_QUAD_COPPER 0x10A4 #define E1000_DEV_ID_82571PT_QUAD_COPPER 0x10D5 #define E1000_DEV_ID_82571EB_QUAD_FIBER 0x10A5 #define E1000_DEV_ID_82571EB_QUAD_COPPER_LP 0x10BC #define E1000_DEV_ID_82572EI_COPPER 0x107D #define E1000_DEV_ID_82572EI_FIBER 0x107E #define E1000_DEV_ID_82572EI_SERDES 0x107F #define E1000_DEV_ID_82572EI 0x10B9 #define E1000_DEV_ID_82573E 0x108B #define E1000_DEV_ID_82573E_IAMT 0x108C #define E1000_DEV_ID_82573L 0x109A #define E1000_DEV_ID_82574L 0x10D3 #define E1000_DEV_ID_82574LA 0x10F6 #define E1000_DEV_ID_82583V 0x150C #define E1000_DEV_ID_80003ES2LAN_COPPER_DPT 0x1096 #define E1000_DEV_ID_80003ES2LAN_SERDES_DPT 0x1098 #define E1000_DEV_ID_80003ES2LAN_COPPER_SPT 0x10BA #define E1000_DEV_ID_80003ES2LAN_SERDES_SPT 0x10BB #define E1000_DEV_ID_ICH8_82567V_3 0x1501 #define E1000_DEV_ID_ICH8_IGP_M_AMT 0x1049 #define E1000_DEV_ID_ICH8_IGP_AMT 0x104A #define E1000_DEV_ID_ICH8_IGP_C 0x104B #define E1000_DEV_ID_ICH8_IFE 0x104C #define E1000_DEV_ID_ICH8_IFE_GT 0x10C4 #define E1000_DEV_ID_ICH8_IFE_G 0x10C5 #define E1000_DEV_ID_ICH8_IGP_M 0x104D #define E1000_DEV_ID_ICH9_IGP_M 0x10BF #define E1000_DEV_ID_ICH9_IGP_M_AMT 0x10F5 #define E1000_DEV_ID_ICH9_IGP_M_V 0x10CB #define E1000_DEV_ID_ICH9_IGP_AMT 0x10BD #define E1000_DEV_ID_ICH9_BM 0x10E5 #define E1000_DEV_ID_ICH9_IGP_C 0x294C #define E1000_DEV_ID_ICH9_IFE 0x10C0 #define E1000_DEV_ID_ICH9_IFE_GT 0x10C3 #define E1000_DEV_ID_ICH9_IFE_G 0x10C2 #define E1000_DEV_ID_ICH10_R_BM_LM 0x10CC #define E1000_DEV_ID_ICH10_R_BM_LF 0x10CD #define E1000_DEV_ID_ICH10_R_BM_V 0x10CE #define E1000_DEV_ID_ICH10_D_BM_LM 0x10DE #define E1000_DEV_ID_ICH10_D_BM_LF 0x10DF #define E1000_DEV_ID_ICH10_D_BM_V 0x1525 #define E1000_DEV_ID_PCH_M_HV_LM 0x10EA #define E1000_DEV_ID_PCH_M_HV_LC 0x10EB #define E1000_DEV_ID_PCH_D_HV_DM 0x10EF #define E1000_DEV_ID_PCH_D_HV_DC 0x10F0 #define E1000_DEV_ID_PCH2_LV_LM 0x1502 #define E1000_DEV_ID_PCH2_LV_V 0x1503 #define E1000_DEV_ID_PCH_LPT_I217_LM 0x153A #define E1000_DEV_ID_PCH_LPT_I217_V 0x153B #define E1000_DEV_ID_PCH_LPTLP_I218_LM 0x155A #define E1000_DEV_ID_PCH_LPTLP_I218_V 0x1559 #define E1000_DEV_ID_PCH_I218_LM2 0x15A0 #define E1000_DEV_ID_PCH_I218_V2 0x15A1 #define E1000_DEV_ID_PCH_I218_LM3 0x15A2 /* Wildcat Point PCH */ #define E1000_DEV_ID_PCH_I218_V3 0x15A3 /* Wildcat Point PCH */ #define E1000_DEV_ID_PCH_SPT_I219_LM 0x156F /* Sunrise Point PCH */ #define E1000_DEV_ID_PCH_SPT_I219_V 0x1570 /* Sunrise Point PCH */ #define E1000_DEV_ID_PCH_SPT_I219_LM2 0x15B7 /* Sunrise Point-H PCH */ #define E1000_DEV_ID_PCH_SPT_I219_V2 0x15B8 /* Sunrise Point-H PCH */ #define E1000_DEV_ID_PCH_LBG_I219_LM3 0x15B9 /* LEWISBURG PCH */ #define E1000_DEV_ID_PCH_SPT_I219_LM4 0x15D7 #define E1000_DEV_ID_PCH_SPT_I219_V4 0x15D8 #define E1000_DEV_ID_PCH_SPT_I219_LM5 0x15E3 #define E1000_DEV_ID_PCH_SPT_I219_V5 0x15D6 #define E1000_DEV_ID_PCH_CNP_I219_LM6 0x15BD #define E1000_DEV_ID_PCH_CNP_I219_V6 0x15BE #define E1000_DEV_ID_PCH_CNP_I219_LM7 0x15BB #define E1000_DEV_ID_PCH_CNP_I219_V7 0x15BC #define E1000_DEV_ID_PCH_ICP_I219_LM8 0x15DF #define E1000_DEV_ID_PCH_ICP_I219_V8 0x15E0 #define E1000_DEV_ID_PCH_ICP_I219_LM9 0x15E1 #define E1000_DEV_ID_PCH_ICP_I219_V9 0x15E2 +#define E1000_DEV_ID_PCH_ICP_I219_V10 0x0D4F #define E1000_DEV_ID_82576 0x10C9 #define E1000_DEV_ID_82576_FIBER 0x10E6 #define E1000_DEV_ID_82576_SERDES 0x10E7 #define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8 #define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526 #define E1000_DEV_ID_82576_NS 0x150A #define E1000_DEV_ID_82576_NS_SERDES 0x1518 #define E1000_DEV_ID_82576_SERDES_QUAD 0x150D #define E1000_DEV_ID_82576_VF 0x10CA #define E1000_DEV_ID_82576_VF_HV 0x152D #define E1000_DEV_ID_I350_VF 0x1520 #define E1000_DEV_ID_I350_VF_HV 0x152F #define E1000_DEV_ID_82575EB_COPPER 0x10A7 #define E1000_DEV_ID_82575EB_FIBER_SERDES 0x10A9 #define E1000_DEV_ID_82575GB_QUAD_COPPER 0x10D6 #define E1000_DEV_ID_82580_COPPER 0x150E #define E1000_DEV_ID_82580_FIBER 0x150F #define E1000_DEV_ID_82580_SERDES 0x1510 #define E1000_DEV_ID_82580_SGMII 0x1511 #define E1000_DEV_ID_82580_COPPER_DUAL 0x1516 #define E1000_DEV_ID_82580_QUAD_FIBER 0x1527 #define E1000_DEV_ID_I350_COPPER 0x1521 #define E1000_DEV_ID_I350_FIBER 0x1522 #define E1000_DEV_ID_I350_SERDES 0x1523 #define E1000_DEV_ID_I350_SGMII 0x1524 #define E1000_DEV_ID_I350_DA4 0x1546 #define E1000_DEV_ID_I210_COPPER 0x1533 #define E1000_DEV_ID_I210_COPPER_OEM1 0x1534 #define E1000_DEV_ID_I210_COPPER_IT 0x1535 #define E1000_DEV_ID_I210_FIBER 0x1536 #define E1000_DEV_ID_I210_SERDES 0x1537 #define E1000_DEV_ID_I210_SGMII 0x1538 #define E1000_DEV_ID_I210_COPPER_FLASHLESS 0x157B #define E1000_DEV_ID_I210_SERDES_FLASHLESS 0x157C #define E1000_DEV_ID_I211_COPPER 0x1539 #define E1000_DEV_ID_I354_BACKPLANE_1GBPS 0x1F40 #define E1000_DEV_ID_I354_SGMII 0x1F41 #define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS 0x1F45 #define E1000_DEV_ID_DH89XXCC_SGMII 0x0438 #define E1000_DEV_ID_DH89XXCC_SERDES 0x043A #define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C #define E1000_DEV_ID_DH89XXCC_SFP 0x0440 #define E1000_REVISION_0 0 #define E1000_REVISION_1 1 #define E1000_REVISION_2 2 #define E1000_REVISION_3 3 #define E1000_REVISION_4 4 #define E1000_FUNC_0 0 #define E1000_FUNC_1 1 #define E1000_FUNC_2 2 #define E1000_FUNC_3 3 #define E1000_ALT_MAC_ADDRESS_OFFSET_LAN0 0 #define E1000_ALT_MAC_ADDRESS_OFFSET_LAN1 3 #define E1000_ALT_MAC_ADDRESS_OFFSET_LAN2 6 #define E1000_ALT_MAC_ADDRESS_OFFSET_LAN3 9 enum e1000_mac_type { e1000_undefined = 0, e1000_82542, e1000_82543, e1000_82544, e1000_82540, e1000_82545, e1000_82545_rev_3, e1000_82546, e1000_82546_rev_3, e1000_82541, e1000_82541_rev_2, e1000_82547, e1000_82547_rev_2, e1000_82571, e1000_82572, e1000_82573, e1000_82574, e1000_82583, e1000_80003es2lan, e1000_ich8lan, e1000_ich9lan, e1000_ich10lan, e1000_pchlan, e1000_pch2lan, e1000_pch_lpt, e1000_pch_spt, e1000_pch_cnp, e1000_82575, e1000_82576, e1000_82580, e1000_i350, e1000_i354, e1000_i210, e1000_i211, e1000_vfadapt, e1000_vfadapt_i350, e1000_num_macs /* List is 1-based, so subtract 1 for TRUE count. */ }; enum e1000_media_type { e1000_media_type_unknown = 0, e1000_media_type_copper = 1, e1000_media_type_fiber = 2, e1000_media_type_internal_serdes = 3, e1000_num_media_types }; enum e1000_nvm_type { e1000_nvm_unknown = 0, e1000_nvm_none, e1000_nvm_eeprom_spi, e1000_nvm_eeprom_microwire, e1000_nvm_flash_hw, e1000_nvm_invm, e1000_nvm_flash_sw }; enum e1000_nvm_override { e1000_nvm_override_none = 0, e1000_nvm_override_spi_small, e1000_nvm_override_spi_large, e1000_nvm_override_microwire_small, e1000_nvm_override_microwire_large }; enum e1000_phy_type { e1000_phy_unknown = 0, e1000_phy_none, e1000_phy_m88, e1000_phy_igp, e1000_phy_igp_2, e1000_phy_gg82563, e1000_phy_igp_3, e1000_phy_ife, e1000_phy_bm, e1000_phy_82578, e1000_phy_82577, e1000_phy_82579, e1000_phy_i217, e1000_phy_82580, e1000_phy_vf, e1000_phy_i210, }; enum e1000_bus_type { e1000_bus_type_unknown = 0, e1000_bus_type_pci, e1000_bus_type_pcix, e1000_bus_type_pci_express, e1000_bus_type_reserved }; enum e1000_bus_speed { e1000_bus_speed_unknown = 0, e1000_bus_speed_33, e1000_bus_speed_66, e1000_bus_speed_100, e1000_bus_speed_120, e1000_bus_speed_133, e1000_bus_speed_2500, e1000_bus_speed_5000, e1000_bus_speed_reserved }; enum e1000_bus_width { e1000_bus_width_unknown = 0, e1000_bus_width_pcie_x1, e1000_bus_width_pcie_x2, e1000_bus_width_pcie_x4 = 4, e1000_bus_width_pcie_x8 = 8, e1000_bus_width_32, e1000_bus_width_64, e1000_bus_width_reserved }; enum e1000_1000t_rx_status { e1000_1000t_rx_status_not_ok = 0, e1000_1000t_rx_status_ok, e1000_1000t_rx_status_undefined = 0xFF }; enum e1000_rev_polarity { e1000_rev_polarity_normal = 0, e1000_rev_polarity_reversed, e1000_rev_polarity_undefined = 0xFF }; enum e1000_fc_mode { e1000_fc_none = 0, e1000_fc_rx_pause, e1000_fc_tx_pause, e1000_fc_full, e1000_fc_default = 0xFF }; enum e1000_ffe_config { e1000_ffe_config_enabled = 0, e1000_ffe_config_active, e1000_ffe_config_blocked }; enum e1000_dsp_config { e1000_dsp_config_disabled = 0, e1000_dsp_config_enabled, e1000_dsp_config_activated, e1000_dsp_config_undefined = 0xFF }; enum e1000_ms_type { e1000_ms_hw_default = 0, e1000_ms_force_master, e1000_ms_force_slave, e1000_ms_auto }; enum e1000_smart_speed { e1000_smart_speed_default = 0, e1000_smart_speed_on, e1000_smart_speed_off }; enum e1000_serdes_link_state { e1000_serdes_link_down = 0, e1000_serdes_link_autoneg_progress, e1000_serdes_link_autoneg_complete, e1000_serdes_link_forced_up }; #define __le16 u16 #define __le32 u32 #define __le64 u64 /* Receive Descriptor */ struct e1000_rx_desc { __le64 buffer_addr; /* Address of the descriptor's data buffer */ __le16 length; /* Length of data DMAed into data buffer */ __le16 csum; /* Packet checksum */ u8 status; /* Descriptor status */ u8 errors; /* Descriptor Errors */ __le16 special; }; /* Receive Descriptor - Extended */ union e1000_rx_desc_extended { struct { __le64 buffer_addr; __le64 reserved; } read; struct { struct { __le32 mrq; /* Multiple Rx Queues */ union { __le32 rss; /* RSS Hash */ struct { __le16 ip_id; /* IP id */ __le16 csum; /* Packet Checksum */ } csum_ip; } hi_dword; } lower; struct { __le32 status_error; /* ext status/error */ __le16 length; __le16 vlan; /* VLAN tag */ } upper; } wb; /* writeback */ }; #define MAX_PS_BUFFERS 4 /* Number of packet split data buffers (not including the header buffer) */ #define PS_PAGE_BUFFERS (MAX_PS_BUFFERS - 1) /* Receive Descriptor - Packet Split */ union e1000_rx_desc_packet_split { struct { /* one buffer for protocol header(s), three data buffers */ __le64 buffer_addr[MAX_PS_BUFFERS]; } read; struct { struct { __le32 mrq; /* Multiple Rx Queues */ union { __le32 rss; /* RSS Hash */ struct { __le16 ip_id; /* IP id */ __le16 csum; /* Packet Checksum */ } csum_ip; } hi_dword; } lower; struct { __le32 status_error; /* ext status/error */ __le16 length0; /* length of buffer 0 */ __le16 vlan; /* VLAN tag */ } middle; struct { __le16 header_status; /* length of buffers 1-3 */ __le16 length[PS_PAGE_BUFFERS]; } upper; __le64 reserved; } wb; /* writeback */ }; /* Transmit Descriptor */ struct e1000_tx_desc { __le64 buffer_addr; /* Address of the descriptor's data buffer */ union { __le32 data; struct { __le16 length; /* Data buffer length */ u8 cso; /* Checksum offset */ u8 cmd; /* Descriptor control */ } flags; } lower; union { __le32 data; struct { u8 status; /* Descriptor status */ u8 css; /* Checksum start */ __le16 special; } fields; } upper; }; /* Offload Context Descriptor */ struct e1000_context_desc { union { __le32 ip_config; struct { u8 ipcss; /* IP checksum start */ u8 ipcso; /* IP checksum offset */ __le16 ipcse; /* IP checksum end */ } ip_fields; } lower_setup; union { __le32 tcp_config; struct { u8 tucss; /* TCP checksum start */ u8 tucso; /* TCP checksum offset */ __le16 tucse; /* TCP checksum end */ } tcp_fields; } upper_setup; __le32 cmd_and_length; union { __le32 data; struct { u8 status; /* Descriptor status */ u8 hdr_len; /* Header length */ __le16 mss; /* Maximum segment size */ } fields; } tcp_seg_setup; }; /* Offload data descriptor */ struct e1000_data_desc { __le64 buffer_addr; /* Address of the descriptor's buffer address */ union { __le32 data; struct { __le16 length; /* Data buffer length */ u8 typ_len_ext; u8 cmd; } flags; } lower; union { __le32 data; struct { u8 status; /* Descriptor status */ u8 popts; /* Packet Options */ __le16 special; } fields; } upper; }; /* Statistics counters collected by the MAC */ struct e1000_hw_stats { u64 crcerrs; u64 algnerrc; u64 symerrs; u64 rxerrc; u64 mpc; u64 scc; u64 ecol; u64 mcc; u64 latecol; u64 colc; u64 dc; u64 tncrs; u64 sec; u64 cexterr; u64 rlec; u64 xonrxc; u64 xontxc; u64 xoffrxc; u64 xofftxc; u64 fcruc; u64 prc64; u64 prc127; u64 prc255; u64 prc511; u64 prc1023; u64 prc1522; u64 gprc; u64 bprc; u64 mprc; u64 gptc; u64 gorc; u64 gotc; u64 rnbc; u64 ruc; u64 rfc; u64 roc; u64 rjc; u64 mgprc; u64 mgpdc; u64 mgptc; u64 tor; u64 tot; u64 tpr; u64 tpt; u64 ptc64; u64 ptc127; u64 ptc255; u64 ptc511; u64 ptc1023; u64 ptc1522; u64 mptc; u64 bptc; u64 tsctc; u64 tsctfc; u64 iac; u64 icrxptc; u64 icrxatc; u64 ictxptc; u64 ictxatc; u64 ictxqec; u64 ictxqmtc; u64 icrxdmtc; u64 icrxoc; u64 cbtmpc; u64 htdpmc; u64 cbrdpc; u64 cbrmpc; u64 rpthc; u64 hgptc; u64 htcbdpc; u64 hgorc; u64 hgotc; u64 lenerrs; u64 scvpc; u64 hrmpc; u64 doosync; u64 o2bgptc; u64 o2bspc; u64 b2ospc; u64 b2ogprc; }; struct e1000_vf_stats { u64 base_gprc; u64 base_gptc; u64 base_gorc; u64 base_gotc; u64 base_mprc; u64 base_gotlbc; u64 base_gptlbc; u64 base_gorlbc; u64 base_gprlbc; u32 last_gprc; u32 last_gptc; u32 last_gorc; u32 last_gotc; u32 last_mprc; u32 last_gotlbc; u32 last_gptlbc; u32 last_gorlbc; u32 last_gprlbc; u64 gprc; u64 gptc; u64 gorc; u64 gotc; u64 mprc; u64 gotlbc; u64 gptlbc; u64 gorlbc; u64 gprlbc; }; struct e1000_phy_stats { u32 idle_errors; u32 receive_errors; }; struct e1000_host_mng_dhcp_cookie { u32 signature; u8 status; u8 reserved0; u16 vlan_id; u32 reserved1; u16 reserved2; u8 reserved3; u8 checksum; }; /* Host Interface "Rev 1" */ struct e1000_host_command_header { u8 command_id; u8 command_length; u8 command_options; u8 checksum; }; #define E1000_HI_MAX_DATA_LENGTH 252 struct e1000_host_command_info { struct e1000_host_command_header command_header; u8 command_data[E1000_HI_MAX_DATA_LENGTH]; }; /* Host Interface "Rev 2" */ struct e1000_host_mng_command_header { u8 command_id; u8 checksum; u16 reserved1; u16 reserved2; u16 command_length; }; #define E1000_HI_MAX_MNG_DATA_LENGTH 0x6F8 struct e1000_host_mng_command_info { struct e1000_host_mng_command_header command_header; u8 command_data[E1000_HI_MAX_MNG_DATA_LENGTH]; }; #include "e1000_mac.h" #include "e1000_phy.h" #include "e1000_nvm.h" #include "e1000_manage.h" #include "e1000_mbx.h" /* Function pointers for the MAC. */ struct e1000_mac_operations { s32 (*init_params)(struct e1000_hw *); s32 (*id_led_init)(struct e1000_hw *); s32 (*blink_led)(struct e1000_hw *); bool (*check_mng_mode)(struct e1000_hw *); s32 (*check_for_link)(struct e1000_hw *); s32 (*cleanup_led)(struct e1000_hw *); void (*clear_hw_cntrs)(struct e1000_hw *); void (*clear_vfta)(struct e1000_hw *); s32 (*get_bus_info)(struct e1000_hw *); void (*set_lan_id)(struct e1000_hw *); s32 (*get_link_up_info)(struct e1000_hw *, u16 *, u16 *); s32 (*led_on)(struct e1000_hw *); s32 (*led_off)(struct e1000_hw *); void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32); s32 (*reset_hw)(struct e1000_hw *); s32 (*init_hw)(struct e1000_hw *); void (*shutdown_serdes)(struct e1000_hw *); void (*power_up_serdes)(struct e1000_hw *); s32 (*setup_link)(struct e1000_hw *); s32 (*setup_physical_interface)(struct e1000_hw *); s32 (*setup_led)(struct e1000_hw *); void (*write_vfta)(struct e1000_hw *, u32, u32); void (*config_collision_dist)(struct e1000_hw *); int (*rar_set)(struct e1000_hw *, u8*, u32); s32 (*read_mac_addr)(struct e1000_hw *); s32 (*validate_mdi_setting)(struct e1000_hw *); s32 (*set_obff_timer)(struct e1000_hw *, u32); s32 (*acquire_swfw_sync)(struct e1000_hw *, u16); void (*release_swfw_sync)(struct e1000_hw *, u16); }; /* When to use various PHY register access functions: * * Func Caller * Function Does Does When to use * ~~~~~~~~~~~~ ~~~~~ ~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * X_reg L,P,A n/a for simple PHY reg accesses * X_reg_locked P,A L for multiple accesses of different regs * on different pages * X_reg_page A L,P for multiple accesses of different regs * on the same page * * Where X=[read|write], L=locking, P=sets page, A=register access * */ struct e1000_phy_operations { s32 (*init_params)(struct e1000_hw *); s32 (*acquire)(struct e1000_hw *); s32 (*cfg_on_link_up)(struct e1000_hw *); s32 (*check_polarity)(struct e1000_hw *); s32 (*check_reset_block)(struct e1000_hw *); s32 (*commit)(struct e1000_hw *); s32 (*force_speed_duplex)(struct e1000_hw *); s32 (*get_cfg_done)(struct e1000_hw *hw); s32 (*get_cable_length)(struct e1000_hw *); s32 (*get_info)(struct e1000_hw *); s32 (*set_page)(struct e1000_hw *, u16); s32 (*read_reg)(struct e1000_hw *, u32, u16 *); s32 (*read_reg_locked)(struct e1000_hw *, u32, u16 *); s32 (*read_reg_page)(struct e1000_hw *, u32, u16 *); void (*release)(struct e1000_hw *); s32 (*reset)(struct e1000_hw *); s32 (*set_d0_lplu_state)(struct e1000_hw *, bool); s32 (*set_d3_lplu_state)(struct e1000_hw *, bool); s32 (*write_reg)(struct e1000_hw *, u32, u16); s32 (*write_reg_locked)(struct e1000_hw *, u32, u16); s32 (*write_reg_page)(struct e1000_hw *, u32, u16); void (*power_up)(struct e1000_hw *); void (*power_down)(struct e1000_hw *); s32 (*read_i2c_byte)(struct e1000_hw *, u8, u8, u8 *); s32 (*write_i2c_byte)(struct e1000_hw *, u8, u8, u8); }; /* Function pointers for the NVM. */ struct e1000_nvm_operations { s32 (*init_params)(struct e1000_hw *); s32 (*acquire)(struct e1000_hw *); s32 (*read)(struct e1000_hw *, u16, u16, u16 *); void (*release)(struct e1000_hw *); void (*reload)(struct e1000_hw *); s32 (*update)(struct e1000_hw *); s32 (*valid_led_default)(struct e1000_hw *, u16 *); s32 (*validate)(struct e1000_hw *); s32 (*write)(struct e1000_hw *, u16, u16, u16 *); }; struct e1000_mac_info { struct e1000_mac_operations ops; u8 addr[ETHER_ADDR_LEN]; u8 perm_addr[ETHER_ADDR_LEN]; enum e1000_mac_type type; u32 collision_delta; u32 ledctl_default; u32 ledctl_mode1; u32 ledctl_mode2; u32 mc_filter_type; u32 tx_packet_delta; u32 txcw; u16 current_ifs_val; u16 ifs_max_val; u16 ifs_min_val; u16 ifs_ratio; u16 ifs_step_size; u16 mta_reg_count; u16 uta_reg_count; /* Maximum size of the MTA register table in all supported adapters */ #define MAX_MTA_REG 128 u32 mta_shadow[MAX_MTA_REG]; u16 rar_entry_count; u8 forced_speed_duplex; bool adaptive_ifs; bool has_fwsm; bool arc_subsystem_valid; bool asf_firmware_present; bool autoneg; bool autoneg_failed; bool get_link_status; bool in_ifs_mode; bool report_tx_early; enum e1000_serdes_link_state serdes_link_state; bool serdes_has_link; bool tx_pkt_filtering; u32 max_frame_size; }; struct e1000_phy_info { struct e1000_phy_operations ops; enum e1000_phy_type type; enum e1000_1000t_rx_status local_rx; enum e1000_1000t_rx_status remote_rx; enum e1000_ms_type ms_type; enum e1000_ms_type original_ms_type; enum e1000_rev_polarity cable_polarity; enum e1000_smart_speed smart_speed; u32 addr; u32 id; u32 reset_delay_us; /* in usec */ u32 revision; enum e1000_media_type media_type; u16 autoneg_advertised; u16 autoneg_mask; u16 cable_length; u16 max_cable_length; u16 min_cable_length; u8 mdix; bool disable_polarity_correction; bool is_mdix; bool polarity_correction; bool speed_downgraded; bool autoneg_wait_to_complete; }; struct e1000_nvm_info { struct e1000_nvm_operations ops; enum e1000_nvm_type type; enum e1000_nvm_override override; u32 flash_bank_size; u32 flash_base_addr; u16 word_size; u16 delay_usec; u16 address_bits; u16 opcode_bits; u16 page_size; }; struct e1000_bus_info { enum e1000_bus_type type; enum e1000_bus_speed speed; enum e1000_bus_width width; u16 func; u16 pci_cmd_word; }; struct e1000_fc_info { u32 high_water; /* Flow control high-water mark */ u32 low_water; /* Flow control low-water mark */ u16 pause_time; /* Flow control pause timer */ u16 refresh_time; /* Flow control refresh timer */ bool send_xon; /* Flow control send XON */ bool strict_ieee; /* Strict IEEE mode */ enum e1000_fc_mode current_mode; /* FC mode in effect */ enum e1000_fc_mode requested_mode; /* FC mode requested by caller */ }; struct e1000_mbx_operations { s32 (*init_params)(struct e1000_hw *hw); s32 (*read)(struct e1000_hw *, u32 *, u16, u16); s32 (*write)(struct e1000_hw *, u32 *, u16, u16); s32 (*read_posted)(struct e1000_hw *, u32 *, u16, u16); s32 (*write_posted)(struct e1000_hw *, u32 *, u16, u16); s32 (*check_for_msg)(struct e1000_hw *, u16); s32 (*check_for_ack)(struct e1000_hw *, u16); s32 (*check_for_rst)(struct e1000_hw *, u16); }; struct e1000_mbx_stats { u32 msgs_tx; u32 msgs_rx; u32 acks; u32 reqs; u32 rsts; }; struct e1000_mbx_info { struct e1000_mbx_operations ops; struct e1000_mbx_stats stats; u32 timeout; u32 usec_delay; u16 size; }; struct e1000_dev_spec_82541 { enum e1000_dsp_config dsp_config; enum e1000_ffe_config ffe_config; u16 spd_default; bool phy_init_script; }; struct e1000_dev_spec_82542 { bool dma_fairness; }; struct e1000_dev_spec_82543 { u32 tbi_compatibility; bool dma_fairness; bool init_phy_disabled; }; struct e1000_dev_spec_82571 { bool laa_is_present; u32 smb_counter; }; struct e1000_dev_spec_80003es2lan { bool mdic_wa_enable; }; struct e1000_shadow_ram { u16 value; bool modified; }; #define E1000_SHADOW_RAM_WORDS 2048 /* I218 PHY Ultra Low Power (ULP) states */ enum e1000_ulp_state { e1000_ulp_state_unknown, e1000_ulp_state_off, e1000_ulp_state_on, }; struct e1000_dev_spec_ich8lan { bool kmrn_lock_loss_workaround_enabled; struct e1000_shadow_ram shadow_ram[E1000_SHADOW_RAM_WORDS]; bool nvm_k1_enabled; bool disable_k1_off; bool eee_disable; u16 eee_lp_ability; enum e1000_ulp_state ulp_state; bool ulp_capability_disabled; bool during_suspend_flow; bool during_dpg_exit; }; struct e1000_dev_spec_82575 { bool sgmii_active; bool global_device_reset; bool eee_disable; bool module_plugged; bool clear_semaphore_once; u32 mtu; struct sfp_e1000_flags eth_flags; u8 media_port; bool media_changed; }; struct e1000_dev_spec_vf { u32 vf_number; u32 v2p_mailbox; }; struct e1000_hw { void *back; u8 *hw_addr; u8 *flash_address; unsigned long io_base; struct e1000_mac_info mac; struct e1000_fc_info fc; struct e1000_phy_info phy; struct e1000_nvm_info nvm; struct e1000_bus_info bus; struct e1000_mbx_info mbx; struct e1000_host_mng_dhcp_cookie mng_cookie; union { struct e1000_dev_spec_82541 _82541; struct e1000_dev_spec_82542 _82542; struct e1000_dev_spec_82543 _82543; struct e1000_dev_spec_82571 _82571; struct e1000_dev_spec_80003es2lan _80003es2lan; struct e1000_dev_spec_ich8lan ich8lan; struct e1000_dev_spec_82575 _82575; struct e1000_dev_spec_vf vf; } dev_spec; u16 device_id; u16 subsystem_vendor_id; u16 subsystem_device_id; u16 vendor_id; u8 revision_id; }; #include "e1000_82541.h" #include "e1000_82543.h" #include "e1000_82571.h" #include "e1000_80003es2lan.h" #include "e1000_ich8lan.h" #include "e1000_82575.h" #include "e1000_i210.h" /* These functions must be implemented by drivers */ void e1000_pci_clear_mwi(struct e1000_hw *hw); void e1000_pci_set_mwi(struct e1000_hw *hw); s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); s32 e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); void e1000_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value); void e1000_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value); #endif Index: projects/clang1000-import/sys/dev/e1000/if_em.c =================================================================== --- projects/clang1000-import/sys/dev/e1000/if_em.c (revision 356919) +++ projects/clang1000-import/sys/dev/e1000/if_em.c (revision 356920) @@ -1,4609 +1,4610 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2016 Nicole Graziano * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $FreeBSD$ */ #include "if_em.h" #include #include #define em_mac_min e1000_82547 #define igb_mac_min e1000_82575 /********************************************************************* * Driver version: *********************************************************************/ char em_driver_version[] = "7.6.1-k"; /********************************************************************* * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into e1000_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } *********************************************************************/ static pci_vendor_info_t em_vendor_info_array[] = { /* Intel(R) PRO/1000 Network Connection - Legacy em*/ PVID(0x8086, E1000_DEV_ID_82540EM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82540EM_LOM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82540EP, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82540EP_LOM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82540EP_LP, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541EI, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541ER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541ER_LOM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541EI_MOBILE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541GI, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541GI_LF, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82541GI_MOBILE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82542, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82543GC_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82543GC_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82544EI_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82544EI_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82544GC_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82544GC_LOM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545EM_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545EM_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545GM_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545GM_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82545GM_SERDES, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546EB_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546EB_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_SERDES, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_PCIE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82547EI, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82547EI_MOBILE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82547GI, "Intel(R) PRO/1000 Network Connection"), /* Intel(R) PRO/1000 Network Connection - em */ PVID(0x8086, E1000_DEV_ID_82571EB_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_SERDES, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82572EI, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82572EI_COPPER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82572EI_FIBER, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82572EI_SERDES, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82573E, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82573E_IAMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82573L, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82583V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_AMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_C, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IFE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IFE_GT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IFE_G, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_IGP_M, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH8_82567V_3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_AMT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_C, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IGP_M_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IFE, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IFE_GT, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_IFE_G, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH9_BM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82574L, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_82574LA, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_LF, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_R_BM_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_LF, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_ICH10_D_BM_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_M_HV_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_M_HV_LC, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_D_HV_DM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_D_HV_DC, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH2_LV_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH2_LV_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LPT_I217_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LPT_I217_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_I218_LM2, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_I218_V2, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_I218_LM3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_I218_V3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM4, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V4, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_LM5, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_SPT_I219_V5, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_CNP_I219_LM6, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_CNP_I219_V6, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_CNP_I219_LM7, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_CNP_I219_V7, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_LM8, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_V8, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_LM9, "Intel(R) PRO/1000 Network Connection"), PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_V9, "Intel(R) PRO/1000 Network Connection"), + PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_V10, "Intel(R) PRO/1000 Network Connection"), /* required last entry */ PVID_END }; static pci_vendor_info_t igb_vendor_info_array[] = { /* Intel(R) PRO/1000 Network Connection - igb */ PVID(0x8086, E1000_DEV_ID_82575EB_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_NS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_NS_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_SERDES_QUAD, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_QUAD_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82576_VF, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_COPPER_DUAL, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_82580_QUAD_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_SFP, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I350_VF, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_COPPER_IT, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_COPPER_OEM1, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_COPPER_FLASHLESS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_SERDES_FLASHLESS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_FIBER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_SERDES, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I210_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I211_COPPER, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I354_BACKPLANE_1GBPS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, "Intel(R) PRO/1000 PCI-Express Network Driver"), PVID(0x8086, E1000_DEV_ID_I354_SGMII, "Intel(R) PRO/1000 PCI-Express Network Driver"), /* required last entry */ PVID_END }; /********************************************************************* * Function prototypes *********************************************************************/ static void *em_register(device_t dev); static void *igb_register(device_t dev); static int em_if_attach_pre(if_ctx_t ctx); static int em_if_attach_post(if_ctx_t ctx); static int em_if_detach(if_ctx_t ctx); static int em_if_shutdown(if_ctx_t ctx); static int em_if_suspend(if_ctx_t ctx); static int em_if_resume(if_ctx_t ctx); static int em_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets); static int em_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets); static void em_if_queues_free(if_ctx_t ctx); static uint64_t em_if_get_counter(if_ctx_t, ift_counter); static void em_if_init(if_ctx_t ctx); static void em_if_stop(if_ctx_t ctx); static void em_if_media_status(if_ctx_t, struct ifmediareq *); static int em_if_media_change(if_ctx_t ctx); static int em_if_mtu_set(if_ctx_t ctx, uint32_t mtu); static void em_if_timer(if_ctx_t ctx, uint16_t qid); static void em_if_vlan_register(if_ctx_t ctx, u16 vtag); static void em_if_vlan_unregister(if_ctx_t ctx, u16 vtag); static void em_if_watchdog_reset(if_ctx_t ctx); static void em_identify_hardware(if_ctx_t ctx); static int em_allocate_pci_resources(if_ctx_t ctx); static void em_free_pci_resources(if_ctx_t ctx); static void em_reset(if_ctx_t ctx); static int em_setup_interface(if_ctx_t ctx); static int em_setup_msix(if_ctx_t ctx); static void em_initialize_transmit_unit(if_ctx_t ctx); static void em_initialize_receive_unit(if_ctx_t ctx); static void em_if_intr_enable(if_ctx_t ctx); static void em_if_intr_disable(if_ctx_t ctx); static void igb_if_intr_enable(if_ctx_t ctx); static void igb_if_intr_disable(if_ctx_t ctx); static int em_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid); static int em_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid); static int igb_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid); static int igb_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid); static void em_if_multi_set(if_ctx_t ctx); static void em_if_update_admin_status(if_ctx_t ctx); static void em_if_debug(if_ctx_t ctx); static void em_update_stats_counters(struct adapter *); static void em_add_hw_stats(struct adapter *adapter); static int em_if_set_promisc(if_ctx_t ctx, int flags); static void em_setup_vlan_hw_support(struct adapter *); static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); static void em_print_nvm_info(struct adapter *); static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS); static int em_get_rs(SYSCTL_HANDLER_ARGS); static void em_print_debug_info(struct adapter *); static int em_is_valid_ether_addr(u8 *); static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS); static void em_add_int_delay_sysctl(struct adapter *, const char *, const char *, struct em_int_delay_info *, int, int); /* Management and WOL Support */ static void em_init_manageability(struct adapter *); static void em_release_manageability(struct adapter *); static void em_get_hw_control(struct adapter *); static void em_release_hw_control(struct adapter *); static void em_get_wakeup(if_ctx_t ctx); static void em_enable_wakeup(if_ctx_t ctx); static int em_enable_phy_wakeup(struct adapter *); static void em_disable_aspm(struct adapter *); int em_intr(void *arg); static void em_disable_promisc(if_ctx_t ctx); /* MSI-X handlers */ static int em_if_msix_intr_assign(if_ctx_t, int); static int em_msix_link(void *); static void em_handle_link(void *context); static void em_enable_vectors_82574(if_ctx_t); static int em_set_flowcntl(SYSCTL_HANDLER_ARGS); static int em_sysctl_eee(SYSCTL_HANDLER_ARGS); static void em_if_led_func(if_ctx_t ctx, int onoff); static int em_get_regs(SYSCTL_HANDLER_ARGS); static void lem_smartspeed(struct adapter *adapter); static void igb_configure_queues(struct adapter *adapter); /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t em_methods[] = { /* Device interface */ DEVMETHOD(device_register, em_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; static device_method_t igb_methods[] = { /* Device interface */ DEVMETHOD(device_register, igb_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), DEVMETHOD_END }; static driver_t em_driver = { "em", em_methods, sizeof(struct adapter), }; static devclass_t em_devclass; DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0); MODULE_DEPEND(em, pci, 1, 1, 1); MODULE_DEPEND(em, ether, 1, 1, 1); MODULE_DEPEND(em, iflib, 1, 1, 1); IFLIB_PNP_INFO(pci, em, em_vendor_info_array); static driver_t igb_driver = { "igb", igb_methods, sizeof(struct adapter), }; static devclass_t igb_devclass; DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0); MODULE_DEPEND(igb, pci, 1, 1, 1); MODULE_DEPEND(igb, ether, 1, 1, 1); MODULE_DEPEND(igb, iflib, 1, 1, 1); IFLIB_PNP_INFO(pci, igb, igb_vendor_info_array); static device_method_t em_if_methods[] = { DEVMETHOD(ifdi_attach_pre, em_if_attach_pre), DEVMETHOD(ifdi_attach_post, em_if_attach_post), DEVMETHOD(ifdi_detach, em_if_detach), DEVMETHOD(ifdi_shutdown, em_if_shutdown), DEVMETHOD(ifdi_suspend, em_if_suspend), DEVMETHOD(ifdi_resume, em_if_resume), DEVMETHOD(ifdi_init, em_if_init), DEVMETHOD(ifdi_stop, em_if_stop), DEVMETHOD(ifdi_msix_intr_assign, em_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, em_if_intr_enable), DEVMETHOD(ifdi_intr_disable, em_if_intr_disable), DEVMETHOD(ifdi_tx_queues_alloc, em_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, em_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, em_if_queues_free), DEVMETHOD(ifdi_update_admin_status, em_if_update_admin_status), DEVMETHOD(ifdi_multi_set, em_if_multi_set), DEVMETHOD(ifdi_media_status, em_if_media_status), DEVMETHOD(ifdi_media_change, em_if_media_change), DEVMETHOD(ifdi_mtu_set, em_if_mtu_set), DEVMETHOD(ifdi_promisc_set, em_if_set_promisc), DEVMETHOD(ifdi_timer, em_if_timer), DEVMETHOD(ifdi_watchdog_reset, em_if_watchdog_reset), DEVMETHOD(ifdi_vlan_register, em_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, em_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, em_if_get_counter), DEVMETHOD(ifdi_led_func, em_if_led_func), DEVMETHOD(ifdi_rx_queue_intr_enable, em_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, em_if_tx_queue_intr_enable), DEVMETHOD(ifdi_debug, em_if_debug), DEVMETHOD_END }; static driver_t em_if_driver = { "em_if", em_if_methods, sizeof(struct adapter) }; static device_method_t igb_if_methods[] = { DEVMETHOD(ifdi_attach_pre, em_if_attach_pre), DEVMETHOD(ifdi_attach_post, em_if_attach_post), DEVMETHOD(ifdi_detach, em_if_detach), DEVMETHOD(ifdi_shutdown, em_if_shutdown), DEVMETHOD(ifdi_suspend, em_if_suspend), DEVMETHOD(ifdi_resume, em_if_resume), DEVMETHOD(ifdi_init, em_if_init), DEVMETHOD(ifdi_stop, em_if_stop), DEVMETHOD(ifdi_msix_intr_assign, em_if_msix_intr_assign), DEVMETHOD(ifdi_intr_enable, igb_if_intr_enable), DEVMETHOD(ifdi_intr_disable, igb_if_intr_disable), DEVMETHOD(ifdi_tx_queues_alloc, em_if_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, em_if_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, em_if_queues_free), DEVMETHOD(ifdi_update_admin_status, em_if_update_admin_status), DEVMETHOD(ifdi_multi_set, em_if_multi_set), DEVMETHOD(ifdi_media_status, em_if_media_status), DEVMETHOD(ifdi_media_change, em_if_media_change), DEVMETHOD(ifdi_mtu_set, em_if_mtu_set), DEVMETHOD(ifdi_promisc_set, em_if_set_promisc), DEVMETHOD(ifdi_timer, em_if_timer), DEVMETHOD(ifdi_watchdog_reset, em_if_watchdog_reset), DEVMETHOD(ifdi_vlan_register, em_if_vlan_register), DEVMETHOD(ifdi_vlan_unregister, em_if_vlan_unregister), DEVMETHOD(ifdi_get_counter, em_if_get_counter), DEVMETHOD(ifdi_led_func, em_if_led_func), DEVMETHOD(ifdi_rx_queue_intr_enable, igb_if_rx_queue_intr_enable), DEVMETHOD(ifdi_tx_queue_intr_enable, igb_if_tx_queue_intr_enable), DEVMETHOD(ifdi_debug, em_if_debug), DEVMETHOD_END }; static driver_t igb_if_driver = { "igb_if", igb_if_methods, sizeof(struct adapter) }; /********************************************************************* * Tunable default values. *********************************************************************/ #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) #define MAX_INTS_PER_SEC 8000 #define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) /* Allow common code without TSO */ #ifndef CSUM_TSO #define CSUM_TSO 0 #endif static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters"); static int em_disable_crc_stripping = 0; SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN, &em_disable_crc_stripping, 0, "Disable CRC Stripping"); static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt, 0, "Default transmit interrupt delay in usecs"); SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt, 0, "Default receive interrupt delay in usecs"); static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV); SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN, &em_tx_abs_int_delay_dflt, 0, "Default transmit interrupt delay limit in usecs"); SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN, &em_rx_abs_int_delay_dflt, 0, "Default receive interrupt delay limit in usecs"); static int em_smart_pwr_down = FALSE; SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down, 0, "Set to true to leave smart power down enabled on newer adapters"); /* Controls whether promiscuous also shows bad packets */ static int em_debug_sbp = TRUE; SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0, "Show bad packets in promiscuous mode"); /* How many packets rxeof tries to clean at a time */ static int em_rx_process_limit = 100; SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &em_rx_process_limit, 0, "Maximum number of received packets to process " "at a time, -1 means unlimited"); /* Energy efficient ethernet - default to OFF */ static int eee_setting = 1; SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0, "Enable Energy Efficient Ethernet"); /* ** Tuneable Interrupt rate */ static int em_max_interrupt_rate = 8000; SYSCTL_INT(_hw_em, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, &em_max_interrupt_rate, 0, "Maximum interrupts per second"); /* Global used in WOL setup with multiport cards */ static int global_quad_port_a = 0; extern struct if_txrx igb_txrx; extern struct if_txrx em_txrx; extern struct if_txrx lem_txrx; static struct if_shared_ctx em_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = EM_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tx_maxsegsize = PAGE_SIZE, .isc_tso_maxsize = EM_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = EM_TSO_SEG_SIZE, .isc_rx_maxsize = MJUM9BYTES, .isc_rx_nsegments = 1, .isc_rx_maxsegsize = MJUM9BYTES, .isc_nfl = 1, .isc_nrxqs = 1, .isc_ntxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = em_vendor_info_array, .isc_driver_version = em_driver_version, .isc_driver = &em_if_driver, .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP | IFLIB_NEED_ZERO_CSUM, .isc_nrxd_min = {EM_MIN_RXD}, .isc_ntxd_min = {EM_MIN_TXD}, .isc_nrxd_max = {EM_MAX_RXD}, .isc_ntxd_max = {EM_MAX_TXD}, .isc_nrxd_default = {EM_DEFAULT_RXD}, .isc_ntxd_default = {EM_DEFAULT_TXD}, }; if_shared_ctx_t em_sctx = &em_sctx_init; static struct if_shared_ctx igb_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_tx_maxsize = EM_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tx_maxsegsize = PAGE_SIZE, .isc_tso_maxsize = EM_TSO_SIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = EM_TSO_SEG_SIZE, .isc_rx_maxsize = MJUM9BYTES, .isc_rx_nsegments = 1, .isc_rx_maxsegsize = MJUM9BYTES, .isc_nfl = 1, .isc_nrxqs = 1, .isc_ntxqs = 1, .isc_admin_intrcnt = 1, .isc_vendor_info = igb_vendor_info_array, .isc_driver_version = em_driver_version, .isc_driver = &igb_if_driver, .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP | IFLIB_NEED_ZERO_CSUM, .isc_nrxd_min = {EM_MIN_RXD}, .isc_ntxd_min = {EM_MIN_TXD}, .isc_nrxd_max = {IGB_MAX_RXD}, .isc_ntxd_max = {IGB_MAX_TXD}, .isc_nrxd_default = {EM_DEFAULT_RXD}, .isc_ntxd_default = {EM_DEFAULT_TXD}, }; if_shared_ctx_t igb_sctx = &igb_sctx_init; /***************************************************************** * * Dump Registers * ****************************************************************/ #define IGB_REGS_LEN 739 static int em_get_regs(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct e1000_hw *hw = &adapter->hw; struct sbuf *sb; u32 *regs_buff; int rc; regs_buff = malloc(sizeof(u32) * IGB_REGS_LEN, M_DEVBUF, M_WAITOK); memset(regs_buff, 0, IGB_REGS_LEN * sizeof(u32)); rc = sysctl_wire_old_buffer(req, 0); MPASS(rc == 0); if (rc != 0) { free(regs_buff, M_DEVBUF); return (rc); } sb = sbuf_new_for_sysctl(NULL, NULL, 32*400, req); MPASS(sb != NULL); if (sb == NULL) { free(regs_buff, M_DEVBUF); return (ENOMEM); } /* General Registers */ regs_buff[0] = E1000_READ_REG(hw, E1000_CTRL); regs_buff[1] = E1000_READ_REG(hw, E1000_STATUS); regs_buff[2] = E1000_READ_REG(hw, E1000_CTRL_EXT); regs_buff[3] = E1000_READ_REG(hw, E1000_ICR); regs_buff[4] = E1000_READ_REG(hw, E1000_RCTL); regs_buff[5] = E1000_READ_REG(hw, E1000_RDLEN(0)); regs_buff[6] = E1000_READ_REG(hw, E1000_RDH(0)); regs_buff[7] = E1000_READ_REG(hw, E1000_RDT(0)); regs_buff[8] = E1000_READ_REG(hw, E1000_RXDCTL(0)); regs_buff[9] = E1000_READ_REG(hw, E1000_RDBAL(0)); regs_buff[10] = E1000_READ_REG(hw, E1000_RDBAH(0)); regs_buff[11] = E1000_READ_REG(hw, E1000_TCTL); regs_buff[12] = E1000_READ_REG(hw, E1000_TDBAL(0)); regs_buff[13] = E1000_READ_REG(hw, E1000_TDBAH(0)); regs_buff[14] = E1000_READ_REG(hw, E1000_TDLEN(0)); regs_buff[15] = E1000_READ_REG(hw, E1000_TDH(0)); regs_buff[16] = E1000_READ_REG(hw, E1000_TDT(0)); regs_buff[17] = E1000_READ_REG(hw, E1000_TXDCTL(0)); regs_buff[18] = E1000_READ_REG(hw, E1000_TDFH); regs_buff[19] = E1000_READ_REG(hw, E1000_TDFT); regs_buff[20] = E1000_READ_REG(hw, E1000_TDFHS); regs_buff[21] = E1000_READ_REG(hw, E1000_TDFPC); sbuf_printf(sb, "General Registers\n"); sbuf_printf(sb, "\tCTRL\t %08x\n", regs_buff[0]); sbuf_printf(sb, "\tSTATUS\t %08x\n", regs_buff[1]); sbuf_printf(sb, "\tCTRL_EXIT\t %08x\n\n", regs_buff[2]); sbuf_printf(sb, "Interrupt Registers\n"); sbuf_printf(sb, "\tICR\t %08x\n\n", regs_buff[3]); sbuf_printf(sb, "RX Registers\n"); sbuf_printf(sb, "\tRCTL\t %08x\n", regs_buff[4]); sbuf_printf(sb, "\tRDLEN\t %08x\n", regs_buff[5]); sbuf_printf(sb, "\tRDH\t %08x\n", regs_buff[6]); sbuf_printf(sb, "\tRDT\t %08x\n", regs_buff[7]); sbuf_printf(sb, "\tRXDCTL\t %08x\n", regs_buff[8]); sbuf_printf(sb, "\tRDBAL\t %08x\n", regs_buff[9]); sbuf_printf(sb, "\tRDBAH\t %08x\n\n", regs_buff[10]); sbuf_printf(sb, "TX Registers\n"); sbuf_printf(sb, "\tTCTL\t %08x\n", regs_buff[11]); sbuf_printf(sb, "\tTDBAL\t %08x\n", regs_buff[12]); sbuf_printf(sb, "\tTDBAH\t %08x\n", regs_buff[13]); sbuf_printf(sb, "\tTDLEN\t %08x\n", regs_buff[14]); sbuf_printf(sb, "\tTDH\t %08x\n", regs_buff[15]); sbuf_printf(sb, "\tTDT\t %08x\n", regs_buff[16]); sbuf_printf(sb, "\tTXDCTL\t %08x\n", regs_buff[17]); sbuf_printf(sb, "\tTDFH\t %08x\n", regs_buff[18]); sbuf_printf(sb, "\tTDFT\t %08x\n", regs_buff[19]); sbuf_printf(sb, "\tTDFHS\t %08x\n", regs_buff[20]); sbuf_printf(sb, "\tTDFPC\t %08x\n\n", regs_buff[21]); free(regs_buff, M_DEVBUF); #ifdef DUMP_DESCS { if_softc_ctx_t scctx = adapter->shared; struct rx_ring *rxr = &rx_que->rxr; struct tx_ring *txr = &tx_que->txr; int ntxd = scctx->isc_ntxd[0]; int nrxd = scctx->isc_nrxd[0]; int j; for (j = 0; j < nrxd; j++) { u32 staterr = le32toh(rxr->rx_base[j].wb.upper.status_error); u32 length = le32toh(rxr->rx_base[j].wb.upper.length); sbuf_printf(sb, "\tReceive Descriptor Address %d: %08" PRIx64 " Error:%d Length:%d\n", j, rxr->rx_base[j].read.buffer_addr, staterr, length); } for (j = 0; j < min(ntxd, 256); j++) { unsigned int *ptr = (unsigned int *)&txr->tx_base[j]; sbuf_printf(sb, "\tTXD[%03d] [0]: %08x [1]: %08x [2]: %08x [3]: %08x eop: %d DD=%d\n", j, ptr[0], ptr[1], ptr[2], ptr[3], buf->eop, buf->eop != -1 ? txr->tx_base[buf->eop].upper.fields.status & E1000_TXD_STAT_DD : 0); } } #endif rc = sbuf_finish(sb); sbuf_delete(sb); return(rc); } static void * em_register(device_t dev) { return (em_sctx); } static void * igb_register(device_t dev) { return (igb_sctx); } static int em_set_num_queues(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); int maxqueues; /* Sanity check based on HW */ switch (adapter->hw.mac.type) { case e1000_82576: case e1000_82580: case e1000_i350: case e1000_i354: maxqueues = 8; break; case e1000_i210: case e1000_82575: maxqueues = 4; break; case e1000_i211: case e1000_82574: maxqueues = 2; break; default: maxqueues = 1; break; } return (maxqueues); } #define LEM_CAPS \ IFCAP_HWCSUM | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | \ IFCAP_VLAN_HWCSUM | IFCAP_WOL | IFCAP_VLAN_HWFILTER #define EM_CAPS \ IFCAP_HWCSUM | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | \ IFCAP_VLAN_HWCSUM | IFCAP_WOL | IFCAP_VLAN_HWFILTER | IFCAP_TSO4 | \ IFCAP_LRO | IFCAP_VLAN_HWTSO #define IGB_CAPS \ IFCAP_HWCSUM | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | \ IFCAP_VLAN_HWCSUM | IFCAP_WOL | IFCAP_VLAN_HWFILTER | IFCAP_TSO4 | \ IFCAP_LRO | IFCAP_VLAN_HWTSO | IFCAP_JUMBO_MTU | IFCAP_HWCSUM_IPV6 |\ IFCAP_TSO6 /********************************************************************* * Device initialization routine * * The attach entry point is called when the driver is being loaded. * This routine identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure *********************************************************************/ static int em_if_attach_pre(if_ctx_t ctx) { struct adapter *adapter; if_softc_ctx_t scctx; device_t dev; struct e1000_hw *hw; int error = 0; INIT_DEBUGOUT("em_if_attach_pre: begin"); dev = iflib_get_dev(ctx); adapter = iflib_get_softc(ctx); adapter->ctx = adapter->osdep.ctx = ctx; adapter->dev = adapter->osdep.dev = dev; scctx = adapter->shared = iflib_get_softc_ctx(ctx); adapter->media = iflib_get_media(ctx); hw = &adapter->hw; adapter->tx_process_limit = scctx->isc_ntxd[0]; /* SYSCTL stuff */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_nvm_info, "I", "NVM Information"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_debug_info, "I", "Debug Information"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_set_flowcntl, "I", "Flow Control"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "reg_dump", CTLTYPE_STRING | CTLFLAG_RD, adapter, 0, em_get_regs, "A", "Dump Registers"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "rs_dump", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, em_get_rs, "I", "Dump RS indexes"); /* Determine hardware and mac info */ em_identify_hardware(ctx); scctx->isc_tx_nsegments = EM_MAX_SCATTER; scctx->isc_nrxqsets_max = scctx->isc_ntxqsets_max = em_set_num_queues(ctx); if (bootverbose) device_printf(dev, "attach_pre capping queues at %d\n", scctx->isc_ntxqsets_max); if (adapter->hw.mac.type >= igb_mac_min) { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0] * sizeof(union e1000_adv_tx_desc), EM_DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union e1000_adv_rx_desc), EM_DBA_ALIGN); scctx->isc_txd_size[0] = sizeof(union e1000_adv_tx_desc); scctx->isc_rxd_size[0] = sizeof(union e1000_adv_rx_desc); scctx->isc_txrx = &igb_txrx; scctx->isc_tx_tso_segments_max = EM_MAX_SCATTER; scctx->isc_tx_tso_size_max = EM_TSO_SIZE; scctx->isc_tx_tso_segsize_max = EM_TSO_SEG_SIZE; scctx->isc_capabilities = scctx->isc_capenable = IGB_CAPS; scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_TSO | CSUM_IP6_TCP | CSUM_IP6_UDP; if (adapter->hw.mac.type != e1000_82575) scctx->isc_tx_csum_flags |= CSUM_SCTP | CSUM_IP6_SCTP; /* ** Some new devices, as with ixgbe, now may ** use a different BAR, so we need to keep ** track of which is used. */ scctx->isc_msix_bar = PCIR_BAR(EM_MSIX_BAR); if (pci_read_config(dev, scctx->isc_msix_bar, 4) == 0) scctx->isc_msix_bar += 4; } else if (adapter->hw.mac.type >= em_mac_min) { scctx->isc_txqsizes[0] = roundup2(scctx->isc_ntxd[0]* sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2(scctx->isc_nrxd[0] * sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN); scctx->isc_txd_size[0] = sizeof(struct e1000_tx_desc); scctx->isc_rxd_size[0] = sizeof(union e1000_rx_desc_extended); scctx->isc_txrx = &em_txrx; scctx->isc_tx_tso_segments_max = EM_MAX_SCATTER; scctx->isc_tx_tso_size_max = EM_TSO_SIZE; scctx->isc_tx_tso_segsize_max = EM_TSO_SEG_SIZE; scctx->isc_capabilities = scctx->isc_capenable = EM_CAPS; /* * For EM-class devices, don't enable IFCAP_{TSO4,VLAN_HWTSO} * by default as we don't have workarounds for all associated * silicon errata. E. g., with several MACs such as 82573E, * TSO only works at Gigabit speed and otherwise can cause the * hardware to hang (which also would be next to impossible to * work around given that already queued TSO-using descriptors * would need to be flushed and vlan(4) reconfigured at runtime * in case of a link speed change). Moreover, MACs like 82579 * still can hang at Gigabit even with all publicly documented * TSO workarounds implemented. Generally, the penality of * these workarounds is rather high and may involve copying * mbuf data around so advantages of TSO lapse. Still, TSO may * work for a few MACs of this class - at least when sticking * with Gigabit - in which case users may enable TSO manually. */ scctx->isc_capenable &= ~(IFCAP_TSO4 | IFCAP_VLAN_HWTSO); scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP | CSUM_IP_TSO; /* * We support MSI-X with 82574 only, but indicate to iflib(4) * that it shall give MSI at least a try with other devices. */ if (adapter->hw.mac.type == e1000_82574) { scctx->isc_msix_bar = PCIR_BAR(EM_MSIX_BAR); } else { scctx->isc_msix_bar = -1; scctx->isc_disable_msix = 1; } } else { scctx->isc_txqsizes[0] = roundup2((scctx->isc_ntxd[0] + 1) * sizeof(struct e1000_tx_desc), EM_DBA_ALIGN); scctx->isc_rxqsizes[0] = roundup2((scctx->isc_nrxd[0] + 1) * sizeof(struct e1000_rx_desc), EM_DBA_ALIGN); scctx->isc_txd_size[0] = sizeof(struct e1000_tx_desc); scctx->isc_rxd_size[0] = sizeof(struct e1000_rx_desc); scctx->isc_tx_csum_flags = CSUM_TCP | CSUM_UDP; scctx->isc_txrx = &lem_txrx; scctx->isc_capabilities = scctx->isc_capenable = LEM_CAPS; if (adapter->hw.mac.type < e1000_82543) scctx->isc_capenable &= ~(IFCAP_HWCSUM|IFCAP_VLAN_HWCSUM); /* INTx only */ scctx->isc_msix_bar = 0; } /* Setup PCI resources */ if (em_allocate_pci_resources(ctx)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_pci; } /* ** For ICH8 and family we need to ** map the flash memory, and this ** must happen after the MAC is ** identified */ if ((hw->mac.type == e1000_ich8lan) || (hw->mac.type == e1000_ich9lan) || (hw->mac.type == e1000_ich10lan) || (hw->mac.type == e1000_pchlan) || (hw->mac.type == e1000_pch2lan) || (hw->mac.type == e1000_pch_lpt)) { int rid = EM_BAR_TYPE_FLASH; adapter->flash = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->flash == NULL) { device_printf(dev, "Mapping of Flash failed\n"); error = ENXIO; goto err_pci; } /* This is used in the shared code */ hw->flash_address = (u8 *)adapter->flash; adapter->osdep.flash_bus_space_tag = rman_get_bustag(adapter->flash); adapter->osdep.flash_bus_space_handle = rman_get_bushandle(adapter->flash); } /* ** In the new SPT device flash is not a ** separate BAR, rather it is also in BAR0, ** so use the same tag and an offset handle for the ** FLASH read/write macros in the shared code. */ else if (hw->mac.type >= e1000_pch_spt) { adapter->osdep.flash_bus_space_tag = adapter->osdep.mem_bus_space_tag; adapter->osdep.flash_bus_space_handle = adapter->osdep.mem_bus_space_handle + E1000_FLASH_BASE_ADDR; } /* Do Shared Code initialization */ error = e1000_setup_init_funcs(hw, TRUE); if (error) { device_printf(dev, "Setup of Shared code failed, error %d\n", error); error = ENXIO; goto err_pci; } em_setup_msix(ctx); e1000_get_bus_info(hw); /* Set up some sysctls for the tunable interrupt delays */ em_add_int_delay_sysctl(adapter, "rx_int_delay", "receive interrupt delay in usecs", &adapter->rx_int_delay, E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt); em_add_int_delay_sysctl(adapter, "tx_int_delay", "transmit interrupt delay in usecs", &adapter->tx_int_delay, E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt); em_add_int_delay_sysctl(adapter, "rx_abs_int_delay", "receive interrupt delay limit in usecs", &adapter->rx_abs_int_delay, E1000_REGISTER(hw, E1000_RADV), em_rx_abs_int_delay_dflt); em_add_int_delay_sysctl(adapter, "tx_abs_int_delay", "transmit interrupt delay limit in usecs", &adapter->tx_abs_int_delay, E1000_REGISTER(hw, E1000_TADV), em_tx_abs_int_delay_dflt); em_add_int_delay_sysctl(adapter, "itr", "interrupt delay limit in usecs/4", &adapter->tx_itr, E1000_REGISTER(hw, E1000_ITR), DEFAULT_ITR); hw->mac.autoneg = DO_AUTO_NEG; hw->phy.autoneg_wait_to_complete = FALSE; hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; if (adapter->hw.mac.type < em_mac_min) { e1000_init_script_state_82541(&adapter->hw, TRUE); e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE); } /* Copper options */ if (hw->phy.media_type == e1000_media_type_copper) { hw->phy.mdix = AUTO_ALL_MODES; hw->phy.disable_polarity_correction = FALSE; hw->phy.ms_type = EM_MASTER_SLAVE; } /* * Set the frame limits assuming * standard ethernet sized frames. */ scctx->isc_max_frame_size = adapter->hw.mac.max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; /* * This controls when hardware reports transmit completion * status. */ hw->mac.report_tx_early = 1; /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(u8) * ETHER_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_late; } /* Check SOL/IDER usage */ if (e1000_check_reset_block(hw)) device_printf(dev, "PHY reset is blocked" " due to SOL/IDER session.\n"); /* Sysctl for setting Energy Efficient Ethernet */ hw->dev_spec.ich8lan.eee_disable = eee_setting; SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_eee, "I", "Disable Energy Efficient Ethernet"); /* ** Start from a known state, this is ** important in reading the nvm and ** mac from that. */ e1000_reset_hw(hw); /* Make sure we have a good EEPROM before we read from it */ if (e1000_validate_nvm_checksum(hw) < 0) { /* ** Some PCI-E parts fail the first check due to ** the link being in sleep state, call it again, ** if it fails a second time its a real issue. */ if (e1000_validate_nvm_checksum(hw) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } } /* Copy the permanent MAC address out of the EEPROM */ if (e1000_read_mac_addr(hw) < 0) { device_printf(dev, "EEPROM read error while reading MAC" " address\n"); error = EIO; goto err_late; } if (!em_is_valid_ether_addr(hw->mac.addr)) { device_printf(dev, "Invalid MAC address\n"); error = EIO; goto err_late; } /* Disable ULP support */ e1000_disable_ulp_lpt_lp(hw, TRUE); /* * Get Wake-on-Lan and Management info for later use */ em_get_wakeup(ctx); /* Enable only WOL MAGIC by default */ scctx->isc_capenable &= ~IFCAP_WOL; if (adapter->wol != 0) scctx->isc_capenable |= IFCAP_WOL_MAGIC; iflib_set_mac(ctx, hw->mac.addr); return (0); err_late: em_release_hw_control(adapter); err_pci: em_free_pci_resources(ctx); free(adapter->mta, M_DEVBUF); return (error); } static int em_if_attach_post(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; int error = 0; /* Setup OS specific network interface */ error = em_setup_interface(ctx); if (error != 0) { goto err_late; } em_reset(ctx); /* Initialize statistics */ em_update_stats_counters(adapter); hw->mac.get_link_status = 1; em_if_update_admin_status(ctx); em_add_hw_stats(adapter); /* Non-AMT based hardware can now take control from firmware */ if (adapter->has_manage && !adapter->has_amt) em_get_hw_control(adapter); INIT_DEBUGOUT("em_if_attach_post: end"); return (error); err_late: em_release_hw_control(adapter); em_free_pci_resources(ctx); em_if_queues_free(ctx); free(adapter->mta, M_DEVBUF); return (error); } /********************************************************************* * Device removal routine * * The detach entry point is called when the driver is being removed. * This routine stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure *********************************************************************/ static int em_if_detach(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); INIT_DEBUGOUT("em_if_detach: begin"); e1000_phy_hw_reset(&adapter->hw); em_release_manageability(adapter); em_release_hw_control(adapter); em_free_pci_resources(ctx); return (0); } /********************************************************************* * * Shutdown entry point * **********************************************************************/ static int em_if_shutdown(if_ctx_t ctx) { return em_if_suspend(ctx); } /* * Suspend/resume device methods. */ static int em_if_suspend(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); em_release_manageability(adapter); em_release_hw_control(adapter); em_enable_wakeup(ctx); return (0); } static int em_if_resume(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if (adapter->hw.mac.type == e1000_pch2lan) e1000_resume_workarounds_pchlan(&adapter->hw); em_if_init(ctx); em_init_manageability(adapter); return(0); } static int em_if_mtu_set(if_ctx_t ctx, uint32_t mtu) { int max_frame_size; struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = iflib_get_softc_ctx(ctx); IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); switch (adapter->hw.mac.type) { case e1000_82571: case e1000_82572: case e1000_ich9lan: case e1000_ich10lan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: case e1000_82574: case e1000_82583: case e1000_80003es2lan: /* 9K Jumbo Frame size */ max_frame_size = 9234; break; case e1000_pchlan: max_frame_size = 4096; break; case e1000_82542: case e1000_ich8lan: /* Adapters that do not support jumbo frames */ max_frame_size = ETHER_MAX_LEN; break; default: if (adapter->hw.mac.type >= igb_mac_min) max_frame_size = 9234; else /* lem */ max_frame_size = MAX_JUMBO_FRAME_SIZE; } if (mtu > max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) { return (EINVAL); } scctx->isc_max_frame_size = adapter->hw.mac.max_frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; return (0); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * **********************************************************************/ static void em_if_init(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; struct ifnet *ifp = iflib_get_ifp(ctx); struct em_tx_queue *tx_que; int i; INIT_DEBUGOUT("em_if_init: begin"); /* Get the latest mac address, User can use a LAA */ bcopy(if_getlladdr(ifp), adapter->hw.mac.addr, ETHER_ADDR_LEN); /* Put the address into the Receive Address Array */ e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); /* * With the 82571 adapter, RAR[0] may be overwritten * when the other port is reset, we make a duplicate * in RAR[14] for that eventuality, this assures * the interface continues to function. */ if (adapter->hw.mac.type == e1000_82571) { e1000_set_laa_state_82571(&adapter->hw, TRUE); e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, E1000_RAR_ENTRIES - 1); } /* Initialize the hardware */ em_reset(ctx); em_if_update_admin_status(ctx); for (i = 0, tx_que = adapter->tx_queues; i < adapter->tx_num_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; txr->tx_rs_cidx = txr->tx_rs_pidx; /* Initialize the last processed descriptor to be the end of * the ring, rather than the start, so that we avoid an * off-by-one error when calculating how many descriptors are * done in the credits_update function. */ txr->tx_cidx_processed = scctx->isc_ntxd[0] - 1; } /* Setup VLAN support, basic and offload if available */ E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); /* Clear bad data from Rx FIFOs */ if (adapter->hw.mac.type >= igb_mac_min) e1000_rx_fifo_flush_82575(&adapter->hw); /* Configure for OS presence */ em_init_manageability(adapter); /* Prepare transmit descriptors and buffers */ em_initialize_transmit_unit(ctx); /* Setup Multicast table */ em_if_multi_set(ctx); adapter->rx_mbuf_sz = iflib_get_rx_mbuf_sz(ctx); em_initialize_receive_unit(ctx); /* Use real VLAN Filter support? */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) { if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) /* Use real VLAN Filter support */ em_setup_vlan_hw_support(adapter); else { u32 ctrl; ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= E1000_CTRL_VME; E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); } } /* Don't lose promiscuous settings */ em_if_set_promisc(ctx, IFF_PROMISC); e1000_clear_hw_cntrs_base_generic(&adapter->hw); /* MSI-X configuration for 82574 */ if (adapter->hw.mac.type == e1000_82574) { int tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp); /* Set the IVAR - interrupt vector routing. */ E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars); } else if (adapter->intr_type == IFLIB_INTR_MSIX) /* Set up queue routing */ igb_configure_queues(adapter); /* this clears any pending interrupts */ E1000_READ_REG(&adapter->hw, E1000_ICR); E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC); /* AMT based hardware can now take control from firmware */ if (adapter->has_manage && adapter->has_amt) em_get_hw_control(adapter); /* Set Energy Efficient Ethernet */ if (adapter->hw.mac.type >= igb_mac_min && adapter->hw.phy.media_type == e1000_media_type_copper) { if (adapter->hw.mac.type == e1000_i354) e1000_set_eee_i354(&adapter->hw, TRUE, TRUE); else e1000_set_eee_i350(&adapter->hw, TRUE, TRUE); } } /********************************************************************* * * Fast Legacy/MSI Combined Interrupt Service routine * *********************************************************************/ int em_intr(void *arg) { struct adapter *adapter = arg; if_ctx_t ctx = adapter->ctx; u32 reg_icr; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); /* Hot eject? */ if (reg_icr == 0xffffffff) return FILTER_STRAY; /* Definitely not our interrupt. */ if (reg_icr == 0x0) return FILTER_STRAY; /* * Starting with the 82571 chip, bit 31 should be used to * determine whether the interrupt belongs to us. */ if (adapter->hw.mac.type >= e1000_82571 && (reg_icr & E1000_ICR_INT_ASSERTED) == 0) return FILTER_STRAY; /* * Only MSI-X interrupts have one-shot behavior by taking advantage * of the EIAC register. Thus, explicitly disable interrupts. This * also works around the MSI message reordering errata on certain * systems. */ IFDI_INTR_DISABLE(ctx); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) em_handle_link(ctx); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; return (FILTER_SCHEDULE_THREAD); } static int em_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *rxq = &adapter->rx_queues[rxqid]; E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxq->eims); return (0); } static int em_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) { struct adapter *adapter = iflib_get_softc(ctx); struct em_tx_queue *txq = &adapter->tx_queues[txqid]; E1000_WRITE_REG(&adapter->hw, E1000_IMS, txq->eims); return (0); } static int igb_if_rx_queue_intr_enable(if_ctx_t ctx, uint16_t rxqid) { struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *rxq = &adapter->rx_queues[rxqid]; E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxq->eims); return (0); } static int igb_if_tx_queue_intr_enable(if_ctx_t ctx, uint16_t txqid) { struct adapter *adapter = iflib_get_softc(ctx); struct em_tx_queue *txq = &adapter->tx_queues[txqid]; E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txq->eims); return (0); } /********************************************************************* * * MSI-X RX Interrupt Service routine * **********************************************************************/ static int em_msix_que(void *arg) { struct em_rx_queue *que = arg; ++que->irqs; return (FILTER_SCHEDULE_THREAD); } /********************************************************************* * * MSI-X Link Fast Interrupt Service routine * **********************************************************************/ static int em_msix_link(void *arg) { struct adapter *adapter = arg; u32 reg_icr; ++adapter->link_irq; MPASS(adapter->hw.back != NULL); reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { em_handle_link(adapter->ctx); } else if (adapter->hw.mac.type == e1000_82574) { /* Only re-arm 82574 if em_if_update_admin_status() won't. */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); } if (adapter->hw.mac.type == e1000_82574) { /* * Because we must read the ICR for this interrupt it may * clear other causes using autoclear, for this reason we * simply create a soft interrupt for all these vectors. */ if (reg_icr) E1000_WRITE_REG(&adapter->hw, E1000_ICS, adapter->ims); } else { /* Re-arm unconditionally */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC); E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask); } return (FILTER_HANDLED); } static void em_handle_link(void *context) { if_ctx_t ctx = context; struct adapter *adapter = iflib_get_softc(ctx); adapter->hw.mac.get_link_status = 1; iflib_admin_intr_deferred(ctx); } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ static void em_if_media_status(if_ctx_t ctx, struct ifmediareq *ifmr) { struct adapter *adapter = iflib_get_softc(ctx); u_char fiber_type = IFM_1000_SX; INIT_DEBUGOUT("em_if_media_status: begin"); iflib_admin_intr_deferred(ctx); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { return; } ifmr->ifm_status |= IFM_ACTIVE; if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { if (adapter->hw.mac.type == e1000_82545) fiber_type = IFM_1000_LX; ifmr->ifm_active |= fiber_type | IFM_FDX; } else { switch (adapter->link_speed) { case 10: ifmr->ifm_active |= IFM_10_T; break; case 100: ifmr->ifm_active |= IFM_100_TX; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; } if (adapter->link_duplex == FULL_DUPLEX) ifmr->ifm_active |= IFM_FDX; else ifmr->ifm_active |= IFM_HDX; } } /********************************************************************* * * Media Ioctl callback * * This routine is called when the user changes speed/duplex using * media/mediopt option with ifconfig. * **********************************************************************/ static int em_if_media_change(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifmedia *ifm = iflib_get_media(ctx); INIT_DEBUGOUT("em_if_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; break; case IFM_1000_LX: case IFM_1000_SX: case IFM_1000_T: adapter->hw.mac.autoneg = DO_AUTO_NEG; adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case IFM_100_TX: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; break; case IFM_10_T: adapter->hw.mac.autoneg = FALSE; adapter->hw.phy.autoneg_advertised = 0; if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; else adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; break; default: device_printf(adapter->dev, "Unsupported media type\n"); } em_if_init(ctx); return (0); } static int em_if_set_promisc(if_ctx_t ctx, int flags) { struct adapter *adapter = iflib_get_softc(ctx); u32 reg_rctl; em_disable_promisc(ctx); reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (flags & IFF_PROMISC) { reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); /* Turn this on if you want to see bad packets */ if (em_debug_sbp) reg_rctl |= E1000_RCTL_SBP; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else if (flags & IFF_ALLMULTI) { reg_rctl |= E1000_RCTL_MPE; reg_rctl &= ~E1000_RCTL_UPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } return (0); } static void em_disable_promisc(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); u32 reg_rctl; int mcnt = 0; reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= (~E1000_RCTL_UPE); if (if_getflags(ifp) & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else mcnt = if_llmaddr_count(ifp); /* Don't disable if in MAX groups */ if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) reg_rctl &= (~E1000_RCTL_MPE); reg_rctl &= (~E1000_RCTL_SBP); E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } static u_int em_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) { u8 *mta = arg; if (cnt == MAX_NUM_MULTICAST_ADDRESSES) return (1); bcopy(LLADDR(sdl), &mta[cnt * ETHER_ADDR_LEN], ETHER_ADDR_LEN); return (1); } /********************************************************************* * Multicast Update * * This routine is called whenever multicast address list is updated. * **********************************************************************/ static void em_if_multi_set(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); u32 reg_rctl = 0; u8 *mta; /* Multicast array memory */ int mcnt = 0; IOCTL_DEBUGOUT("em_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(u8) * ETHER_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_clear_mwi(&adapter->hw); reg_rctl |= E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); } mcnt = if_foreach_llmaddr(ifp, em_copy_maddr, mta); if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); } else e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); if (adapter->hw.mac.type == e1000_82542 && adapter->hw.revision_id == E1000_REVISION_2) { reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); reg_rctl &= ~E1000_RCTL_RST; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); msec_delay(5); if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE) e1000_pci_set_mwi(&adapter->hw); } } /********************************************************************* * Timer routine * * This routine schedules em_if_update_admin_status() to check for * link status and to gather statistics as well as to perform some * controller-specific hardware patting. * **********************************************************************/ static void em_if_timer(if_ctx_t ctx, uint16_t qid) { if (qid != 0) return; iflib_admin_intr_deferred(ctx); } static void em_if_update_admin_status(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; device_t dev = iflib_get_dev(ctx); u32 link_check, thstat, ctrl; link_check = thstat = ctrl = 0; /* Get the cached link value or read phy for real */ switch (hw->phy.media_type) { case e1000_media_type_copper: if (hw->mac.get_link_status) { if (hw->mac.type == e1000_pch_spt) msec_delay(50); /* Do the work to read phy */ e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; if (link_check) /* ESB2 fix */ e1000_cfg_on_link_up(hw); } else { link_check = TRUE; } break; case e1000_media_type_fiber: e1000_check_for_link(hw); link_check = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU); break; case e1000_media_type_internal_serdes: e1000_check_for_link(hw); link_check = adapter->hw.mac.serdes_has_link; break; /* VF device is type_unknown */ case e1000_media_type_unknown: e1000_check_for_link(hw); link_check = !hw->mac.get_link_status; /* FALLTHROUGH */ default: break; } /* Check for thermal downshift or shutdown */ if (hw->mac.type == e1000_i350) { thstat = E1000_READ_REG(hw, E1000_THSTAT); ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); } /* Now check for a transition */ if (link_check && (adapter->link_active == 0)) { e1000_get_speed_and_duplex(hw, &adapter->link_speed, &adapter->link_duplex); /* Check if we must disable SPEED_MODE bit on PCI-E */ if ((adapter->link_speed != SPEED_1000) && ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572))) { int tarc0; tarc0 = E1000_READ_REG(hw, E1000_TARC(0)); tarc0 &= ~TARC_SPEED_MODE_BIT; E1000_WRITE_REG(hw, E1000_TARC(0), tarc0); } if (bootverbose) device_printf(dev, "Link is up %d Mbps %s\n", adapter->link_speed, ((adapter->link_duplex == FULL_DUPLEX) ? "Full Duplex" : "Half Duplex")); adapter->link_active = 1; adapter->smartspeed = 0; if ((ctrl & E1000_CTRL_EXT_LINK_MODE_MASK) == E1000_CTRL_EXT_LINK_MODE_GMII && (thstat & E1000_THSTAT_LINK_THROTTLE)) device_printf(dev, "Link: thermal downshift\n"); /* Delay Link Up for Phy update */ if (((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) && (hw->phy.id == I210_I_PHY_ID)) msec_delay(I210_LINK_DELAY); /* Reset if the media type changed. */ if ((hw->dev_spec._82575.media_changed) && (adapter->hw.mac.type >= igb_mac_min)) { hw->dev_spec._82575.media_changed = false; adapter->flags |= IGB_MEDIA_RESET; em_reset(ctx); } iflib_link_state_change(ctx, LINK_STATE_UP, IF_Mbps(adapter->link_speed)); } else if (!link_check && (adapter->link_active == 1)) { adapter->link_speed = 0; adapter->link_duplex = 0; adapter->link_active = 0; iflib_link_state_change(ctx, LINK_STATE_DOWN, 0); } em_update_stats_counters(adapter); /* Reset LAA into RAR[0] on 82571 */ if (hw->mac.type == e1000_82571 && e1000_get_laa_state_82571(hw)) e1000_rar_set(hw, hw->mac.addr, 0); if (hw->mac.type < em_mac_min) lem_smartspeed(adapter); else if (hw->mac.type == e1000_82574 && adapter->intr_type == IFLIB_INTR_MSIX) E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); } static void em_if_watchdog_reset(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); /* * Just count the event; iflib(4) will already trigger a * sufficient reset of the controller. */ adapter->watchdog_events++; } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC. * **********************************************************************/ static void em_if_stop(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); INIT_DEBUGOUT("em_if_stop: begin"); e1000_reset_hw(&adapter->hw); if (adapter->hw.mac.type >= e1000_82544) E1000_WRITE_REG(&adapter->hw, E1000_WUFC, 0); e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } /********************************************************************* * * Determine hardware revision. * **********************************************************************/ static void em_identify_hardware(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct adapter *adapter = iflib_get_softc(ctx); /* Make sure our PCI config space has the necessary stuff set */ adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); /* Save off the information about this board */ adapter->hw.vendor_id = pci_get_vendor(dev); adapter->hw.device_id = pci_get_device(dev); adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); adapter->hw.subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); adapter->hw.subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); /* Do Shared Code Init and Setup */ if (e1000_set_mac_type(&adapter->hw)) { device_printf(dev, "Setup init failure\n"); return; } } static int em_allocate_pci_resources(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); int rid, val; rid = PCIR_BAR(0); adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->memory == NULL) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->memory); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->memory); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; /* Only older adapters use IO mapping */ if (adapter->hw.mac.type < em_mac_min && adapter->hw.mac.type > e1000_82543) { /* Figure our where our IO BAR is ? */ for (rid = PCIR_BAR(0); rid < PCIR_CIS;) { val = pci_read_config(dev, rid, 4); if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) { break; } rid += 4; /* check for 64bit BAR */ if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT) rid += 4; } if (rid >= PCIR_CIS) { device_printf(dev, "Unable to locate IO BAR\n"); return (ENXIO); } adapter->ioport = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid, RF_ACTIVE); if (adapter->ioport == NULL) { device_printf(dev, "Unable to allocate bus resource: " "ioport\n"); return (ENXIO); } adapter->hw.io_base = 0; adapter->osdep.io_bus_space_tag = rman_get_bustag(adapter->ioport); adapter->osdep.io_bus_space_handle = rman_get_bushandle(adapter->ioport); } adapter->hw.back = &adapter->osdep; return (0); } /********************************************************************* * * Set up the MSI-X Interrupt handlers * **********************************************************************/ static int em_if_msix_intr_assign(if_ctx_t ctx, int msix) { struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *rx_que = adapter->rx_queues; struct em_tx_queue *tx_que = adapter->tx_queues; int error, rid, i, vector = 0, rx_vectors; char buf[16]; /* First set up ring resources */ for (i = 0; i < adapter->rx_num_queues; i++, rx_que++, vector++) { rid = vector + 1; snprintf(buf, sizeof(buf), "rxq%d", i); error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX, em_msix_que, rx_que, rx_que->me, buf); if (error) { device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error); adapter->rx_num_queues = i + 1; goto fail; } rx_que->msix = vector; /* * Set the bit to enable interrupt * in E1000_IMS -- bits 20 and 21 * are for RX0 and RX1, note this has * NOTHING to do with the MSI-X vector */ if (adapter->hw.mac.type == e1000_82574) { rx_que->eims = 1 << (20 + i); adapter->ims |= rx_que->eims; adapter->ivars |= (8 | rx_que->msix) << (i * 4); } else if (adapter->hw.mac.type == e1000_82575) rx_que->eims = E1000_EICR_TX_QUEUE0 << vector; else rx_que->eims = 1 << vector; } rx_vectors = vector; vector = 0; for (i = 0; i < adapter->tx_num_queues; i++, tx_que++, vector++) { snprintf(buf, sizeof(buf), "txq%d", i); tx_que = &adapter->tx_queues[i]; iflib_softirq_alloc_generic(ctx, &adapter->rx_queues[i % adapter->rx_num_queues].que_irq, IFLIB_INTR_TX, tx_que, tx_que->me, buf); tx_que->msix = (vector % adapter->rx_num_queues); /* * Set the bit to enable interrupt * in E1000_IMS -- bits 22 and 23 * are for TX0 and TX1, note this has * NOTHING to do with the MSI-X vector */ if (adapter->hw.mac.type == e1000_82574) { tx_que->eims = 1 << (22 + i); adapter->ims |= tx_que->eims; adapter->ivars |= (8 | tx_que->msix) << (8 + (i * 4)); } else if (adapter->hw.mac.type == e1000_82575) { tx_que->eims = E1000_EICR_TX_QUEUE0 << i; } else { tx_que->eims = 1 << i; } } /* Link interrupt */ rid = rx_vectors + 1; error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, em_msix_link, adapter, 0, "aq"); if (error) { device_printf(iflib_get_dev(ctx), "Failed to register admin handler"); goto fail; } adapter->linkvec = rx_vectors; if (adapter->hw.mac.type < igb_mac_min) { adapter->ivars |= (8 | rx_vectors) << 16; adapter->ivars |= 0x80000000; } return (0); fail: iflib_irq_free(ctx, &adapter->irq); rx_que = adapter->rx_queues; for (int i = 0; i < adapter->rx_num_queues; i++, rx_que++) iflib_irq_free(ctx, &rx_que->que_irq); return (error); } static void igb_configure_queues(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct em_rx_queue *rx_que; struct em_tx_queue *tx_que; u32 tmp, ivar = 0, newitr = 0; /* First turn on RSS capability */ if (adapter->hw.mac.type != e1000_82575) E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME | E1000_GPIE_PBA | E1000_GPIE_NSICR); /* Turn on MSI-X */ switch (adapter->hw.mac.type) { case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: /* RX entries */ for (int i = 0; i < adapter->rx_num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); rx_que = &adapter->rx_queues[i]; if (i & 1) { ivar &= 0xFF00FFFF; ivar |= (rx_que->msix | E1000_IVAR_VALID) << 16; } else { ivar &= 0xFFFFFF00; ivar |= rx_que->msix | E1000_IVAR_VALID; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); } /* TX entries */ for (int i = 0; i < adapter->tx_num_queues; i++) { u32 index = i >> 1; ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); tx_que = &adapter->tx_queues[i]; if (i & 1) { ivar &= 0x00FFFFFF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 24; } else { ivar &= 0xFFFF00FF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 8; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= tx_que->eims; } /* And for the link interrupt */ ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; adapter->link_mask = 1 << adapter->linkvec; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82576: /* RX entries */ for (int i = 0; i < adapter->rx_num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); rx_que = &adapter->rx_queues[i]; if (i < 8) { ivar &= 0xFFFFFF00; ivar |= rx_que->msix | E1000_IVAR_VALID; } else { ivar &= 0xFF00FFFF; ivar |= (rx_que->msix | E1000_IVAR_VALID) << 16; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= rx_que->eims; } /* TX entries */ for (int i = 0; i < adapter->tx_num_queues; i++) { u32 index = i & 0x7; /* Each IVAR has two entries */ ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); tx_que = &adapter->tx_queues[i]; if (i < 8) { ivar &= 0xFFFF00FF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 8; } else { ivar &= 0x00FFFFFF; ivar |= (tx_que->msix | E1000_IVAR_VALID) << 24; } E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); adapter->que_mask |= tx_que->eims; } /* And for the link interrupt */ ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; adapter->link_mask = 1 << adapter->linkvec; E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); break; case e1000_82575: /* enable MSI-X support*/ tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); tmp |= E1000_CTRL_EXT_PBA_CLR; /* Auto-Mask interrupts upon ICR read. */ tmp |= E1000_CTRL_EXT_EIAME; tmp |= E1000_CTRL_EXT_IRCA; E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); /* Queues */ for (int i = 0; i < adapter->rx_num_queues; i++) { rx_que = &adapter->rx_queues[i]; tmp = E1000_EICR_RX_QUEUE0 << i; tmp |= E1000_EICR_TX_QUEUE0 << i; rx_que->eims = tmp; E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), i, rx_que->eims); adapter->que_mask |= rx_que->eims; } /* Link */ E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec), E1000_EIMS_OTHER); adapter->link_mask |= E1000_EIMS_OTHER; default: break; } /* Set the starting interrupt rate */ if (em_max_interrupt_rate > 0) newitr = (4000000 / em_max_interrupt_rate) & 0x7FFC; if (hw->mac.type == e1000_82575) newitr |= newitr << 16; else newitr |= E1000_EITR_CNT_IGNR; for (int i = 0; i < adapter->rx_num_queues; i++) { rx_que = &adapter->rx_queues[i]; E1000_WRITE_REG(hw, E1000_EITR(rx_que->msix), newitr); } return; } static void em_free_pci_resources(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *que = adapter->rx_queues; device_t dev = iflib_get_dev(ctx); /* Release all MSI-X queue resources */ if (adapter->intr_type == IFLIB_INTR_MSIX) iflib_irq_free(ctx, &adapter->irq); for (int i = 0; i < adapter->rx_num_queues; i++, que++) { iflib_irq_free(ctx, &que->que_irq); } if (adapter->memory != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(adapter->memory), adapter->memory); adapter->memory = NULL; } if (adapter->flash != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(adapter->flash), adapter->flash); adapter->flash = NULL; } if (adapter->ioport != NULL) { bus_release_resource(dev, SYS_RES_IOPORT, rman_get_rid(adapter->ioport), adapter->ioport); adapter->ioport = NULL; } } /* Set up MSI or MSI-X */ static int em_setup_msix(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if (adapter->hw.mac.type == e1000_82574) { em_enable_vectors_82574(ctx); } return (0); } /********************************************************************* * * Workaround for SmartSpeed on 82541 and 82547 controllers * **********************************************************************/ static void lem_smartspeed(struct adapter *adapter) { u16 phy_tmp; if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) || adapter->hw.mac.autoneg == 0 || (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0) return; if (adapter->smartspeed == 0) { /* If Master/Slave config fault is asserted twice, * we assume back-to-back */ e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT)) return; e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp); if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) { e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp); if(phy_tmp & CR_1000T_MS_ENABLE) { phy_tmp &= ~CR_1000T_MS_ENABLE; e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp); adapter->smartspeed++; if(adapter->hw.mac.autoneg && !e1000_copper_link_autoneg(&adapter->hw) && !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) { phy_tmp |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp); } } } return; } else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) { /* If still no link, perhaps using 2/3 pair cable */ e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp); phy_tmp |= CR_1000T_MS_ENABLE; e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp); if(adapter->hw.mac.autoneg && !e1000_copper_link_autoneg(&adapter->hw) && !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) { phy_tmp |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp); } } /* Restart process after EM_SMARTSPEED_MAX iterations */ if(adapter->smartspeed++ == EM_SMARTSPEED_MAX) adapter->smartspeed = 0; } /********************************************************************* * * Initialize the DMA Coalescing feature * **********************************************************************/ static void igb_init_dmac(struct adapter *adapter, u32 pba) { device_t dev = adapter->dev; struct e1000_hw *hw = &adapter->hw; u32 dmac, reg = ~E1000_DMACR_DMAC_EN; u16 hwm; u16 max_frame_size; if (hw->mac.type == e1000_i211) return; max_frame_size = adapter->shared->isc_max_frame_size; if (hw->mac.type > e1000_82580) { if (adapter->dmac == 0) { /* Disabling it */ E1000_WRITE_REG(hw, E1000_DMACR, reg); return; } else device_printf(dev, "DMA Coalescing enabled\n"); /* Set starting threshold */ E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); hwm = 64 * pba - max_frame_size / 16; if (hwm < 64 * (pba - 6)) hwm = 64 * (pba - 6); reg = E1000_READ_REG(hw, E1000_FCRTC); reg &= ~E1000_FCRTC_RTH_COAL_MASK; reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) & E1000_FCRTC_RTH_COAL_MASK); E1000_WRITE_REG(hw, E1000_FCRTC, reg); dmac = pba - max_frame_size / 512; if (dmac < pba - 10) dmac = pba - 10; reg = E1000_READ_REG(hw, E1000_DMACR); reg &= ~E1000_DMACR_DMACTHR_MASK; reg |= ((dmac << E1000_DMACR_DMACTHR_SHIFT) & E1000_DMACR_DMACTHR_MASK); /* transition to L0x or L1 if available..*/ reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); /* Check if status is 2.5Gb backplane connection * before configuration of watchdog timer, which is * in msec values in 12.8usec intervals * watchdog timer= msec values in 32usec intervals * for non 2.5Gb connection */ if (hw->mac.type == e1000_i354) { int status = E1000_READ_REG(hw, E1000_STATUS); if ((status & E1000_STATUS_2P5_SKU) && (!(status & E1000_STATUS_2P5_SKU_OVER))) reg |= ((adapter->dmac * 5) >> 6); else reg |= (adapter->dmac >> 5); } else { reg |= (adapter->dmac >> 5); } E1000_WRITE_REG(hw, E1000_DMACR, reg); E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); /* Set the interval before transition */ reg = E1000_READ_REG(hw, E1000_DMCTLX); if (hw->mac.type == e1000_i350) reg |= IGB_DMCTLX_DCFLUSH_DIS; /* ** in 2.5Gb connection, TTLX unit is 0.4 usec ** which is 0x4*2 = 0xA. But delay is still 4 usec */ if (hw->mac.type == e1000_i354) { int status = E1000_READ_REG(hw, E1000_STATUS); if ((status & E1000_STATUS_2P5_SKU) && (!(status & E1000_STATUS_2P5_SKU_OVER))) reg |= 0xA; else reg |= 0x4; } else { reg |= 0x4; } E1000_WRITE_REG(hw, E1000_DMCTLX, reg); /* free space in tx packet buffer to wake from DMA coal */ E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE - (2 * max_frame_size)) >> 6); /* make low power state decision controlled by DMA coal */ reg = E1000_READ_REG(hw, E1000_PCIEMISC); reg &= ~E1000_PCIEMISC_LX_DECISION; E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); } else if (hw->mac.type == e1000_82580) { u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC); E1000_WRITE_REG(hw, E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION); E1000_WRITE_REG(hw, E1000_DMACR, 0); } } /********************************************************************* * * Initialize the hardware to a configuration as specified by the * adapter structure. * **********************************************************************/ static void em_reset(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); struct e1000_hw *hw = &adapter->hw; u16 rx_buffer_size; u32 pba; INIT_DEBUGOUT("em_reset: begin"); /* Let the firmware know the OS is in control */ em_get_hw_control(adapter); /* Set up smart power down as default off on newer adapters. */ if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 || hw->mac.type == e1000_82572)) { u16 phy_tmp = 0; /* Speed up time to link by disabling smart power down. */ e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp); phy_tmp &= ~IGP02E1000_PM_SPD; e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp); } /* * Packet Buffer Allocation (PBA) * Writing PBA sets the receive portion of the buffer * the remainder is used for the transmit buffer. */ switch (hw->mac.type) { /* Total Packet Buffer on these is 48K */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */ break; case e1000_82573: /* 82573: Total Packet Buffer is 32K */ pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */ break; case e1000_82574: case e1000_82583: pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */ break; case e1000_ich8lan: pba = E1000_PBA_8K; break; case e1000_ich9lan: case e1000_ich10lan: /* Boost Receive side for jumbo frames */ if (adapter->hw.mac.max_frame_size > 4096) pba = E1000_PBA_14K; else pba = E1000_PBA_10K; break; case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: pba = E1000_PBA_26K; break; case e1000_82575: pba = E1000_PBA_32K; break; case e1000_82576: case e1000_vfadapt: pba = E1000_READ_REG(hw, E1000_RXPBS); pba &= E1000_RXPBS_SIZE_MASK_82576; break; case e1000_82580: case e1000_i350: case e1000_i354: case e1000_vfadapt_i350: pba = E1000_READ_REG(hw, E1000_RXPBS); pba = e1000_rxpbs_adjust_82580(pba); break; case e1000_i210: case e1000_i211: pba = E1000_PBA_34K; break; default: if (adapter->hw.mac.max_frame_size > 8192) pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */ else pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */ } /* Special needs in case of Jumbo frames */ if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) { u32 tx_space, min_tx, min_rx; pba = E1000_READ_REG(hw, E1000_PBA); tx_space = pba >> 16; pba &= 0xffff; min_tx = (adapter->hw.mac.max_frame_size + sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2; min_tx = roundup2(min_tx, 1024); min_tx >>= 10; min_rx = adapter->hw.mac.max_frame_size; min_rx = roundup2(min_rx, 1024); min_rx >>= 10; if (tx_space < min_tx && ((min_tx - tx_space) < pba)) { pba = pba - (min_tx - tx_space); /* * if short on rx space, rx wins * and must trump tx adjustment */ if (pba < min_rx) pba = min_rx; } E1000_WRITE_REG(hw, E1000_PBA, pba); } if (hw->mac.type < igb_mac_min) E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba); INIT_DEBUGOUT1("em_reset: pba=%dK",pba); /* * These parameters control the automatic generation (Tx) and * response (Rx) to Ethernet PAUSE frames. * - High water mark should allow for at least two frames to be * received after sending an XOFF. * - Low water mark works best when it is very near the high water mark. * This allows the receiver to restart by sending XON when it has * drained a bit. Here we use an arbitrary value of 1500 which will * restart after one full frame is pulled from the buffer. There * could be several smaller frames in the buffer and if so they will * not trigger the XON until their total number reduces the buffer * by 1500. * - The pause time is fairly large at 1000 x 512ns = 512 usec. */ rx_buffer_size = (pba & 0xffff) << 10; hw->fc.high_water = rx_buffer_size - roundup2(adapter->hw.mac.max_frame_size, 1024); hw->fc.low_water = hw->fc.high_water - 1500; if (adapter->fc) /* locally set flow control value? */ hw->fc.requested_mode = adapter->fc; else hw->fc.requested_mode = e1000_fc_full; if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; else hw->fc.pause_time = EM_FC_PAUSE_TIME; hw->fc.send_xon = TRUE; /* Device specific overrides/settings */ switch (hw->mac.type) { case e1000_pchlan: /* Workaround: no TX flow ctrl for PCH */ hw->fc.requested_mode = e1000_fc_rx_pause; hw->fc.pause_time = 0xFFFF; /* override */ if (if_getmtu(ifp) > ETHERMTU) { hw->fc.high_water = 0x3500; hw->fc.low_water = 0x1500; } else { hw->fc.high_water = 0x5000; hw->fc.low_water = 0x3000; } hw->fc.refresh_time = 0x1000; break; case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: hw->fc.high_water = 0x5C20; hw->fc.low_water = 0x5048; hw->fc.pause_time = 0x0650; hw->fc.refresh_time = 0x0400; /* Jumbos need adjusted PBA */ if (if_getmtu(ifp) > ETHERMTU) E1000_WRITE_REG(hw, E1000_PBA, 12); else E1000_WRITE_REG(hw, E1000_PBA, 26); break; case e1000_82575: case e1000_82576: /* 8-byte granularity */ hw->fc.low_water = hw->fc.high_water - 8; break; case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: /* 16-byte granularity */ hw->fc.low_water = hw->fc.high_water - 16; break; case e1000_ich9lan: case e1000_ich10lan: if (if_getmtu(ifp) > ETHERMTU) { hw->fc.high_water = 0x2800; hw->fc.low_water = hw->fc.high_water - 8; break; } /* FALLTHROUGH */ default: if (hw->mac.type == e1000_80003es2lan) hw->fc.pause_time = 0xFFFF; break; } /* Issue a global reset */ e1000_reset_hw(hw); if (adapter->hw.mac.type >= igb_mac_min) { E1000_WRITE_REG(hw, E1000_WUC, 0); } else { E1000_WRITE_REG(hw, E1000_WUFC, 0); em_disable_aspm(adapter); } if (adapter->flags & IGB_MEDIA_RESET) { e1000_setup_init_funcs(hw, TRUE); e1000_get_bus_info(hw); adapter->flags &= ~IGB_MEDIA_RESET; } /* and a re-init */ if (e1000_init_hw(hw) < 0) { device_printf(dev, "Hardware Initialization Failed\n"); return; } if (adapter->hw.mac.type >= igb_mac_min) igb_init_dmac(adapter, pba); E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN); e1000_get_phy_info(hw); e1000_check_for_link(hw); } /* * Initialise the RSS mapping for NICs that support multiple transmit/ * receive rings. */ #define RSSKEYLEN 10 static void em_initialize_rss_mapping(struct adapter *adapter) { uint8_t rss_key[4 * RSSKEYLEN]; uint32_t reta = 0; struct e1000_hw *hw = &adapter->hw; int i; /* * Configure RSS key */ arc4rand(rss_key, sizeof(rss_key), 0); for (i = 0; i < RSSKEYLEN; ++i) { uint32_t rssrk = 0; rssrk = EM_RSSRK_VAL(rss_key, i); E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk); } /* * Configure RSS redirect table in following fashion: * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] */ for (i = 0; i < sizeof(reta); ++i) { uint32_t q; q = (i % adapter->rx_num_queues) << 7; reta |= q << (8 * i); } for (i = 0; i < 32; ++i) E1000_WRITE_REG(hw, E1000_RETA(i), reta); E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | E1000_MRQC_RSS_FIELD_IPV4_TCP | E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX | E1000_MRQC_RSS_FIELD_IPV6_EX | E1000_MRQC_RSS_FIELD_IPV6); } static void igb_initialize_rss_mapping(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; int i; int queue_id; u32 reta; u32 rss_key[10], mrqc, shift = 0; /* XXX? */ if (adapter->hw.mac.type == e1000_82575) shift = 6; /* * The redirection table controls which destination * queue each bucket redirects traffic to. * Each DWORD represents four queues, with the LSB * being the first queue in the DWORD. * * This just allocates buckets to queues using round-robin * allocation. * * NOTE: It Just Happens to line up with the default * RSS allocation method. */ /* Warning FM follows */ reta = 0; for (i = 0; i < 128; i++) { #ifdef RSS queue_id = rss_get_indirection_to_bucket(i); /* * If we have more queues than buckets, we'll * end up mapping buckets to a subset of the * queues. * * If we have more buckets than queues, we'll * end up instead assigning multiple buckets * to queues. * * Both are suboptimal, but we need to handle * the case so we don't go out of bounds * indexing arrays and such. */ queue_id = queue_id % adapter->rx_num_queues; #else queue_id = (i % adapter->rx_num_queues); #endif /* Adjust if required */ queue_id = queue_id << shift; /* * The low 8 bits are for hash value (n+0); * The next 8 bits are for hash value (n+1), etc. */ reta = reta >> 8; reta = reta | ( ((uint32_t) queue_id) << 24); if ((i & 3) == 3) { E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta); reta = 0; } } /* Now fill in hash table */ /* * MRQC: Multiple Receive Queues Command * Set queuing to RSS control, number depends on the device. */ mrqc = E1000_MRQC_ENABLE_RSS_8Q; #ifdef RSS /* XXX ew typecasting */ rss_getkey((uint8_t *) &rss_key); #else arc4rand(&rss_key, sizeof(rss_key), 0); #endif for (i = 0; i < 10; i++) E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key[i]); /* * Configure the RSS fields to hash upon. */ mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | E1000_MRQC_RSS_FIELD_IPV4_TCP); mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 | E1000_MRQC_RSS_FIELD_IPV6_TCP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP | E1000_MRQC_RSS_FIELD_IPV6_UDP); mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX | E1000_MRQC_RSS_FIELD_IPV6_TCP_EX); E1000_WRITE_REG(hw, E1000_MRQC, mrqc); } /********************************************************************* * * Setup networking device structure and register interface media. * **********************************************************************/ static int em_setup_interface(if_ctx_t ctx) { struct ifnet *ifp = iflib_get_ifp(ctx); struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; INIT_DEBUGOUT("em_setup_interface: begin"); /* Single Queue */ if (adapter->tx_num_queues == 1) { if_setsendqlen(ifp, scctx->isc_ntxd[0] - 1); if_setsendqready(ifp); } /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { u_char fiber_type = IFM_1000_SX; /* default type */ if (adapter->hw.mac.type == e1000_82545) fiber_type = IFM_1000_LX; ifmedia_add(adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | fiber_type, 0, NULL); } else { ifmedia_add(adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 0, NULL); if (adapter->hw.phy.type != e1000_phy_ife) { ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } } ifmedia_add(adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(adapter->media, IFM_ETHER | IFM_AUTO); return (0); } static int em_if_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; int error = E1000_SUCCESS; struct em_tx_queue *que; int i, j; MPASS(adapter->tx_num_queues > 0); MPASS(adapter->tx_num_queues == ntxqsets); /* First allocate the top level queue structs */ if (!(adapter->tx_queues = (struct em_tx_queue *) malloc(sizeof(struct em_tx_queue) * adapter->tx_num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate queue memory\n"); return(ENOMEM); } for (i = 0, que = adapter->tx_queues; i < adapter->tx_num_queues; i++, que++) { /* Set up some basics */ struct tx_ring *txr = &que->txr; txr->adapter = que->adapter = adapter; que->me = txr->me = i; /* Allocate report status array */ if (!(txr->tx_rsq = (qidx_t *) malloc(sizeof(qidx_t) * scctx->isc_ntxd[0], M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "failed to allocate rs_idxs memory\n"); error = ENOMEM; goto fail; } for (j = 0; j < scctx->isc_ntxd[0]; j++) txr->tx_rsq[j] = QIDX_INVALID; /* get the virtual and physical address of the hardware queues */ txr->tx_base = (struct e1000_tx_desc *)vaddrs[i*ntxqs]; txr->tx_paddr = paddrs[i*ntxqs]; } if (bootverbose) device_printf(iflib_get_dev(ctx), "allocated for %d tx_queues\n", adapter->tx_num_queues); return (0); fail: em_if_queues_free(ctx); return (error); } static int em_if_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct adapter *adapter = iflib_get_softc(ctx); int error = E1000_SUCCESS; struct em_rx_queue *que; int i; MPASS(adapter->rx_num_queues > 0); MPASS(adapter->rx_num_queues == nrxqsets); /* First allocate the top level queue structs */ if (!(adapter->rx_queues = (struct em_rx_queue *) malloc(sizeof(struct em_rx_queue) * adapter->rx_num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(iflib_get_dev(ctx), "Unable to allocate queue memory\n"); error = ENOMEM; goto fail; } for (i = 0, que = adapter->rx_queues; i < nrxqsets; i++, que++) { /* Set up some basics */ struct rx_ring *rxr = &que->rxr; rxr->adapter = que->adapter = adapter; rxr->que = que; que->me = rxr->me = i; /* get the virtual and physical address of the hardware queues */ rxr->rx_base = (union e1000_rx_desc_extended *)vaddrs[i*nrxqs]; rxr->rx_paddr = paddrs[i*nrxqs]; } if (bootverbose) device_printf(iflib_get_dev(ctx), "allocated for %d rx_queues\n", adapter->rx_num_queues); return (0); fail: em_if_queues_free(ctx); return (error); } static void em_if_queues_free(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct em_tx_queue *tx_que = adapter->tx_queues; struct em_rx_queue *rx_que = adapter->rx_queues; if (tx_que != NULL) { for (int i = 0; i < adapter->tx_num_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; if (txr->tx_rsq == NULL) break; free(txr->tx_rsq, M_DEVBUF); txr->tx_rsq = NULL; } free(adapter->tx_queues, M_DEVBUF); adapter->tx_queues = NULL; } if (rx_que != NULL) { free(adapter->rx_queues, M_DEVBUF); adapter->rx_queues = NULL; } em_release_hw_control(adapter); if (adapter->mta != NULL) { free(adapter->mta, M_DEVBUF); } } /********************************************************************* * * Enable transmit unit. * **********************************************************************/ static void em_initialize_transmit_unit(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; struct em_tx_queue *que; struct tx_ring *txr; struct e1000_hw *hw = &adapter->hw; u32 tctl, txdctl = 0, tarc, tipg = 0; INIT_DEBUGOUT("em_initialize_transmit_unit: begin"); for (int i = 0; i < adapter->tx_num_queues; i++, txr++) { u64 bus_addr; caddr_t offp, endp; que = &adapter->tx_queues[i]; txr = &que->txr; bus_addr = txr->tx_paddr; /* Clear checksum offload context. */ offp = (caddr_t)&txr->csum_flags; endp = (caddr_t)(txr + 1); bzero(offp, endp - offp); /* Base and Len of TX Ring */ E1000_WRITE_REG(hw, E1000_TDLEN(i), scctx->isc_ntxd[0] * sizeof(struct e1000_tx_desc)); E1000_WRITE_REG(hw, E1000_TDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_TDBAL(i), (u32)bus_addr); /* Init the HEAD/TAIL indices */ E1000_WRITE_REG(hw, E1000_TDT(i), 0); E1000_WRITE_REG(hw, E1000_TDH(i), 0); HW_DEBUGOUT2("Base = %x, Length = %x\n", E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)), E1000_READ_REG(&adapter->hw, E1000_TDLEN(i))); txdctl = 0; /* clear txdctl */ txdctl |= 0x1f; /* PTHRESH */ txdctl |= 1 << 8; /* HTHRESH */ txdctl |= 1 << 16;/* WTHRESH */ txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */ txdctl |= E1000_TXDCTL_GRAN; txdctl |= 1 << 25; /* LWTHRESH */ E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); } /* Set the default values for the Tx Inter Packet Gap timer */ switch (adapter->hw.mac.type) { case e1000_80003es2lan: tipg = DEFAULT_82543_TIPG_IPGR1; tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; case e1000_82542: tipg = DEFAULT_82542_TIPG_IPGT; tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; break; default: if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) tipg = DEFAULT_82543_TIPG_IPGT_FIBER; else tipg = DEFAULT_82543_TIPG_IPGT_COPPER; tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; } E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg); E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value); if(adapter->hw.mac.type >= e1000_82540) E1000_WRITE_REG(&adapter->hw, E1000_TADV, adapter->tx_abs_int_delay.value); if ((adapter->hw.mac.type == e1000_82571) || (adapter->hw.mac.type == e1000_82572)) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= TARC_SPEED_MODE_BIT; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } else if (adapter->hw.mac.type == e1000_80003es2lan) { /* errata: program both queues to unweighted RR */ tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); } else if (adapter->hw.mac.type == e1000_82574) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= TARC_ERRATA_BIT; if ( adapter->tx_num_queues > 1) { tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX); E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); } else E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } if (adapter->tx_int_delay.value > 0) adapter->txd_cmd |= E1000_TXD_CMD_IDE; /* Program the Transmit Control Register */ tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL); tctl &= ~E1000_TCTL_CT; tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); if (adapter->hw.mac.type >= e1000_82571) tctl |= E1000_TCTL_MULR; /* This write will effectively turn on the transmit unit. */ E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl); /* SPT and KBL errata workarounds */ if (hw->mac.type == e1000_pch_spt) { u32 reg; reg = E1000_READ_REG(hw, E1000_IOSFPC); reg |= E1000_RCTL_RDMTS_HEX; E1000_WRITE_REG(hw, E1000_IOSFPC, reg); /* i218-i219 Specification Update 1.5.4.5 */ reg = E1000_READ_REG(hw, E1000_TARC(0)); reg &= ~E1000_TARC0_CB_MULTIQ_3_REQ; reg |= E1000_TARC0_CB_MULTIQ_2_REQ; E1000_WRITE_REG(hw, E1000_TARC(0), reg); } } /********************************************************************* * * Enable receive unit. * **********************************************************************/ static void em_initialize_receive_unit(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); if_softc_ctx_t scctx = adapter->shared; struct ifnet *ifp = iflib_get_ifp(ctx); struct e1000_hw *hw = &adapter->hw; struct em_rx_queue *que; int i; u32 rctl, rxcsum, rfctl; INIT_DEBUGOUT("em_initialize_receive_units: begin"); /* * Make sure receives are disabled while setting * up the descriptor ring */ rctl = E1000_READ_REG(hw, E1000_RCTL); /* Do not disable if ever enabled on this hardware */ if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583)) E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); /* Setup the Receive Control Register */ rctl &= ~(3 << E1000_RCTL_MO_SHIFT); rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Do not store bad packets */ rctl &= ~E1000_RCTL_SBP; /* Enable Long Packet receive */ if (if_getmtu(ifp) > ETHERMTU) rctl |= E1000_RCTL_LPE; else rctl &= ~E1000_RCTL_LPE; /* Strip the CRC */ if (!em_disable_crc_stripping) rctl |= E1000_RCTL_SECRC; if (adapter->hw.mac.type >= e1000_82540) { E1000_WRITE_REG(&adapter->hw, E1000_RADV, adapter->rx_abs_int_delay.value); /* * Set the interrupt throttling rate. Value is calculated * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) */ E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR); } E1000_WRITE_REG(&adapter->hw, E1000_RDTR, adapter->rx_int_delay.value); /* Use extended rx descriptor formats */ rfctl = E1000_READ_REG(hw, E1000_RFCTL); rfctl |= E1000_RFCTL_EXTEN; /* * When using MSI-X interrupts we need to throttle * using the EITR register (82574 only) */ if (hw->mac.type == e1000_82574) { for (int i = 0; i < 4; i++) E1000_WRITE_REG(hw, E1000_EITR_82574(i), DEFAULT_ITR); /* Disable accelerated acknowledge */ rfctl |= E1000_RFCTL_ACK_DIS; } E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); if (if_getcapenable(ifp) & IFCAP_RXCSUM && adapter->hw.mac.type >= e1000_82543) { if (adapter->tx_num_queues > 1) { if (adapter->hw.mac.type >= igb_mac_min) { rxcsum |= E1000_RXCSUM_PCSD; if (hw->mac.type != e1000_82575) rxcsum |= E1000_RXCSUM_CRCOFL; } else rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL | E1000_RXCSUM_PCSD; } else { if (adapter->hw.mac.type >= igb_mac_min) rxcsum |= E1000_RXCSUM_IPPCSE; else rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL; if (adapter->hw.mac.type > e1000_82575) rxcsum |= E1000_RXCSUM_CRCOFL; } } else rxcsum &= ~E1000_RXCSUM_TUOFL; E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); if (adapter->rx_num_queues > 1) { if (adapter->hw.mac.type >= igb_mac_min) igb_initialize_rss_mapping(adapter); else em_initialize_rss_mapping(adapter); } /* * XXX TEMPORARY WORKAROUND: on some systems with 82573 * long latencies are observed, like Lenovo X60. This * change eliminates the problem, but since having positive * values in RDTR is a known source of problems on other * platforms another solution is being sought. */ if (hw->mac.type == e1000_82573) E1000_WRITE_REG(hw, E1000_RDTR, 0x20); for (i = 0, que = adapter->rx_queues; i < adapter->rx_num_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; /* Setup the Base and Length of the Rx Descriptor Ring */ u64 bus_addr = rxr->rx_paddr; #if 0 u32 rdt = adapter->rx_num_queues -1; /* default */ #endif E1000_WRITE_REG(hw, E1000_RDLEN(i), scctx->isc_nrxd[0] * sizeof(union e1000_rx_desc_extended)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr); /* Setup the Head and Tail Descriptor Pointers */ E1000_WRITE_REG(hw, E1000_RDH(i), 0); E1000_WRITE_REG(hw, E1000_RDT(i), 0); } /* * Set PTHRESH for improved jumbo performance * According to 10.2.5.11 of Intel 82574 Datasheet, * RXDCTL(1) is written whenever RXDCTL(0) is written. * Only write to RXDCTL(1) if there is a need for different * settings. */ if (((adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_pch2lan) || (adapter->hw.mac.type == e1000_ich10lan)) && (if_getmtu(ifp) > ETHERMTU)) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3); } else if (adapter->hw.mac.type == e1000_82574) { for (int i = 0; i < adapter->rx_num_queues; i++) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); rxdctl |= 0x20; /* PTHRESH */ rxdctl |= 4 << 8; /* HTHRESH */ rxdctl |= 4 << 16;/* WTHRESH */ rxdctl |= 1 << 24; /* Switch to granularity */ E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } } else if (adapter->hw.mac.type >= igb_mac_min) { u32 psize, srrctl = 0; if (if_getmtu(ifp) > ETHERMTU) { /* Set maximum packet len */ if (adapter->rx_mbuf_sz <= 4096) { srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; } else if (adapter->rx_mbuf_sz > 4096) { srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; } psize = scctx->isc_max_frame_size; /* are we on a vlan? */ if (ifp->if_vlantrunk != NULL) psize += VLAN_TAG_SIZE; E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); } else { srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; rctl |= E1000_RCTL_SZ_2048; } /* * If TX flow control is disabled and there's >1 queue defined, * enable DROP. * * This drops frames rather than hanging the RX MAC for all queues. */ if ((adapter->rx_num_queues > 1) && (adapter->fc == e1000_fc_none || adapter->fc == e1000_fc_rx_pause)) { srrctl |= E1000_SRRCTL_DROP_EN; } /* Setup the Base and Length of the Rx Descriptor Rings */ for (i = 0, que = adapter->rx_queues; i < adapter->rx_num_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; u64 bus_addr = rxr->rx_paddr; u32 rxdctl; #ifdef notyet /* Configure for header split? -- ignore for now */ rxr->hdr_split = igb_header_split; #else srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; #endif E1000_WRITE_REG(hw, E1000_RDLEN(i), scctx->isc_nrxd[0] * sizeof(struct e1000_rx_desc)); E1000_WRITE_REG(hw, E1000_RDBAH(i), (uint32_t)(bus_addr >> 32)); E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr); E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); /* Enable this Queue */ rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; rxdctl &= 0xFFF00000; rxdctl |= IGB_RX_PTHRESH; rxdctl |= IGB_RX_HTHRESH << 8; rxdctl |= IGB_RX_WTHRESH << 16; E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); } } else if (adapter->hw.mac.type >= e1000_pch2lan) { if (if_getmtu(ifp) > ETHERMTU) e1000_lv_jumbo_workaround_ich8lan(hw, TRUE); else e1000_lv_jumbo_workaround_ich8lan(hw, FALSE); } /* Make sure VLAN Filters are off */ rctl &= ~E1000_RCTL_VFE; if (adapter->hw.mac.type < igb_mac_min) { if (adapter->rx_mbuf_sz == MCLBYTES) rctl |= E1000_RCTL_SZ_2048; else if (adapter->rx_mbuf_sz == MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; /* ensure we clear use DTYPE of 00 here */ rctl &= ~0x00000C00; } /* Write out the settings */ E1000_WRITE_REG(hw, E1000_RCTL, rctl); return; } static void em_if_vlan_register(if_ctx_t ctx, u16 vtag) { struct adapter *adapter = iflib_get_softc(ctx); u32 index, bit; index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; } static void em_if_vlan_unregister(if_ctx_t ctx, u16 vtag) { struct adapter *adapter = iflib_get_softc(ctx); u32 index, bit; index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; } static void em_setup_vlan_hw_support(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 reg; /* * We get here thru init_locked, meaning * a soft reset, this has already cleared * the VFTA and other state, so if there * have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* * A soft reset zero's out the VFTA, so * we need to repopulate it now. */ for (int i = 0; i < EM_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, i, adapter->shadow_vfta[i]); reg = E1000_READ_REG(hw, E1000_CTRL); reg |= E1000_CTRL_VME; E1000_WRITE_REG(hw, E1000_CTRL, reg); /* Enable the Filter Table */ reg = E1000_READ_REG(hw, E1000_RCTL); reg &= ~E1000_RCTL_CFIEN; reg |= E1000_RCTL_VFE; E1000_WRITE_REG(hw, E1000_RCTL, reg); } static void em_if_intr_enable(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; u32 ims_mask = IMS_ENABLE_MASK; if (hw->mac.type == e1000_82574) { E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK); ims_mask |= adapter->ims; } E1000_WRITE_REG(hw, E1000_IMS, ims_mask); } static void em_if_intr_disable(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; if (hw->mac.type == e1000_82574) E1000_WRITE_REG(hw, EM_EIAC, 0); E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); } static void igb_if_intr_enable(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; u32 mask; if (__predict_true(adapter->intr_type == IFLIB_INTR_MSIX)) { mask = (adapter->que_mask | adapter->link_mask); E1000_WRITE_REG(hw, E1000_EIAC, mask); E1000_WRITE_REG(hw, E1000_EIAM, mask); E1000_WRITE_REG(hw, E1000_EIMS, mask); E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_LSC); } else E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK); E1000_WRITE_FLUSH(hw); } static void igb_if_intr_disable(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; if (__predict_true(adapter->intr_type == IFLIB_INTR_MSIX)) { E1000_WRITE_REG(hw, E1000_EIMC, 0xffffffff); E1000_WRITE_REG(hw, E1000_EIAC, 0); } E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); E1000_WRITE_FLUSH(hw); } /* * Bit of a misnomer, what this really means is * to enable OS management of the system... aka * to disable special hardware management features */ static void em_init_manageability(struct adapter *adapter) { /* A shared code workaround */ #define E1000_82542_MANC2H E1000_MANC2H if (adapter->has_manage) { int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* disable hardware interception of ARP */ manc &= ~(E1000_MANC_ARP_EN); /* enable receiving management packets to the host */ manc |= E1000_MANC_EN_MNG2HOST; #define E1000_MNG2HOST_PORT_623 (1 << 5) #define E1000_MNG2HOST_PORT_664 (1 << 6) manc2h |= E1000_MNG2HOST_PORT_623; manc2h |= E1000_MNG2HOST_PORT_664; E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * Give control back to hardware management * controller if there is one. */ static void em_release_manageability(struct adapter *adapter) { if (adapter->has_manage) { int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); /* re-enable hardware interception of ARP */ manc |= E1000_MANC_ARP_EN; manc &= ~E1000_MANC_EN_MNG2HOST; E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); } } /* * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means * that the driver is loaded. For AMT version type f/w * this means that the network i/f is open. */ static void em_get_hw_control(struct adapter *adapter) { u32 ctrl_ext, swsm; if (adapter->vf_ifp) return; if (adapter->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); E1000_WRITE_REG(&adapter->hw, E1000_SWSM, swsm | E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); } /* * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit. * For ASF and Pass Through versions of f/w this means that * the driver is no longer loaded. For AMT versions of the * f/w this means that the network i/f is closed. */ static void em_release_hw_control(struct adapter *adapter) { u32 ctrl_ext, swsm; if (!adapter->has_manage) return; if (adapter->hw.mac.type == e1000_82573) { swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM); E1000_WRITE_REG(&adapter->hw, E1000_SWSM, swsm & ~E1000_SWSM_DRV_LOAD); return; } /* else */ ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); return; } static int em_is_valid_ether_addr(u8 *addr) { char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { return (FALSE); } return (TRUE); } /* ** Parse the interface capabilities with regard ** to both system management and wake-on-lan for ** later use. */ static void em_get_wakeup(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); u16 eeprom_data = 0, device_id, apme_mask; adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); apme_mask = EM_EEPROM_APME; switch (adapter->hw.mac.type) { case e1000_82542: case e1000_82543: break; case e1000_82544: e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL2_REG, 1, &eeprom_data); apme_mask = EM_82544_APME; break; case e1000_82546: case e1000_82546_rev_3: if (adapter->hw.bus.func == 1) { e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; } else e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; case e1000_82573: case e1000_82583: adapter->has_amt = TRUE; /* FALLTHROUGH */ case e1000_82571: case e1000_82572: case e1000_80003es2lan: if (adapter->hw.bus.func == 1) { e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); break; } else e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; case e1000_ich8lan: case e1000_ich9lan: case e1000_ich10lan: case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: case e1000_pch_spt: case e1000_82575: /* listing all igb devices */ case e1000_82576: case e1000_82580: case e1000_i350: case e1000_i354: case e1000_i210: case e1000_i211: case e1000_vfadapt: case e1000_vfadapt_i350: apme_mask = E1000_WUC_APME; adapter->has_amt = TRUE; eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC); break; default: e1000_read_nvm(&adapter->hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); break; } if (eeprom_data & apme_mask) adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC); /* * We have the eeprom settings, now apply the special cases * where the eeprom may be wrong or the board won't support * wake on lan on a particular port */ device_id = pci_get_device(dev); switch (device_id) { case E1000_DEV_ID_82546GB_PCIE: adapter->wol = 0; break; case E1000_DEV_ID_82546EB_FIBER: case E1000_DEV_ID_82546GB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_FUNC_1) adapter->wol = 0; break; case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3: /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) adapter->wol = 0; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; break; case E1000_DEV_ID_82571EB_FIBER: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_FUNC_1) adapter->wol = 0; break; case E1000_DEV_ID_82571EB_QUAD_COPPER: case E1000_DEV_ID_82571EB_QUAD_FIBER: case E1000_DEV_ID_82571EB_QUAD_COPPER_LP: /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) adapter->wol = 0; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; break; } return; } /* * Enable PCI Wake On Lan capability */ static void em_enable_wakeup(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); device_t dev = iflib_get_dev(ctx); if_t ifp = iflib_get_ifp(ctx); int error = 0; u32 pmc, ctrl, ctrl_ext, rctl; u16 status; if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0) return; /* * Determine type of Wakeup: note that wol * is set with all bits on by default. */ if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0) adapter->wol &= ~E1000_WUFC_MAG; if ((if_getcapenable(ifp) & IFCAP_WOL_UCAST) == 0) adapter->wol &= ~E1000_WUFC_EX; if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0) adapter->wol &= ~E1000_WUFC_MC; else { rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); rctl |= E1000_RCTL_MPE; E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl); } if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC))) goto pme; /* Advertise the wakeup capability */ ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL); ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3); E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl); /* Keep the laser running on Fiber adapters */ if (adapter->hw.phy.media_type == e1000_media_type_fiber || adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA; E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext); } if ((adapter->hw.mac.type == e1000_ich8lan) || (adapter->hw.mac.type == e1000_pchlan) || (adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_ich10lan)) e1000_suspend_workarounds_ich8lan(&adapter->hw); if ( adapter->hw.mac.type >= e1000_pchlan) { error = em_enable_phy_wakeup(adapter); if (error) goto pme; } else { /* Enable wakeup by the MAC */ E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); } if (adapter->hw.phy.type == e1000_phy_igp_3) e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw); pme: status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2); status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE); if (!error && (if_getcapenable(ifp) & IFCAP_WOL)) status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2); return; } /* * WOL in the newer chipset interfaces (pchlan) * require thing to be copied into the phy */ static int em_enable_phy_wakeup(struct adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 mreg, ret = 0; u16 preg; /* copy MAC RARs to PHY RARs */ e1000_copy_rx_addrs_to_phy_ich8lan(hw); /* copy MAC MTA to PHY MTA */ for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) { mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i); e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF)); e1000_write_phy_reg(hw, BM_MTA(i) + 1, (u16)((mreg >> 16) & 0xFFFF)); } /* configure PHY Rx Control register */ e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg); mreg = E1000_READ_REG(hw, E1000_RCTL); if (mreg & E1000_RCTL_UPE) preg |= BM_RCTL_UPE; if (mreg & E1000_RCTL_MPE) preg |= BM_RCTL_MPE; preg &= ~(BM_RCTL_MO_MASK); if (mreg & E1000_RCTL_MO_3) preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT) << BM_RCTL_MO_SHIFT); if (mreg & E1000_RCTL_BAM) preg |= BM_RCTL_BAM; if (mreg & E1000_RCTL_PMCF) preg |= BM_RCTL_PMCF; mreg = E1000_READ_REG(hw, E1000_CTRL); if (mreg & E1000_CTRL_RFCE) preg |= BM_RCTL_RFCE; e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg); /* enable PHY wakeup in MAC register */ E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN | E1000_WUC_APME); E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol); /* configure and enable PHY wakeup in PHY registers */ e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol); e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN); /* activate PHY wakeup */ ret = hw->phy.ops.acquire(hw); if (ret) { printf("Could not acquire PHY\n"); return ret; } e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT)); ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg); if (ret) { printf("Could not read PHY page 769\n"); goto out; } preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT; ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg); if (ret) printf("Could not set PHY Host Wakeup bit\n"); out: hw->phy.ops.release(hw); return ret; } static void em_if_led_func(if_ctx_t ctx, int onoff) { struct adapter *adapter = iflib_get_softc(ctx); if (onoff) { e1000_setup_led(&adapter->hw); e1000_led_on(&adapter->hw); } else { e1000_led_off(&adapter->hw); e1000_cleanup_led(&adapter->hw); } } /* * Disable the L0S and L1 LINK states */ static void em_disable_aspm(struct adapter *adapter) { int base, reg; u16 link_cap,link_ctrl; device_t dev = adapter->dev; switch (adapter->hw.mac.type) { case e1000_82573: case e1000_82574: case e1000_82583: break; default: return; } if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0) return; reg = base + PCIER_LINK_CAP; link_cap = pci_read_config(dev, reg, 2); if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0) return; reg = base + PCIER_LINK_CTL; link_ctrl = pci_read_config(dev, reg, 2); link_ctrl &= ~PCIEM_LINK_CTL_ASPMC; pci_write_config(dev, reg, link_ctrl, 2); return; } /********************************************************************** * * Update the board statistics counters. * **********************************************************************/ static void em_update_stats_counters(struct adapter *adapter) { u64 prev_xoffrxc = adapter->stats.xoffrxc; if(adapter->hw.phy.media_type == e1000_media_type_copper || (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) { adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS); adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC); } adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS); adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC); adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC); adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL); adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC); adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL); adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC); adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC); adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC); adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC); adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC); adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); /* ** For watchdog management we need to know if we have been ** paused during the last interval, so capture that here. */ if (adapter->stats.xoffrxc != prev_xoffrxc) adapter->shared->isc_pause_frames = 1; adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC); adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC); adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64); adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127); adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255); adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511); adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023); adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522); adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC); adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC); adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC); adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC); /* For the 64-bit byte counters the low dword must be read first. */ /* Both registers clear on the read of the high dword */ adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32); adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) + ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32); adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC); adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC); adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC); adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC); adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC); adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH); adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH); adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR); adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT); adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64); adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127); adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255); adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511); adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023); adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522); adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC); adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC); /* Interrupt Counts */ adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC); adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC); adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC); adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC); adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC); adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC); adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC); adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC); adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC); if (adapter->hw.mac.type >= e1000_82543) { adapter->stats.algnerrc += E1000_READ_REG(&adapter->hw, E1000_ALGNERRC); adapter->stats.rxerrc += E1000_READ_REG(&adapter->hw, E1000_RXERRC); adapter->stats.tncrs += E1000_READ_REG(&adapter->hw, E1000_TNCRS); adapter->stats.cexterr += E1000_READ_REG(&adapter->hw, E1000_CEXTERR); adapter->stats.tsctc += E1000_READ_REG(&adapter->hw, E1000_TSCTC); adapter->stats.tsctfc += E1000_READ_REG(&adapter->hw, E1000_TSCTFC); } } static uint64_t em_if_get_counter(if_ctx_t ctx, ift_counter cnt) { struct adapter *adapter = iflib_get_softc(ctx); struct ifnet *ifp = iflib_get_ifp(ctx); switch (cnt) { case IFCOUNTER_COLLISIONS: return (adapter->stats.colc); case IFCOUNTER_IERRORS: return (adapter->dropped_pkts + adapter->stats.rxerrc + adapter->stats.crcerrs + adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc + adapter->stats.mpc + adapter->stats.cexterr); case IFCOUNTER_OERRORS: return (adapter->stats.ecol + adapter->stats.latecol + adapter->watchdog_events); default: return (if_get_counter_default(ifp, cnt)); } } /* Export a single 32-bit register via a read-only sysctl. */ static int em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; u_int val; adapter = oidp->oid_arg1; val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); return (sysctl_handle_int(oidp, &val, 0, req)); } /* * Add sysctl variables, one per statistic, to the system. */ static void em_add_hw_stats(struct adapter *adapter) { device_t dev = iflib_get_dev(adapter->ctx); struct em_tx_queue *tx_que = adapter->tx_queues; struct em_rx_queue *rx_que = adapter->rx_queues; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct e1000_hw_stats *stats = &adapter->stats; struct sysctl_oid *stat_node, *queue_node, *int_node; struct sysctl_oid_list *stat_list, *queue_list, *int_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSI-X IRQ Handled"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", CTLFLAG_RD, &adapter->rx_overruns, "RX overruns"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL, em_sysctl_reg_handler, "IU", "Device Control Register"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL, em_sysctl_reg_handler, "IU", "Receiver Control Register"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", CTLFLAG_RD, &adapter->hw.fc.high_water, 0, "Flow Control High Watermark"); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", CTLFLAG_RD, &adapter->hw.fc.low_water, 0, "Flow Control Low Watermark"); for (int i = 0; i < adapter->tx_num_queues; i++, tx_que++) { struct tx_ring *txr = &tx_que->txr; snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "TX Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me), em_sysctl_reg_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq", CTLFLAG_RD, &txr->tx_irq, "Queue MSI-X Transmit Interrupts"); } for (int j = 0; j < adapter->rx_num_queues; j++, rx_que++) { struct rx_ring *rxr = &rx_que->rxr; snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", j); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "RX Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me), em_sysctl_reg_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq", CTLFLAG_RD, &rxr->rx_irq, "Queue MSI-X Receive Interrupts"); } /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll", CTLFLAG_RD, &stats->ecol, "Excessive collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll", CTLFLAG_RD, &stats->scc, "Single collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll", CTLFLAG_RD, &stats->mcc, "Multiple collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll", CTLFLAG_RD, &stats->latecol, "Late collisions"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count", CTLFLAG_RD, &stats->colc, "Collision Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors", CTLFLAG_RD, &adapter->stats.symerrs, "Symbol Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors", CTLFLAG_RD, &adapter->stats.sec, "Sequence Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count", CTLFLAG_RD, &adapter->stats.dc, "Defer Count"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets", CTLFLAG_RD, &adapter->stats.mpc, "Missed Packets"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", CTLFLAG_RD, &adapter->stats.rnbc, "Receive No Buffers"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize", CTLFLAG_RD, &adapter->stats.ruc, "Receive Undersize"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &adapter->stats.rfc, "Fragmented Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize", CTLFLAG_RD, &adapter->stats.roc, "Oversized Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber", CTLFLAG_RD, &adapter->stats.rjc, "Recevied Jabber"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs", CTLFLAG_RD, &adapter->stats.rxerrc, "Receive Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &adapter->stats.crcerrs, "CRC errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs", CTLFLAG_RD, &adapter->stats.algnerrc, "Alignment Errors"); /* On 82575 these are collision counts */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", CTLFLAG_RD, &adapter->stats.cexterr, "Collision/Carrier extension errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &adapter->stats.xonrxc, "XON Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &adapter->stats.xontxc, "XON Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &adapter->stats.xoffrxc, "XOFF Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &adapter->stats.xofftxc, "XOFF Transmitted"); /* Packet Reception Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", CTLFLAG_RD, &adapter->stats.tpr, "Total Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", CTLFLAG_RD, &adapter->stats.gprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.bprc, "Broadcast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", CTLFLAG_RD, &adapter->stats.mprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &adapter->stats.prc64, "64 byte frames received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &adapter->stats.prc127, "65-127 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &adapter->stats.prc255, "128-255 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &adapter->stats.prc511, "256-511 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &adapter->stats.prc1023, "512-1023 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", CTLFLAG_RD, &adapter->stats.gorc, "Good Octets Received"); /* Packet Transmission Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &adapter->stats.gotc, "Good Octets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &adapter->stats.tpt, "Total Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &adapter->stats.gptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &adapter->stats.bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &adapter->stats.mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &adapter->stats.ptc64, "64 byte frames transmitted "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &adapter->stats.ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &adapter->stats.ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &adapter->stats.ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &adapter->stats.ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &adapter->stats.ptc1522, "1024-1522 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd", CTLFLAG_RD, &adapter->stats.tsctc, "TSO Contexts Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", CTLFLAG_RD, &adapter->stats.tsctfc, "TSO Contexts Failed"); /* Interrupt Stats */ int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", CTLFLAG_RD, NULL, "Interrupt Statistics"); int_list = SYSCTL_CHILDREN(int_node); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts", CTLFLAG_RD, &adapter->stats.iac, "Interrupt Assertion Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", CTLFLAG_RD, &adapter->stats.icrxptc, "Interrupt Cause Rx Pkt Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", CTLFLAG_RD, &adapter->stats.icrxatc, "Interrupt Cause Rx Abs Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", CTLFLAG_RD, &adapter->stats.ictxptc, "Interrupt Cause Tx Pkt Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", CTLFLAG_RD, &adapter->stats.ictxatc, "Interrupt Cause Tx Abs Timer Expire Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", CTLFLAG_RD, &adapter->stats.ictxqec, "Interrupt Cause Tx Queue Empty Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", CTLFLAG_RD, &adapter->stats.ictxqmtc, "Interrupt Cause Tx Queue Min Thresh Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", CTLFLAG_RD, &adapter->stats.icrxdmtc, "Interrupt Cause Rx Desc Min Thresh Count"); SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun", CTLFLAG_RD, &adapter->stats.icrxoc, "Interrupt Cause Receiver Overrun Count"); } /********************************************************************** * * This routine provides a way to dump out the adapter eeprom, * often a useful debug/service tool. This only dumps the first * 32 words, stuff that matters is in that extent. * **********************************************************************/ static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); /* * This value will cause a hex dump of the * first 32 16-bit words of the EEPROM to * the screen. */ if (result == 1) em_print_nvm_info(adapter); return (error); } static void em_print_nvm_info(struct adapter *adapter) { u16 eeprom_data; int i, j, row = 0; /* Its a bit crude, but it gets the job done */ printf("\nInterface EEPROM Dump:\n"); printf("Offset\n0x0000 "); for (i = 0, j = 0; i < 32; i++, j++) { if (j == 8) { /* Make the offset block */ j = 0; ++row; printf("\n0x00%x0 ",row); } e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); printf("%04x ", eeprom_data); } printf("\n"); } static int em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { struct em_int_delay_info *info; struct adapter *adapter; u32 regval; int error, usecs, ticks; info = (struct em_int_delay_info *) arg1; usecs = info->value; error = sysctl_handle_int(oidp, &usecs, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535)) return (EINVAL); info->value = usecs; ticks = EM_USECS_TO_TICKS(usecs); if (info->offset == E1000_ITR) /* units are 256ns here */ ticks *= 4; adapter = info->adapter; regval = E1000_READ_OFFSET(&adapter->hw, info->offset); regval = (regval & ~0xffff) | (ticks & 0xffff); /* Handle a few special cases. */ switch (info->offset) { case E1000_RDTR: break; case E1000_TIDV: if (ticks == 0) { adapter->txd_cmd &= ~E1000_TXD_CMD_IDE; /* Don't write 0 into the TIDV register. */ regval++; } else adapter->txd_cmd |= E1000_TXD_CMD_IDE; break; } E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval); return (0); } static void em_add_int_delay_sysctl(struct adapter *adapter, const char *name, const char *description, struct em_int_delay_info *info, int offset, int value) { info->adapter = adapter; info->offset = offset; info->value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, info, 0, em_sysctl_int_delay, "I", description); } /* * Set flow control using sysctl: * Flow control values: * 0 - off * 1 - rx pause * 2 - tx pause * 3 - full */ static int em_set_flowcntl(SYSCTL_HANDLER_ARGS) { int error; static int input = 3; /* default is full */ struct adapter *adapter = (struct adapter *) arg1; error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (input == adapter->fc) /* no change? */ return (error); switch (input) { case e1000_fc_rx_pause: case e1000_fc_tx_pause: case e1000_fc_full: case e1000_fc_none: adapter->hw.fc.requested_mode = input; adapter->fc = input; break; default: /* Do nothing */ return (error); } adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; e1000_force_mac_fc(&adapter->hw); return (error); } /* * Manage Energy Efficient Ethernet: * Control values: * 0/1 - enabled/disabled */ static int em_sysctl_eee(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error, value; value = adapter->hw.dev_spec.ich8lan.eee_disable; error = sysctl_handle_int(oidp, &value, 0, req); if (error || req->newptr == NULL) return (error); adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0); em_if_init(adapter->ctx); return (0); } static int em_sysctl_debug_info(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error; int result; result = -1; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { adapter = (struct adapter *) arg1; em_print_debug_info(adapter); } return (error); } static int em_get_rs(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *) arg1; int error; int result; result = 0; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr || result != 1) return (error); em_dump_rs(adapter); return (error); } static void em_if_debug(if_ctx_t ctx) { em_dump_rs(iflib_get_softc(ctx)); } /* * This routine is meant to be fluid, add whatever is * needed for debugging a problem. -jfv */ static void em_print_debug_info(struct adapter *adapter) { device_t dev = iflib_get_dev(adapter->ctx); struct ifnet *ifp = iflib_get_ifp(adapter->ctx); struct tx_ring *txr = &adapter->tx_queues->txr; struct rx_ring *rxr = &adapter->rx_queues->rxr; if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) printf("Interface is RUNNING "); else printf("Interface is NOT RUNNING\n"); if (if_getdrvflags(ifp) & IFF_DRV_OACTIVE) printf("and INACTIVE\n"); else printf("and ACTIVE\n"); for (int i = 0; i < adapter->tx_num_queues; i++, txr++) { device_printf(dev, "TX Queue %d ------\n", i); device_printf(dev, "hw tdh = %d, hw tdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_TDH(i)), E1000_READ_REG(&adapter->hw, E1000_TDT(i))); } for (int j=0; j < adapter->rx_num_queues; j++, rxr++) { device_printf(dev, "RX Queue %d ------\n", j); device_printf(dev, "hw rdh = %d, hw rdt = %d\n", E1000_READ_REG(&adapter->hw, E1000_RDH(j)), E1000_READ_REG(&adapter->hw, E1000_RDT(j))); } } /* * 82574 only: * Write a new value to the EEPROM increasing the number of MSI-X * vectors from 3 to 5, for proper multiqueue support. */ static void em_enable_vectors_82574(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); struct e1000_hw *hw = &adapter->hw; device_t dev = iflib_get_dev(ctx); u16 edata; e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); if (bootverbose) device_printf(dev, "EM_NVM_PCIE_CTRL = %#06x\n", edata); if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) { device_printf(dev, "Writing to eeprom: increasing " "reported MSI-X vectors from 3 to 5...\n"); edata &= ~(EM_NVM_MSIX_N_MASK); edata |= 4 << EM_NVM_MSIX_N_SHIFT; e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata); e1000_update_nvm_checksum(hw); device_printf(dev, "Writing to eeprom: done\n"); } } Index: projects/clang1000-import/sys/dev/flash/mx25lreg.h =================================================================== --- projects/clang1000-import/sys/dev/flash/mx25lreg.h (revision 356919) +++ projects/clang1000-import/sys/dev/flash/mx25lreg.h (revision 356920) @@ -1,71 +1,73 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009, Oleksandr Tymoshenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __MX25LREG_H__ #define __MX25LREG_H__ /* * Commands */ #define CMD_WRITE_ENABLE 0x06 #define CMD_WRITE_DISABLE 0x04 #define CMD_READ_IDENT 0x9F #define CMD_READ_STATUS 0x05 #define CMD_WRITE_STATUS 0x01 #define CMD_READ 0x03 #define CMD_FAST_READ 0x0B +#define CMD_READ_DUAL_IO 0xBB +#define CMD_READ_QUAD_OUTPUT 0x6B #define CMD_PAGE_PROGRAM 0x02 #define CMD_SECTOR_ERASE 0xD8 #define CMD_BULK_ERASE 0xC7 #define CMD_BLOCK_4K_ERASE 0x20 #define CMD_BLOCK_32K_ERASE 0x52 #define CMD_ENTER_4B_MODE 0xB7 #define CMD_EXIT_4B_MODE 0xE9 /* Quad 4B-addressing operations. */ #define CMD_QUAD_SECTOR_ERASE 0xDC #define CMD_QUAD_PAGE_PROGRAM 0x34 #define CMD_READ_4B_QUAD_OUTPUT 0x6C /* * Status register flags */ #define STATUS_SRWD (1 << 7) #define STATUS_BP2 (1 << 4) #define STATUS_BP1 (1 << 3) #define STATUS_BP0 (1 << 2) #define STATUS_WEL (1 << 1) #define STATUS_WIP (1 << 0) #define FLASH_PAGE_SIZE 256 #endif /* __MX25LREG_H__ */ Index: projects/clang1000-import/sys/dev/md/md.c =================================================================== --- projects/clang1000-import/sys/dev/md/md.c (revision 356919) +++ projects/clang1000-import/sys/dev/md/md.c (revision 356920) @@ -1,2169 +1,2176 @@ /*- * SPDX-License-Identifier: (Beerware AND BSD-3-Clause) * * ---------------------------------------------------------------------------- * "THE BEER-WARE LICENSE" (Revision 42): * wrote this file. As long as you retain this notice you * can do whatever you want with this stuff. If we meet some day, and you think * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * * $FreeBSD$ * */ /*- * The following functions are based in the vn(4) driver: mdstart_swap(), * mdstart_vnode(), mdcreate_swap(), mdcreate_vnode() and mddestroy(), * and as such under the following copyright: * * Copyright (c) 1988 University of Utah. * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Utah Hdr: vn.c 1.13 94/04/02 * * from: @(#)vn.c 8.6 (Berkeley) 4/1/94 * From: src/sys/dev/vn/vn.c,v 1.122 2000/12/16 16:06:03 */ #include "opt_rootdevname.h" #include "opt_geom.h" #include "opt_md.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MD_MODVER 1 #define MD_SHUTDOWN 0x10000 /* Tell worker thread to terminate. */ #define MD_EXITING 0x20000 /* Worker thread is exiting. */ #define MD_PROVIDERGONE 0x40000 /* Safe to free the softc */ #ifndef MD_NSECT #define MD_NSECT (10000 * 2) #endif struct md_req { unsigned md_unit; /* unit number */ enum md_types md_type; /* type of disk */ off_t md_mediasize; /* size of disk in bytes */ unsigned md_sectorsize; /* sectorsize */ unsigned md_options; /* options */ int md_fwheads; /* firmware heads */ int md_fwsectors; /* firmware sectors */ char *md_file; /* pathname of file to mount */ enum uio_seg md_file_seg; /* location of md_file */ char *md_label; /* label of the device (userspace) */ int *md_units; /* pointer to units array (kernel) */ size_t md_units_nitems; /* items in md_units array */ }; #ifdef COMPAT_FREEBSD32 struct md_ioctl32 { unsigned md_version; unsigned md_unit; enum md_types md_type; uint32_t md_file; off_t md_mediasize; unsigned md_sectorsize; unsigned md_options; uint64_t md_base; int md_fwheads; int md_fwsectors; uint32_t md_label; int md_pad[MDNPAD]; } __attribute__((__packed__)); CTASSERT((sizeof(struct md_ioctl32)) == 436); #define MDIOCATTACH_32 _IOC_NEWTYPE(MDIOCATTACH, struct md_ioctl32) #define MDIOCDETACH_32 _IOC_NEWTYPE(MDIOCDETACH, struct md_ioctl32) #define MDIOCQUERY_32 _IOC_NEWTYPE(MDIOCQUERY, struct md_ioctl32) #define MDIOCRESIZE_32 _IOC_NEWTYPE(MDIOCRESIZE, struct md_ioctl32) #endif /* COMPAT_FREEBSD32 */ static MALLOC_DEFINE(M_MD, "md_disk", "Memory Disk"); static MALLOC_DEFINE(M_MDSECT, "md_sectors", "Memory Disk Sectors"); static int md_debug; SYSCTL_INT(_debug, OID_AUTO, mddebug, CTLFLAG_RW, &md_debug, 0, "Enable md(4) debug messages"); static int md_malloc_wait; SYSCTL_INT(_vm, OID_AUTO, md_malloc_wait, CTLFLAG_RW, &md_malloc_wait, 0, "Allow malloc to wait for memory allocations"); #if defined(MD_ROOT) && !defined(MD_ROOT_FSTYPE) #define MD_ROOT_FSTYPE "ufs" #endif #if defined(MD_ROOT) /* * Preloaded image gets put here. */ #if defined(MD_ROOT_SIZE) /* * We put the mfs_root symbol into the oldmfs section of the kernel object file. * Applications that patch the object with the image can determine * the size looking at the oldmfs section size within the kernel. */ u_char mfs_root[MD_ROOT_SIZE*1024] __attribute__ ((section ("oldmfs"))); const int mfs_root_size = sizeof(mfs_root); #elif defined(MD_ROOT_MEM) /* MD region already mapped in the memory */ u_char *mfs_root; int mfs_root_size; #else extern volatile u_char __weak_symbol mfs_root; extern volatile u_char __weak_symbol mfs_root_end; __GLOBL(mfs_root); __GLOBL(mfs_root_end); #define mfs_root_size ((uintptr_t)(&mfs_root_end - &mfs_root)) #endif #endif static g_init_t g_md_init; static g_fini_t g_md_fini; static g_start_t g_md_start; static g_access_t g_md_access; static void g_md_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp); static g_provgone_t g_md_providergone; static struct cdev *status_dev = NULL; static struct sx md_sx; static struct unrhdr *md_uh; static d_ioctl_t mdctlioctl; static struct cdevsw mdctl_cdevsw = { .d_version = D_VERSION, .d_ioctl = mdctlioctl, .d_name = MD_NAME, }; struct g_class g_md_class = { .name = "MD", .version = G_VERSION, .init = g_md_init, .fini = g_md_fini, .start = g_md_start, .access = g_md_access, .dumpconf = g_md_dumpconf, .providergone = g_md_providergone, }; DECLARE_GEOM_CLASS(g_md_class, g_md); static LIST_HEAD(, md_s) md_softc_list = LIST_HEAD_INITIALIZER(md_softc_list); #define NINDIR (PAGE_SIZE / sizeof(uintptr_t)) #define NMASK (NINDIR-1) static int nshift; static uma_zone_t md_pbuf_zone; struct indir { uintptr_t *array; u_int total; u_int used; u_int shift; }; struct md_s { int unit; LIST_ENTRY(md_s) list; struct bio_queue_head bio_queue; struct mtx queue_mtx; struct cdev *dev; enum md_types type; off_t mediasize; unsigned sectorsize; unsigned opencount; unsigned fwheads; unsigned fwsectors; char ident[32]; unsigned flags; char name[20]; struct proc *procp; struct g_geom *gp; struct g_provider *pp; int (*start)(struct md_s *sc, struct bio *bp); struct devstat *devstat; /* MD_MALLOC related fields */ struct indir *indir; uma_zone_t uma; /* MD_PRELOAD related fields */ u_char *pl_ptr; size_t pl_len; /* MD_VNODE related fields */ struct vnode *vnode; char file[PATH_MAX]; char label[PATH_MAX]; struct ucred *cred; /* MD_SWAP related fields */ vm_object_t object; }; static struct indir * new_indir(u_int shift) { struct indir *ip; ip = malloc(sizeof *ip, M_MD, (md_malloc_wait ? M_WAITOK : M_NOWAIT) | M_ZERO); if (ip == NULL) return (NULL); ip->array = malloc(sizeof(uintptr_t) * NINDIR, M_MDSECT, (md_malloc_wait ? M_WAITOK : M_NOWAIT) | M_ZERO); if (ip->array == NULL) { free(ip, M_MD); return (NULL); } ip->total = NINDIR; ip->shift = shift; return (ip); } static void del_indir(struct indir *ip) { free(ip->array, M_MDSECT); free(ip, M_MD); } static void destroy_indir(struct md_s *sc, struct indir *ip) { int i; for (i = 0; i < NINDIR; i++) { if (!ip->array[i]) continue; if (ip->shift) destroy_indir(sc, (struct indir*)(ip->array[i])); else if (ip->array[i] > 255) uma_zfree(sc->uma, (void *)(ip->array[i])); } del_indir(ip); } /* * This function does the math and allocates the top level "indir" structure * for a device of "size" sectors. */ static struct indir * dimension(off_t size) { off_t rcnt; struct indir *ip; int layer; rcnt = size; layer = 0; while (rcnt > NINDIR) { rcnt /= NINDIR; layer++; } /* * XXX: the top layer is probably not fully populated, so we allocate * too much space for ip->array in here. */ ip = malloc(sizeof *ip, M_MD, M_WAITOK | M_ZERO); ip->array = malloc(sizeof(uintptr_t) * NINDIR, M_MDSECT, M_WAITOK | M_ZERO); ip->total = NINDIR; ip->shift = layer * nshift; return (ip); } /* * Read a given sector */ static uintptr_t s_read(struct indir *ip, off_t offset) { struct indir *cip; int idx; uintptr_t up; if (md_debug > 1) printf("s_read(%jd)\n", (intmax_t)offset); up = 0; for (cip = ip; cip != NULL;) { if (cip->shift) { idx = (offset >> cip->shift) & NMASK; up = cip->array[idx]; cip = (struct indir *)up; continue; } idx = offset & NMASK; return (cip->array[idx]); } return (0); } /* * Write a given sector, prune the tree if the value is 0 */ static int s_write(struct indir *ip, off_t offset, uintptr_t ptr) { struct indir *cip, *lip[10]; int idx, li; uintptr_t up; if (md_debug > 1) printf("s_write(%jd, %p)\n", (intmax_t)offset, (void *)ptr); up = 0; li = 0; cip = ip; for (;;) { lip[li++] = cip; if (cip->shift) { idx = (offset >> cip->shift) & NMASK; up = cip->array[idx]; if (up != 0) { cip = (struct indir *)up; continue; } /* Allocate branch */ cip->array[idx] = (uintptr_t)new_indir(cip->shift - nshift); if (cip->array[idx] == 0) return (ENOSPC); cip->used++; up = cip->array[idx]; cip = (struct indir *)up; continue; } /* leafnode */ idx = offset & NMASK; up = cip->array[idx]; if (up != 0) cip->used--; cip->array[idx] = ptr; if (ptr != 0) cip->used++; break; } if (cip->used != 0 || li == 1) return (0); li--; while (cip->used == 0 && cip != ip) { li--; idx = (offset >> lip[li]->shift) & NMASK; up = lip[li]->array[idx]; KASSERT(up == (uintptr_t)cip, ("md screwed up")); del_indir(cip); lip[li]->array[idx] = 0; lip[li]->used--; cip = lip[li]; } return (0); } static int g_md_access(struct g_provider *pp, int r, int w, int e) { struct md_s *sc; sc = pp->geom->softc; if (sc == NULL) { if (r <= 0 && w <= 0 && e <= 0) return (0); return (ENXIO); } r += pp->acr; w += pp->acw; e += pp->ace; if ((sc->flags & MD_READONLY) != 0 && w > 0) return (EROFS); if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) { sc->opencount = 1; } else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) { sc->opencount = 0; } return (0); } static void g_md_start(struct bio *bp) { struct md_s *sc; sc = bp->bio_to->geom->softc; if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE)) { devstat_start_transaction_bio(sc->devstat, bp); } mtx_lock(&sc->queue_mtx); bioq_disksort(&sc->bio_queue, bp); wakeup(sc); mtx_unlock(&sc->queue_mtx); } #define MD_MALLOC_MOVE_ZERO 1 #define MD_MALLOC_MOVE_FILL 2 #define MD_MALLOC_MOVE_READ 3 #define MD_MALLOC_MOVE_WRITE 4 #define MD_MALLOC_MOVE_CMP 5 static int md_malloc_move_ma(vm_page_t **mp, int *ma_offs, unsigned sectorsize, void *ptr, u_char fill, int op) { struct sf_buf *sf; vm_page_t m, *mp1; char *p, first; off_t *uc; unsigned n; int error, i, ma_offs1, sz, first_read; m = NULL; error = 0; sf = NULL; /* if (op == MD_MALLOC_MOVE_CMP) { gcc */ first = 0; first_read = 0; uc = ptr; mp1 = *mp; ma_offs1 = *ma_offs; /* } */ sched_pin(); for (n = sectorsize; n != 0; n -= sz) { sz = imin(PAGE_SIZE - *ma_offs, n); if (m != **mp) { if (sf != NULL) sf_buf_free(sf); m = **mp; sf = sf_buf_alloc(m, SFB_CPUPRIVATE | (md_malloc_wait ? 0 : SFB_NOWAIT)); if (sf == NULL) { error = ENOMEM; break; } } p = (char *)sf_buf_kva(sf) + *ma_offs; switch (op) { case MD_MALLOC_MOVE_ZERO: bzero(p, sz); break; case MD_MALLOC_MOVE_FILL: memset(p, fill, sz); break; case MD_MALLOC_MOVE_READ: bcopy(ptr, p, sz); cpu_flush_dcache(p, sz); break; case MD_MALLOC_MOVE_WRITE: bcopy(p, ptr, sz); break; case MD_MALLOC_MOVE_CMP: for (i = 0; i < sz; i++, p++) { if (!first_read) { *uc = (u_char)*p; first = *p; first_read = 1; } else if (*p != first) { error = EDOOFUS; break; } } break; default: KASSERT(0, ("md_malloc_move_ma unknown op %d\n", op)); break; } if (error != 0) break; *ma_offs += sz; *ma_offs %= PAGE_SIZE; if (*ma_offs == 0) (*mp)++; ptr = (char *)ptr + sz; } if (sf != NULL) sf_buf_free(sf); sched_unpin(); if (op == MD_MALLOC_MOVE_CMP && error != 0) { *mp = mp1; *ma_offs = ma_offs1; } return (error); } static int md_malloc_move_vlist(bus_dma_segment_t **pvlist, int *pma_offs, unsigned len, void *ptr, u_char fill, int op) { bus_dma_segment_t *vlist; uint8_t *p, *end, first; off_t *uc; int ma_offs, seg_len; vlist = *pvlist; ma_offs = *pma_offs; uc = ptr; for (; len != 0; len -= seg_len) { seg_len = imin(vlist->ds_len - ma_offs, len); p = (uint8_t *)(uintptr_t)vlist->ds_addr + ma_offs; switch (op) { case MD_MALLOC_MOVE_ZERO: bzero(p, seg_len); break; case MD_MALLOC_MOVE_FILL: memset(p, fill, seg_len); break; case MD_MALLOC_MOVE_READ: bcopy(ptr, p, seg_len); cpu_flush_dcache(p, seg_len); break; case MD_MALLOC_MOVE_WRITE: bcopy(p, ptr, seg_len); break; case MD_MALLOC_MOVE_CMP: end = p + seg_len; first = *uc = *p; /* Confirm all following bytes match the first */ while (++p < end) { if (*p != first) return (EDOOFUS); } break; default: KASSERT(0, ("md_malloc_move_vlist unknown op %d\n", op)); break; } ma_offs += seg_len; if (ma_offs == vlist->ds_len) { ma_offs = 0; vlist++; } ptr = (uint8_t *)ptr + seg_len; } *pvlist = vlist; *pma_offs = ma_offs; return (0); } static int mdstart_malloc(struct md_s *sc, struct bio *bp) { u_char *dst; vm_page_t *m; bus_dma_segment_t *vlist; int i, error, error1, ma_offs, notmapped; off_t secno, nsec, uc; uintptr_t sp, osp; switch (bp->bio_cmd) { case BIO_READ: case BIO_WRITE: case BIO_DELETE: break; default: return (EOPNOTSUPP); } notmapped = (bp->bio_flags & BIO_UNMAPPED) != 0; vlist = (bp->bio_flags & BIO_VLIST) != 0 ? (bus_dma_segment_t *)bp->bio_data : NULL; if (notmapped) { m = bp->bio_ma; ma_offs = bp->bio_ma_offset; dst = NULL; KASSERT(vlist == NULL, ("vlists cannot be unmapped")); } else if (vlist != NULL) { ma_offs = bp->bio_ma_offset; dst = NULL; } else { dst = bp->bio_data; } nsec = bp->bio_length / sc->sectorsize; secno = bp->bio_offset / sc->sectorsize; error = 0; while (nsec--) { osp = s_read(sc->indir, secno); if (bp->bio_cmd == BIO_DELETE) { if (osp != 0) error = s_write(sc->indir, secno, 0); } else if (bp->bio_cmd == BIO_READ) { if (osp == 0) { if (notmapped) { error = md_malloc_move_ma(&m, &ma_offs, sc->sectorsize, NULL, 0, MD_MALLOC_MOVE_ZERO); } else if (vlist != NULL) { error = md_malloc_move_vlist(&vlist, &ma_offs, sc->sectorsize, NULL, 0, MD_MALLOC_MOVE_ZERO); } else bzero(dst, sc->sectorsize); } else if (osp <= 255) { if (notmapped) { error = md_malloc_move_ma(&m, &ma_offs, sc->sectorsize, NULL, osp, MD_MALLOC_MOVE_FILL); } else if (vlist != NULL) { error = md_malloc_move_vlist(&vlist, &ma_offs, sc->sectorsize, NULL, osp, MD_MALLOC_MOVE_FILL); } else memset(dst, osp, sc->sectorsize); } else { if (notmapped) { error = md_malloc_move_ma(&m, &ma_offs, sc->sectorsize, (void *)osp, 0, MD_MALLOC_MOVE_READ); } else if (vlist != NULL) { error = md_malloc_move_vlist(&vlist, &ma_offs, sc->sectorsize, (void *)osp, 0, MD_MALLOC_MOVE_READ); } else { bcopy((void *)osp, dst, sc->sectorsize); cpu_flush_dcache(dst, sc->sectorsize); } } osp = 0; } else if (bp->bio_cmd == BIO_WRITE) { if (sc->flags & MD_COMPRESS) { if (notmapped) { error1 = md_malloc_move_ma(&m, &ma_offs, sc->sectorsize, &uc, 0, MD_MALLOC_MOVE_CMP); i = error1 == 0 ? sc->sectorsize : 0; } else if (vlist != NULL) { error1 = md_malloc_move_vlist(&vlist, &ma_offs, sc->sectorsize, &uc, 0, MD_MALLOC_MOVE_CMP); i = error1 == 0 ? sc->sectorsize : 0; } else { uc = dst[0]; for (i = 1; i < sc->sectorsize; i++) { if (dst[i] != uc) break; } } } else { i = 0; uc = 0; } if (i == sc->sectorsize) { if (osp != uc) error = s_write(sc->indir, secno, uc); } else { if (osp <= 255) { sp = (uintptr_t)uma_zalloc(sc->uma, md_malloc_wait ? M_WAITOK : M_NOWAIT); if (sp == 0) { error = ENOSPC; break; } if (notmapped) { error = md_malloc_move_ma(&m, &ma_offs, sc->sectorsize, (void *)sp, 0, MD_MALLOC_MOVE_WRITE); } else if (vlist != NULL) { error = md_malloc_move_vlist( &vlist, &ma_offs, sc->sectorsize, (void *)sp, 0, MD_MALLOC_MOVE_WRITE); } else { bcopy(dst, (void *)sp, sc->sectorsize); } error = s_write(sc->indir, secno, sp); } else { if (notmapped) { error = md_malloc_move_ma(&m, &ma_offs, sc->sectorsize, (void *)osp, 0, MD_MALLOC_MOVE_WRITE); } else if (vlist != NULL) { error = md_malloc_move_vlist( &vlist, &ma_offs, sc->sectorsize, (void *)osp, 0, MD_MALLOC_MOVE_WRITE); } else { bcopy(dst, (void *)osp, sc->sectorsize); } osp = 0; } } } else { error = EOPNOTSUPP; } if (osp > 255) uma_zfree(sc->uma, (void*)osp); if (error != 0) break; secno++; if (!notmapped && vlist == NULL) dst += sc->sectorsize; } bp->bio_resid = 0; return (error); } static void mdcopyto_vlist(void *src, bus_dma_segment_t *vlist, off_t offset, off_t len) { off_t seg_len; while (offset >= vlist->ds_len) { offset -= vlist->ds_len; vlist++; } while (len != 0) { seg_len = omin(len, vlist->ds_len - offset); bcopy(src, (void *)(uintptr_t)(vlist->ds_addr + offset), seg_len); offset = 0; src = (uint8_t *)src + seg_len; len -= seg_len; vlist++; } } static void mdcopyfrom_vlist(bus_dma_segment_t *vlist, off_t offset, void *dst, off_t len) { off_t seg_len; while (offset >= vlist->ds_len) { offset -= vlist->ds_len; vlist++; } while (len != 0) { seg_len = omin(len, vlist->ds_len - offset); bcopy((void *)(uintptr_t)(vlist->ds_addr + offset), dst, seg_len); offset = 0; dst = (uint8_t *)dst + seg_len; len -= seg_len; vlist++; } } static int mdstart_preload(struct md_s *sc, struct bio *bp) { uint8_t *p; p = sc->pl_ptr + bp->bio_offset; switch (bp->bio_cmd) { case BIO_READ: if ((bp->bio_flags & BIO_VLIST) != 0) { mdcopyto_vlist(p, (bus_dma_segment_t *)bp->bio_data, bp->bio_ma_offset, bp->bio_length); } else { bcopy(p, bp->bio_data, bp->bio_length); } cpu_flush_dcache(bp->bio_data, bp->bio_length); break; case BIO_WRITE: if ((bp->bio_flags & BIO_VLIST) != 0) { mdcopyfrom_vlist((bus_dma_segment_t *)bp->bio_data, bp->bio_ma_offset, p, bp->bio_length); } else { bcopy(bp->bio_data, p, bp->bio_length); } break; } bp->bio_resid = 0; return (0); } static int mdstart_vnode(struct md_s *sc, struct bio *bp) { int error; struct uio auio; struct iovec aiov; struct iovec *piov; struct mount *mp; struct vnode *vp; struct buf *pb; bus_dma_segment_t *vlist; struct thread *td; off_t iolen, iostart, len, zerosize; int ma_offs, npages; switch (bp->bio_cmd) { case BIO_READ: auio.uio_rw = UIO_READ; break; case BIO_WRITE: case BIO_DELETE: auio.uio_rw = UIO_WRITE; break; case BIO_FLUSH: break; default: return (EOPNOTSUPP); } td = curthread; vp = sc->vnode; pb = NULL; piov = NULL; ma_offs = bp->bio_ma_offset; len = bp->bio_length; /* * VNODE I/O * * If an error occurs, we set BIO_ERROR but we do not set * B_INVAL because (for a write anyway), the buffer is * still valid. */ if (bp->bio_cmd == BIO_FLUSH) { (void) vn_start_write(vp, &mp, V_WAIT); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_FSYNC(vp, MNT_WAIT, td); VOP_UNLOCK(vp); vn_finished_write(mp); return (error); } auio.uio_offset = (vm_ooffset_t)bp->bio_offset; auio.uio_resid = bp->bio_length; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; if (bp->bio_cmd == BIO_DELETE) { /* * Emulate BIO_DELETE by writing zeros. */ zerosize = ZERO_REGION_SIZE - (ZERO_REGION_SIZE % sc->sectorsize); auio.uio_iovcnt = howmany(bp->bio_length, zerosize); piov = malloc(sizeof(*piov) * auio.uio_iovcnt, M_MD, M_WAITOK); auio.uio_iov = piov; while (len > 0) { piov->iov_base = __DECONST(void *, zero_region); piov->iov_len = len; if (len > zerosize) piov->iov_len = zerosize; len -= piov->iov_len; piov++; } piov = auio.uio_iov; } else if ((bp->bio_flags & BIO_VLIST) != 0) { piov = malloc(sizeof(*piov) * bp->bio_ma_n, M_MD, M_WAITOK); auio.uio_iov = piov; vlist = (bus_dma_segment_t *)bp->bio_data; while (len > 0) { piov->iov_base = (void *)(uintptr_t)(vlist->ds_addr + ma_offs); piov->iov_len = vlist->ds_len - ma_offs; if (piov->iov_len > len) piov->iov_len = len; len -= piov->iov_len; ma_offs = 0; vlist++; piov++; } auio.uio_iovcnt = piov - auio.uio_iov; piov = auio.uio_iov; } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) { pb = uma_zalloc(md_pbuf_zone, M_WAITOK); bp->bio_resid = len; unmapped_step: npages = atop(min(MAXPHYS, round_page(len + (ma_offs & PAGE_MASK)))); iolen = min(ptoa(npages) - (ma_offs & PAGE_MASK), len); KASSERT(iolen > 0, ("zero iolen")); pmap_qenter((vm_offset_t)pb->b_data, &bp->bio_ma[atop(ma_offs)], npages); aiov.iov_base = (void *)((vm_offset_t)pb->b_data + (ma_offs & PAGE_MASK)); aiov.iov_len = iolen; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_resid = iolen; } else { aiov.iov_base = bp->bio_data; aiov.iov_len = bp->bio_length; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; } iostart = auio.uio_offset; if (auio.uio_rw == UIO_READ) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_READ(vp, &auio, 0, sc->cred); VOP_UNLOCK(vp); } else { (void) vn_start_write(vp, &mp, V_WAIT); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_WRITE(vp, &auio, sc->flags & MD_ASYNC ? 0 : IO_SYNC, sc->cred); VOP_UNLOCK(vp); vn_finished_write(mp); if (error == 0) sc->flags &= ~MD_VERIFY; } /* When MD_CACHE is set, try to avoid double-caching the data. */ if (error == 0 && (sc->flags & MD_CACHE) == 0) VOP_ADVISE(vp, iostart, auio.uio_offset - 1, POSIX_FADV_DONTNEED); if (pb != NULL) { pmap_qremove((vm_offset_t)pb->b_data, npages); if (error == 0) { len -= iolen; bp->bio_resid -= iolen; ma_offs += iolen; if (len > 0) goto unmapped_step; } uma_zfree(md_pbuf_zone, pb); } free(piov, M_MD); if (pb == NULL) bp->bio_resid = auio.uio_resid; return (error); } static int mdstart_swap(struct md_s *sc, struct bio *bp) { vm_page_t m; u_char *p; vm_pindex_t i, lastp; bus_dma_segment_t *vlist; int rv, ma_offs, offs, len, lastend; switch (bp->bio_cmd) { case BIO_READ: case BIO_WRITE: case BIO_DELETE: break; default: return (EOPNOTSUPP); } p = bp->bio_data; ma_offs = (bp->bio_flags & (BIO_UNMAPPED|BIO_VLIST)) != 0 ? bp->bio_ma_offset : 0; vlist = (bp->bio_flags & BIO_VLIST) != 0 ? (bus_dma_segment_t *)bp->bio_data : NULL; /* * offs is the offset at which to start operating on the * next (ie, first) page. lastp is the last page on * which we're going to operate. lastend is the ending * position within that last page (ie, PAGE_SIZE if * we're operating on complete aligned pages). */ offs = bp->bio_offset % PAGE_SIZE; lastp = (bp->bio_offset + bp->bio_length - 1) / PAGE_SIZE; lastend = (bp->bio_offset + bp->bio_length - 1) % PAGE_SIZE + 1; rv = VM_PAGER_OK; - VM_OBJECT_WLOCK(sc->object); vm_object_pip_add(sc->object, 1); for (i = bp->bio_offset / PAGE_SIZE; i <= lastp; i++) { len = ((i == lastp) ? lastend : PAGE_SIZE) - offs; + VM_OBJECT_WLOCK(sc->object); m = vm_page_grab(sc->object, i, VM_ALLOC_SYSTEM); + VM_OBJECT_WUNLOCK(sc->object); if (bp->bio_cmd == BIO_READ) { if (vm_page_all_valid(m)) rv = VM_PAGER_OK; else rv = vm_pager_get_pages(sc->object, &m, 1, NULL, NULL); if (rv == VM_PAGER_ERROR) { + VM_OBJECT_WLOCK(sc->object); vm_page_free(m); + VM_OBJECT_WUNLOCK(sc->object); break; } else if (rv == VM_PAGER_FAIL) { /* * Pager does not have the page. Zero * the allocated page, and mark it as * valid. Do not set dirty, the page * can be recreated if thrown out. */ pmap_zero_page(m); vm_page_valid(m); } if ((bp->bio_flags & BIO_UNMAPPED) != 0) { pmap_copy_pages(&m, offs, bp->bio_ma, ma_offs, len); } else if ((bp->bio_flags & BIO_VLIST) != 0) { physcopyout_vlist(VM_PAGE_TO_PHYS(m) + offs, vlist, ma_offs, len); cpu_flush_dcache(p, len); } else { physcopyout(VM_PAGE_TO_PHYS(m) + offs, p, len); cpu_flush_dcache(p, len); } } else if (bp->bio_cmd == BIO_WRITE) { if (len == PAGE_SIZE || vm_page_all_valid(m)) rv = VM_PAGER_OK; else rv = vm_pager_get_pages(sc->object, &m, 1, NULL, NULL); if (rv == VM_PAGER_ERROR) { + VM_OBJECT_WLOCK(sc->object); vm_page_free(m); + VM_OBJECT_WUNLOCK(sc->object); break; } else if (rv == VM_PAGER_FAIL) pmap_zero_page(m); if ((bp->bio_flags & BIO_UNMAPPED) != 0) { pmap_copy_pages(bp->bio_ma, ma_offs, &m, offs, len); } else if ((bp->bio_flags & BIO_VLIST) != 0) { physcopyin_vlist(vlist, ma_offs, VM_PAGE_TO_PHYS(m) + offs, len); } else { physcopyin(p, VM_PAGE_TO_PHYS(m) + offs, len); } vm_page_valid(m); vm_page_set_dirty(m); } else if (bp->bio_cmd == BIO_DELETE) { if (len == PAGE_SIZE || vm_page_all_valid(m)) rv = VM_PAGER_OK; else rv = vm_pager_get_pages(sc->object, &m, 1, NULL, NULL); + VM_OBJECT_WLOCK(sc->object); if (rv == VM_PAGER_ERROR) { vm_page_free(m); + VM_OBJECT_WUNLOCK(sc->object); break; } else if (rv == VM_PAGER_FAIL) { vm_page_free(m); m = NULL; } else { /* Page is valid. */ if (len != PAGE_SIZE) { pmap_zero_page_area(m, offs, len); vm_page_set_dirty(m); } else { vm_pager_page_unswapped(m); vm_page_free(m); m = NULL; } } + VM_OBJECT_WUNLOCK(sc->object); } if (m != NULL) { vm_page_xunbusy(m); /* * The page may be deactivated prior to setting * PGA_REFERENCED, but in this case it will be * reactivated by the page daemon. */ if (vm_page_active(m)) vm_page_reference(m); else vm_page_activate(m); } /* Actions on further pages start at offset 0 */ p += PAGE_SIZE - offs; offs = 0; ma_offs += len; } vm_object_pip_wakeup(sc->object); - VM_OBJECT_WUNLOCK(sc->object); return (rv != VM_PAGER_ERROR ? 0 : ENOSPC); } static int mdstart_null(struct md_s *sc, struct bio *bp) { switch (bp->bio_cmd) { case BIO_READ: bzero(bp->bio_data, bp->bio_length); cpu_flush_dcache(bp->bio_data, bp->bio_length); break; case BIO_WRITE: break; } bp->bio_resid = 0; return (0); } static void md_kthread(void *arg) { struct md_s *sc; struct bio *bp; int error; sc = arg; thread_lock(curthread); sched_prio(curthread, PRIBIO); thread_unlock(curthread); if (sc->type == MD_VNODE) curthread->td_pflags |= TDP_NORUNNINGBUF; for (;;) { mtx_lock(&sc->queue_mtx); if (sc->flags & MD_SHUTDOWN) { sc->flags |= MD_EXITING; mtx_unlock(&sc->queue_mtx); kproc_exit(0); } bp = bioq_takefirst(&sc->bio_queue); if (!bp) { msleep(sc, &sc->queue_mtx, PRIBIO | PDROP, "mdwait", 0); continue; } mtx_unlock(&sc->queue_mtx); if (bp->bio_cmd == BIO_GETATTR) { int isv = ((sc->flags & MD_VERIFY) != 0); if ((sc->fwsectors && sc->fwheads && (g_handleattr_int(bp, "GEOM::fwsectors", sc->fwsectors) || g_handleattr_int(bp, "GEOM::fwheads", sc->fwheads))) || g_handleattr_int(bp, "GEOM::candelete", 1)) error = -1; else if (sc->ident[0] != '\0' && g_handleattr_str(bp, "GEOM::ident", sc->ident)) error = -1; else if (g_handleattr_int(bp, "MNT::verified", isv)) error = -1; else error = EOPNOTSUPP; } else { error = sc->start(sc, bp); } if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { /* * Devstat uses (bio_bcount, bio_resid) for * determining the length of the completed part of * the i/o. g_io_deliver() will translate from * bio_completed to that, but it also destroys the * bio so we must do our own translation. */ bp->bio_bcount = bp->bio_length; bp->bio_resid = (error == -1 ? bp->bio_bcount : 0); devstat_end_transaction_bio(sc->devstat, bp); } if (error != -1) { bp->bio_completed = bp->bio_length; g_io_deliver(bp, error); } } } static struct md_s * mdfind(int unit) { struct md_s *sc; LIST_FOREACH(sc, &md_softc_list, list) { if (sc->unit == unit) break; } return (sc); } static struct md_s * mdnew(int unit, int *errp, enum md_types type) { struct md_s *sc; int error; *errp = 0; if (unit == -1) unit = alloc_unr(md_uh); else unit = alloc_unr_specific(md_uh, unit); if (unit == -1) { *errp = EBUSY; return (NULL); } sc = (struct md_s *)malloc(sizeof *sc, M_MD, M_WAITOK | M_ZERO); sc->type = type; bioq_init(&sc->bio_queue); mtx_init(&sc->queue_mtx, "md bio queue", NULL, MTX_DEF); sc->unit = unit; sprintf(sc->name, "md%d", unit); LIST_INSERT_HEAD(&md_softc_list, sc, list); error = kproc_create(md_kthread, sc, &sc->procp, 0, 0,"%s", sc->name); if (error == 0) return (sc); LIST_REMOVE(sc, list); mtx_destroy(&sc->queue_mtx); free_unr(md_uh, sc->unit); free(sc, M_MD); *errp = error; return (NULL); } static void mdinit(struct md_s *sc) { struct g_geom *gp; struct g_provider *pp; g_topology_lock(); gp = g_new_geomf(&g_md_class, "md%d", sc->unit); gp->softc = sc; pp = g_new_providerf(gp, "md%d", sc->unit); devstat_remove_entry(pp->stat); pp->stat = NULL; pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; pp->mediasize = sc->mediasize; pp->sectorsize = sc->sectorsize; switch (sc->type) { case MD_MALLOC: case MD_VNODE: case MD_SWAP: pp->flags |= G_PF_ACCEPT_UNMAPPED; break; case MD_PRELOAD: case MD_NULL: break; } sc->gp = gp; sc->pp = pp; sc->devstat = devstat_new_entry("md", sc->unit, sc->sectorsize, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); sc->devstat->id = pp; g_error_provider(pp, 0); g_topology_unlock(); } static int mdcreate_malloc(struct md_s *sc, struct md_req *mdr) { uintptr_t sp; int error; off_t u; error = 0; if (mdr->md_options & ~(MD_AUTOUNIT | MD_COMPRESS | MD_RESERVE)) return (EINVAL); if (mdr->md_sectorsize != 0 && !powerof2(mdr->md_sectorsize)) return (EINVAL); /* Compression doesn't make sense if we have reserved space */ if (mdr->md_options & MD_RESERVE) mdr->md_options &= ~MD_COMPRESS; if (mdr->md_fwsectors != 0) sc->fwsectors = mdr->md_fwsectors; if (mdr->md_fwheads != 0) sc->fwheads = mdr->md_fwheads; sc->flags = mdr->md_options & (MD_COMPRESS | MD_FORCE); sc->indir = dimension(sc->mediasize / sc->sectorsize); sc->uma = uma_zcreate(sc->name, sc->sectorsize, NULL, NULL, NULL, NULL, 0x1ff, 0); if (mdr->md_options & MD_RESERVE) { off_t nsectors; nsectors = sc->mediasize / sc->sectorsize; for (u = 0; u < nsectors; u++) { sp = (uintptr_t)uma_zalloc(sc->uma, (md_malloc_wait ? M_WAITOK : M_NOWAIT) | M_ZERO); if (sp != 0) error = s_write(sc->indir, u, sp); else error = ENOMEM; if (error != 0) break; } } return (error); } static int mdsetcred(struct md_s *sc, struct ucred *cred) { char *tmpbuf; int error = 0; /* * Set credits in our softc */ if (sc->cred) crfree(sc->cred); sc->cred = crhold(cred); /* * Horrible kludge to establish credentials for NFS XXX. */ if (sc->vnode) { struct uio auio; struct iovec aiov; tmpbuf = malloc(sc->sectorsize, M_TEMP, M_WAITOK); bzero(&auio, sizeof(auio)); aiov.iov_base = tmpbuf; aiov.iov_len = sc->sectorsize; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_resid = aiov.iov_len; vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY); error = VOP_READ(sc->vnode, &auio, 0, sc->cred); VOP_UNLOCK(sc->vnode); free(tmpbuf, M_TEMP); } return (error); } static int mdcreate_vnode(struct md_s *sc, struct md_req *mdr, struct thread *td) { struct vattr vattr; struct nameidata nd; char *fname; int error, flags; fname = mdr->md_file; if (mdr->md_file_seg == UIO_USERSPACE) { error = copyinstr(fname, sc->file, sizeof(sc->file), NULL); if (error != 0) return (error); } else if (mdr->md_file_seg == UIO_SYSSPACE) strlcpy(sc->file, fname, sizeof(sc->file)); else return (EDOOFUS); /* * If the user specified that this is a read only device, don't * set the FWRITE mask before trying to open the backing store. */ flags = FREAD | ((mdr->md_options & MD_READONLY) ? 0 : FWRITE) \ | ((mdr->md_options & MD_VERIFY) ? O_VERIFY : 0); NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, sc->file, td); error = vn_open(&nd, &flags, 0, NULL); if (error != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_vp->v_type != VREG) { error = EINVAL; goto bad; } error = VOP_GETATTR(nd.ni_vp, &vattr, td->td_ucred); if (error != 0) goto bad; if (VOP_ISLOCKED(nd.ni_vp) != LK_EXCLUSIVE) { vn_lock(nd.ni_vp, LK_UPGRADE | LK_RETRY); if (VN_IS_DOOMED(nd.ni_vp)) { /* Forced unmount. */ error = EBADF; goto bad; } } nd.ni_vp->v_vflag |= VV_MD; VOP_UNLOCK(nd.ni_vp); if (mdr->md_fwsectors != 0) sc->fwsectors = mdr->md_fwsectors; if (mdr->md_fwheads != 0) sc->fwheads = mdr->md_fwheads; snprintf(sc->ident, sizeof(sc->ident), "MD-DEV%ju-INO%ju", (uintmax_t)vattr.va_fsid, (uintmax_t)vattr.va_fileid); sc->flags = mdr->md_options & (MD_ASYNC | MD_CACHE | MD_FORCE | MD_VERIFY); if (!(flags & FWRITE)) sc->flags |= MD_READONLY; sc->vnode = nd.ni_vp; error = mdsetcred(sc, td->td_ucred); if (error != 0) { sc->vnode = NULL; vn_lock(nd.ni_vp, LK_EXCLUSIVE | LK_RETRY); nd.ni_vp->v_vflag &= ~VV_MD; goto bad; } return (0); bad: VOP_UNLOCK(nd.ni_vp); (void)vn_close(nd.ni_vp, flags, td->td_ucred, td); return (error); } static void g_md_providergone(struct g_provider *pp) { struct md_s *sc = pp->geom->softc; mtx_lock(&sc->queue_mtx); sc->flags |= MD_PROVIDERGONE; wakeup(&sc->flags); mtx_unlock(&sc->queue_mtx); } static int mddestroy(struct md_s *sc, struct thread *td) { if (sc->gp) { g_topology_lock(); g_wither_geom(sc->gp, ENXIO); g_topology_unlock(); mtx_lock(&sc->queue_mtx); while (!(sc->flags & MD_PROVIDERGONE)) msleep(&sc->flags, &sc->queue_mtx, PRIBIO, "mddestroy", 0); mtx_unlock(&sc->queue_mtx); } if (sc->devstat) { devstat_remove_entry(sc->devstat); sc->devstat = NULL; } mtx_lock(&sc->queue_mtx); sc->flags |= MD_SHUTDOWN; wakeup(sc); while (!(sc->flags & MD_EXITING)) msleep(sc->procp, &sc->queue_mtx, PRIBIO, "mddestroy", hz / 10); mtx_unlock(&sc->queue_mtx); mtx_destroy(&sc->queue_mtx); if (sc->vnode != NULL) { vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY); sc->vnode->v_vflag &= ~VV_MD; VOP_UNLOCK(sc->vnode); (void)vn_close(sc->vnode, sc->flags & MD_READONLY ? FREAD : (FREAD|FWRITE), sc->cred, td); } if (sc->cred != NULL) crfree(sc->cred); if (sc->object != NULL) vm_object_deallocate(sc->object); if (sc->indir) destroy_indir(sc, sc->indir); if (sc->uma) uma_zdestroy(sc->uma); LIST_REMOVE(sc, list); free_unr(md_uh, sc->unit); free(sc, M_MD); return (0); } static int mdresize(struct md_s *sc, struct md_req *mdr) { int error, res; vm_pindex_t oldpages, newpages; switch (sc->type) { case MD_VNODE: case MD_NULL: break; case MD_SWAP: if (mdr->md_mediasize <= 0 || (mdr->md_mediasize % PAGE_SIZE) != 0) return (EDOM); oldpages = OFF_TO_IDX(round_page(sc->mediasize)); newpages = OFF_TO_IDX(round_page(mdr->md_mediasize)); if (newpages < oldpages) { VM_OBJECT_WLOCK(sc->object); vm_object_page_remove(sc->object, newpages, 0, 0); swap_pager_freespace(sc->object, newpages, oldpages - newpages); swap_release_by_cred(IDX_TO_OFF(oldpages - newpages), sc->cred); sc->object->charge = IDX_TO_OFF(newpages); sc->object->size = newpages; VM_OBJECT_WUNLOCK(sc->object); } else if (newpages > oldpages) { res = swap_reserve_by_cred(IDX_TO_OFF(newpages - oldpages), sc->cred); if (!res) return (ENOMEM); if ((mdr->md_options & MD_RESERVE) || (sc->flags & MD_RESERVE)) { error = swap_pager_reserve(sc->object, oldpages, newpages - oldpages); if (error < 0) { swap_release_by_cred( IDX_TO_OFF(newpages - oldpages), sc->cred); return (EDOM); } } VM_OBJECT_WLOCK(sc->object); sc->object->charge = IDX_TO_OFF(newpages); sc->object->size = newpages; VM_OBJECT_WUNLOCK(sc->object); } break; default: return (EOPNOTSUPP); } sc->mediasize = mdr->md_mediasize; g_topology_lock(); g_resize_provider(sc->pp, sc->mediasize); g_topology_unlock(); return (0); } static int mdcreate_swap(struct md_s *sc, struct md_req *mdr, struct thread *td) { vm_ooffset_t npage; int error; /* * Range check. Disallow negative sizes and sizes not being * multiple of page size. */ if (sc->mediasize <= 0 || (sc->mediasize % PAGE_SIZE) != 0) return (EDOM); /* * Allocate an OBJT_SWAP object. * * Note the truncation. */ if ((mdr->md_options & MD_VERIFY) != 0) return (EINVAL); npage = mdr->md_mediasize / PAGE_SIZE; if (mdr->md_fwsectors != 0) sc->fwsectors = mdr->md_fwsectors; if (mdr->md_fwheads != 0) sc->fwheads = mdr->md_fwheads; sc->object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * npage, VM_PROT_DEFAULT, 0, td->td_ucred); if (sc->object == NULL) return (ENOMEM); sc->flags = mdr->md_options & (MD_FORCE | MD_RESERVE); if (mdr->md_options & MD_RESERVE) { if (swap_pager_reserve(sc->object, 0, npage) < 0) { error = EDOM; goto finish; } } error = mdsetcred(sc, td->td_ucred); finish: if (error != 0) { vm_object_deallocate(sc->object); sc->object = NULL; } return (error); } static int mdcreate_null(struct md_s *sc, struct md_req *mdr, struct thread *td) { /* * Range check. Disallow negative sizes and sizes not being * multiple of page size. */ if (sc->mediasize <= 0 || (sc->mediasize % PAGE_SIZE) != 0) return (EDOM); return (0); } static int kern_mdattach_locked(struct thread *td, struct md_req *mdr) { struct md_s *sc; unsigned sectsize; int error, i; sx_assert(&md_sx, SA_XLOCKED); switch (mdr->md_type) { case MD_MALLOC: case MD_PRELOAD: case MD_VNODE: case MD_SWAP: case MD_NULL: break; default: return (EINVAL); } if (mdr->md_sectorsize == 0) sectsize = DEV_BSIZE; else sectsize = mdr->md_sectorsize; if (sectsize > MAXPHYS || mdr->md_mediasize < sectsize) return (EINVAL); if (mdr->md_options & MD_AUTOUNIT) sc = mdnew(-1, &error, mdr->md_type); else { if (mdr->md_unit > INT_MAX) return (EINVAL); sc = mdnew(mdr->md_unit, &error, mdr->md_type); } if (sc == NULL) return (error); if (mdr->md_label != NULL) error = copyinstr(mdr->md_label, sc->label, sizeof(sc->label), NULL); if (error != 0) goto err_after_new; if (mdr->md_options & MD_AUTOUNIT) mdr->md_unit = sc->unit; sc->mediasize = mdr->md_mediasize; sc->sectorsize = sectsize; error = EDOOFUS; switch (sc->type) { case MD_MALLOC: sc->start = mdstart_malloc; error = mdcreate_malloc(sc, mdr); break; case MD_PRELOAD: /* * We disallow attaching preloaded memory disks via * ioctl. Preloaded memory disks are automatically * attached in g_md_init(). */ error = EOPNOTSUPP; break; case MD_VNODE: sc->start = mdstart_vnode; error = mdcreate_vnode(sc, mdr, td); break; case MD_SWAP: sc->start = mdstart_swap; error = mdcreate_swap(sc, mdr, td); break; case MD_NULL: sc->start = mdstart_null; error = mdcreate_null(sc, mdr, td); break; } err_after_new: if (error != 0) { mddestroy(sc, td); return (error); } /* Prune off any residual fractional sector */ i = sc->mediasize % sc->sectorsize; sc->mediasize -= i; mdinit(sc); return (0); } static int kern_mdattach(struct thread *td, struct md_req *mdr) { int error; sx_xlock(&md_sx); error = kern_mdattach_locked(td, mdr); sx_xunlock(&md_sx); return (error); } static int kern_mddetach_locked(struct thread *td, struct md_req *mdr) { struct md_s *sc; sx_assert(&md_sx, SA_XLOCKED); if (mdr->md_mediasize != 0 || (mdr->md_options & ~MD_FORCE) != 0) return (EINVAL); sc = mdfind(mdr->md_unit); if (sc == NULL) return (ENOENT); if (sc->opencount != 0 && !(sc->flags & MD_FORCE) && !(mdr->md_options & MD_FORCE)) return (EBUSY); return (mddestroy(sc, td)); } static int kern_mddetach(struct thread *td, struct md_req *mdr) { int error; sx_xlock(&md_sx); error = kern_mddetach_locked(td, mdr); sx_xunlock(&md_sx); return (error); } static int kern_mdresize_locked(struct md_req *mdr) { struct md_s *sc; sx_assert(&md_sx, SA_XLOCKED); if ((mdr->md_options & ~(MD_FORCE | MD_RESERVE)) != 0) return (EINVAL); sc = mdfind(mdr->md_unit); if (sc == NULL) return (ENOENT); if (mdr->md_mediasize < sc->sectorsize) return (EINVAL); if (mdr->md_mediasize < sc->mediasize && !(sc->flags & MD_FORCE) && !(mdr->md_options & MD_FORCE)) return (EBUSY); return (mdresize(sc, mdr)); } static int kern_mdresize(struct md_req *mdr) { int error; sx_xlock(&md_sx); error = kern_mdresize_locked(mdr); sx_xunlock(&md_sx); return (error); } static int kern_mdquery_locked(struct md_req *mdr) { struct md_s *sc; int error; sx_assert(&md_sx, SA_XLOCKED); sc = mdfind(mdr->md_unit); if (sc == NULL) return (ENOENT); mdr->md_type = sc->type; mdr->md_options = sc->flags; mdr->md_mediasize = sc->mediasize; mdr->md_sectorsize = sc->sectorsize; error = 0; if (mdr->md_label != NULL) { error = copyout(sc->label, mdr->md_label, strlen(sc->label) + 1); if (error != 0) return (error); } if (sc->type == MD_VNODE || (sc->type == MD_PRELOAD && mdr->md_file != NULL)) error = copyout(sc->file, mdr->md_file, strlen(sc->file) + 1); return (error); } static int kern_mdquery(struct md_req *mdr) { int error; sx_xlock(&md_sx); error = kern_mdquery_locked(mdr); sx_xunlock(&md_sx); return (error); } /* Copy members that are not userspace pointers. */ #define MD_IOCTL2REQ(mdio, mdr) do { \ (mdr)->md_unit = (mdio)->md_unit; \ (mdr)->md_type = (mdio)->md_type; \ (mdr)->md_mediasize = (mdio)->md_mediasize; \ (mdr)->md_sectorsize = (mdio)->md_sectorsize; \ (mdr)->md_options = (mdio)->md_options; \ (mdr)->md_fwheads = (mdio)->md_fwheads; \ (mdr)->md_fwsectors = (mdio)->md_fwsectors; \ (mdr)->md_units = &(mdio)->md_pad[0]; \ (mdr)->md_units_nitems = nitems((mdio)->md_pad); \ } while(0) /* Copy members that might have been updated */ #define MD_REQ2IOCTL(mdr, mdio) do { \ (mdio)->md_unit = (mdr)->md_unit; \ (mdio)->md_type = (mdr)->md_type; \ (mdio)->md_mediasize = (mdr)->md_mediasize; \ (mdio)->md_sectorsize = (mdr)->md_sectorsize; \ (mdio)->md_options = (mdr)->md_options; \ (mdio)->md_fwheads = (mdr)->md_fwheads; \ (mdio)->md_fwsectors = (mdr)->md_fwsectors; \ } while(0) static int mdctlioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td) { struct md_req mdr; int error; if (md_debug) printf("mdctlioctl(%s %lx %p %x %p)\n", devtoname(dev), cmd, addr, flags, td); bzero(&mdr, sizeof(mdr)); switch (cmd) { case MDIOCATTACH: case MDIOCDETACH: case MDIOCRESIZE: case MDIOCQUERY: { struct md_ioctl *mdio = (struct md_ioctl *)addr; if (mdio->md_version != MDIOVERSION) return (EINVAL); MD_IOCTL2REQ(mdio, &mdr); mdr.md_file = mdio->md_file; mdr.md_file_seg = UIO_USERSPACE; /* If the file is adjacent to the md_ioctl it's in kernel. */ if ((void *)mdio->md_file == (void *)(mdio + 1)) mdr.md_file_seg = UIO_SYSSPACE; mdr.md_label = mdio->md_label; break; } #ifdef COMPAT_FREEBSD32 case MDIOCATTACH_32: case MDIOCDETACH_32: case MDIOCRESIZE_32: case MDIOCQUERY_32: { struct md_ioctl32 *mdio = (struct md_ioctl32 *)addr; if (mdio->md_version != MDIOVERSION) return (EINVAL); MD_IOCTL2REQ(mdio, &mdr); mdr.md_file = (void *)(uintptr_t)mdio->md_file; mdr.md_file_seg = UIO_USERSPACE; mdr.md_label = (void *)(uintptr_t)mdio->md_label; break; } #endif default: /* Fall through to handler switch. */ break; } error = 0; switch (cmd) { case MDIOCATTACH: #ifdef COMPAT_FREEBSD32 case MDIOCATTACH_32: #endif error = kern_mdattach(td, &mdr); break; case MDIOCDETACH: #ifdef COMPAT_FREEBSD32 case MDIOCDETACH_32: #endif error = kern_mddetach(td, &mdr); break; case MDIOCRESIZE: #ifdef COMPAT_FREEBSD32 case MDIOCRESIZE_32: #endif error = kern_mdresize(&mdr); break; case MDIOCQUERY: #ifdef COMPAT_FREEBSD32 case MDIOCQUERY_32: #endif error = kern_mdquery(&mdr); break; default: error = ENOIOCTL; } switch (cmd) { case MDIOCATTACH: case MDIOCQUERY: { struct md_ioctl *mdio = (struct md_ioctl *)addr; MD_REQ2IOCTL(&mdr, mdio); break; } #ifdef COMPAT_FREEBSD32 case MDIOCATTACH_32: case MDIOCQUERY_32: { struct md_ioctl32 *mdio = (struct md_ioctl32 *)addr; MD_REQ2IOCTL(&mdr, mdio); break; } #endif default: /* Other commands to not alter mdr. */ break; } return (error); } static void md_preloaded(u_char *image, size_t length, const char *name) { struct md_s *sc; int error; sc = mdnew(-1, &error, MD_PRELOAD); if (sc == NULL) return; sc->mediasize = length; sc->sectorsize = DEV_BSIZE; sc->pl_ptr = image; sc->pl_len = length; sc->start = mdstart_preload; if (name != NULL) strlcpy(sc->file, name, sizeof(sc->file)); #ifdef MD_ROOT if (sc->unit == 0) { #ifndef ROOTDEVNAME rootdevnames[0] = MD_ROOT_FSTYPE ":/dev/md0"; #endif #ifdef MD_ROOT_READONLY sc->flags |= MD_READONLY; #endif } #endif mdinit(sc); if (name != NULL) { printf("%s%d: Preloaded image <%s> %zd bytes at %p\n", MD_NAME, sc->unit, name, length, image); } else { printf("%s%d: Embedded image %zd bytes at %p\n", MD_NAME, sc->unit, length, image); } } static void g_md_init(struct g_class *mp __unused) { caddr_t mod; u_char *ptr, *name, *type; unsigned len; int i; /* figure out log2(NINDIR) */ for (i = NINDIR, nshift = -1; i; nshift++) i >>= 1; mod = NULL; sx_init(&md_sx, "MD config lock"); g_topology_unlock(); md_uh = new_unrhdr(0, INT_MAX, NULL); #ifdef MD_ROOT if (mfs_root_size != 0) { sx_xlock(&md_sx); #ifdef MD_ROOT_MEM md_preloaded(mfs_root, mfs_root_size, NULL); #else md_preloaded(__DEVOLATILE(u_char *, &mfs_root), mfs_root_size, NULL); #endif sx_xunlock(&md_sx); } #endif /* XXX: are preload_* static or do they need Giant ? */ while ((mod = preload_search_next_name(mod)) != NULL) { name = (char *)preload_search_info(mod, MODINFO_NAME); if (name == NULL) continue; type = (char *)preload_search_info(mod, MODINFO_TYPE); if (type == NULL) continue; if (strcmp(type, "md_image") && strcmp(type, "mfs_root")) continue; ptr = preload_fetch_addr(mod); len = preload_fetch_size(mod); if (ptr != NULL && len != 0) { sx_xlock(&md_sx); md_preloaded(ptr, len, name); sx_xunlock(&md_sx); } } md_pbuf_zone = pbuf_zsecond_create("mdpbuf", nswbuf / 10); status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL, 0600, MDCTL_NAME); g_topology_lock(); } static void g_md_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp) { struct md_s *mp; char *type; mp = gp->softc; if (mp == NULL) return; switch (mp->type) { case MD_MALLOC: type = "malloc"; break; case MD_PRELOAD: type = "preload"; break; case MD_VNODE: type = "vnode"; break; case MD_SWAP: type = "swap"; break; case MD_NULL: type = "null"; break; default: type = "unknown"; break; } if (pp != NULL) { if (indent == NULL) { sbuf_printf(sb, " u %d", mp->unit); sbuf_printf(sb, " s %ju", (uintmax_t) mp->sectorsize); sbuf_printf(sb, " f %ju", (uintmax_t) mp->fwheads); sbuf_printf(sb, " fs %ju", (uintmax_t) mp->fwsectors); sbuf_printf(sb, " l %ju", (uintmax_t) mp->mediasize); sbuf_printf(sb, " t %s", type); if ((mp->type == MD_VNODE && mp->vnode != NULL) || (mp->type == MD_PRELOAD && mp->file[0] != '\0')) sbuf_printf(sb, " file %s", mp->file); sbuf_printf(sb, " label %s", mp->label); } else { sbuf_printf(sb, "%s%d\n", indent, mp->unit); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t) mp->sectorsize); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t) mp->fwheads); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t) mp->fwsectors); if (mp->ident[0] != '\0') { sbuf_printf(sb, "%s", indent); g_conf_printf_escaped(sb, "%s", mp->ident); sbuf_printf(sb, "\n"); } sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t) mp->mediasize); sbuf_printf(sb, "%s%s\n", indent, (mp->flags & MD_COMPRESS) == 0 ? "off": "on"); sbuf_printf(sb, "%s%s\n", indent, (mp->flags & MD_READONLY) == 0 ? "read-write": "read-only"); sbuf_printf(sb, "%s%s\n", indent, type); if ((mp->type == MD_VNODE && mp->vnode != NULL) || (mp->type == MD_PRELOAD && mp->file[0] != '\0')) { sbuf_printf(sb, "%s", indent); g_conf_printf_escaped(sb, "%s", mp->file); sbuf_printf(sb, "\n"); } if (mp->type == MD_VNODE) sbuf_printf(sb, "%s%s\n", indent, (mp->flags & MD_CACHE) == 0 ? "off": "on"); sbuf_printf(sb, "%s\n"); } } } static void g_md_fini(struct g_class *mp __unused) { sx_destroy(&md_sx); if (status_dev != NULL) destroy_dev(status_dev); uma_zdestroy(md_pbuf_zone); delete_unrhdr(md_uh); } Index: projects/clang1000-import/sys/dev/mps/mps_pci.c =================================================================== --- projects/clang1000-import/sys/dev/mps/mps_pci.c (revision 356919) +++ projects/clang1000-import/sys/dev/mps/mps_pci.c (revision 356920) @@ -1,444 +1,445 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009 Yahoo! Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* PCI/PCI-X/PCIe bus interface for the Avago Tech (LSI) MPT2 controllers */ /* TODO Move headers to mpsvar */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int mps_pci_probe(device_t); static int mps_pci_attach(device_t); static int mps_pci_detach(device_t); static int mps_pci_suspend(device_t); static int mps_pci_resume(device_t); static void mps_pci_free(struct mps_softc *); static int mps_alloc_msix(struct mps_softc *sc, int msgs); static int mps_alloc_msi(struct mps_softc *sc, int msgs); static int mps_pci_alloc_interrupts(struct mps_softc *sc); static device_method_t mps_methods[] = { DEVMETHOD(device_probe, mps_pci_probe), DEVMETHOD(device_attach, mps_pci_attach), DEVMETHOD(device_detach, mps_pci_detach), DEVMETHOD(device_suspend, mps_pci_suspend), DEVMETHOD(device_resume, mps_pci_resume), DEVMETHOD_END }; static driver_t mps_pci_driver = { "mps", mps_methods, sizeof(struct mps_softc) }; struct mps_ident { uint16_t vendor; uint16_t device; uint16_t subvendor; uint16_t subdevice; u_int flags; const char *desc; } mps_identifiers[] = { { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2004, 0xffff, 0xffff, 0, "Avago Technologies (LSI) SAS2004" }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2008, 0xffff, 0xffff, 0, "Avago Technologies (LSI) SAS2008" }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2108_1, 0xffff, 0xffff, 0, "Avago Technologies (LSI) SAS2108" }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2108_2, 0xffff, 0xffff, 0, "Avago Technologies (LSI) SAS2108" }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2108_3, 0xffff, 0xffff, 0, "Avago Technologies (LSI) SAS2108" }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2116_1, 0xffff, 0xffff, 0, "Avago Technologies (LSI) SAS2116" }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2116_2, 0xffff, 0xffff, 0, "Avago Technologies (LSI) SAS2116" }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2208_1, 0xffff, 0xffff, 0, "Avago Technologies (LSI) SAS2208" }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2208_2, 0xffff, 0xffff, 0, "Avago Technologies (LSI) SAS2208" }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2208_3, 0xffff, 0xffff, 0, "Avago Technologies (LSI) SAS2208" }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2208_4, 0xffff, 0xffff, 0, "Avago Technologies (LSI) SAS2208" }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2208_5, 0xffff, 0xffff, 0, "Avago Technologies (LSI) SAS2208" }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2208_6, 0xffff, 0xffff, 0, "Avago Technologies (LSI) SAS2208" }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2308_1, 0xffff, 0xffff, 0, "Avago Technologies (LSI) SAS2308" }, // Add Customer specific vender/subdevice id before generic // (0xffff) vender/subdevice id. { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2308_2, 0x8086, 0x3516, 0, "Intel(R) Integrated RAID Module RMS25JB080" }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2308_2, 0x8086, 0x3517, 0, "Intel(R) Integrated RAID Module RMS25JB040" }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2308_2, 0x8086, 0x3518, 0, "Intel(R) Integrated RAID Module RMS25KB080" }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2308_2, 0x8086, 0x3519, 0, "Intel(R) Integrated RAID Module RMS25KB040" }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2308_2, 0xffff, 0xffff, 0, "Avago Technologies (LSI) SAS2308" }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2308_3, 0xffff, 0xffff, 0, "Avago Technologies (LSI) SAS2308" }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SSS6200, 0xffff, 0xffff, 0, "Avago Technologies (LSI) SSS6200" }, { 0, 0, 0, 0, 0, NULL } }; static devclass_t mps_devclass; DRIVER_MODULE(mps, pci, mps_pci_driver, mps_devclass, 0, 0); +MODULE_DEPEND(mps, cam, 1, 1, 1); MODULE_PNP_INFO("U16:vendor;U16:device;U16:subvendor;U16:subdevice", pci, mps, mps_identifiers, nitems(mps_identifiers) - 1); static struct mps_ident * mps_find_ident(device_t dev) { struct mps_ident *m; for (m = mps_identifiers; m->vendor != 0; m++) { if (m->vendor != pci_get_vendor(dev)) continue; if (m->device != pci_get_device(dev)) continue; if ((m->subvendor != 0xffff) && (m->subvendor != pci_get_subvendor(dev))) continue; if ((m->subdevice != 0xffff) && (m->subdevice != pci_get_subdevice(dev))) continue; return (m); } return (NULL); } static int mps_pci_probe(device_t dev) { struct mps_ident *id; if ((id = mps_find_ident(dev)) != NULL) { device_set_desc(dev, id->desc); return (BUS_PROBE_DEFAULT); } return (ENXIO); } static int mps_pci_attach(device_t dev) { struct mps_softc *sc; struct mps_ident *m; int error; sc = device_get_softc(dev); bzero(sc, sizeof(*sc)); sc->mps_dev = dev; m = mps_find_ident(dev); sc->mps_flags = m->flags; mps_get_tunables(sc); /* Twiddle basic PCI config bits for a sanity check */ pci_enable_busmaster(dev); /* Allocate the System Interface Register Set */ sc->mps_regs_rid = PCIR_BAR(1); if ((sc->mps_regs_resource = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->mps_regs_rid, RF_ACTIVE)) == NULL) { mps_printf(sc, "Cannot allocate PCI registers\n"); return (ENXIO); } sc->mps_btag = rman_get_bustag(sc->mps_regs_resource); sc->mps_bhandle = rman_get_bushandle(sc->mps_regs_resource); /* Allocate the parent DMA tag */ if (bus_dma_tag_create( bus_get_dma_tag(dev), /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ BUS_SPACE_UNRESTRICTED, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->mps_parent_dmat)) { mps_printf(sc, "Cannot allocate parent DMA tag\n"); mps_pci_free(sc); return (ENOMEM); } if (((error = mps_pci_alloc_interrupts(sc)) != 0) || ((error = mps_attach(sc)) != 0)) mps_pci_free(sc); return (error); } /* * Allocate, but don't assign interrupts early. Doing it before requesting * the IOCFacts message informs the firmware that we want to do MSI-X * multiqueue. We might not use all of the available messages, but there's * no reason to re-alloc if we don't. */ static int mps_pci_alloc_interrupts(struct mps_softc *sc) { device_t dev; int error, msgs; dev = sc->mps_dev; error = 0; msgs = 0; if (sc->disable_msix == 0) { msgs = pci_msix_count(dev); mps_dprint(sc, MPS_INIT, "Counted %d MSI-X messages\n", msgs); msgs = min(msgs, sc->max_msix); msgs = min(msgs, MPS_MSIX_MAX); msgs = min(msgs, 1); /* XXX */ if (msgs != 0) { mps_dprint(sc, MPS_INIT, "Attempting to allocate %d " "MSI-X messages\n", msgs); error = mps_alloc_msix(sc, msgs); } } if (((error != 0) || (msgs == 0)) && (sc->disable_msi == 0)) { msgs = pci_msi_count(dev); mps_dprint(sc, MPS_INIT, "Counted %d MSI messages\n", msgs); msgs = min(msgs, MPS_MSI_MAX); if (msgs != 0) { mps_dprint(sc, MPS_INIT, "Attempting to allocate %d " "MSI messages\n", MPS_MSI_MAX); error = mps_alloc_msi(sc, MPS_MSI_MAX); } } if ((error != 0) || (msgs == 0)) { /* * If neither MSI or MSI-X are avaiable, assume legacy INTx. * This also implies that there will be only 1 queue. */ mps_dprint(sc, MPS_INIT, "Falling back to legacy INTx\n"); sc->mps_flags |= MPS_FLAGS_INTX; msgs = 1; } else sc->mps_flags |= MPS_FLAGS_MSI; sc->msi_msgs = msgs; mps_dprint(sc, MPS_INIT, "Allocated %d interrupts\n", msgs); return (error); } int mps_pci_setup_interrupts(struct mps_softc *sc) { device_t dev; struct mps_queue *q; void *ihandler; int i, error, rid, initial_rid; dev = sc->mps_dev; error = ENXIO; if (sc->mps_flags & MPS_FLAGS_INTX) { initial_rid = 0; ihandler = mps_intr; } else if (sc->mps_flags & MPS_FLAGS_MSI) { initial_rid = 1; ihandler = mps_intr_msi; } else { mps_dprint(sc, MPS_ERROR|MPS_INIT, "Unable to set up interrupts\n"); return (EINVAL); } for (i = 0; i < sc->msi_msgs; i++) { q = &sc->queues[i]; rid = i + initial_rid; q->irq_rid = rid; q->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &q->irq_rid, RF_ACTIVE); if (q->irq == NULL) { mps_dprint(sc, MPS_ERROR|MPS_INIT, "Cannot allocate interrupt RID %d\n", rid); sc->msi_msgs = i; break; } error = bus_setup_intr(dev, q->irq, INTR_TYPE_BIO | INTR_MPSAFE, NULL, ihandler, sc, &q->intrhand); if (error) { mps_dprint(sc, MPS_ERROR|MPS_INIT, "Cannot setup interrupt RID %d\n", rid); sc->msi_msgs = i; break; } } mps_dprint(sc, MPS_INIT, "Set up %d interrupts\n", sc->msi_msgs); return (error); } static int mps_pci_detach(device_t dev) { struct mps_softc *sc; int error; sc = device_get_softc(dev); if ((error = mps_free(sc)) != 0) return (error); mps_pci_free(sc); return (0); } void mps_pci_free_interrupts(struct mps_softc *sc) { struct mps_queue *q; int i; if (sc->queues == NULL) return; for (i = 0; i < sc->msi_msgs; i++) { q = &sc->queues[i]; if (q->irq != NULL) { bus_teardown_intr(sc->mps_dev, q->irq, q->intrhand); bus_release_resource(sc->mps_dev, SYS_RES_IRQ, q->irq_rid, q->irq); } } } static void mps_pci_free(struct mps_softc *sc) { if (sc->mps_parent_dmat != NULL) { bus_dma_tag_destroy(sc->mps_parent_dmat); } mps_pci_free_interrupts(sc); if (sc->mps_flags & MPS_FLAGS_MSI) pci_release_msi(sc->mps_dev); if (sc->mps_regs_resource != NULL) { bus_release_resource(sc->mps_dev, SYS_RES_MEMORY, sc->mps_regs_rid, sc->mps_regs_resource); } return; } static int mps_pci_suspend(device_t dev) { return (EINVAL); } static int mps_pci_resume(device_t dev) { return (EINVAL); } static int mps_alloc_msix(struct mps_softc *sc, int msgs) { int error; error = pci_alloc_msix(sc->mps_dev, &msgs); return (error); } static int mps_alloc_msi(struct mps_softc *sc, int msgs) { int error; error = pci_alloc_msi(sc->mps_dev, &msgs); return (error); } int mps_pci_restore(struct mps_softc *sc) { struct pci_devinfo *dinfo; mps_dprint(sc, MPS_TRACE, "%s\n", __func__); dinfo = device_get_ivars(sc->mps_dev); if (dinfo == NULL) { mps_dprint(sc, MPS_FAULT, "%s: NULL dinfo\n", __func__); return (EINVAL); } pci_cfg_restore(sc->mps_dev, dinfo); return (0); } Index: projects/clang1000-import/sys/dts/arm/zedboard.dts =================================================================== --- projects/clang1000-import/sys/dts/arm/zedboard.dts (revision 356919) +++ projects/clang1000-import/sys/dts/arm/zedboard.dts (revision 356920) @@ -1,71 +1,80 @@ /*- * Copyright (c) 2016 The FreeBSD Foundation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /dts-v1/; /include/ "zynq-7000.dtsi" / { model = "zedboard"; compatible = "digilent,zedboard", "xlnx,zynq-7000"; memory { // First megabyte isn't accessible by all interconnect masters. device_type = "memory"; reg = <0x100000 0x1ff00000>; /* 511MB RAM at 0x100000 */ }; chosen { stdin = &uart1; stdout = &uart1; }; }; &slcr { clock-frequency = <33333333>; // 33Mhz PS_CLK }; &global_timer { clock-frequency = <333333333>; // 333Mhz }; &uart1 { status = "okay"; }; ð0 { status = "okay"; }; +&qspi0 { + status = "okay"; + + flash0 { + compatible = "st,m25p", "s25fl128"; + spi-chipselect = <0>; + }; +}; + &sdhci0 { status = "okay"; }; &ehci0 { status = "okay"; phy_vbus_ext; }; Index: projects/clang1000-import/sys/dts/arm/zybo.dts =================================================================== --- projects/clang1000-import/sys/dts/arm/zybo.dts (revision 356919) +++ projects/clang1000-import/sys/dts/arm/zybo.dts (revision 356920) @@ -1,69 +1,78 @@ /*- * Copyright (c) 2016 The FreeBSD Foundation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /dts-v1/; /include/ "zynq-7000.dtsi" / { model = "zybo"; compatible = "digilent,zybo", "xlnx,zynq-7000"; memory { // First megabyte isn't accessible by all interconnect masters. device_type = "memory"; reg = <0x100000 0x1ff00000>; /* 511MB RAM at 0x100000 */ }; chosen { stdin = &uart1; stdout = &uart1; }; }; &slcr { clock-frequency = <50000000>; // 50Mhz PS_CLK }; &global_timer { clock-frequency = <325000000>; // 325Mhz }; &uart1 { status = "okay"; }; ð0 { status = "okay"; }; +&qspi0 { + status = "okay"; + + flash0 { + compatible = "st,m25p", "s25fl128"; + spi-chipselect = <0>; + }; +}; + &sdhci0 { status = "okay"; }; &ehci0 { status = "okay"; }; Index: projects/clang1000-import/sys/dts/arm/zynq-7000.dtsi =================================================================== --- projects/clang1000-import/sys/dts/arm/zynq-7000.dtsi (revision 356919) +++ projects/clang1000-import/sys/dts/arm/zynq-7000.dtsi (revision 356920) @@ -1,229 +1,230 @@ /*- * Copyright (c) 2016 The FreeBSD Foundation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ / { compatible = "xlnx,zynq-7000"; #address-cells = <1>; #size-cells = <1>; interrupt-parent = <&GIC>; // Reserve first half megabyte because it is not accessible to all // bus masters. memreserve = <0x00000000 0x00080000>; // Zynq PS System registers. // ps7sys@f8000000 { device_type = "soc"; compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0xf8000000 0xf10000>; // SLCR block slcr: slcr@7000 { compatible = "xlnx,zy7_slcr"; reg = <0x0 0x1000>; }; // Interrupt controller GIC: gic { compatible = "arm,gic"; interrupt-controller; #address-cells = <0>; #interrupt-cells = <3>; reg = <0xf01000 0x1000>, // distributer registers <0xf00100 0x0100>; // CPU if registers }; // L2 cache controller pl310@f02000 { compatible = "arm,pl310"; reg = <0xf02000 0x1000>; interrupts = <0 2 4>; interrupt-parent = <&GIC>; }; // Device Config devcfg: devcfg@7000 { compatible = "xlnx,zy7_devcfg"; reg = <0x7000 0x1000>; interrupts = <0 8 4>; interrupt-parent = <&GIC>; }; // triple timer counters0,1 ttc0: ttc@1000 { compatible = "xlnx,ttc"; reg = <0x1000 0x1000>; }; ttc1: ttc@2000 { compatible = "xlnx,ttc"; reg = <0x2000 0x1000>; }; // ARM Cortex A9 TWD Timer global_timer: timer@f00600 { compatible = "arm,mpcore-timers"; #address-cells = <1>; #size-cells = <0>; reg = <0xf00200 0x100>, // Global Timer Regs <0xf00600 0x20>; // Private Timer Regs interrupts = <1 11 1>, <1 13 1>; interrupt-parent = <&GIC>; }; // system watch-dog timer swdt@5000 { device_type = "watchdog"; compatible = "xlnx,zy7_wdt"; reg = <0x5000 0x1000>; interrupts = <0 9 1>; interrupt-parent = <&GIC>; }; scuwdt@f00620 { device_type = "watchdog"; compatible = "arm,mpcore_wdt"; reg = <0xf00620 0x20>; interrupts = <1 14 1>; interrupt-parent = <&GIC>; reset = <1>; }; }; // pssys@f8000000 // Zynq PS I/O Peripheral registers. // ps7io@e0000000 { device_type = "soc"; compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0xe0000000 0x300000>; // UART controllers uart0: uart@0000 { device_type = "serial"; compatible = "cadence,uart"; status = "disabled"; reg = <0x0000 0x1000>; interrupts = <0 27 4>; interrupt-parent = <&GIC>; clock-frequency = <50000000>; }; uart1: uart@1000 { device_type = "serial"; compatible = "cadence,uart"; status = "disabled"; reg = <0x1000 0x1000>; interrupts = <0 50 4>; interrupt-parent = <&GIC>; clock-frequency = <50000000>; }; // USB controllers ehci0: ehci@2000 { compatible = "xlnx,zy7_ehci"; status = "disabled"; reg = <0x2000 0x1000>; interrupts = <0 21 4>; interrupt-parent = <&GIC>; }; ehci1: ehci@3000 { compatible = "xlnx,zy7_ehci"; status = "disabled"; reg = <0x3000 0x1000>; interrupts = <0 44 4>; interrupt-parent = <&GIC>; }; // GPIO controller gpio: gpio@a000 { compatible = "xlnx,zy7_gpio"; reg = <0xa000 0x1000>; interrupts = <0 20 4>; interrupt-parent = <&GIC>; }; // Gigabit Ethernet controllers eth0: eth@b000 { device_type = "network"; compatible = "cadence,gem"; status = "disabled"; reg = <0xb000 0x1000>; interrupts = <0 22 4>; interrupt-parent = <&GIC>; ref-clock-num = <0>; }; eth1: eth@c000 { device_type = "network"; compatible = "cadence,gem"; status = "disabled"; reg = <0xc000 0x1000>; interrupts = <0 45 4>; interrupt-parent = <&GIC>; ref-clock-num = <1>; }; // Quad-SPI controller qspi0: qspi@d000 { compatible = "xlnx,zy7_qspi"; status = "disabled"; reg = <0xd000 0x1000>; interrupts = <0 19 4>; interrupt-parent = <&GIC>; - spi-clock = <50000000>; + ref-clock = <200000000>; // 200 Mhz + spi-clock = <50000000>; // 50 Mhz }; // SDIO controllers sdhci0: sdhci@100000 { compatible = "xlnx,zy7_sdhci"; status = "disabled"; reg = <0x100000 0x1000>; interrupts = <0 24 4>; interrupt-parent = <&GIC>; max-frequency = <50000000>; }; sdhci1: sdhci@101000 { compatible = "xlnx,zy7_sdhci"; status = "disabled"; reg = <0x101000 0x1000>; interrupts = <0 47 4>; interrupt-parent = <&GIC>; max-frequency = <50000000>; }; }; // ps7io@e0000000 }; Index: projects/clang1000-import/sys/fs/tmpfs/tmpfs_subr.c =================================================================== --- projects/clang1000-import/sys/fs/tmpfs/tmpfs_subr.c (revision 356919) +++ projects/clang1000-import/sys/fs/tmpfs/tmpfs_subr.c (revision 356920) @@ -1,1936 +1,1940 @@ /* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause-NetBSD * * Copyright (c) 2005 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Julio M. Merino Vidal, developed as part of Google's Summer of Code * 2005 program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Efficient memory file system supporting functions. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW, 0, "tmpfs file system"); static long tmpfs_pages_reserved = TMPFS_PAGES_MINRESERVED; static uma_zone_t tmpfs_dirent_pool; static uma_zone_t tmpfs_node_pool; static int tmpfs_node_ctor(void *mem, int size, void *arg, int flags) { struct tmpfs_node *node; node = mem; node->tn_gen++; node->tn_size = 0; node->tn_status = 0; node->tn_flags = 0; node->tn_links = 0; node->tn_vnode = NULL; node->tn_vpstate = 0; return (0); } static void tmpfs_node_dtor(void *mem, int size, void *arg) { struct tmpfs_node *node; node = mem; node->tn_type = VNON; } static int tmpfs_node_init(void *mem, int size, int flags) { struct tmpfs_node *node; node = mem; node->tn_id = 0; mtx_init(&node->tn_interlock, "tmpfsni", NULL, MTX_DEF); node->tn_gen = arc4random(); return (0); } static void tmpfs_node_fini(void *mem, int size) { struct tmpfs_node *node; node = mem; mtx_destroy(&node->tn_interlock); } void tmpfs_subr_init(void) { tmpfs_dirent_pool = uma_zcreate("TMPFS dirent", sizeof(struct tmpfs_dirent), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); tmpfs_node_pool = uma_zcreate("TMPFS node", sizeof(struct tmpfs_node), tmpfs_node_ctor, tmpfs_node_dtor, tmpfs_node_init, tmpfs_node_fini, UMA_ALIGN_PTR, 0); } void tmpfs_subr_uninit(void) { uma_zdestroy(tmpfs_node_pool); uma_zdestroy(tmpfs_dirent_pool); } static int sysctl_mem_reserved(SYSCTL_HANDLER_ARGS) { int error; long pages, bytes; pages = *(long *)arg1; bytes = pages * PAGE_SIZE; error = sysctl_handle_long(oidp, &bytes, 0, req); if (error || !req->newptr) return (error); pages = bytes / PAGE_SIZE; if (pages < TMPFS_PAGES_MINRESERVED) return (EINVAL); *(long *)arg1 = pages; return (0); } SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory_reserved, CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &tmpfs_pages_reserved, 0, sysctl_mem_reserved, "L", "Amount of available memory and swap below which tmpfs growth stops"); static __inline int tmpfs_dirtree_cmp(struct tmpfs_dirent *a, struct tmpfs_dirent *b); RB_PROTOTYPE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp); size_t tmpfs_mem_avail(void) { vm_ooffset_t avail; avail = swap_pager_avail + vm_free_count() - tmpfs_pages_reserved; if (__predict_false(avail < 0)) avail = 0; return (avail); } size_t tmpfs_pages_used(struct tmpfs_mount *tmp) { const size_t node_size = sizeof(struct tmpfs_node) + sizeof(struct tmpfs_dirent); size_t meta_pages; meta_pages = howmany((uintmax_t)tmp->tm_nodes_inuse * node_size, PAGE_SIZE); return (meta_pages + tmp->tm_pages_used); } static size_t tmpfs_pages_check_avail(struct tmpfs_mount *tmp, size_t req_pages) { if (tmpfs_mem_avail() < req_pages) return (0); if (tmp->tm_pages_max != ULONG_MAX && tmp->tm_pages_max < req_pages + tmpfs_pages_used(tmp)) return (0); return (1); } void tmpfs_ref_node(struct tmpfs_node *node) { TMPFS_NODE_LOCK(node); tmpfs_ref_node_locked(node); TMPFS_NODE_UNLOCK(node); } void tmpfs_ref_node_locked(struct tmpfs_node *node) { TMPFS_NODE_ASSERT_LOCKED(node); KASSERT(node->tn_refcount > 0, ("node %p zero refcount", node)); KASSERT(node->tn_refcount < UINT_MAX, ("node %p refcount %u", node, node->tn_refcount)); node->tn_refcount++; } /* * Allocates a new node of type 'type' inside the 'tmp' mount point, with * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', * using the credentials of the process 'p'. * * If the node type is set to 'VDIR', then the parent parameter must point * to the parent directory of the node being created. It may only be NULL * while allocating the root node. * * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter * specifies the device the node represents. * * If the node type is set to 'VLNK', then the parameter target specifies * the file name of the target file for the symbolic link that is being * created. * * Note that new nodes are retrieved from the available list if it has * items or, if it is empty, from the node pool as long as there is enough * space to create them. * * Returns zero on success or an appropriate error code on failure. */ int tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *tmp, enum vtype type, uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, const char *target, dev_t rdev, struct tmpfs_node **node) { struct tmpfs_node *nnode; vm_object_t obj; /* If the root directory of the 'tmp' file system is not yet * allocated, this must be the request to do it. */ MPASS(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); MPASS(IFF(type == VLNK, target != NULL)); MPASS(IFF(type == VBLK || type == VCHR, rdev != VNOVAL)); if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) return (ENOSPC); if (tmpfs_pages_check_avail(tmp, 1) == 0) return (ENOSPC); if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) { /* * When a new tmpfs node is created for fully * constructed mount point, there must be a parent * node, which vnode is locked exclusively. As * consequence, if the unmount is executing in * parallel, vflush() cannot reclaim the parent vnode. * Due to this, the check for MNTK_UNMOUNT flag is not * racy: if we did not see MNTK_UNMOUNT flag, then tmp * cannot be destroyed until node construction is * finished and the parent vnode unlocked. * * Tmpfs does not need to instantiate new nodes during * unmount. */ return (EBUSY); } if ((mp->mnt_kern_flag & MNT_RDONLY) != 0) return (EROFS); nnode = uma_zalloc_arg(tmpfs_node_pool, tmp, M_WAITOK); /* Generic initialization. */ nnode->tn_type = type; vfs_timestamp(&nnode->tn_atime); nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime = nnode->tn_atime; nnode->tn_uid = uid; nnode->tn_gid = gid; nnode->tn_mode = mode; nnode->tn_id = alloc_unr64(&tmp->tm_ino_unr); nnode->tn_refcount = 1; /* Type-specific initialization. */ switch (nnode->tn_type) { case VBLK: case VCHR: nnode->tn_rdev = rdev; break; case VDIR: RB_INIT(&nnode->tn_dir.tn_dirhead); LIST_INIT(&nnode->tn_dir.tn_dupindex); MPASS(parent != nnode); MPASS(IMPLIES(parent == NULL, tmp->tm_root == NULL)); nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent; nnode->tn_dir.tn_readdir_lastn = 0; nnode->tn_dir.tn_readdir_lastp = NULL; nnode->tn_links++; TMPFS_NODE_LOCK(nnode->tn_dir.tn_parent); nnode->tn_dir.tn_parent->tn_links++; TMPFS_NODE_UNLOCK(nnode->tn_dir.tn_parent); break; case VFIFO: /* FALLTHROUGH */ case VSOCK: break; case VLNK: MPASS(strlen(target) < MAXPATHLEN); nnode->tn_size = strlen(target); nnode->tn_link = malloc(nnode->tn_size, M_TMPFSNAME, M_WAITOK); memcpy(nnode->tn_link, target, nnode->tn_size); break; case VREG: obj = nnode->tn_reg.tn_aobj = vm_pager_allocate(OBJT_SWAP, NULL, 0, VM_PROT_DEFAULT, 0, NULL /* XXXKIB - tmpfs needs swap reservation */); VM_OBJECT_WLOCK(obj); /* OBJ_TMPFS is set together with the setting of vp->v_object */ vm_object_set_flag(obj, OBJ_TMPFS_NODE); VM_OBJECT_WUNLOCK(obj); break; default: panic("tmpfs_alloc_node: type %p %d", nnode, (int)nnode->tn_type); } TMPFS_LOCK(tmp); LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); nnode->tn_attached = true; tmp->tm_nodes_inuse++; tmp->tm_refcount++; TMPFS_UNLOCK(tmp); *node = nnode; return (0); } /* * Destroys the node pointed to by node from the file system 'tmp'. * If the node references a directory, no entries are allowed. */ void tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) { TMPFS_LOCK(tmp); TMPFS_NODE_LOCK(node); if (!tmpfs_free_node_locked(tmp, node, false)) { TMPFS_NODE_UNLOCK(node); TMPFS_UNLOCK(tmp); } } bool tmpfs_free_node_locked(struct tmpfs_mount *tmp, struct tmpfs_node *node, bool detach) { vm_object_t uobj; TMPFS_MP_ASSERT_LOCKED(tmp); TMPFS_NODE_ASSERT_LOCKED(node); KASSERT(node->tn_refcount > 0, ("node %p refcount zero", node)); node->tn_refcount--; if (node->tn_attached && (detach || node->tn_refcount == 0)) { MPASS(tmp->tm_nodes_inuse > 0); tmp->tm_nodes_inuse--; LIST_REMOVE(node, tn_entries); node->tn_attached = false; } if (node->tn_refcount > 0) return (false); #ifdef INVARIANTS MPASS(node->tn_vnode == NULL); MPASS((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); #endif TMPFS_NODE_UNLOCK(node); TMPFS_UNLOCK(tmp); switch (node->tn_type) { case VBLK: /* FALLTHROUGH */ case VCHR: /* FALLTHROUGH */ case VDIR: /* FALLTHROUGH */ case VFIFO: /* FALLTHROUGH */ case VSOCK: break; case VLNK: free(node->tn_link, M_TMPFSNAME); break; case VREG: uobj = node->tn_reg.tn_aobj; if (uobj != NULL) { if (uobj->size != 0) atomic_subtract_long(&tmp->tm_pages_used, uobj->size); KASSERT((uobj->flags & OBJ_TMPFS) == 0, ("leaked OBJ_TMPFS node %p vm_obj %p", node, uobj)); vm_object_deallocate(uobj); } break; default: panic("tmpfs_free_node: type %p %d", node, (int)node->tn_type); } uma_zfree(tmpfs_node_pool, node); TMPFS_LOCK(tmp); tmpfs_free_tmp(tmp); return (true); } static __inline uint32_t tmpfs_dirent_hash(const char *name, u_int len) { uint32_t hash; hash = fnv_32_buf(name, len, FNV1_32_INIT + len) & TMPFS_DIRCOOKIE_MASK; #ifdef TMPFS_DEBUG_DIRCOOKIE_DUP hash &= 0xf; #endif if (hash < TMPFS_DIRCOOKIE_MIN) hash += TMPFS_DIRCOOKIE_MIN; return (hash); } static __inline off_t tmpfs_dirent_cookie(struct tmpfs_dirent *de) { if (de == NULL) return (TMPFS_DIRCOOKIE_EOF); MPASS(de->td_cookie >= TMPFS_DIRCOOKIE_MIN); return (de->td_cookie); } static __inline boolean_t tmpfs_dirent_dup(struct tmpfs_dirent *de) { return ((de->td_cookie & TMPFS_DIRCOOKIE_DUP) != 0); } static __inline boolean_t tmpfs_dirent_duphead(struct tmpfs_dirent *de) { return ((de->td_cookie & TMPFS_DIRCOOKIE_DUPHEAD) != 0); } void tmpfs_dirent_init(struct tmpfs_dirent *de, const char *name, u_int namelen) { de->td_hash = de->td_cookie = tmpfs_dirent_hash(name, namelen); memcpy(de->ud.td_name, name, namelen); de->td_namelen = namelen; } /* * Allocates a new directory entry for the node node with a name of name. * The new directory entry is returned in *de. * * The link count of node is increased by one to reflect the new object * referencing it. * * Returns zero on success or an appropriate error code on failure. */ int tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, const char *name, u_int len, struct tmpfs_dirent **de) { struct tmpfs_dirent *nde; nde = uma_zalloc(tmpfs_dirent_pool, M_WAITOK); nde->td_node = node; if (name != NULL) { nde->ud.td_name = malloc(len, M_TMPFSNAME, M_WAITOK); tmpfs_dirent_init(nde, name, len); } else nde->td_namelen = 0; if (node != NULL) node->tn_links++; *de = nde; return 0; } /* * Frees a directory entry. It is the caller's responsibility to destroy * the node referenced by it if needed. * * The link count of node is decreased by one to reflect the removal of an * object that referenced it. This only happens if 'node_exists' is true; * otherwise the function will not access the node referred to by the * directory entry, as it may already have been released from the outside. */ void tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de) { struct tmpfs_node *node; node = de->td_node; if (node != NULL) { MPASS(node->tn_links > 0); node->tn_links--; } if (!tmpfs_dirent_duphead(de) && de->ud.td_name != NULL) free(de->ud.td_name, M_TMPFSNAME); uma_zfree(tmpfs_dirent_pool, de); } void tmpfs_destroy_vobject(struct vnode *vp, vm_object_t obj) { ASSERT_VOP_ELOCKED(vp, "tmpfs_destroy_vobject"); if (vp->v_type != VREG || obj == NULL) return; VM_OBJECT_WLOCK(obj); VI_LOCK(vp); vm_object_clear_flag(obj, OBJ_TMPFS); obj->un_pager.swp.swp_tmpfs = NULL; if (vp->v_writecount < 0) vp->v_writecount = 0; VI_UNLOCK(vp); VM_OBJECT_WUNLOCK(obj); } /* * Need to clear v_object for insmntque failure. */ static void tmpfs_insmntque_dtr(struct vnode *vp, void *dtr_arg) { tmpfs_destroy_vobject(vp, vp->v_object); vp->v_object = NULL; vp->v_data = NULL; vp->v_op = &dead_vnodeops; vgone(vp); vput(vp); } /* * Allocates a new vnode for the node node or returns a new reference to * an existing one if the node had already a vnode referencing it. The * resulting locked vnode is returned in *vpp. * * Returns zero on success or an appropriate error code on failure. */ int tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, struct vnode **vpp) { struct vnode *vp; struct tmpfs_mount *tm; vm_object_t object; int error; error = 0; tm = VFS_TO_TMPFS(mp); TMPFS_NODE_LOCK(node); tmpfs_ref_node_locked(node); loop: TMPFS_NODE_ASSERT_LOCKED(node); if ((vp = node->tn_vnode) != NULL) { MPASS((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); VI_LOCK(vp); if ((node->tn_type == VDIR && node->tn_dir.tn_parent == NULL) || (VN_IS_DOOMED(vp) && (lkflag & LK_NOWAIT) != 0)) { VI_UNLOCK(vp); TMPFS_NODE_UNLOCK(node); error = ENOENT; vp = NULL; goto out; } if (VN_IS_DOOMED(vp)) { VI_UNLOCK(vp); node->tn_vpstate |= TMPFS_VNODE_WRECLAIM; while ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) { msleep(&node->tn_vnode, TMPFS_NODE_MTX(node), 0, "tmpfsE", 0); } goto loop; } TMPFS_NODE_UNLOCK(node); error = vget(vp, lkflag | LK_INTERLOCK, curthread); if (error == ENOENT) { TMPFS_NODE_LOCK(node); goto loop; } if (error != 0) { vp = NULL; goto out; } /* * Make sure the vnode is still there after * getting the interlock to avoid racing a free. */ if (node->tn_vnode == NULL || node->tn_vnode != vp) { vput(vp); TMPFS_NODE_LOCK(node); goto loop; } goto out; } if ((node->tn_vpstate & TMPFS_VNODE_DOOMED) || (node->tn_type == VDIR && node->tn_dir.tn_parent == NULL)) { TMPFS_NODE_UNLOCK(node); error = ENOENT; vp = NULL; goto out; } /* * otherwise lock the vp list while we call getnewvnode * since that can block. */ if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { node->tn_vpstate |= TMPFS_VNODE_WANT; error = msleep((caddr_t) &node->tn_vpstate, TMPFS_NODE_MTX(node), 0, "tmpfs_alloc_vp", 0); if (error != 0) goto out; goto loop; } else node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; TMPFS_NODE_UNLOCK(node); /* Get a new vnode and associate it with our node. */ error = getnewvnode("tmpfs", mp, VFS_TO_TMPFS(mp)->tm_nonc ? &tmpfs_vnodeop_nonc_entries : &tmpfs_vnodeop_entries, &vp); if (error != 0) goto unlock; MPASS(vp != NULL); /* lkflag is ignored, the lock is exclusive */ (void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vp->v_data = node; vp->v_type = node->tn_type; /* Type-specific initialization. */ switch (node->tn_type) { case VBLK: /* FALLTHROUGH */ case VCHR: /* FALLTHROUGH */ case VLNK: /* FALLTHROUGH */ case VSOCK: break; case VFIFO: vp->v_op = &tmpfs_fifoop_entries; break; case VREG: object = node->tn_reg.tn_aobj; VM_OBJECT_WLOCK(object); VI_LOCK(vp); KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs")); vp->v_object = object; object->un_pager.swp.swp_tmpfs = vp; vm_object_set_flag(object, OBJ_TMPFS); VI_UNLOCK(vp); VM_OBJECT_WUNLOCK(object); break; case VDIR: MPASS(node->tn_dir.tn_parent != NULL); if (node->tn_dir.tn_parent == node) vp->v_vflag |= VV_ROOT; break; default: panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); } if (vp->v_type != VFIFO) VN_LOCK_ASHARE(vp); error = insmntque1(vp, mp, tmpfs_insmntque_dtr, NULL); if (error != 0) vp = NULL; unlock: TMPFS_NODE_LOCK(node); MPASS(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; node->tn_vnode = vp; if (node->tn_vpstate & TMPFS_VNODE_WANT) { node->tn_vpstate &= ~TMPFS_VNODE_WANT; TMPFS_NODE_UNLOCK(node); wakeup((caddr_t) &node->tn_vpstate); } else TMPFS_NODE_UNLOCK(node); out: if (error == 0) { *vpp = vp; #ifdef INVARIANTS MPASS(*vpp != NULL && VOP_ISLOCKED(*vpp)); TMPFS_NODE_LOCK(node); MPASS(*vpp == node->tn_vnode); TMPFS_NODE_UNLOCK(node); #endif } tmpfs_free_node(tm, node); return (error); } /* * Destroys the association between the vnode vp and the node it * references. */ void tmpfs_free_vp(struct vnode *vp) { struct tmpfs_node *node; node = VP_TO_TMPFS_NODE(vp); TMPFS_NODE_ASSERT_LOCKED(node); node->tn_vnode = NULL; if ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) wakeup(&node->tn_vnode); node->tn_vpstate &= ~TMPFS_VNODE_WRECLAIM; vp->v_data = NULL; } /* * Allocates a new file of type 'type' and adds it to the parent directory * 'dvp'; this addition is done using the component name given in 'cnp'. * The ownership of the new file is automatically assigned based on the * credentials of the caller (through 'cnp'), the group is set based on * the parent directory and the mode is determined from the 'vap' argument. * If successful, *vpp holds a vnode to the newly created file and zero * is returned. Otherwise *vpp is NULL and the function returns an * appropriate error code. */ int tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, struct componentname *cnp, const char *target) { int error; struct tmpfs_dirent *de; struct tmpfs_mount *tmp; struct tmpfs_node *dnode; struct tmpfs_node *node; struct tmpfs_node *parent; ASSERT_VOP_ELOCKED(dvp, "tmpfs_alloc_file"); MPASS(cnp->cn_flags & HASBUF); tmp = VFS_TO_TMPFS(dvp->v_mount); dnode = VP_TO_TMPFS_DIR(dvp); *vpp = NULL; /* If the entry we are creating is a directory, we cannot overflow * the number of links of its parent, because it will get a new * link. */ if (vap->va_type == VDIR) { /* Ensure that we do not overflow the maximum number of links * imposed by the system. */ MPASS(dnode->tn_links <= TMPFS_LINK_MAX); if (dnode->tn_links == TMPFS_LINK_MAX) { return (EMLINK); } parent = dnode; MPASS(parent != NULL); } else parent = NULL; /* Allocate a node that represents the new file. */ error = tmpfs_alloc_node(dvp->v_mount, tmp, vap->va_type, cnp->cn_cred->cr_uid, dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev, &node); if (error != 0) return (error); /* Allocate a directory entry that points to the new file. */ error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen, &de); if (error != 0) { tmpfs_free_node(tmp, node); return (error); } /* Allocate a vnode for the new file. */ error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp); if (error != 0) { tmpfs_free_dirent(tmp, de); tmpfs_free_node(tmp, node); return (error); } /* Now that all required items are allocated, we can proceed to * insert the new node into the directory, an operation that * cannot fail. */ if (cnp->cn_flags & ISWHITEOUT) tmpfs_dir_whiteout_remove(dvp, cnp); tmpfs_dir_attach(dvp, de); return (0); } struct tmpfs_dirent * tmpfs_dir_first(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc) { struct tmpfs_dirent *de; de = RB_MIN(tmpfs_dir, &dnode->tn_dir.tn_dirhead); dc->tdc_tree = de; if (de != NULL && tmpfs_dirent_duphead(de)) de = LIST_FIRST(&de->ud.td_duphead); dc->tdc_current = de; return (dc->tdc_current); } struct tmpfs_dirent * tmpfs_dir_next(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc) { struct tmpfs_dirent *de; MPASS(dc->tdc_tree != NULL); if (tmpfs_dirent_dup(dc->tdc_current)) { dc->tdc_current = LIST_NEXT(dc->tdc_current, uh.td_dup.entries); if (dc->tdc_current != NULL) return (dc->tdc_current); } dc->tdc_tree = dc->tdc_current = RB_NEXT(tmpfs_dir, &dnode->tn_dir.tn_dirhead, dc->tdc_tree); if ((de = dc->tdc_current) != NULL && tmpfs_dirent_duphead(de)) { dc->tdc_current = LIST_FIRST(&de->ud.td_duphead); MPASS(dc->tdc_current != NULL); } return (dc->tdc_current); } /* Lookup directory entry in RB-Tree. Function may return duphead entry. */ static struct tmpfs_dirent * tmpfs_dir_xlookup_hash(struct tmpfs_node *dnode, uint32_t hash) { struct tmpfs_dirent *de, dekey; dekey.td_hash = hash; de = RB_FIND(tmpfs_dir, &dnode->tn_dir.tn_dirhead, &dekey); return (de); } /* Lookup directory entry by cookie, initialize directory cursor accordingly. */ static struct tmpfs_dirent * tmpfs_dir_lookup_cookie(struct tmpfs_node *node, off_t cookie, struct tmpfs_dir_cursor *dc) { struct tmpfs_dir *dirhead = &node->tn_dir.tn_dirhead; struct tmpfs_dirent *de, dekey; MPASS(cookie >= TMPFS_DIRCOOKIE_MIN); if (cookie == node->tn_dir.tn_readdir_lastn && (de = node->tn_dir.tn_readdir_lastp) != NULL) { /* Protect against possible race, tn_readdir_last[pn] * may be updated with only shared vnode lock held. */ if (cookie == tmpfs_dirent_cookie(de)) goto out; } if ((cookie & TMPFS_DIRCOOKIE_DUP) != 0) { LIST_FOREACH(de, &node->tn_dir.tn_dupindex, uh.td_dup.index_entries) { MPASS(tmpfs_dirent_dup(de)); if (de->td_cookie == cookie) goto out; /* dupindex list is sorted. */ if (de->td_cookie < cookie) { de = NULL; goto out; } } MPASS(de == NULL); goto out; } if ((cookie & TMPFS_DIRCOOKIE_MASK) != cookie) { de = NULL; } else { dekey.td_hash = cookie; /* Recover if direntry for cookie was removed */ de = RB_NFIND(tmpfs_dir, dirhead, &dekey); } dc->tdc_tree = de; dc->tdc_current = de; if (de != NULL && tmpfs_dirent_duphead(de)) { dc->tdc_current = LIST_FIRST(&de->ud.td_duphead); MPASS(dc->tdc_current != NULL); } return (dc->tdc_current); out: dc->tdc_tree = de; dc->tdc_current = de; if (de != NULL && tmpfs_dirent_dup(de)) dc->tdc_tree = tmpfs_dir_xlookup_hash(node, de->td_hash); return (dc->tdc_current); } /* * Looks for a directory entry in the directory represented by node. * 'cnp' describes the name of the entry to look for. Note that the . * and .. components are not allowed as they do not physically exist * within directories. * * Returns a pointer to the entry when found, otherwise NULL. */ struct tmpfs_dirent * tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, struct componentname *cnp) { struct tmpfs_dir_duphead *duphead; struct tmpfs_dirent *de; uint32_t hash; MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.')); MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' && cnp->cn_nameptr[1] == '.'))); TMPFS_VALIDATE_DIR(node); hash = tmpfs_dirent_hash(cnp->cn_nameptr, cnp->cn_namelen); de = tmpfs_dir_xlookup_hash(node, hash); if (de != NULL && tmpfs_dirent_duphead(de)) { duphead = &de->ud.td_duphead; LIST_FOREACH(de, duphead, uh.td_dup.entries) { if (TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr, cnp->cn_namelen)) break; } } else if (de != NULL) { if (!TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr, cnp->cn_namelen)) de = NULL; } if (de != NULL && f != NULL && de->td_node != f) de = NULL; return (de); } /* * Attach duplicate-cookie directory entry nde to dnode and insert to dupindex * list, allocate new cookie value. */ static void tmpfs_dir_attach_dup(struct tmpfs_node *dnode, struct tmpfs_dir_duphead *duphead, struct tmpfs_dirent *nde) { struct tmpfs_dir_duphead *dupindex; struct tmpfs_dirent *de, *pde; dupindex = &dnode->tn_dir.tn_dupindex; de = LIST_FIRST(dupindex); if (de == NULL || de->td_cookie < TMPFS_DIRCOOKIE_DUP_MAX) { if (de == NULL) nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN; else nde->td_cookie = de->td_cookie + 1; MPASS(tmpfs_dirent_dup(nde)); LIST_INSERT_HEAD(dupindex, nde, uh.td_dup.index_entries); LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); return; } /* * Cookie numbers are near exhaustion. Scan dupindex list for unused * numbers. dupindex list is sorted in descending order. Keep it so * after inserting nde. */ while (1) { pde = de; de = LIST_NEXT(de, uh.td_dup.index_entries); if (de == NULL && pde->td_cookie != TMPFS_DIRCOOKIE_DUP_MIN) { /* * Last element of the index doesn't have minimal cookie * value, use it. */ nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN; LIST_INSERT_AFTER(pde, nde, uh.td_dup.index_entries); LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); return; } else if (de == NULL) { /* * We are so lucky have 2^30 hash duplicates in single * directory :) Return largest possible cookie value. * It should be fine except possible issues with * VOP_READDIR restart. */ nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MAX; LIST_INSERT_HEAD(dupindex, nde, uh.td_dup.index_entries); LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); return; } if (de->td_cookie + 1 == pde->td_cookie || de->td_cookie >= TMPFS_DIRCOOKIE_DUP_MAX) continue; /* No hole or invalid cookie. */ nde->td_cookie = de->td_cookie + 1; MPASS(tmpfs_dirent_dup(nde)); MPASS(pde->td_cookie > nde->td_cookie); MPASS(nde->td_cookie > de->td_cookie); LIST_INSERT_BEFORE(de, nde, uh.td_dup.index_entries); LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); return; } } /* * Attaches the directory entry de to the directory represented by vp. * Note that this does not change the link count of the node pointed by * the directory entry, as this is done by tmpfs_alloc_dirent. */ void tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de) { struct tmpfs_node *dnode; struct tmpfs_dirent *xde, *nde; ASSERT_VOP_ELOCKED(vp, __func__); MPASS(de->td_namelen > 0); MPASS(de->td_hash >= TMPFS_DIRCOOKIE_MIN); MPASS(de->td_cookie == de->td_hash); dnode = VP_TO_TMPFS_DIR(vp); dnode->tn_dir.tn_readdir_lastn = 0; dnode->tn_dir.tn_readdir_lastp = NULL; MPASS(!tmpfs_dirent_dup(de)); xde = RB_INSERT(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de); if (xde != NULL && tmpfs_dirent_duphead(xde)) tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de); else if (xde != NULL) { /* * Allocate new duphead. Swap xde with duphead to avoid * adding/removing elements with the same hash. */ MPASS(!tmpfs_dirent_dup(xde)); tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), NULL, NULL, 0, &nde); /* *nde = *xde; XXX gcc 4.2.1 may generate invalid code. */ memcpy(nde, xde, sizeof(*xde)); xde->td_cookie |= TMPFS_DIRCOOKIE_DUPHEAD; LIST_INIT(&xde->ud.td_duphead); xde->td_namelen = 0; xde->td_node = NULL; tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, nde); tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de); } dnode->tn_size += sizeof(struct tmpfs_dirent); dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ TMPFS_NODE_MODIFIED; tmpfs_update(vp); } /* * Detaches the directory entry de from the directory represented by vp. * Note that this does not change the link count of the node pointed by * the directory entry, as this is done by tmpfs_free_dirent. */ void tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de) { struct tmpfs_mount *tmp; struct tmpfs_dir *head; struct tmpfs_node *dnode; struct tmpfs_dirent *xde; ASSERT_VOP_ELOCKED(vp, __func__); dnode = VP_TO_TMPFS_DIR(vp); head = &dnode->tn_dir.tn_dirhead; dnode->tn_dir.tn_readdir_lastn = 0; dnode->tn_dir.tn_readdir_lastp = NULL; if (tmpfs_dirent_dup(de)) { /* Remove duphead if de was last entry. */ if (LIST_NEXT(de, uh.td_dup.entries) == NULL) { xde = tmpfs_dir_xlookup_hash(dnode, de->td_hash); MPASS(tmpfs_dirent_duphead(xde)); } else xde = NULL; LIST_REMOVE(de, uh.td_dup.entries); LIST_REMOVE(de, uh.td_dup.index_entries); if (xde != NULL) { if (LIST_EMPTY(&xde->ud.td_duphead)) { RB_REMOVE(tmpfs_dir, head, xde); tmp = VFS_TO_TMPFS(vp->v_mount); MPASS(xde->td_node == NULL); tmpfs_free_dirent(tmp, xde); } } de->td_cookie = de->td_hash; } else RB_REMOVE(tmpfs_dir, head, de); dnode->tn_size -= sizeof(struct tmpfs_dirent); dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ TMPFS_NODE_MODIFIED; tmpfs_update(vp); } void tmpfs_dir_destroy(struct tmpfs_mount *tmp, struct tmpfs_node *dnode) { struct tmpfs_dirent *de, *dde, *nde; RB_FOREACH_SAFE(de, tmpfs_dir, &dnode->tn_dir.tn_dirhead, nde) { RB_REMOVE(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de); /* Node may already be destroyed. */ de->td_node = NULL; if (tmpfs_dirent_duphead(de)) { while ((dde = LIST_FIRST(&de->ud.td_duphead)) != NULL) { LIST_REMOVE(dde, uh.td_dup.entries); dde->td_node = NULL; tmpfs_free_dirent(tmp, dde); } } tmpfs_free_dirent(tmp, de); } } /* * Helper function for tmpfs_readdir. Creates a '.' entry for the given * directory and returns it in the uio space. The function returns 0 * on success, -1 if there was not enough space in the uio structure to * hold the directory entry or an appropriate error code if another * error happens. */ static int tmpfs_dir_getdotdent(struct tmpfs_mount *tm, struct tmpfs_node *node, struct uio *uio) { int error; struct dirent dent; TMPFS_VALIDATE_DIR(node); MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); dent.d_fileno = node->tn_id; dent.d_type = DT_DIR; dent.d_namlen = 1; dent.d_name[0] = '.'; dent.d_reclen = GENERIC_DIRSIZ(&dent); dirent_terminate(&dent); if (dent.d_reclen > uio->uio_resid) error = EJUSTRETURN; else error = uiomove(&dent, dent.d_reclen, uio); tmpfs_set_status(tm, node, TMPFS_NODE_ACCESSED); return (error); } /* * Helper function for tmpfs_readdir. Creates a '..' entry for the given * directory and returns it in the uio space. The function returns 0 * on success, -1 if there was not enough space in the uio structure to * hold the directory entry or an appropriate error code if another * error happens. */ static int tmpfs_dir_getdotdotdent(struct tmpfs_mount *tm, struct tmpfs_node *node, struct uio *uio) { struct tmpfs_node *parent; struct dirent dent; int error; TMPFS_VALIDATE_DIR(node); MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); /* * Return ENOENT if the current node is already removed. */ TMPFS_ASSERT_LOCKED(node); parent = node->tn_dir.tn_parent; if (parent == NULL) return (ENOENT); TMPFS_NODE_LOCK(parent); dent.d_fileno = parent->tn_id; TMPFS_NODE_UNLOCK(parent); dent.d_type = DT_DIR; dent.d_namlen = 2; dent.d_name[0] = '.'; dent.d_name[1] = '.'; dent.d_reclen = GENERIC_DIRSIZ(&dent); dirent_terminate(&dent); if (dent.d_reclen > uio->uio_resid) error = EJUSTRETURN; else error = uiomove(&dent, dent.d_reclen, uio); tmpfs_set_status(tm, node, TMPFS_NODE_ACCESSED); return (error); } /* * Helper function for tmpfs_readdir. Returns as much directory entries * as can fit in the uio space. The read starts at uio->uio_offset. * The function returns 0 on success, -1 if there was not enough space * in the uio structure to hold the directory entry or an appropriate * error code if another error happens. */ int tmpfs_dir_getdents(struct tmpfs_mount *tm, struct tmpfs_node *node, struct uio *uio, int maxcookies, u_long *cookies, int *ncookies) { struct tmpfs_dir_cursor dc; struct tmpfs_dirent *de; off_t off; int error; TMPFS_VALIDATE_DIR(node); off = 0; /* * Lookup the node from the current offset. The starting offset of * 0 will lookup both '.' and '..', and then the first real entry, * or EOF if there are none. Then find all entries for the dir that * fit into the buffer. Once no more entries are found (de == NULL), * the offset is set to TMPFS_DIRCOOKIE_EOF, which will cause the next * call to return 0. */ switch (uio->uio_offset) { case TMPFS_DIRCOOKIE_DOT: error = tmpfs_dir_getdotdent(tm, node, uio); if (error != 0) return (error); uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; if (cookies != NULL) cookies[(*ncookies)++] = off = uio->uio_offset; /* FALLTHROUGH */ case TMPFS_DIRCOOKIE_DOTDOT: error = tmpfs_dir_getdotdotdent(tm, node, uio); if (error != 0) return (error); de = tmpfs_dir_first(node, &dc); uio->uio_offset = tmpfs_dirent_cookie(de); if (cookies != NULL) cookies[(*ncookies)++] = off = uio->uio_offset; /* EOF. */ if (de == NULL) return (0); break; case TMPFS_DIRCOOKIE_EOF: return (0); default: de = tmpfs_dir_lookup_cookie(node, uio->uio_offset, &dc); if (de == NULL) return (EINVAL); if (cookies != NULL) off = tmpfs_dirent_cookie(de); } /* Read as much entries as possible; i.e., until we reach the end of * the directory or we exhaust uio space. */ do { struct dirent d; /* Create a dirent structure representing the current * tmpfs_node and fill it. */ if (de->td_node == NULL) { d.d_fileno = 1; d.d_type = DT_WHT; } else { d.d_fileno = de->td_node->tn_id; switch (de->td_node->tn_type) { case VBLK: d.d_type = DT_BLK; break; case VCHR: d.d_type = DT_CHR; break; case VDIR: d.d_type = DT_DIR; break; case VFIFO: d.d_type = DT_FIFO; break; case VLNK: d.d_type = DT_LNK; break; case VREG: d.d_type = DT_REG; break; case VSOCK: d.d_type = DT_SOCK; break; default: panic("tmpfs_dir_getdents: type %p %d", de->td_node, (int)de->td_node->tn_type); } } d.d_namlen = de->td_namelen; MPASS(de->td_namelen < sizeof(d.d_name)); (void)memcpy(d.d_name, de->ud.td_name, de->td_namelen); d.d_reclen = GENERIC_DIRSIZ(&d); dirent_terminate(&d); /* Stop reading if the directory entry we are treating is * bigger than the amount of data that can be returned. */ if (d.d_reclen > uio->uio_resid) { error = EJUSTRETURN; break; } /* Copy the new dirent structure into the output buffer and * advance pointers. */ error = uiomove(&d, d.d_reclen, uio); if (error == 0) { de = tmpfs_dir_next(node, &dc); if (cookies != NULL) { off = tmpfs_dirent_cookie(de); MPASS(*ncookies < maxcookies); cookies[(*ncookies)++] = off; } } } while (error == 0 && uio->uio_resid > 0 && de != NULL); /* Skip setting off when using cookies as it is already done above. */ if (cookies == NULL) off = tmpfs_dirent_cookie(de); /* Update the offset and cache. */ uio->uio_offset = off; node->tn_dir.tn_readdir_lastn = off; node->tn_dir.tn_readdir_lastp = de; tmpfs_set_status(tm, node, TMPFS_NODE_ACCESSED); return error; } int tmpfs_dir_whiteout_add(struct vnode *dvp, struct componentname *cnp) { struct tmpfs_dirent *de; int error; error = tmpfs_alloc_dirent(VFS_TO_TMPFS(dvp->v_mount), NULL, cnp->cn_nameptr, cnp->cn_namelen, &de); if (error != 0) return (error); tmpfs_dir_attach(dvp, de); return (0); } void tmpfs_dir_whiteout_remove(struct vnode *dvp, struct componentname *cnp) { struct tmpfs_dirent *de; de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp); MPASS(de != NULL && de->td_node == NULL); tmpfs_dir_detach(dvp, de); tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de); } /* * Resizes the aobj associated with the regular file pointed to by 'vp' to the * size 'newsize'. 'vp' must point to a vnode that represents a regular file. * 'newsize' must be positive. * * Returns zero on success or an appropriate error code on failure. */ int tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr) { struct tmpfs_mount *tmp; struct tmpfs_node *node; vm_object_t uobj; vm_page_t m; vm_pindex_t idx, newpages, oldpages; off_t oldsize; int base, rv; MPASS(vp->v_type == VREG); MPASS(newsize >= 0); node = VP_TO_TMPFS_NODE(vp); uobj = node->tn_reg.tn_aobj; tmp = VFS_TO_TMPFS(vp->v_mount); /* * Convert the old and new sizes to the number of pages needed to * store them. It may happen that we do not need to do anything * because the last allocated page can accommodate the change on * its own. */ oldsize = node->tn_size; oldpages = OFF_TO_IDX(oldsize + PAGE_MASK); MPASS(oldpages == uobj->size); newpages = OFF_TO_IDX(newsize + PAGE_MASK); if (__predict_true(newpages == oldpages && newsize >= oldsize)) { node->tn_size = newsize; return (0); } if (newpages > oldpages && tmpfs_pages_check_avail(tmp, newpages - oldpages) == 0) return (ENOSPC); VM_OBJECT_WLOCK(uobj); if (newsize < oldsize) { /* * Zero the truncated part of the last page. */ base = newsize & PAGE_MASK; if (base != 0) { idx = OFF_TO_IDX(newsize); retry: m = vm_page_grab(uobj, idx, VM_ALLOC_NOCREAT); if (m != NULL) { MPASS(vm_page_all_valid(m)); } else if (vm_pager_has_page(uobj, idx, NULL, NULL)) { m = vm_page_alloc(uobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL); if (m == NULL) goto retry; + vm_object_pip_add(uobj, 1); + VM_OBJECT_WUNLOCK(uobj); rv = vm_pager_get_pages(uobj, &m, 1, NULL, NULL); + VM_OBJECT_WLOCK(uobj); + vm_object_pip_wakeup(uobj); if (rv == VM_PAGER_OK) { /* * Since the page was not resident, * and therefore not recently * accessed, immediately enqueue it * for asynchronous laundering. The * current operation is not regarded * as an access. */ vm_page_launder(m); } else { vm_page_free(m); if (ignerr) m = NULL; else { VM_OBJECT_WUNLOCK(uobj); return (EIO); } } } if (m != NULL) { pmap_zero_page_area(m, base, PAGE_SIZE - base); vm_page_set_dirty(m); vm_page_xunbusy(m); } } /* * Release any swap space and free any whole pages. */ if (newpages < oldpages) { swap_pager_freespace(uobj, newpages, oldpages - newpages); vm_object_page_remove(uobj, newpages, 0, 0); } } uobj->size = newpages; VM_OBJECT_WUNLOCK(uobj); atomic_add_long(&tmp->tm_pages_used, newpages - oldpages); node->tn_size = newsize; return (0); } void tmpfs_check_mtime(struct vnode *vp) { struct tmpfs_node *node; struct vm_object *obj; ASSERT_VOP_ELOCKED(vp, "check_mtime"); if (vp->v_type != VREG) return; obj = vp->v_object; KASSERT((obj->flags & (OBJ_TMPFS_NODE | OBJ_TMPFS)) == (OBJ_TMPFS_NODE | OBJ_TMPFS), ("non-tmpfs obj")); /* unlocked read */ if (obj->generation != obj->cleangeneration) { VM_OBJECT_WLOCK(obj); if (obj->generation != obj->cleangeneration) { obj->cleangeneration = obj->generation; node = VP_TO_TMPFS_NODE(vp); node->tn_status |= TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED; } VM_OBJECT_WUNLOCK(obj); } } /* * Change flags of the given vnode. * Caller should execute tmpfs_update on vp after a successful execution. * The vnode must be locked on entry and remain locked on exit. */ int tmpfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred, struct thread *p) { int error; struct tmpfs_node *node; ASSERT_VOP_ELOCKED(vp, "chflags"); node = VP_TO_TMPFS_NODE(vp); if ((flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE | SF_NOUNLINK | UF_APPEND | UF_ARCHIVE | UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | UF_NOUNLINK | UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE | UF_SPARSE | UF_SYSTEM)) != 0) return (EOPNOTSUPP); /* Disallow this operation if the file system is mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return EROFS; /* * Callers may only modify the file flags on objects they * have VADMIN rights for. */ if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) return (error); /* * Unprivileged processes are not permitted to unset system * flags, or modify flags if any system flags are set. */ if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS)) { if (node->tn_flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { error = securelevel_gt(cred, 0); if (error) return (error); } } else { if (node->tn_flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || ((flags ^ node->tn_flags) & SF_SETTABLE)) return (EPERM); } node->tn_flags = flags; node->tn_status |= TMPFS_NODE_CHANGED; ASSERT_VOP_ELOCKED(vp, "chflags2"); return (0); } /* * Change access mode on the given vnode. * Caller should execute tmpfs_update on vp after a successful execution. * The vnode must be locked on entry and remain locked on exit. */ int tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct thread *p) { int error; struct tmpfs_node *node; ASSERT_VOP_ELOCKED(vp, "chmod"); node = VP_TO_TMPFS_NODE(vp); /* Disallow this operation if the file system is mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return EROFS; /* Immutable or append-only files cannot be modified, either. */ if (node->tn_flags & (IMMUTABLE | APPEND)) return EPERM; /* * To modify the permissions on a file, must possess VADMIN * for that file. */ if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) return (error); /* * Privileged processes may set the sticky bit on non-directories, * as well as set the setgid bit on a file with a group that the * process is not a member of. */ if (vp->v_type != VDIR && (mode & S_ISTXT)) { if (priv_check_cred(cred, PRIV_VFS_STICKYFILE)) return (EFTYPE); } if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID)) { error = priv_check_cred(cred, PRIV_VFS_SETGID); if (error) return (error); } node->tn_mode &= ~ALLPERMS; node->tn_mode |= mode & ALLPERMS; node->tn_status |= TMPFS_NODE_CHANGED; ASSERT_VOP_ELOCKED(vp, "chmod2"); return (0); } /* * Change ownership of the given vnode. At least one of uid or gid must * be different than VNOVAL. If one is set to that value, the attribute * is unchanged. * Caller should execute tmpfs_update on vp after a successful execution. * The vnode must be locked on entry and remain locked on exit. */ int tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, struct thread *p) { int error; struct tmpfs_node *node; uid_t ouid; gid_t ogid; ASSERT_VOP_ELOCKED(vp, "chown"); node = VP_TO_TMPFS_NODE(vp); /* Assign default values if they are unknown. */ MPASS(uid != VNOVAL || gid != VNOVAL); if (uid == VNOVAL) uid = node->tn_uid; if (gid == VNOVAL) gid = node->tn_gid; MPASS(uid != VNOVAL && gid != VNOVAL); /* Disallow this operation if the file system is mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return EROFS; /* Immutable or append-only files cannot be modified, either. */ if (node->tn_flags & (IMMUTABLE | APPEND)) return EPERM; /* * To modify the ownership of a file, must possess VADMIN for that * file. */ if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) return (error); /* * To change the owner of a file, or change the group of a file to a * group of which we are not a member, the caller must have * privilege. */ if ((uid != node->tn_uid || (gid != node->tn_gid && !groupmember(gid, cred))) && (error = priv_check_cred(cred, PRIV_VFS_CHOWN))) return (error); ogid = node->tn_gid; ouid = node->tn_uid; node->tn_uid = uid; node->tn_gid = gid; node->tn_status |= TMPFS_NODE_CHANGED; if ((node->tn_mode & (S_ISUID | S_ISGID)) && (ouid != uid || ogid != gid)) { if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) node->tn_mode &= ~(S_ISUID | S_ISGID); } ASSERT_VOP_ELOCKED(vp, "chown2"); return (0); } /* * Change size of the given vnode. * Caller should execute tmpfs_update on vp after a successful execution. * The vnode must be locked on entry and remain locked on exit. */ int tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred, struct thread *p) { int error; struct tmpfs_node *node; ASSERT_VOP_ELOCKED(vp, "chsize"); node = VP_TO_TMPFS_NODE(vp); /* Decide whether this is a valid operation based on the file type. */ error = 0; switch (vp->v_type) { case VDIR: return EISDIR; case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return EROFS; break; case VBLK: /* FALLTHROUGH */ case VCHR: /* FALLTHROUGH */ case VFIFO: /* Allow modifications of special files even if in the file * system is mounted read-only (we are not modifying the * files themselves, but the objects they represent). */ return 0; default: /* Anything else is unsupported. */ return EOPNOTSUPP; } /* Immutable or append-only files cannot be modified, either. */ if (node->tn_flags & (IMMUTABLE | APPEND)) return EPERM; error = tmpfs_truncate(vp, size); /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents * for us, as will update tn_status; no need to do that here. */ ASSERT_VOP_ELOCKED(vp, "chsize2"); return (error); } /* * Change access and modification times of the given vnode. * Caller should execute tmpfs_update on vp after a successful execution. * The vnode must be locked on entry and remain locked on exit. */ int tmpfs_chtimes(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *l) { int error; struct tmpfs_node *node; ASSERT_VOP_ELOCKED(vp, "chtimes"); node = VP_TO_TMPFS_NODE(vp); /* Disallow this operation if the file system is mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return EROFS; /* Immutable or append-only files cannot be modified, either. */ if (node->tn_flags & (IMMUTABLE | APPEND)) return EPERM; error = vn_utimes_perm(vp, vap, cred, l); if (error != 0) return (error); if (vap->va_atime.tv_sec != VNOVAL) node->tn_status |= TMPFS_NODE_ACCESSED; if (vap->va_mtime.tv_sec != VNOVAL) node->tn_status |= TMPFS_NODE_MODIFIED; if (vap->va_birthtime.tv_sec != VNOVAL) node->tn_status |= TMPFS_NODE_MODIFIED; tmpfs_itimes(vp, &vap->va_atime, &vap->va_mtime); if (vap->va_birthtime.tv_sec != VNOVAL) node->tn_birthtime = vap->va_birthtime; ASSERT_VOP_ELOCKED(vp, "chtimes2"); return (0); } void tmpfs_set_status(struct tmpfs_mount *tm, struct tmpfs_node *node, int status) { if ((node->tn_status & status) == status || tm->tm_ronly) return; TMPFS_NODE_LOCK(node); node->tn_status |= status; TMPFS_NODE_UNLOCK(node); } /* Sync timestamps */ void tmpfs_itimes(struct vnode *vp, const struct timespec *acc, const struct timespec *mod) { struct tmpfs_node *node; struct timespec now; ASSERT_VOP_LOCKED(vp, "tmpfs_itimes"); node = VP_TO_TMPFS_NODE(vp); if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED)) == 0) return; vfs_timestamp(&now); TMPFS_NODE_LOCK(node); if (node->tn_status & TMPFS_NODE_ACCESSED) { if (acc == NULL) acc = &now; node->tn_atime = *acc; } if (node->tn_status & TMPFS_NODE_MODIFIED) { if (mod == NULL) mod = &now; node->tn_mtime = *mod; } if (node->tn_status & TMPFS_NODE_CHANGED) node->tn_ctime = now; node->tn_status &= ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); TMPFS_NODE_UNLOCK(node); /* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */ random_harvest_queue(node, sizeof(*node), RANDOM_FS_ATIME); } void tmpfs_update(struct vnode *vp) { tmpfs_itimes(vp, NULL, NULL); } int tmpfs_truncate(struct vnode *vp, off_t length) { int error; struct tmpfs_node *node; node = VP_TO_TMPFS_NODE(vp); if (length < 0) { error = EINVAL; goto out; } if (node->tn_size == length) { error = 0; goto out; } if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) return (EFBIG); error = tmpfs_reg_resize(vp, length, FALSE); if (error == 0) node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; out: tmpfs_update(vp); return (error); } static __inline int tmpfs_dirtree_cmp(struct tmpfs_dirent *a, struct tmpfs_dirent *b) { if (a->td_hash > b->td_hash) return (1); else if (a->td_hash < b->td_hash) return (-1); return (0); } RB_GENERATE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp); Index: projects/clang1000-import/sys/geom/label/g_label.c =================================================================== --- projects/clang1000-import/sys/geom/label/g_label.c (revision 356919) +++ projects/clang1000-import/sys/geom/label/g_label.c (revision 356920) @@ -1,561 +1,577 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004-2005 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_geom.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include FEATURE(geom_label, "GEOM labeling support"); SYSCTL_DECL(_kern_geom); SYSCTL_NODE(_kern_geom, OID_AUTO, label, CTLFLAG_RW, 0, "GEOM_LABEL stuff"); u_int g_label_debug = 0; SYSCTL_UINT(_kern_geom_label, OID_AUTO, debug, CTLFLAG_RWTUN, &g_label_debug, 0, "Debug level"); static int g_label_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp); static int g_label_destroy(struct g_geom *gp, boolean_t force); static struct g_geom *g_label_taste(struct g_class *mp, struct g_provider *pp, int flags __unused); static void g_label_config(struct gctl_req *req, struct g_class *mp, const char *verb); struct g_class g_label_class = { .name = G_LABEL_CLASS_NAME, .version = G_VERSION, .ctlreq = g_label_config, .taste = g_label_taste, .destroy_geom = g_label_destroy_geom }; /* * To add a new file system where you want to look for volume labels, * you have to: * 1. Add a file g_label_.c which implements labels recognition. * 2. Add an 'extern const struct g_label_desc g_label_;' into * g_label.h file. * 3. Add an element to the table below '&g_label_,'. * 4. Add your file to sys/conf/files. * 5. Add your file to sys/modules/geom/geom_label/Makefile. * 6. Add your file system to manual page sbin/geom/class/label/glabel.8. */ const struct g_label_desc *g_labels[] = { &g_label_gpt, &g_label_gpt_uuid, #ifdef GEOM_LABEL &g_label_ufs_id, &g_label_ufs_volume, &g_label_iso9660, &g_label_msdosfs, &g_label_ext2fs, &g_label_reiserfs, &g_label_ntfs, &g_label_disk_ident, &g_label_flashmap, #endif NULL }; void g_label_rtrim(char *label, size_t size) { ptrdiff_t i; for (i = size - 1; i >= 0; i--) { if (label[i] == '\0') continue; else if (label[i] == ' ') label[i] = '\0'; else break; } } static int g_label_destroy_geom(struct gctl_req *req __unused, struct g_class *mp, struct g_geom *gp __unused) { /* * XXX: Unloading a class which is using geom_slice:1.56 is currently * XXX: broken, so we deny unloading when we have geoms. */ return (EOPNOTSUPP); } static void g_label_orphan(struct g_consumer *cp) { G_LABEL_DEBUG(1, "Label %s removed.", LIST_FIRST(&cp->geom->provider)->name); g_slice_orphan(cp); } static void g_label_spoiled(struct g_consumer *cp) { G_LABEL_DEBUG(1, "Label %s removed.", LIST_FIRST(&cp->geom->provider)->name); g_slice_spoiled(cp); } static void g_label_resize(struct g_consumer *cp) { G_LABEL_DEBUG(1, "Label %s resized.", LIST_FIRST(&cp->geom->provider)->name); g_slice_config(cp->geom, 0, G_SLICE_CONFIG_FORCE, (off_t)0, cp->provider->mediasize, cp->provider->sectorsize, "notused"); } static int g_label_is_name_ok(const char *label) { const char *s; /* Check if the label starts from ../ */ if (strncmp(label, "../", 3) == 0) return (0); /* Check if the label contains /../ */ if (strstr(label, "/../") != NULL) return (0); /* Check if the label ends at ../ */ if ((s = strstr(label, "/..")) != NULL && s[3] == '\0') return (0); return (1); } static void g_label_mangle_name(char *label, size_t size) { struct sbuf *sb; const u_char *c; + size_t len, i; + /* Trim trailing whitespace. */ + len = strlen(label); + for (i = len; i > 0; i--) { + if (isspace(label[i - 1])) + label[i - 1] = '\0'; + else + break; + } + if (*label == '\0') + return; + + sb = sbuf_new(NULL, NULL, size, SBUF_FIXEDLEN); for (c = label; *c != '\0'; c++) { + /* Trim leading whitespace. */ + if (isspace(*c) && sbuf_len(sb) == 0) + continue; if (!isprint(*c) || isspace(*c) || *c =='"' || *c == '%') sbuf_printf(sb, "%%%02X", *c); else sbuf_putc(sb, *c); } if (sbuf_finish(sb) != 0) label[0] = '\0'; else strlcpy(label, sbuf_data(sb), size); sbuf_delete(sb); } static struct g_geom * g_label_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp, const char *label, const char *dir, off_t mediasize) { struct g_geom *gp; struct g_provider *pp2; struct g_consumer *cp; char name[64]; g_topology_assert(); if (!g_label_is_name_ok(label)) { G_LABEL_DEBUG(0, "%s contains suspicious label, skipping.", pp->name); G_LABEL_DEBUG(1, "%s suspicious label is: %s", pp->name, label); if (req != NULL) gctl_error(req, "Label name %s is invalid.", label); return (NULL); } gp = NULL; cp = NULL; if (snprintf(name, sizeof(name), "%s/%s", dir, label) >= sizeof(name)) { if (req != NULL) gctl_error(req, "Label name %s is too long.", label); return (NULL); } LIST_FOREACH(gp, &mp->geom, geom) { pp2 = LIST_FIRST(&gp->provider); if (pp2 == NULL) continue; if ((pp2->flags & G_PF_ORPHAN) != 0) continue; if (strcmp(pp2->name, name) == 0) { G_LABEL_DEBUG(1, "Label %s(%s) already exists (%s).", label, name, pp->name); if (req != NULL) { gctl_error(req, "Provider %s already exists.", name); } return (NULL); } } gp = g_slice_new(mp, 1, pp, &cp, NULL, 0, NULL); if (gp == NULL) { G_LABEL_DEBUG(0, "Cannot create slice %s.", label); if (req != NULL) gctl_error(req, "Cannot create slice %s.", label); return (NULL); } gp->orphan = g_label_orphan; gp->spoiled = g_label_spoiled; gp->resize = g_label_resize; g_access(cp, -1, 0, 0); g_slice_config(gp, 0, G_SLICE_CONFIG_SET, (off_t)0, mediasize, pp->sectorsize, "%s", name); G_LABEL_DEBUG(1, "Label for provider %s is %s.", pp->name, name); return (gp); } static int g_label_destroy(struct g_geom *gp, boolean_t force) { struct g_provider *pp; g_topology_assert(); pp = LIST_FIRST(&gp->provider); if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { if (force) { G_LABEL_DEBUG(0, "Provider %s is still open, so it " "can't be definitely removed.", pp->name); } else { G_LABEL_DEBUG(1, "Provider %s is still open (r%dw%de%d).", pp->name, pp->acr, pp->acw, pp->ace); return (EBUSY); } } else if (pp != NULL) G_LABEL_DEBUG(1, "Label %s removed.", pp->name); g_slice_spoiled(LIST_FIRST(&gp->consumer)); return (0); } static int g_label_read_metadata(struct g_consumer *cp, struct g_label_metadata *md) { struct g_provider *pp; u_char *buf; int error; g_topology_assert(); pp = cp->provider; g_topology_unlock(); buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, &error); g_topology_lock(); if (buf == NULL) return (error); /* Decode metadata. */ label_metadata_decode(buf, md); g_free(buf); return (0); } static void g_label_orphan_taste(struct g_consumer *cp __unused) { KASSERT(1 == 0, ("%s called?", __func__)); } static void g_label_start_taste(struct bio *bp __unused) { KASSERT(1 == 0, ("%s called?", __func__)); } static int g_label_access_taste(struct g_provider *pp __unused, int dr __unused, int dw __unused, int de __unused) { KASSERT(1 == 0, ("%s called", __func__)); return (EOPNOTSUPP); } static struct g_geom * g_label_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) { struct g_label_metadata md; struct g_consumer *cp; struct g_geom *gp; int i; g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); g_topology_assert(); G_LABEL_DEBUG(2, "Tasting %s.", pp->name); /* Skip providers that are already open for writing. */ if (pp->acw > 0) return (NULL); if (strcmp(pp->geom->class->name, mp->name) == 0) return (NULL); gp = g_new_geomf(mp, "label:taste"); gp->start = g_label_start_taste; gp->access = g_label_access_taste; gp->orphan = g_label_orphan_taste; cp = g_new_consumer(gp); g_attach(cp, pp); if (g_access(cp, 1, 0, 0) != 0) goto end; do { if (g_label_read_metadata(cp, &md) != 0) break; if (strcmp(md.md_magic, G_LABEL_MAGIC) != 0) break; if (md.md_version > G_LABEL_VERSION) { printf("geom_label.ko module is too old to handle %s.\n", pp->name); break; } /* * Backward compatibility: */ /* * There was no md_provsize field in earlier versions of * metadata. */ if (md.md_version < 2) md.md_provsize = pp->mediasize; if (md.md_provsize != pp->mediasize) break; g_label_create(NULL, mp, pp, md.md_label, G_LABEL_DIR, pp->mediasize - pp->sectorsize); } while (0); for (i = 0; g_labels[i] != NULL; i++) { char label[128]; if (g_labels[i]->ld_enabled == 0) continue; g_topology_unlock(); g_labels[i]->ld_taste(cp, label, sizeof(label)); g_label_mangle_name(label, sizeof(label)); g_topology_lock(); if (label[0] == '\0') continue; g_label_create(NULL, mp, pp, label, g_labels[i]->ld_dir, pp->mediasize); } g_access(cp, -1, 0, 0); end: g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); return (NULL); } static void g_label_ctl_create(struct gctl_req *req, struct g_class *mp) { struct g_provider *pp; const char *name; int *nargs; g_topology_assert(); nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument", "nargs"); return; } if (*nargs != 2) { gctl_error(req, "Invalid number of arguments."); return; } /* * arg1 is the name of provider. */ name = gctl_get_asciiparam(req, "arg1"); if (name == NULL) { gctl_error(req, "No 'arg%d' argument", 1); return; } if (strncmp(name, "/dev/", strlen("/dev/")) == 0) name += strlen("/dev/"); pp = g_provider_by_name(name); if (pp == NULL) { G_LABEL_DEBUG(1, "Provider %s is invalid.", name); gctl_error(req, "Provider %s is invalid.", name); return; } /* * arg0 is the label. */ name = gctl_get_asciiparam(req, "arg0"); if (name == NULL) { gctl_error(req, "No 'arg%d' argument", 0); return; } g_label_create(req, mp, pp, name, G_LABEL_DIR, pp->mediasize); } static const char * g_label_skip_dir(const char *name) { char path[64]; u_int i; if (strncmp(name, "/dev/", strlen("/dev/")) == 0) name += strlen("/dev/"); if (strncmp(name, G_LABEL_DIR "/", strlen(G_LABEL_DIR "/")) == 0) name += strlen(G_LABEL_DIR "/"); for (i = 0; g_labels[i] != NULL; i++) { snprintf(path, sizeof(path), "%s/", g_labels[i]->ld_dir); if (strncmp(name, path, strlen(path)) == 0) { name += strlen(path); break; } } return (name); } static struct g_geom * g_label_find_geom(struct g_class *mp, const char *name) { struct g_geom *gp; struct g_provider *pp; const char *pname; name = g_label_skip_dir(name); LIST_FOREACH(gp, &mp->geom, geom) { pp = LIST_FIRST(&gp->provider); pname = g_label_skip_dir(pp->name); if (strcmp(pname, name) == 0) return (gp); } return (NULL); } static void g_label_ctl_destroy(struct gctl_req *req, struct g_class *mp) { int *nargs, *force, error, i; struct g_geom *gp; const char *name; char param[16]; g_topology_assert(); nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "No '%s' argument", "nargs"); return; } if (*nargs <= 0) { gctl_error(req, "Missing device(s)."); return; } force = gctl_get_paraml(req, "force", sizeof(*force)); if (force == NULL) { gctl_error(req, "No 'force' argument"); return; } for (i = 0; i < *nargs; i++) { snprintf(param, sizeof(param), "arg%d", i); name = gctl_get_asciiparam(req, param); if (name == NULL) { gctl_error(req, "No 'arg%d' argument", i); return; } gp = g_label_find_geom(mp, name); if (gp == NULL) { G_LABEL_DEBUG(1, "Label %s is invalid.", name); gctl_error(req, "Label %s is invalid.", name); return; } error = g_label_destroy(gp, *force); if (error != 0) { gctl_error(req, "Cannot destroy label %s (error=%d).", LIST_FIRST(&gp->provider)->name, error); return; } } } static void g_label_config(struct gctl_req *req, struct g_class *mp, const char *verb) { uint32_t *version; g_topology_assert(); version = gctl_get_paraml(req, "version", sizeof(*version)); if (version == NULL) { gctl_error(req, "No '%s' argument.", "version"); return; } if (*version != G_LABEL_VERSION) { gctl_error(req, "Userland and kernel parts are out of sync."); return; } if (strcmp(verb, "create") == 0) { g_label_ctl_create(req, mp); return; } else if (strcmp(verb, "destroy") == 0 || strcmp(verb, "stop") == 0) { g_label_ctl_destroy(req, mp); return; } gctl_error(req, "Unknown verb."); } DECLARE_GEOM_CLASS(g_label_class, g_label); MODULE_VERSION(geom_label, 0); Index: projects/clang1000-import/sys/i386/linux/Makefile =================================================================== --- projects/clang1000-import/sys/i386/linux/Makefile (revision 356919) +++ projects/clang1000-import/sys/i386/linux/Makefile (revision 356920) @@ -1,28 +1,7 @@ # Makefile for syscall tables # # $FreeBSD$ -# Don't use an OBJDIR -.OBJDIR: ${.CURDIR} +GENERATED_PREFIX= linux_ -.include - -MAKESYSCALLS= ../../tools/makesyscalls.lua -SRCS= syscalls.conf \ - syscalls.master -GENERATED= linux_proto.h \ - linux_syscall.h \ - linux_syscalls.c \ - linux_sysent.c \ - linux_systrace_args.c - -all: - @echo "make sysent only" - -# We .ORDER these explicitly so that we only run MAKESYSCALLS once, rather than -# potentially once for each ${GENERATED} file. -.ORDER: ${GENERATED} -sysent: ${GENERATED} - -${GENERATED}: ${MAKESYSCALLS} ${SRCS} - ${LUA} ${MAKESYSCALLS} syscalls.master syscalls.conf +.include "../../conf/sysent.mk" Index: projects/clang1000-import/sys/kern/Makefile =================================================================== --- projects/clang1000-import/sys/kern/Makefile (revision 356919) +++ projects/clang1000-import/sys/kern/Makefile (revision 356920) @@ -1,30 +1,14 @@ # @(#)Makefile 8.2 (Berkeley) 3/21/94 # $FreeBSD$ # # Makefile for init_sysent -# Don't use an OBJDIR -.OBJDIR: ${.CURDIR} +SYSENT_CONF= +GENERATED= init_sysent.c \ + syscalls.c \ + systrace_args.c \ + ${SYSDIR}/sys/syscall.h \ + ${SYSDIR}/sys/syscall.mk \ + ${SYSDIR}/sys/sysproto.h -.include - -MAKESYSCALLS= ../tools/makesyscalls.lua -SRCS= capabilities.conf \ - syscalls.master -GENERATED= init_sysent.c \ - syscalls.c \ - systrace_args.c \ - ../sys/syscall.h \ - ../sys/syscall.mk \ - ../sys/sysproto.h - -all: - @echo "make sysent only" - -# We .ORDER these explicitly so that we only run MAKESYSCALLS once, rather than -# potentially once for each ${GENERATED} file. -.ORDER: ${GENERATED} -sysent: ${GENERATED} - -${GENERATED}: ${MAKESYSCALLS} ${SRCS} - ${LUA} ${MAKESYSCALLS} syscalls.master +.include "../conf/sysent.mk" Index: projects/clang1000-import/sys/kern/kern_sendfile.c =================================================================== --- projects/clang1000-import/sys/kern/kern_sendfile.c (revision 356919) +++ projects/clang1000-import/sys/kern/kern_sendfile.c (revision 356920) @@ -1,1247 +1,1255 @@ /*- * Copyright (c) 2013-2015 Gleb Smirnoff * Copyright (c) 1998, David Greenman. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_kern_tls.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define EXT_FLAG_SYNC EXT_FLAG_VENDOR1 #define EXT_FLAG_NOCACHE EXT_FLAG_VENDOR2 #define EXT_FLAG_CACHE_LAST EXT_FLAG_VENDOR3 /* * Structure describing a single sendfile(2) I/O, which may consist of * several underlying pager I/Os. * * The syscall context allocates the structure and initializes 'nios' * to 1. As sendfile_swapin() runs through pages and starts asynchronous * paging operations, it increments 'nios'. * * Every I/O completion calls sendfile_iodone(), which decrements the 'nios', * and the syscall also calls sendfile_iodone() after allocating all mbufs, * linking them and sending to socket. Whoever reaches zero 'nios' is * responsible to * call pru_ready on the socket, to notify it of readyness * of the data. */ struct sf_io { volatile u_int nios; u_int error; int npages; struct socket *so; struct mbuf *m; + vm_object_t obj; #ifdef KERN_TLS struct ktls_session *tls; #endif vm_page_t pa[]; }; /* * Structure used to track requests with SF_SYNC flag. */ struct sendfile_sync { struct mtx mtx; struct cv cv; unsigned count; }; counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)]; static void sfstat_init(const void *unused) { COUNTER_ARRAY_ALLOC(sfstat, sizeof(struct sfstat) / sizeof(uint64_t), M_WAITOK); } SYSINIT(sfstat, SI_SUB_MBUF, SI_ORDER_FIRST, sfstat_init, NULL); static int sfstat_sysctl(SYSCTL_HANDLER_ARGS) { struct sfstat s; COUNTER_ARRAY_COPY(sfstat, &s, sizeof(s) / sizeof(uint64_t)); if (req->newptr) COUNTER_ARRAY_ZERO(sfstat, sizeof(s) / sizeof(uint64_t)); return (SYSCTL_OUT(req, &s, sizeof(s))); } SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW, NULL, 0, sfstat_sysctl, "I", "sendfile statistics"); static void sendfile_free_mext(struct mbuf *m) { struct sf_buf *sf; vm_page_t pg; int flags; KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_SFBUF, ("%s: m %p !M_EXT or !EXT_SFBUF", __func__, m)); sf = m->m_ext.ext_arg1; pg = sf_buf_page(sf); flags = (m->m_ext.ext_flags & EXT_FLAG_NOCACHE) != 0 ? VPR_TRYFREE : 0; sf_buf_free(sf); vm_page_release(pg, flags); if (m->m_ext.ext_flags & EXT_FLAG_SYNC) { struct sendfile_sync *sfs = m->m_ext.ext_arg2; mtx_lock(&sfs->mtx); KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0")); if (--sfs->count == 0) cv_signal(&sfs->cv); mtx_unlock(&sfs->mtx); } } static void sendfile_free_mext_pg(struct mbuf *m) { struct mbuf_ext_pgs *ext_pgs; vm_page_t pg; int flags, i; bool cache_last; KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_PGS, ("%s: m %p !M_EXT or !EXT_PGS", __func__, m)); cache_last = m->m_ext.ext_flags & EXT_FLAG_CACHE_LAST; ext_pgs = m->m_ext.ext_pgs; flags = (m->m_ext.ext_flags & EXT_FLAG_NOCACHE) != 0 ? VPR_TRYFREE : 0; for (i = 0; i < ext_pgs->npgs; i++) { if (cache_last && i == ext_pgs->npgs - 1) flags = 0; pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]); vm_page_release(pg, flags); } if (m->m_ext.ext_flags & EXT_FLAG_SYNC) { struct sendfile_sync *sfs = m->m_ext.ext_arg2; mtx_lock(&sfs->mtx); KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0")); if (--sfs->count == 0) cv_signal(&sfs->cv); mtx_unlock(&sfs->mtx); } } /* * Helper function to calculate how much data to put into page i of n. * Only first and last pages are special. */ static inline off_t xfsize(int i, int n, off_t off, off_t len) { if (i == 0) return (omin(PAGE_SIZE - (off & PAGE_MASK), len)); if (i == n - 1 && ((off + len) & PAGE_MASK) > 0) return ((off + len) & PAGE_MASK); return (PAGE_SIZE); } /* * Helper function to get offset within object for i page. */ static inline vm_ooffset_t vmoff(int i, off_t off) { if (i == 0) return ((vm_ooffset_t)off); return (trunc_page(off + i * PAGE_SIZE)); } /* * Helper function used when allocation of a page or sf_buf failed. * Pretend as if we don't have enough space, subtract xfsize() of * all pages that failed. */ static inline void fixspace(int old, int new, off_t off, int *space) { KASSERT(old > new, ("%s: old %d new %d", __func__, old, new)); /* Subtract last one. */ *space -= xfsize(old - 1, old, off, *space); old--; if (new == old) /* There was only one page. */ return; /* Subtract first one. */ if (new == 0) { *space -= xfsize(0, old, off, *space); new++; } /* Rest of pages are full sized. */ *space -= (old - new) * PAGE_SIZE; KASSERT(*space >= 0, ("%s: space went backwards", __func__)); } /* * I/O completion callback. */ static void sendfile_iodone(void *arg, vm_page_t *pg, int count, int error) { struct sf_io *sfio = arg; struct socket *so = sfio->so; for (int i = 0; i < count; i++) if (pg[i] != bogus_page) vm_page_xunbusy_unchecked(pg[i]); if (error) sfio->error = error; if (!refcount_release(&sfio->nios)) return; + vm_object_pip_wakeup(sfio->obj); + if (__predict_false(sfio->error && sfio->m == NULL)) { /* * I/O operation failed, but pru_send hadn't been executed - * nothing had been sent to the socket. The syscall has * returned error to the user. */ free(sfio, M_TEMP); return; } #if defined(KERN_TLS) && defined(INVARIANTS) if ((sfio->m->m_flags & M_EXT) != 0 && sfio->m->m_ext.ext_type == EXT_PGS) KASSERT(sfio->tls == sfio->m->m_ext.ext_pgs->tls, ("TLS session mismatch")); else KASSERT(sfio->tls == NULL, ("non-ext_pgs mbuf with TLS session")); #endif CURVNET_SET(so->so_vnet); if (__predict_false(sfio->error)) { /* * I/O operation failed. The state of data in the socket * is now inconsistent, and all what we can do is to tear * it down. Protocol abort method would tear down protocol * state, free all ready mbufs and detach not ready ones. * We will free the mbufs corresponding to this I/O manually. * * The socket would be marked with EIO and made available * for read, so that application receives EIO on next * syscall and eventually closes the socket. */ so->so_proto->pr_usrreqs->pru_abort(so); so->so_error = EIO; mb_free_notready(sfio->m, sfio->npages); #ifdef KERN_TLS } else if (sfio->tls != NULL && sfio->tls->mode == TCP_TLS_MODE_SW) { /* * I/O operation is complete, but we still need to * encrypt. We cannot do this in the interrupt thread * of the disk controller, so forward the mbufs to a * different thread. * * Donate the socket reference from sfio to rather * than explicitly invoking soref(). */ ktls_enqueue(sfio->m, so, sfio->npages); goto out_with_ref; #endif } else (void)(so->so_proto->pr_usrreqs->pru_ready)(so, sfio->m, sfio->npages); SOCK_LOCK(so); sorele(so); #ifdef KERN_TLS out_with_ref: #endif CURVNET_RESTORE(); free(sfio, M_TEMP); } /* * Iterate through pages vector and request paging for non-valid pages. */ static int sendfile_swapin(vm_object_t obj, struct sf_io *sfio, int *nios, off_t off, off_t len, int npages, int rhpages, int flags) { vm_page_t *pa = sfio->pa; int grabbed; *nios = 0; flags = (flags & SF_NODISKIO) ? VM_ALLOC_NOWAIT : 0; /* * First grab all the pages and wire them. Note that we grab * only required pages. Readahead pages are dealt with later. */ VM_OBJECT_WLOCK(obj); grabbed = vm_page_grab_pages(obj, OFF_TO_IDX(off), VM_ALLOC_NORMAL | VM_ALLOC_WIRED | flags, pa, npages); if (grabbed < npages) { for (int i = grabbed; i < npages; i++) pa[i] = NULL; npages = grabbed; rhpages = 0; } for (int i = 0; i < npages;) { int j, a, count, rv; /* Skip valid pages. */ if (vm_page_is_valid(pa[i], vmoff(i, off) & PAGE_MASK, xfsize(i, npages, off, len))) { vm_page_xunbusy(pa[i]); SFSTAT_INC(sf_pages_valid); i++; continue; } /* * Next page is invalid. Check if it belongs to pager. It * may not be there, which is a regular situation for shmem * pager. For vnode pager this happens only in case of * a sparse file. * * Important feature of vm_pager_has_page() is the hint * stored in 'a', about how many pages we can pagein after * this page in a single I/O. */ if (!vm_pager_has_page(obj, OFF_TO_IDX(vmoff(i, off)), NULL, &a)) { pmap_zero_page(pa[i]); pa[i]->valid = VM_PAGE_BITS_ALL; MPASS(pa[i]->dirty == 0); vm_page_xunbusy(pa[i]); i++; continue; } /* * We want to pagein as many pages as possible, limited only * by the 'a' hint and actual request. */ count = min(a + 1, npages - i); /* * We should not pagein into a valid page, thus we first trim * any valid pages off the end of request, and substitute * to bogus_page those, that are in the middle. */ for (j = i + count - 1; j > i; j--) { if (vm_page_is_valid(pa[j], vmoff(j, off) & PAGE_MASK, xfsize(j, npages, off, len))) { count--; rhpages = 0; } else break; } for (j = i + 1; j < i + count - 1; j++) if (vm_page_is_valid(pa[j], vmoff(j, off) & PAGE_MASK, xfsize(j, npages, off, len))) { vm_page_xunbusy(pa[j]); SFSTAT_INC(sf_pages_valid); SFSTAT_INC(sf_pages_bogus); pa[j] = bogus_page; } refcount_acquire(&sfio->nios); + VM_OBJECT_WUNLOCK(obj); rv = vm_pager_get_pages_async(obj, pa + i, count, NULL, i + count == npages ? &rhpages : NULL, &sendfile_iodone, sfio); + VM_OBJECT_WLOCK(obj); if (__predict_false(rv != VM_PAGER_OK)) { /* * Perform full pages recovery before returning EIO. * Pages from 0 to npages are wired. * Pages from i to npages are also busied. * Pages from (i + 1) to (i + count - 1) may be * substituted to bogus page, and not busied. */ for (j = 0; j < npages; j++) { if (j > i && j < i + count - 1 && pa[j] == bogus_page) pa[j] = vm_page_lookup(obj, OFF_TO_IDX(vmoff(j, off))); else if (j >= i) vm_page_xunbusy(pa[j]); KASSERT(pa[j] != NULL && pa[j] != bogus_page, ("%s: page %p[%d] I/O recovery failure", __func__, pa, j)); vm_page_unwire(pa[j], PQ_INACTIVE); } VM_OBJECT_WUNLOCK(obj); return (EIO); } SFSTAT_INC(sf_iocnt); SFSTAT_ADD(sf_pages_read, count); if (i + count == npages) SFSTAT_ADD(sf_rhpages_read, rhpages); /* * Restore the valid page pointers. They are already * unbusied, but still wired. */ for (j = i; j < i + count; j++) if (pa[j] == bogus_page) { pa[j] = vm_page_lookup(obj, OFF_TO_IDX(vmoff(j, off))); KASSERT(pa[j], ("%s: page %p[%d] disappeared", __func__, pa, j)); } i += count; (*nios)++; } VM_OBJECT_WUNLOCK(obj); if (*nios == 0 && npages != 0) SFSTAT_INC(sf_noiocnt); return (0); } static int sendfile_getobj(struct thread *td, struct file *fp, vm_object_t *obj_res, struct vnode **vp_res, struct shmfd **shmfd_res, off_t *obj_size, int *bsize) { struct vattr va; vm_object_t obj; struct vnode *vp; struct shmfd *shmfd; int error; vp = *vp_res = NULL; obj = NULL; shmfd = *shmfd_res = NULL; *bsize = 0; /* * The file descriptor must be a regular file and have a * backing VM object. */ if (fp->f_type == DTYPE_VNODE) { vp = fp->f_vnode; vn_lock(vp, LK_SHARED | LK_RETRY); if (vp->v_type != VREG) { error = EINVAL; goto out; } *bsize = vp->v_mount->mnt_stat.f_iosize; error = VOP_GETATTR(vp, &va, td->td_ucred); if (error != 0) goto out; *obj_size = va.va_size; obj = vp->v_object; if (obj == NULL) { error = EINVAL; goto out; } } else if (fp->f_type == DTYPE_SHM) { error = 0; shmfd = fp->f_data; obj = shmfd->shm_object; *obj_size = shmfd->shm_size; } else { error = EINVAL; goto out; } VM_OBJECT_WLOCK(obj); if ((obj->flags & OBJ_DEAD) != 0) { VM_OBJECT_WUNLOCK(obj); error = EBADF; goto out; } /* * Temporarily increase the backing VM object's reference * count so that a forced reclamation of its vnode does not * immediately destroy it. */ vm_object_reference_locked(obj); VM_OBJECT_WUNLOCK(obj); *obj_res = obj; *vp_res = vp; *shmfd_res = shmfd; out: if (vp != NULL) VOP_UNLOCK(vp); return (error); } static int sendfile_getsock(struct thread *td, int s, struct file **sock_fp, struct socket **so) { int error; *sock_fp = NULL; *so = NULL; /* * The socket must be a stream socket and connected. */ error = getsock_cap(td, s, &cap_send_rights, sock_fp, NULL, NULL); if (error != 0) return (error); *so = (*sock_fp)->f_data; if ((*so)->so_type != SOCK_STREAM) return (EINVAL); if (SOLISTENING(*so)) return (ENOTCONN); return (0); } int vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, struct thread *td) { struct file *sock_fp; struct vnode *vp; struct vm_object *obj; struct socket *so; #ifdef KERN_TLS struct ktls_session *tls; #endif struct mbuf_ext_pgs *ext_pgs; struct mbuf *m, *mh, *mhtail; struct sf_buf *sf; struct shmfd *shmfd; struct sendfile_sync *sfs; struct vattr va; off_t off, sbytes, rem, obj_size; int bsize, error, ext_pgs_idx, hdrlen, max_pgs, softerr; #ifdef KERN_TLS int tls_enq_cnt; #endif bool use_ext_pgs; obj = NULL; so = NULL; m = mh = NULL; sfs = NULL; #ifdef KERN_TLS tls = NULL; #endif hdrlen = sbytes = 0; softerr = 0; use_ext_pgs = false; error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize); if (error != 0) return (error); error = sendfile_getsock(td, sockfd, &sock_fp, &so); if (error != 0) goto out; #ifdef MAC error = mac_socket_check_send(td->td_ucred, so); if (error != 0) goto out; #endif SFSTAT_INC(sf_syscalls); SFSTAT_ADD(sf_rhpages_requested, SF_READAHEAD(flags)); if (flags & SF_SYNC) { sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO); mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); cv_init(&sfs->cv, "sendfile"); } rem = nbytes ? omin(nbytes, obj_size - offset) : obj_size - offset; /* * Protect against multiple writers to the socket. * * XXXRW: Historically this has assumed non-interruptibility, so now * we implement that, but possibly shouldn't. */ (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR); #ifdef KERN_TLS tls = ktls_hold(so->so_snd.sb_tls_info); #endif /* * Loop through the pages of the file, starting with the requested * offset. Get a file page (do I/O if necessary), map the file page * into an sf_buf, attach an mbuf header to the sf_buf, and queue * it on the socket. * This is done in two loops. The inner loop turns as many pages * as it can, up to available socket buffer space, without blocking * into mbufs to have it bulk delivered into the socket send buffer. * The outer loop checks the state and available space of the socket * and takes care of the overall progress. */ for (off = offset; rem > 0; ) { struct sf_io *sfio; vm_page_t *pa; struct mbuf *mtail; int nios, space, npages, rhpages; mtail = NULL; /* * Check the socket state for ongoing connection, * no errors and space in socket buffer. * If space is low allow for the remainder of the * file to be processed if it fits the socket buffer. * Otherwise block in waiting for sufficient space * to proceed, or if the socket is nonblocking, return * to userland with EAGAIN while reporting how far * we've come. * We wait until the socket buffer has significant free * space to do bulk sends. This makes good use of file * system read ahead and allows packet segmentation * offloading hardware to take over lots of work. If * we were not careful here we would send off only one * sfbuf at a time. */ SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2) so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2; retry_space: if (so->so_snd.sb_state & SBS_CANTSENDMORE) { error = EPIPE; SOCKBUF_UNLOCK(&so->so_snd); goto done; } else if (so->so_error) { error = so->so_error; so->so_error = 0; SOCKBUF_UNLOCK(&so->so_snd); goto done; } if ((so->so_state & SS_ISCONNECTED) == 0) { SOCKBUF_UNLOCK(&so->so_snd); error = ENOTCONN; goto done; } space = sbspace(&so->so_snd); if (space < rem && (space <= 0 || space < so->so_snd.sb_lowat)) { if (so->so_state & SS_NBIO) { SOCKBUF_UNLOCK(&so->so_snd); error = EAGAIN; goto done; } /* * sbwait drops the lock while sleeping. * When we loop back to retry_space the * state may have changed and we retest * for it. */ error = sbwait(&so->so_snd); /* * An error from sbwait usually indicates that we've * been interrupted by a signal. If we've sent anything * then return bytes sent, otherwise return the error. */ if (error != 0) { SOCKBUF_UNLOCK(&so->so_snd); goto done; } goto retry_space; } SOCKBUF_UNLOCK(&so->so_snd); /* * At the beginning of the first loop check if any headers * are specified and copy them into mbufs. Reduce space in * the socket buffer by the size of the header mbuf chain. * Clear hdr_uio here and hdrlen at the end of the first loop. */ if (hdr_uio != NULL && hdr_uio->uio_resid > 0) { hdr_uio->uio_td = td; hdr_uio->uio_rw = UIO_WRITE; #ifdef KERN_TLS if (tls != NULL) mh = m_uiotombuf(hdr_uio, M_WAITOK, space, tls->params.max_frame_len, M_NOMAP); else #endif mh = m_uiotombuf(hdr_uio, M_WAITOK, space, 0, 0); hdrlen = m_length(mh, &mhtail); space -= hdrlen; /* * If header consumed all the socket buffer space, * don't waste CPU cycles and jump to the end. */ if (space == 0) { sfio = NULL; nios = 0; goto prepend_header; } hdr_uio = NULL; } if (vp != NULL) { error = vn_lock(vp, LK_SHARED); if (error != 0) goto done; error = VOP_GETATTR(vp, &va, td->td_ucred); if (error != 0 || off >= va.va_size) { VOP_UNLOCK(vp); goto done; } if (va.va_size != obj_size) { obj_size = va.va_size; rem = nbytes ? omin(nbytes + offset, obj_size) : obj_size; rem -= off; } } if (space > rem) space = rem; else if (space > PAGE_SIZE) { /* * Use page boundaries when possible for large * requests. */ if (off & PAGE_MASK) space -= (PAGE_SIZE - (off & PAGE_MASK)); space = trunc_page(space); if (off & PAGE_MASK) space += (PAGE_SIZE - (off & PAGE_MASK)); } npages = howmany(space + (off & PAGE_MASK), PAGE_SIZE); /* * Calculate maximum allowed number of pages for readahead * at this iteration. If SF_USER_READAHEAD was set, we don't * do any heuristics and use exactly the value supplied by * application. Otherwise, we allow readahead up to "rem". * If application wants more, let it be, but there is no * reason to go above MAXPHYS. Also check against "obj_size", * since vm_pager_has_page() can hint beyond EOF. */ if (flags & SF_USER_READAHEAD) { rhpages = SF_READAHEAD(flags); } else { rhpages = howmany(rem + (off & PAGE_MASK), PAGE_SIZE) - npages; rhpages += SF_READAHEAD(flags); } rhpages = min(howmany(MAXPHYS, PAGE_SIZE), rhpages); rhpages = min(howmany(obj_size - trunc_page(off), PAGE_SIZE) - npages, rhpages); sfio = malloc(sizeof(struct sf_io) + npages * sizeof(vm_page_t), M_TEMP, M_WAITOK); refcount_init(&sfio->nios, 1); sfio->so = so; + sfio->obj = obj; sfio->error = 0; + vm_object_pip_add(obj, 1); #ifdef KERN_TLS /* * This doesn't use ktls_hold() because sfio->m will * also have a reference on 'tls' that will be valid * for all of sfio's lifetime. */ sfio->tls = tls; #endif error = sendfile_swapin(obj, sfio, &nios, off, space, npages, rhpages, flags); if (error != 0) { if (vp != NULL) VOP_UNLOCK(vp); sfio->m = NULL; sendfile_iodone(sfio, NULL, 0, error); goto done; } /* * Loop and construct maximum sized mbuf chain to be bulk * dumped into socket buffer. */ pa = sfio->pa; /* * Use unmapped mbufs if enabled for TCP. Unmapped * bufs are restricted to TCP as that is what has been * tested. In particular, unmapped mbufs have not * been tested with UNIX-domain sockets. * * TLS frames always require unmapped mbufs. */ if ((mb_use_ext_pgs && so->so_proto->pr_protocol == IPPROTO_TCP) #ifdef KERN_TLS || tls != NULL #endif ) { use_ext_pgs = true; #ifdef KERN_TLS if (tls != NULL) max_pgs = num_pages(tls->params.max_frame_len); else #endif max_pgs = MBUF_PEXT_MAX_PGS; /* Start at last index, to wrap on first use. */ ext_pgs_idx = max_pgs - 1; } for (int i = 0; i < npages; i++) { struct mbuf *m0; /* * If a page wasn't grabbed successfully, then * trim the array. Can happen only with SF_NODISKIO. */ if (pa[i] == NULL) { SFSTAT_INC(sf_busy); fixspace(npages, i, off, &space); npages = i; softerr = EBUSY; break; } if (use_ext_pgs) { off_t xfs; ext_pgs_idx++; if (ext_pgs_idx == max_pgs) { m0 = mb_alloc_ext_pgs(M_WAITOK, false, sendfile_free_mext_pg); if (flags & SF_NOCACHE) { m0->m_ext.ext_flags |= EXT_FLAG_NOCACHE; /* * See comment below regarding * ignoring SF_NOCACHE for the * last page. */ if ((npages - i <= max_pgs) && ((off + space) & PAGE_MASK) && (rem > space || rhpages > 0)) m0->m_ext.ext_flags |= EXT_FLAG_CACHE_LAST; } if (sfs != NULL) { m0->m_ext.ext_flags |= EXT_FLAG_SYNC; m0->m_ext.ext_arg2 = sfs; mtx_lock(&sfs->mtx); sfs->count++; mtx_unlock(&sfs->mtx); } ext_pgs = m0->m_ext.ext_pgs; if (i == 0) sfio->m = m0; ext_pgs_idx = 0; /* Append to mbuf chain. */ if (mtail != NULL) mtail->m_next = m0; else m = m0; mtail = m0; ext_pgs->first_pg_off = vmoff(i, off) & PAGE_MASK; } if (nios) { mtail->m_flags |= M_NOTREADY; ext_pgs->nrdy++; } ext_pgs->pa[ext_pgs_idx] = VM_PAGE_TO_PHYS(pa[i]); ext_pgs->npgs++; xfs = xfsize(i, npages, off, space); ext_pgs->last_pg_len = xfs; MBUF_EXT_PGS_ASSERT_SANITY(ext_pgs); mtail->m_len += xfs; mtail->m_ext.ext_size += PAGE_SIZE; continue; } /* * Get a sendfile buf. When allocating the * first buffer for mbuf chain, we usually * wait as long as necessary, but this wait * can be interrupted. For consequent * buffers, do not sleep, since several * threads might exhaust the buffers and then * deadlock. */ sf = sf_buf_alloc(pa[i], m != NULL ? SFB_NOWAIT : SFB_CATCH); if (sf == NULL) { SFSTAT_INC(sf_allocfail); for (int j = i; j < npages; j++) vm_page_unwire(pa[j], PQ_INACTIVE); if (m == NULL) softerr = ENOBUFS; fixspace(npages, i, off, &space); npages = i; break; } m0 = m_get(M_WAITOK, MT_DATA); m0->m_ext.ext_buf = (char *)sf_buf_kva(sf); m0->m_ext.ext_size = PAGE_SIZE; m0->m_ext.ext_arg1 = sf; m0->m_ext.ext_type = EXT_SFBUF; m0->m_ext.ext_flags = EXT_FLAG_EMBREF; m0->m_ext.ext_free = sendfile_free_mext; /* * SF_NOCACHE sets the page as being freed upon send. * However, we ignore it for the last page in 'space', * if the page is truncated, and we got more data to * send (rem > space), or if we have readahead * configured (rhpages > 0). */ if ((flags & SF_NOCACHE) && (i != npages - 1 || !((off + space) & PAGE_MASK) || !(rem > space || rhpages > 0))) m0->m_ext.ext_flags |= EXT_FLAG_NOCACHE; if (sfs != NULL) { m0->m_ext.ext_flags |= EXT_FLAG_SYNC; m0->m_ext.ext_arg2 = sfs; mtx_lock(&sfs->mtx); sfs->count++; mtx_unlock(&sfs->mtx); } m0->m_ext.ext_count = 1; m0->m_flags |= (M_EXT | M_RDONLY); if (nios) m0->m_flags |= M_NOTREADY; m0->m_data = (char *)sf_buf_kva(sf) + (vmoff(i, off) & PAGE_MASK); m0->m_len = xfsize(i, npages, off, space); if (i == 0) sfio->m = m0; /* Append to mbuf chain. */ if (mtail != NULL) mtail->m_next = m0; else m = m0; mtail = m0; } if (vp != NULL) VOP_UNLOCK(vp); /* Keep track of bytes processed. */ off += space; rem -= space; /* Prepend header, if any. */ if (hdrlen) { prepend_header: mhtail->m_next = m; m = mh; mh = NULL; } if (m == NULL) { KASSERT(softerr, ("%s: m NULL, no error", __func__)); error = softerr; free(sfio, M_TEMP); goto done; } /* Add the buffer chain to the socket buffer. */ KASSERT(m_length(m, NULL) == space + hdrlen, ("%s: mlen %u space %d hdrlen %d", __func__, m_length(m, NULL), space, hdrlen)); CURVNET_SET(so->so_vnet); #ifdef KERN_TLS if (tls != NULL) { error = ktls_frame(m, tls, &tls_enq_cnt, TLS_RLTYPE_APP); if (error != 0) goto done; } #endif if (nios == 0) { /* * If sendfile_swapin() didn't initiate any I/Os, * which happens if all data is cached in VM, then * we can send data right now without the * PRUS_NOTREADY flag. */ + vm_object_pip_wakeup(sfio->obj); free(sfio, M_TEMP); #ifdef KERN_TLS if (tls != NULL && tls->mode == TCP_TLS_MODE_SW) { error = (*so->so_proto->pr_usrreqs->pru_send) (so, PRUS_NOTREADY, m, NULL, NULL, td); soref(so); ktls_enqueue(m, so, tls_enq_cnt); } else #endif error = (*so->so_proto->pr_usrreqs->pru_send) (so, 0, m, NULL, NULL, td); } else { sfio->npages = npages; soref(so); error = (*so->so_proto->pr_usrreqs->pru_send) (so, PRUS_NOTREADY, m, NULL, NULL, td); sendfile_iodone(sfio, NULL, 0, 0); } CURVNET_RESTORE(); m = NULL; /* pru_send always consumes */ if (error) goto done; sbytes += space + hdrlen; if (hdrlen) hdrlen = 0; if (softerr) { error = softerr; goto done; } } /* * Send trailers. Wimp out and use writev(2). */ if (trl_uio != NULL) { sbunlock(&so->so_snd); error = kern_writev(td, sockfd, trl_uio); if (error == 0) sbytes += td->td_retval[0]; goto out; } done: sbunlock(&so->so_snd); out: /* * If there was no error we have to clear td->td_retval[0] * because it may have been set by writev. */ if (error == 0) { td->td_retval[0] = 0; } if (sent != NULL) { (*sent) = sbytes; } if (obj != NULL) vm_object_deallocate(obj); if (so) fdrop(sock_fp, td); if (m) m_freem(m); if (mh) m_freem(mh); if (sfs != NULL) { mtx_lock(&sfs->mtx); if (sfs->count != 0) cv_wait(&sfs->cv, &sfs->mtx); KASSERT(sfs->count == 0, ("sendfile sync still busy")); cv_destroy(&sfs->cv); mtx_destroy(&sfs->mtx); free(sfs, M_TEMP); } #ifdef KERN_TLS if (tls != NULL) ktls_free(tls); #endif if (error == ERESTART) error = EINTR; return (error); } static int sendfile(struct thread *td, struct sendfile_args *uap, int compat) { struct sf_hdtr hdtr; struct uio *hdr_uio, *trl_uio; struct file *fp; off_t sbytes; int error; /* * File offset must be positive. If it goes beyond EOF * we send only the header/trailer and no payload data. */ if (uap->offset < 0) return (EINVAL); sbytes = 0; hdr_uio = trl_uio = NULL; if (uap->hdtr != NULL) { error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); if (error != 0) goto out; if (hdtr.headers != NULL) { error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); if (error != 0) goto out; #ifdef COMPAT_FREEBSD4 /* * In FreeBSD < 5.0 the nbytes to send also included * the header. If compat is specified subtract the * header size from nbytes. */ if (compat) { if (uap->nbytes > hdr_uio->uio_resid) uap->nbytes -= hdr_uio->uio_resid; else uap->nbytes = 0; } #endif } if (hdtr.trailers != NULL) { error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); if (error != 0) goto out; } } AUDIT_ARG_FD(uap->fd); /* * sendfile(2) can start at any offset within a file so we require * CAP_READ+CAP_SEEK = CAP_PREAD. */ if ((error = fget_read(td, uap->fd, &cap_pread_rights, &fp)) != 0) goto out; error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, uap->offset, uap->nbytes, &sbytes, uap->flags, td); fdrop(fp, td); if (uap->sbytes != NULL) copyout(&sbytes, uap->sbytes, sizeof(off_t)); out: free(hdr_uio, M_IOV); free(trl_uio, M_IOV); return (error); } /* * sendfile(2) * * int sendfile(int fd, int s, off_t offset, size_t nbytes, * struct sf_hdtr *hdtr, off_t *sbytes, int flags) * * Send a file specified by 'fd' and starting at 'offset' to a socket * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes == * 0. Optionally add a header and/or trailer to the socket output. If * specified, write the total number of bytes sent into *sbytes. */ int sys_sendfile(struct thread *td, struct sendfile_args *uap) { return (sendfile(td, uap, 0)); } #ifdef COMPAT_FREEBSD4 int freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) { struct sendfile_args args; args.fd = uap->fd; args.s = uap->s; args.offset = uap->offset; args.nbytes = uap->nbytes; args.hdtr = uap->hdtr; args.sbytes = uap->sbytes; args.flags = uap->flags; return (sendfile(td, &args, 1)); } #endif /* COMPAT_FREEBSD4 */ Index: projects/clang1000-import/sys/kern/kern_synch.c =================================================================== --- projects/clang1000-import/sys/kern/kern_synch.c (revision 356919) +++ projects/clang1000-import/sys/kern/kern_synch.c (revision 356920) @@ -1,668 +1,674 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_ktrace.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #include #endif #ifdef EPOCH_TRACE #include #endif #include static void synch_setup(void *dummy); SYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup, NULL); int hogticks; static const char pause_wchan[MAXCPU]; static struct callout loadav_callout; struct loadavg averunnable = { {0, 0, 0}, FSCALE }; /* load average, of runnable procs */ /* * Constants for averages over 1, 5, and 15 minutes * when sampling at 5 second intervals. */ static fixpt_t cexp[3] = { 0.9200444146293232 * FSCALE, /* exp(-1/12) */ 0.9834714538216174 * FSCALE, /* exp(-1/60) */ 0.9944598480048967 * FSCALE, /* exp(-1/180) */ }; /* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */ SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, FSCALE, ""); static void loadav(void *arg); SDT_PROVIDER_DECLARE(sched); SDT_PROBE_DEFINE(sched, , , preempt); static void sleepinit(void *unused) { hogticks = (hz / 10) * 2; /* Default only. */ init_sleepqueues(); } /* * vmem tries to lock the sleepq mutexes when free'ing kva, so make sure * it is available. */ SYSINIT(sleepinit, SI_SUB_KMEM, SI_ORDER_ANY, sleepinit, NULL); /* * General sleep call. Suspends the current thread until a wakeup is * performed on the specified identifier. The thread will then be made * runnable with the specified priority. Sleeps at most sbt units of time * (0 means no timeout). If pri includes the PCATCH flag, let signals * interrupt the sleep, otherwise ignore them while sleeping. Returns 0 if * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a * signal becomes pending, ERESTART is returned if the current system * call should be restarted if possible, and EINTR is returned if the system * call should be interrupted by the signal (return EINTR). * * The lock argument is unlocked before the caller is suspended, and * re-locked before _sleep() returns. If priority includes the PDROP * flag the lock is not re-locked before returning. */ int _sleep(const void *ident, struct lock_object *lock, int priority, const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags) { struct thread *td; struct lock_class *class; uintptr_t lock_state; int catch, pri, rval, sleepq_flags; WITNESS_SAVE_DECL(lock_witness); td = curthread; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0, wmesg); #endif WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Sleeping on \"%s\"", wmesg); KASSERT(sbt != 0 || mtx_owned(&Giant) || lock != NULL, ("sleeping without a lock")); KASSERT(ident != NULL, ("_sleep: NULL ident")); KASSERT(TD_IS_RUNNING(td), ("_sleep: curthread not running")); if (priority & PDROP) KASSERT(lock != NULL && lock != &Giant.lock_object, ("PDROP requires a non-Giant lock")); if (lock != NULL) class = LOCK_CLASS(lock); else class = NULL; if (SCHEDULER_STOPPED_TD(td)) { if (lock != NULL && priority & PDROP) class->lc_unlock(lock); return (0); } catch = priority & PCATCH; pri = priority & PRIMASK; KASSERT(!TD_ON_SLEEPQ(td), ("recursive sleep")); if ((uintptr_t)ident >= (uintptr_t)&pause_wchan[0] && (uintptr_t)ident <= (uintptr_t)&pause_wchan[MAXCPU - 1]) sleepq_flags = SLEEPQ_PAUSE; else sleepq_flags = SLEEPQ_SLEEP; if (catch) sleepq_flags |= SLEEPQ_INTERRUPTIBLE; sleepq_lock(ident); CTR5(KTR_PROC, "sleep: thread %ld (pid %ld, %s) on %s (%p)", td->td_tid, td->td_proc->p_pid, td->td_name, wmesg, ident); if (lock == &Giant.lock_object) mtx_assert(&Giant, MA_OWNED); DROP_GIANT(); if (lock != NULL && lock != &Giant.lock_object && !(class->lc_flags & LC_SLEEPABLE)) { WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); } else /* GCC needs to follow the Yellow Brick Road */ lock_state = -1; /* * We put ourselves on the sleep queue and start our timeout * before calling thread_suspend_check, as we could stop there, * and a wakeup or a SIGCONT (or both) could occur while we were * stopped without resuming us. Thus, we must be ready for sleep * when cursig() is called. If the wakeup happens while we're * stopped, then td will no longer be on a sleep queue upon * return from cursig(). */ sleepq_add(ident, lock, wmesg, sleepq_flags, 0); if (sbt != 0) sleepq_set_timeout_sbt(ident, sbt, pr, flags); if (lock != NULL && class->lc_flags & LC_SLEEPABLE) { sleepq_release(ident); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); sleepq_lock(ident); } if (sbt != 0 && catch) rval = sleepq_timedwait_sig(ident, pri); else if (sbt != 0) rval = sleepq_timedwait(ident, pri); else if (catch) rval = sleepq_wait_sig(ident, pri); else { sleepq_wait(ident, pri); rval = 0; } #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0, wmesg); #endif PICKUP_GIANT(); if (lock != NULL && lock != &Giant.lock_object && !(priority & PDROP)) { class->lc_lock(lock, lock_state); WITNESS_RESTORE(lock, lock_witness); } return (rval); } int msleep_spin_sbt(const void *ident, struct mtx *mtx, const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags) { struct thread *td; int rval; WITNESS_SAVE_DECL(mtx); td = curthread; KASSERT(mtx != NULL, ("sleeping without a mutex")); KASSERT(ident != NULL, ("msleep_spin_sbt: NULL ident")); KASSERT(TD_IS_RUNNING(td), ("msleep_spin_sbt: curthread not running")); if (SCHEDULER_STOPPED_TD(td)) return (0); sleepq_lock(ident); CTR5(KTR_PROC, "msleep_spin: thread %ld (pid %ld, %s) on %s (%p)", td->td_tid, td->td_proc->p_pid, td->td_name, wmesg, ident); DROP_GIANT(); mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED); WITNESS_SAVE(&mtx->lock_object, mtx); mtx_unlock_spin(mtx); /* * We put ourselves on the sleep queue and start our timeout. */ sleepq_add(ident, &mtx->lock_object, wmesg, SLEEPQ_SLEEP, 0); if (sbt != 0) sleepq_set_timeout_sbt(ident, sbt, pr, flags); /* * Can't call ktrace with any spin locks held so it can lock the * ktrace_mtx lock, and WITNESS_WARN considers it an error to hold * any spin lock. Thus, we have to drop the sleepq spin lock while * we handle those requests. This is safe since we have placed our * thread on the sleep queue already. */ #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) { sleepq_release(ident); ktrcsw(1, 0, wmesg); sleepq_lock(ident); } #endif #ifdef WITNESS sleepq_release(ident); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Sleeping on \"%s\"", wmesg); sleepq_lock(ident); #endif if (sbt != 0) rval = sleepq_timedwait(ident, 0); else { sleepq_wait(ident, 0); rval = 0; } #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0, wmesg); #endif PICKUP_GIANT(); mtx_lock_spin(mtx); WITNESS_RESTORE(&mtx->lock_object, mtx); return (rval); } /* * pause_sbt() delays the calling thread by the given signed binary * time. During cold bootup, pause_sbt() uses the DELAY() function * instead of the _sleep() function to do the waiting. The "sbt" * argument must be greater than or equal to zero. A "sbt" value of * zero is equivalent to a "sbt" value of one tick. */ int pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags) { KASSERT(sbt >= 0, ("pause_sbt: timeout must be >= 0")); /* silently convert invalid timeouts */ if (sbt == 0) sbt = tick_sbt; if ((cold && curthread == &thread0) || kdb_active || SCHEDULER_STOPPED()) { /* * We delay one second at a time to avoid overflowing the * system specific DELAY() function(s): */ while (sbt >= SBT_1S) { DELAY(1000000); sbt -= SBT_1S; } /* Do the delay remainder, if any */ sbt = howmany(sbt, SBT_1US); if (sbt > 0) DELAY(sbt); return (EWOULDBLOCK); } return (_sleep(&pause_wchan[curcpu], NULL, (flags & C_CATCH) ? PCATCH : 0, wmesg, sbt, pr, flags)); } /* * Potentially release the last reference for refcount. Check for * unlikely conditions and signal the caller as to whether it was * the final ref. */ bool refcount_release_last(volatile u_int *count, u_int n, u_int old) { u_int waiter; waiter = old & REFCOUNT_WAITER; old = REFCOUNT_COUNT(old); if (__predict_false(n > old || REFCOUNT_SATURATED(old))) { /* * Avoid multiple destructor invocations if underflow occurred. * This is not perfect since the memory backing the containing * object may already have been reallocated. */ _refcount_update_saturated(count); return (false); } /* * Attempt to atomically clear the waiter bit. Wakeup waiters * if we are successful. */ if (waiter != 0 && atomic_cmpset_int(count, REFCOUNT_WAITER, 0)) wakeup(__DEVOLATILE(u_int *, count)); /* * Last reference. Signal the user to call the destructor. * * Ensure that the destructor sees all updates. The fence_rel * at the start of refcount_releasen synchronizes with this fence. */ atomic_thread_fence_acq(); return (true); } /* * Wait for a refcount wakeup. This does not guarantee that the ref is still * zero on return and may be subject to transient wakeups. Callers wanting * a precise answer should use refcount_wait(). */ void -refcount_sleep(volatile u_int *count, const char *wmesg, int pri) +_refcount_sleep(volatile u_int *count, struct lock_object *lock, + const char *wmesg, int pri) { void *wchan; u_int old; - if (REFCOUNT_COUNT(*count) == 0) + if (REFCOUNT_COUNT(*count) == 0) { + if (lock != NULL) + LOCK_CLASS(lock)->lc_unlock(lock); return; + } wchan = __DEVOLATILE(void *, count); sleepq_lock(wchan); + if (lock != NULL) + LOCK_CLASS(lock)->lc_unlock(lock); old = *count; for (;;) { if (REFCOUNT_COUNT(old) == 0) { sleepq_release(wchan); return; } if (old & REFCOUNT_WAITER) break; if (atomic_fcmpset_int(count, &old, old | REFCOUNT_WAITER)) break; } sleepq_add(wchan, NULL, wmesg, 0, 0); sleepq_wait(wchan, pri); } /* * Make all threads sleeping on the specified identifier runnable. */ void wakeup(const void *ident) { int wakeup_swapper; sleepq_lock(ident); wakeup_swapper = sleepq_broadcast(ident, SLEEPQ_SLEEP, 0, 0); sleepq_release(ident); if (wakeup_swapper) { KASSERT(ident != &proc0, ("wakeup and wakeup_swapper and proc0")); kick_proc0(); } } /* * Make a thread sleeping on the specified identifier runnable. * May wake more than one thread if a target thread is currently * swapped out. */ void wakeup_one(const void *ident) { int wakeup_swapper; sleepq_lock(ident); wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP, 0, 0); sleepq_release(ident); if (wakeup_swapper) kick_proc0(); } void wakeup_any(const void *ident) { int wakeup_swapper; sleepq_lock(ident); wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP | SLEEPQ_UNFAIR, 0, 0); sleepq_release(ident); if (wakeup_swapper) kick_proc0(); } static void kdb_switch(void) { thread_unlock(curthread); kdb_backtrace(); kdb_reenter(); panic("%s: did not reenter debugger", __func__); } /* * The machine independent parts of context switching. * * The thread lock is required on entry and is no longer held on return. */ void mi_switch(int flags) { uint64_t runtime, new_switchtime; struct thread *td; td = curthread; /* XXX */ THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED); KASSERT(!TD_ON_RUNQ(td), ("mi_switch: called by old code")); #ifdef INVARIANTS if (!TD_ON_LOCK(td) && !TD_IS_RUNNING(td)) mtx_assert(&Giant, MA_NOTOWNED); #endif KASSERT(td->td_critnest == 1 || KERNEL_PANICKED(), ("mi_switch: switch in a critical section")); KASSERT((flags & (SW_INVOL | SW_VOL)) != 0, ("mi_switch: switch must be voluntary or involuntary")); /* * Don't perform context switches from the debugger. */ if (kdb_active) kdb_switch(); if (SCHEDULER_STOPPED_TD(td)) return; if (flags & SW_VOL) { td->td_ru.ru_nvcsw++; td->td_swvoltick = ticks; } else { td->td_ru.ru_nivcsw++; td->td_swinvoltick = ticks; } #ifdef SCHED_STATS SCHED_STAT_INC(sched_switch_stats[flags & SW_TYPE_MASK]); #endif /* * Compute the amount of time during which the current * thread was running, and add that to its total so far. */ new_switchtime = cpu_ticks(); runtime = new_switchtime - PCPU_GET(switchtime); td->td_runtime += runtime; td->td_incruntime += runtime; PCPU_SET(switchtime, new_switchtime); td->td_generation++; /* bump preempt-detect counter */ VM_CNT_INC(v_swtch); PCPU_SET(switchticks, ticks); CTR4(KTR_PROC, "mi_switch: old thread %ld (td_sched %p, pid %ld, %s)", td->td_tid, td_get_sched(td), td->td_proc->p_pid, td->td_name); #ifdef KDTRACE_HOOKS if (SDT_PROBES_ENABLED() && ((flags & SW_PREEMPT) != 0 || ((flags & SW_INVOL) != 0 && (flags & SW_TYPE_MASK) == SWT_NEEDRESCHED))) SDT_PROBE0(sched, , , preempt); #endif sched_switch(td, flags); CTR4(KTR_PROC, "mi_switch: new thread %ld (td_sched %p, pid %ld, %s)", td->td_tid, td_get_sched(td), td->td_proc->p_pid, td->td_name); /* * If the last thread was exiting, finish cleaning it up. */ if ((td = PCPU_GET(deadthread))) { PCPU_SET(deadthread, NULL); thread_stash(td); } spinlock_exit(); } /* * Change thread state to be runnable, placing it on the run queue if * it is in memory. If it is swapped out, return true so our caller * will know to awaken the swapper. * * Requires the thread lock on entry, drops on exit. */ int setrunnable(struct thread *td, int srqflags) { int swapin; THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT(td->td_proc->p_state != PRS_ZOMBIE, ("setrunnable: pid %d is a zombie", td->td_proc->p_pid)); swapin = 0; switch (td->td_state) { case TDS_RUNNING: case TDS_RUNQ: break; case TDS_CAN_RUN: KASSERT((td->td_flags & TDF_INMEM) != 0, ("setrunnable: td %p not in mem, flags 0x%X inhibit 0x%X", td, td->td_flags, td->td_inhibitors)); /* unlocks thread lock according to flags */ sched_wakeup(td, srqflags); return (0); case TDS_INHIBITED: /* * If we are only inhibited because we are swapped out * arrange to swap in this process. */ if (td->td_inhibitors == TDI_SWAPPED && (td->td_flags & TDF_SWAPINREQ) == 0) { td->td_flags |= TDF_SWAPINREQ; swapin = 1; } break; default: panic("setrunnable: state 0x%x", td->td_state); } if ((srqflags & (SRQ_HOLD | SRQ_HOLDTD)) == 0) thread_unlock(td); return (swapin); } /* * Compute a tenex style load average of a quantity on * 1, 5 and 15 minute intervals. */ static void loadav(void *arg) { int i, nrun; struct loadavg *avg; nrun = sched_load(); avg = &averunnable; for (i = 0; i < 3; i++) avg->ldavg[i] = (cexp[i] * avg->ldavg[i] + nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT; /* * Schedule the next update to occur after 5 seconds, but add a * random variation to avoid synchronisation with processes that * run at regular intervals. */ callout_reset_sbt(&loadav_callout, SBT_1US * (4000000 + (int)(random() % 2000001)), SBT_1US, loadav, NULL, C_DIRECT_EXEC | C_PREL(32)); } /* ARGSUSED */ static void synch_setup(void *dummy) { callout_init(&loadav_callout, 1); /* Kick off timeout driven events by calling first time. */ loadav(NULL); } int should_yield(void) { return ((u_int)ticks - (u_int)curthread->td_swvoltick >= hogticks); } void maybe_yield(void) { if (should_yield()) kern_yield(PRI_USER); } void kern_yield(int prio) { struct thread *td; td = curthread; DROP_GIANT(); thread_lock(td); if (prio == PRI_USER) prio = td->td_user_pri; if (prio >= 0) sched_prio(td, prio); mi_switch(SW_VOL | SWT_RELINQUISH); PICKUP_GIANT(); } /* * General purpose yield system call. */ int sys_yield(struct thread *td, struct yield_args *uap) { thread_lock(td); if (PRI_BASE(td->td_pri_class) == PRI_TIMESHARE) sched_prio(td, PRI_MAX_TIMESHARE); mi_switch(SW_VOL | SWT_RELINQUISH); td->td_retval[0] = 0; return (0); } Index: projects/clang1000-import/sys/kern/uipc_shm.c =================================================================== --- projects/clang1000-import/sys/kern/uipc_shm.c (revision 356919) +++ projects/clang1000-import/sys/kern/uipc_shm.c (revision 356920) @@ -1,1547 +1,1551 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2006, 2011, 2016-2017 Robert N. M. Watson * All rights reserved. * * Portions of this software were developed by BAE Systems, the University of * Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL * contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent * Computing (TC) research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Support for shared swap-backed anonymous memory objects via * shm_open(2), shm_rename(2), and shm_unlink(2). * While most of the implementation is here, vm_mmap.c contains * mapping logic changes. * * posixshmcontrol(1) allows users to inspect the state of the memory * objects. Per-uid swap resource limit controls total amount of * memory that user can consume for anonymous objects, including * shared. */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct shm_mapping { char *sm_path; Fnv32_t sm_fnv; struct shmfd *sm_shmfd; LIST_ENTRY(shm_mapping) sm_link; }; static MALLOC_DEFINE(M_SHMFD, "shmfd", "shared memory file descriptor"); static LIST_HEAD(, shm_mapping) *shm_dictionary; static struct sx shm_dict_lock; static struct mtx shm_timestamp_lock; static u_long shm_hash; static struct unrhdr64 shm_ino_unr; static dev_t shm_dev_ino; #define SHM_HASH(fnv) (&shm_dictionary[(fnv) & shm_hash]) static void shm_init(void *arg); static void shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd); static struct shmfd *shm_lookup(char *path, Fnv32_t fnv); static int shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred); static int shm_dotruncate_locked(struct shmfd *shmfd, off_t length, void *rl_cookie); static int shm_copyin_path(struct thread *td, const char *userpath_in, char **path_out); static fo_rdwr_t shm_read; static fo_rdwr_t shm_write; static fo_truncate_t shm_truncate; static fo_ioctl_t shm_ioctl; static fo_stat_t shm_stat; static fo_close_t shm_close; static fo_chmod_t shm_chmod; static fo_chown_t shm_chown; static fo_seek_t shm_seek; static fo_fill_kinfo_t shm_fill_kinfo; static fo_mmap_t shm_mmap; static fo_get_seals_t shm_get_seals; static fo_add_seals_t shm_add_seals; static fo_fallocate_t shm_fallocate; /* File descriptor operations. */ struct fileops shm_ops = { .fo_read = shm_read, .fo_write = shm_write, .fo_truncate = shm_truncate, .fo_ioctl = shm_ioctl, .fo_poll = invfo_poll, .fo_kqfilter = invfo_kqfilter, .fo_stat = shm_stat, .fo_close = shm_close, .fo_chmod = shm_chmod, .fo_chown = shm_chown, .fo_sendfile = vn_sendfile, .fo_seek = shm_seek, .fo_fill_kinfo = shm_fill_kinfo, .fo_mmap = shm_mmap, .fo_get_seals = shm_get_seals, .fo_add_seals = shm_add_seals, .fo_fallocate = shm_fallocate, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; FEATURE(posix_shm, "POSIX shared memory"); static int uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio) { vm_page_t m; vm_pindex_t idx; size_t tlen; int error, offset, rv; idx = OFF_TO_IDX(uio->uio_offset); offset = uio->uio_offset & PAGE_MASK; tlen = MIN(PAGE_SIZE - offset, len); VM_OBJECT_WLOCK(obj); /* * Read I/O without either a corresponding resident page or swap * page: use zero_region. This is intended to avoid instantiating * pages on read from a sparse region. */ if (uio->uio_rw == UIO_READ && vm_page_lookup(obj, idx) == NULL && !vm_pager_has_page(obj, idx, NULL, NULL)) { VM_OBJECT_WUNLOCK(obj); return (uiomove(__DECONST(void *, zero_region), tlen, uio)); } /* * Parallel reads of the page content from disk are prevented * by exclusive busy. * * Although the tmpfs vnode lock is held here, it is * nonetheless safe to sleep waiting for a free page. The * pageout daemon does not need to acquire the tmpfs vnode * lock to page out tobj's pages because tobj is a OBJT_SWAP * type object. */ rv = vm_page_grab_valid(&m, obj, idx, VM_ALLOC_NORMAL | VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY); if (rv != VM_PAGER_OK) { VM_OBJECT_WUNLOCK(obj); printf("uiomove_object: vm_obj %p idx %jd pager error %d\n", obj, idx, rv); return (EIO); } VM_OBJECT_WUNLOCK(obj); error = uiomove_fromphys(&m, offset, tlen, uio); if (uio->uio_rw == UIO_WRITE && error == 0) vm_page_set_dirty(m); vm_page_activate(m); vm_page_sunbusy(m); return (error); } int uiomove_object(vm_object_t obj, off_t obj_size, struct uio *uio) { ssize_t resid; size_t len; int error; error = 0; while ((resid = uio->uio_resid) > 0) { if (obj_size <= uio->uio_offset) break; len = MIN(obj_size - uio->uio_offset, resid); if (len == 0) break; error = uiomove_object_page(obj, len, uio); if (error != 0 || resid == uio->uio_resid) break; } return (error); } static int shm_seek(struct file *fp, off_t offset, int whence, struct thread *td) { struct shmfd *shmfd; off_t foffset; int error; shmfd = fp->f_data; foffset = foffset_lock(fp, 0); error = 0; switch (whence) { case L_INCR: if (foffset < 0 || (offset > 0 && foffset > OFF_MAX - offset)) { error = EOVERFLOW; break; } offset += foffset; break; case L_XTND: if (offset > 0 && shmfd->shm_size > OFF_MAX - offset) { error = EOVERFLOW; break; } offset += shmfd->shm_size; break; case L_SET: break; default: error = EINVAL; } if (error == 0) { if (offset < 0 || offset > shmfd->shm_size) error = EINVAL; else td->td_uretoff.tdu_off = offset; } foffset_unlock(fp, offset, error != 0 ? FOF_NOUPDATE : 0); return (error); } static int shm_read(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { struct shmfd *shmfd; void *rl_cookie; int error; shmfd = fp->f_data; #ifdef MAC error = mac_posixshm_check_read(active_cred, fp->f_cred, shmfd); if (error) return (error); #endif foffset_lock_uio(fp, uio, flags); rl_cookie = rangelock_rlock(&shmfd->shm_rl, uio->uio_offset, uio->uio_offset + uio->uio_resid, &shmfd->shm_mtx); error = uiomove_object(shmfd->shm_object, shmfd->shm_size, uio); rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx); foffset_unlock_uio(fp, uio, flags); return (error); } static int shm_write(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { struct shmfd *shmfd; void *rl_cookie; int error; shmfd = fp->f_data; #ifdef MAC error = mac_posixshm_check_write(active_cred, fp->f_cred, shmfd); if (error) return (error); #endif foffset_lock_uio(fp, uio, flags); if ((flags & FOF_OFFSET) == 0) { rl_cookie = rangelock_wlock(&shmfd->shm_rl, 0, OFF_MAX, &shmfd->shm_mtx); } else { rl_cookie = rangelock_wlock(&shmfd->shm_rl, uio->uio_offset, uio->uio_offset + uio->uio_resid, &shmfd->shm_mtx); } if ((shmfd->shm_seals & F_SEAL_WRITE) != 0) error = EPERM; else error = uiomove_object(shmfd->shm_object, shmfd->shm_size, uio); rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx); foffset_unlock_uio(fp, uio, flags); return (error); } static int shm_truncate(struct file *fp, off_t length, struct ucred *active_cred, struct thread *td) { struct shmfd *shmfd; #ifdef MAC int error; #endif shmfd = fp->f_data; #ifdef MAC error = mac_posixshm_check_truncate(active_cred, fp->f_cred, shmfd); if (error) return (error); #endif return (shm_dotruncate(shmfd, length)); } int shm_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td) { switch (com) { case FIONBIO: case FIOASYNC: /* * Allow fcntl(fd, F_SETFL, O_NONBLOCK) to work, * just like it would on an unlinked regular file */ return (0); default: return (ENOTTY); } } static int shm_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td) { struct shmfd *shmfd; #ifdef MAC int error; #endif shmfd = fp->f_data; #ifdef MAC error = mac_posixshm_check_stat(active_cred, fp->f_cred, shmfd); if (error) return (error); #endif /* * Attempt to return sanish values for fstat() on a memory file * descriptor. */ bzero(sb, sizeof(*sb)); sb->st_blksize = PAGE_SIZE; sb->st_size = shmfd->shm_size; sb->st_blocks = howmany(sb->st_size, sb->st_blksize); mtx_lock(&shm_timestamp_lock); sb->st_atim = shmfd->shm_atime; sb->st_ctim = shmfd->shm_ctime; sb->st_mtim = shmfd->shm_mtime; sb->st_birthtim = shmfd->shm_birthtime; sb->st_mode = S_IFREG | shmfd->shm_mode; /* XXX */ sb->st_uid = shmfd->shm_uid; sb->st_gid = shmfd->shm_gid; mtx_unlock(&shm_timestamp_lock); sb->st_dev = shm_dev_ino; sb->st_ino = shmfd->shm_ino; sb->st_nlink = shmfd->shm_object->ref_count; return (0); } static int shm_close(struct file *fp, struct thread *td) { struct shmfd *shmfd; shmfd = fp->f_data; fp->f_data = NULL; shm_drop(shmfd); return (0); } static int shm_copyin_path(struct thread *td, const char *userpath_in, char **path_out) { int error; char *path; const char *pr_path; size_t pr_pathlen; path = malloc(MAXPATHLEN, M_SHMFD, M_WAITOK); pr_path = td->td_ucred->cr_prison->pr_path; /* Construct a full pathname for jailed callers. */ pr_pathlen = strcmp(pr_path, "/") == 0 ? 0 : strlcpy(path, pr_path, MAXPATHLEN); error = copyinstr(userpath_in, path + pr_pathlen, MAXPATHLEN - pr_pathlen, NULL); if (error != 0) goto out; #ifdef KTRACE if (KTRPOINT(curthread, KTR_NAMEI)) ktrnamei(path); #endif /* Require paths to start with a '/' character. */ if (path[pr_pathlen] != '/') { error = EINVAL; goto out; } *path_out = path; out: if (error != 0) free(path, M_SHMFD); return (error); } static int shm_dotruncate_locked(struct shmfd *shmfd, off_t length, void *rl_cookie) { vm_object_t object; vm_page_t m; vm_pindex_t idx, nobjsize; vm_ooffset_t delta; int base, rv; KASSERT(length >= 0, ("shm_dotruncate: length < 0")); object = shmfd->shm_object; VM_OBJECT_ASSERT_WLOCKED(object); rangelock_cookie_assert(rl_cookie, RA_WLOCKED); if (length == shmfd->shm_size) return (0); nobjsize = OFF_TO_IDX(length + PAGE_MASK); /* Are we shrinking? If so, trim the end. */ if (length < shmfd->shm_size) { if ((shmfd->shm_seals & F_SEAL_SHRINK) != 0) return (EPERM); /* * Disallow any requests to shrink the size if this * object is mapped into the kernel. */ if (shmfd->shm_kmappings > 0) return (EBUSY); /* * Zero the truncated part of the last page. */ base = length & PAGE_MASK; if (base != 0) { idx = OFF_TO_IDX(length); retry: m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT); if (m != NULL) { MPASS(vm_page_all_valid(m)); } else if (vm_pager_has_page(object, idx, NULL, NULL)) { m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL); if (m == NULL) goto retry; + vm_object_pip_add(object, 1); + VM_OBJECT_WUNLOCK(object); rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); + VM_OBJECT_WLOCK(object); + vm_object_pip_wakeup(object); if (rv == VM_PAGER_OK) { /* * Since the page was not resident, * and therefore not recently * accessed, immediately enqueue it * for asynchronous laundering. The * current operation is not regarded * as an access. */ vm_page_launder(m); } else { vm_page_free(m); VM_OBJECT_WUNLOCK(object); return (EIO); } } if (m != NULL) { pmap_zero_page_area(m, base, PAGE_SIZE - base); KASSERT(vm_page_all_valid(m), ("shm_dotruncate: page %p is invalid", m)); vm_page_set_dirty(m); vm_page_xunbusy(m); } } delta = IDX_TO_OFF(object->size - nobjsize); /* Toss in memory pages. */ if (nobjsize < object->size) vm_object_page_remove(object, nobjsize, object->size, 0); /* Toss pages from swap. */ if (object->type == OBJT_SWAP) swap_pager_freespace(object, nobjsize, delta); /* Free the swap accounted for shm */ swap_release_by_cred(delta, object->cred); object->charge -= delta; } else { if ((shmfd->shm_seals & F_SEAL_GROW) != 0) return (EPERM); /* Try to reserve additional swap space. */ delta = IDX_TO_OFF(nobjsize - object->size); if (!swap_reserve_by_cred(delta, object->cred)) return (ENOMEM); object->charge += delta; } shmfd->shm_size = length; mtx_lock(&shm_timestamp_lock); vfs_timestamp(&shmfd->shm_ctime); shmfd->shm_mtime = shmfd->shm_ctime; mtx_unlock(&shm_timestamp_lock); object->size = nobjsize; return (0); } int shm_dotruncate(struct shmfd *shmfd, off_t length) { void *rl_cookie; int error; rl_cookie = rangelock_wlock(&shmfd->shm_rl, 0, OFF_MAX, &shmfd->shm_mtx); VM_OBJECT_WLOCK(shmfd->shm_object); error = shm_dotruncate_locked(shmfd, length, rl_cookie); VM_OBJECT_WUNLOCK(shmfd->shm_object); rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx); return (error); } /* * shmfd object management including creation and reference counting * routines. */ struct shmfd * shm_alloc(struct ucred *ucred, mode_t mode) { struct shmfd *shmfd; shmfd = malloc(sizeof(*shmfd), M_SHMFD, M_WAITOK | M_ZERO); shmfd->shm_size = 0; shmfd->shm_uid = ucred->cr_uid; shmfd->shm_gid = ucred->cr_gid; shmfd->shm_mode = mode; shmfd->shm_object = vm_pager_allocate(OBJT_SWAP, NULL, shmfd->shm_size, VM_PROT_DEFAULT, 0, ucred); KASSERT(shmfd->shm_object != NULL, ("shm_create: vm_pager_allocate")); vfs_timestamp(&shmfd->shm_birthtime); shmfd->shm_atime = shmfd->shm_mtime = shmfd->shm_ctime = shmfd->shm_birthtime; shmfd->shm_ino = alloc_unr64(&shm_ino_unr); refcount_init(&shmfd->shm_refs, 1); mtx_init(&shmfd->shm_mtx, "shmrl", NULL, MTX_DEF); rangelock_init(&shmfd->shm_rl); #ifdef MAC mac_posixshm_init(shmfd); mac_posixshm_create(ucred, shmfd); #endif return (shmfd); } struct shmfd * shm_hold(struct shmfd *shmfd) { refcount_acquire(&shmfd->shm_refs); return (shmfd); } void shm_drop(struct shmfd *shmfd) { if (refcount_release(&shmfd->shm_refs)) { #ifdef MAC mac_posixshm_destroy(shmfd); #endif rangelock_destroy(&shmfd->shm_rl); mtx_destroy(&shmfd->shm_mtx); vm_object_deallocate(shmfd->shm_object); free(shmfd, M_SHMFD); } } /* * Determine if the credentials have sufficient permissions for a * specified combination of FREAD and FWRITE. */ int shm_access(struct shmfd *shmfd, struct ucred *ucred, int flags) { accmode_t accmode; int error; accmode = 0; if (flags & FREAD) accmode |= VREAD; if (flags & FWRITE) accmode |= VWRITE; mtx_lock(&shm_timestamp_lock); error = vaccess(VREG, shmfd->shm_mode, shmfd->shm_uid, shmfd->shm_gid, accmode, ucred, NULL); mtx_unlock(&shm_timestamp_lock); return (error); } /* * Dictionary management. We maintain an in-kernel dictionary to map * paths to shmfd objects. We use the FNV hash on the path to store * the mappings in a hash table. */ static void shm_init(void *arg) { mtx_init(&shm_timestamp_lock, "shm timestamps", NULL, MTX_DEF); sx_init(&shm_dict_lock, "shm dictionary"); shm_dictionary = hashinit(1024, M_SHMFD, &shm_hash); new_unrhdr64(&shm_ino_unr, 1); shm_dev_ino = devfs_alloc_cdp_inode(); KASSERT(shm_dev_ino > 0, ("shm dev inode not initialized")); } SYSINIT(shm_init, SI_SUB_SYSV_SHM, SI_ORDER_ANY, shm_init, NULL); static struct shmfd * shm_lookup(char *path, Fnv32_t fnv) { struct shm_mapping *map; LIST_FOREACH(map, SHM_HASH(fnv), sm_link) { if (map->sm_fnv != fnv) continue; if (strcmp(map->sm_path, path) == 0) return (map->sm_shmfd); } return (NULL); } static void shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd) { struct shm_mapping *map; map = malloc(sizeof(struct shm_mapping), M_SHMFD, M_WAITOK); map->sm_path = path; map->sm_fnv = fnv; map->sm_shmfd = shm_hold(shmfd); shmfd->shm_path = path; LIST_INSERT_HEAD(SHM_HASH(fnv), map, sm_link); } static int shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred) { struct shm_mapping *map; int error; LIST_FOREACH(map, SHM_HASH(fnv), sm_link) { if (map->sm_fnv != fnv) continue; if (strcmp(map->sm_path, path) == 0) { #ifdef MAC error = mac_posixshm_check_unlink(ucred, map->sm_shmfd); if (error) return (error); #endif error = shm_access(map->sm_shmfd, ucred, FREAD | FWRITE); if (error) return (error); map->sm_shmfd->shm_path = NULL; LIST_REMOVE(map, sm_link); shm_drop(map->sm_shmfd); free(map->sm_path, M_SHMFD); free(map, M_SHMFD); return (0); } } return (ENOENT); } int kern_shm_open2(struct thread *td, const char *userpath, int flags, mode_t mode, int shmflags, struct filecaps *fcaps, const char *name __unused) { struct filedesc *fdp; struct shmfd *shmfd; struct file *fp; char *path; void *rl_cookie; Fnv32_t fnv; mode_t cmode; int error, fd, initial_seals; if ((shmflags & ~SHM_ALLOW_SEALING) != 0) return (EINVAL); initial_seals = F_SEAL_SEAL; if ((shmflags & SHM_ALLOW_SEALING) != 0) initial_seals &= ~F_SEAL_SEAL; #ifdef CAPABILITY_MODE /* * shm_open(2) is only allowed for anonymous objects. */ if (IN_CAPABILITY_MODE(td) && (userpath != SHM_ANON)) return (ECAPMODE); #endif AUDIT_ARG_FFLAGS(flags); AUDIT_ARG_MODE(mode); if ((flags & O_ACCMODE) != O_RDONLY && (flags & O_ACCMODE) != O_RDWR) return (EINVAL); if ((flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC)) != 0) return (EINVAL); /* * Currently only F_SEAL_SEAL may be set when creating or opening shmfd. * If the decision is made later to allow additional seals, care must be * taken below to ensure that the seals are properly set if the shmfd * already existed -- this currently assumes that only F_SEAL_SEAL can * be set and doesn't take further precautions to ensure the validity of * the seals being added with respect to current mappings. */ if ((initial_seals & ~F_SEAL_SEAL) != 0) return (EINVAL); fdp = td->td_proc->p_fd; cmode = (mode & ~fdp->fd_cmask) & ACCESSPERMS; /* * shm_open(2) created shm should always have O_CLOEXEC set, as mandated * by POSIX. We allow it to be unset here so that an in-kernel * interface may be written as a thin layer around shm, optionally not * setting CLOEXEC. For shm_open(2), O_CLOEXEC is set unconditionally * in sys_shm_open() to keep this implementation compliant. */ error = falloc_caps(td, &fp, &fd, flags & O_CLOEXEC, fcaps); if (error) return (error); /* A SHM_ANON path pointer creates an anonymous object. */ if (userpath == SHM_ANON) { /* A read-only anonymous object is pointless. */ if ((flags & O_ACCMODE) == O_RDONLY) { fdclose(td, fp, fd); fdrop(fp, td); return (EINVAL); } shmfd = shm_alloc(td->td_ucred, cmode); shmfd->shm_seals = initial_seals; } else { error = shm_copyin_path(td, userpath, &path); if (error != 0) { fdclose(td, fp, fd); fdrop(fp, td); return (error); } AUDIT_ARG_UPATH1_CANON(path); fnv = fnv_32_str(path, FNV1_32_INIT); sx_xlock(&shm_dict_lock); shmfd = shm_lookup(path, fnv); if (shmfd == NULL) { /* Object does not yet exist, create it if requested. */ if (flags & O_CREAT) { #ifdef MAC error = mac_posixshm_check_create(td->td_ucred, path); if (error == 0) { #endif shmfd = shm_alloc(td->td_ucred, cmode); shmfd->shm_seals = initial_seals; shm_insert(path, fnv, shmfd); #ifdef MAC } #endif } else { free(path, M_SHMFD); error = ENOENT; } } else { rl_cookie = rangelock_wlock(&shmfd->shm_rl, 0, OFF_MAX, &shmfd->shm_mtx); /* * kern_shm_open() likely shouldn't ever error out on * trying to set a seal that already exists, unlike * F_ADD_SEALS. This would break terribly as * shm_open(2) actually sets F_SEAL_SEAL to maintain * historical behavior where the underlying file could * not be sealed. */ initial_seals &= ~shmfd->shm_seals; /* * Object already exists, obtain a new * reference if requested and permitted. */ free(path, M_SHMFD); /* * initial_seals can't set additional seals if we've * already been set F_SEAL_SEAL. If F_SEAL_SEAL is set, * then we've already removed that one from * initial_seals. This is currently redundant as we * only allow setting F_SEAL_SEAL at creation time, but * it's cheap to check and decreases the effort required * to allow additional seals. */ if ((shmfd->shm_seals & F_SEAL_SEAL) != 0 && initial_seals != 0) error = EPERM; else if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) error = EEXIST; else { #ifdef MAC error = mac_posixshm_check_open(td->td_ucred, shmfd, FFLAGS(flags & O_ACCMODE)); if (error == 0) #endif error = shm_access(shmfd, td->td_ucred, FFLAGS(flags & O_ACCMODE)); } /* * Truncate the file back to zero length if * O_TRUNC was specified and the object was * opened with read/write. */ if (error == 0 && (flags & (O_ACCMODE | O_TRUNC)) == (O_RDWR | O_TRUNC)) { VM_OBJECT_WLOCK(shmfd->shm_object); #ifdef MAC error = mac_posixshm_check_truncate( td->td_ucred, fp->f_cred, shmfd); if (error == 0) #endif error = shm_dotruncate_locked(shmfd, 0, rl_cookie); VM_OBJECT_WUNLOCK(shmfd->shm_object); } if (error == 0) { /* * Currently we only allow F_SEAL_SEAL to be * set initially. As noted above, this would * need to be reworked should that change. */ shmfd->shm_seals |= initial_seals; shm_hold(shmfd); } rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx); } sx_xunlock(&shm_dict_lock); if (error) { fdclose(td, fp, fd); fdrop(fp, td); return (error); } } finit(fp, FFLAGS(flags & O_ACCMODE), DTYPE_SHM, shmfd, &shm_ops); td->td_retval[0] = fd; fdrop(fp, td); return (0); } /* System calls. */ #ifdef COMPAT_FREEBSD12 int freebsd12_shm_open(struct thread *td, struct freebsd12_shm_open_args *uap) { return (kern_shm_open(td, uap->path, uap->flags | O_CLOEXEC, uap->mode, NULL)); } #endif int sys_shm_unlink(struct thread *td, struct shm_unlink_args *uap) { char *path; Fnv32_t fnv; int error; error = shm_copyin_path(td, uap->path, &path); if (error != 0) return (error); AUDIT_ARG_UPATH1_CANON(path); fnv = fnv_32_str(path, FNV1_32_INIT); sx_xlock(&shm_dict_lock); error = shm_remove(path, fnv, td->td_ucred); sx_xunlock(&shm_dict_lock); free(path, M_TEMP); return (error); } int sys_shm_rename(struct thread *td, struct shm_rename_args *uap) { char *path_from = NULL, *path_to = NULL; Fnv32_t fnv_from, fnv_to; struct shmfd *fd_from; struct shmfd *fd_to; int error; int flags; flags = uap->flags; AUDIT_ARG_FFLAGS(flags); /* * Make sure the user passed only valid flags. * If you add a new flag, please add a new term here. */ if ((flags & ~( SHM_RENAME_NOREPLACE | SHM_RENAME_EXCHANGE )) != 0) { error = EINVAL; goto out; } /* * EXCHANGE and NOREPLACE don't quite make sense together. Let's * force the user to choose one or the other. */ if ((flags & SHM_RENAME_NOREPLACE) != 0 && (flags & SHM_RENAME_EXCHANGE) != 0) { error = EINVAL; goto out; } /* Renaming to or from anonymous makes no sense */ if (uap->path_from == SHM_ANON || uap->path_to == SHM_ANON) { error = EINVAL; goto out; } error = shm_copyin_path(td, uap->path_from, &path_from); if (error != 0) goto out; error = shm_copyin_path(td, uap->path_to, &path_to); if (error != 0) goto out; AUDIT_ARG_UPATH1_CANON(path_from); AUDIT_ARG_UPATH2_CANON(path_to); /* Rename with from/to equal is a no-op */ if (strcmp(path_from, path_to) == 0) goto out; fnv_from = fnv_32_str(path_from, FNV1_32_INIT); fnv_to = fnv_32_str(path_to, FNV1_32_INIT); sx_xlock(&shm_dict_lock); fd_from = shm_lookup(path_from, fnv_from); if (fd_from == NULL) { error = ENOENT; goto out_locked; } fd_to = shm_lookup(path_to, fnv_to); if ((flags & SHM_RENAME_NOREPLACE) != 0 && fd_to != NULL) { error = EEXIST; goto out_locked; } /* * Unconditionally prevents shm_remove from invalidating the 'from' * shm's state. */ shm_hold(fd_from); error = shm_remove(path_from, fnv_from, td->td_ucred); /* * One of my assumptions failed if ENOENT (e.g. locking didn't * protect us) */ KASSERT(error != ENOENT, ("Our shm disappeared during shm_rename: %s", path_from)); if (error != 0) { shm_drop(fd_from); goto out_locked; } /* * If we are exchanging, we need to ensure the shm_remove below * doesn't invalidate the dest shm's state. */ if ((flags & SHM_RENAME_EXCHANGE) != 0 && fd_to != NULL) shm_hold(fd_to); /* * NOTE: if path_to is not already in the hash, c'est la vie; * it simply means we have nothing already at path_to to unlink. * That is the ENOENT case. * * If we somehow don't have access to unlink this guy, but * did for the shm at path_from, then relink the shm to path_from * and abort with EACCES. * * All other errors: that is weird; let's relink and abort the * operation. */ error = shm_remove(path_to, fnv_to, td->td_ucred); if (error != 0 && error != ENOENT) { shm_insert(path_from, fnv_from, fd_from); shm_drop(fd_from); /* Don't free path_from now, since the hash references it */ path_from = NULL; goto out_locked; } error = 0; shm_insert(path_to, fnv_to, fd_from); /* Don't free path_to now, since the hash references it */ path_to = NULL; /* We kept a ref when we removed, and incremented again in insert */ shm_drop(fd_from); KASSERT(fd_from->shm_refs > 0, ("Expected >0 refs; got: %d\n", fd_from->shm_refs)); if ((flags & SHM_RENAME_EXCHANGE) != 0 && fd_to != NULL) { shm_insert(path_from, fnv_from, fd_to); path_from = NULL; shm_drop(fd_to); KASSERT(fd_to->shm_refs > 0, ("Expected >0 refs; got: %d\n", fd_to->shm_refs)); } out_locked: sx_xunlock(&shm_dict_lock); out: free(path_from, M_SHMFD); free(path_to, M_SHMFD); return (error); } int shm_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t objsize, vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff, struct thread *td) { struct shmfd *shmfd; vm_prot_t maxprot; int error; bool writecnt; void *rl_cookie; shmfd = fp->f_data; maxprot = VM_PROT_NONE; rl_cookie = rangelock_rlock(&shmfd->shm_rl, 0, objsize, &shmfd->shm_mtx); /* FREAD should always be set. */ if ((fp->f_flag & FREAD) != 0) maxprot |= VM_PROT_EXECUTE | VM_PROT_READ; /* * If FWRITE's set, we can allow VM_PROT_WRITE unless it's a shared * mapping with a write seal applied. */ if ((fp->f_flag & FWRITE) != 0 && ((flags & MAP_SHARED) == 0 || (shmfd->shm_seals & F_SEAL_WRITE) == 0)) maxprot |= VM_PROT_WRITE; writecnt = (flags & MAP_SHARED) != 0 && (prot & VM_PROT_WRITE) != 0; if (writecnt && (shmfd->shm_seals & F_SEAL_WRITE) != 0) { error = EPERM; goto out; } /* Don't permit shared writable mappings on read-only descriptors. */ if (writecnt && (maxprot & VM_PROT_WRITE) == 0) { error = EACCES; goto out; } maxprot &= cap_maxprot; /* See comment in vn_mmap(). */ if ( #ifdef _LP64 objsize > OFF_MAX || #endif foff < 0 || foff > OFF_MAX - objsize) { error = EINVAL; goto out; } #ifdef MAC error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, flags); if (error != 0) goto out; #endif mtx_lock(&shm_timestamp_lock); vfs_timestamp(&shmfd->shm_atime); mtx_unlock(&shm_timestamp_lock); vm_object_reference(shmfd->shm_object); if (writecnt) vm_pager_update_writecount(shmfd->shm_object, 0, objsize); error = vm_mmap_object(map, addr, objsize, prot, maxprot, flags, shmfd->shm_object, foff, writecnt, td); if (error != 0) { if (writecnt) vm_pager_release_writecount(shmfd->shm_object, 0, objsize); vm_object_deallocate(shmfd->shm_object); } out: rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx); return (error); } static int shm_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td) { struct shmfd *shmfd; int error; error = 0; shmfd = fp->f_data; mtx_lock(&shm_timestamp_lock); /* * SUSv4 says that x bits of permission need not be affected. * Be consistent with our shm_open there. */ #ifdef MAC error = mac_posixshm_check_setmode(active_cred, shmfd, mode); if (error != 0) goto out; #endif error = vaccess(VREG, shmfd->shm_mode, shmfd->shm_uid, shmfd->shm_gid, VADMIN, active_cred, NULL); if (error != 0) goto out; shmfd->shm_mode = mode & ACCESSPERMS; out: mtx_unlock(&shm_timestamp_lock); return (error); } static int shm_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, struct thread *td) { struct shmfd *shmfd; int error; error = 0; shmfd = fp->f_data; mtx_lock(&shm_timestamp_lock); #ifdef MAC error = mac_posixshm_check_setowner(active_cred, shmfd, uid, gid); if (error != 0) goto out; #endif if (uid == (uid_t)-1) uid = shmfd->shm_uid; if (gid == (gid_t)-1) gid = shmfd->shm_gid; if (((uid != shmfd->shm_uid && uid != active_cred->cr_uid) || (gid != shmfd->shm_gid && !groupmember(gid, active_cred))) && (error = priv_check_cred(active_cred, PRIV_VFS_CHOWN))) goto out; shmfd->shm_uid = uid; shmfd->shm_gid = gid; out: mtx_unlock(&shm_timestamp_lock); return (error); } /* * Helper routines to allow the backing object of a shared memory file * descriptor to be mapped in the kernel. */ int shm_map(struct file *fp, size_t size, off_t offset, void **memp) { struct shmfd *shmfd; vm_offset_t kva, ofs; vm_object_t obj; int rv; if (fp->f_type != DTYPE_SHM) return (EINVAL); shmfd = fp->f_data; obj = shmfd->shm_object; VM_OBJECT_WLOCK(obj); /* * XXXRW: This validation is probably insufficient, and subject to * sign errors. It should be fixed. */ if (offset >= shmfd->shm_size || offset + size > round_page(shmfd->shm_size)) { VM_OBJECT_WUNLOCK(obj); return (EINVAL); } shmfd->shm_kmappings++; vm_object_reference_locked(obj); VM_OBJECT_WUNLOCK(obj); /* Map the object into the kernel_map and wire it. */ kva = vm_map_min(kernel_map); ofs = offset & PAGE_MASK; offset = trunc_page(offset); size = round_page(size + ofs); rv = vm_map_find(kernel_map, obj, offset, &kva, size, 0, VMFS_OPTIMAL_SPACE, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0); if (rv == KERN_SUCCESS) { rv = vm_map_wire(kernel_map, kva, kva + size, VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES); if (rv == KERN_SUCCESS) { *memp = (void *)(kva + ofs); return (0); } vm_map_remove(kernel_map, kva, kva + size); } else vm_object_deallocate(obj); /* On failure, drop our mapping reference. */ VM_OBJECT_WLOCK(obj); shmfd->shm_kmappings--; VM_OBJECT_WUNLOCK(obj); return (vm_mmap_to_errno(rv)); } /* * We require the caller to unmap the entire entry. This allows us to * safely decrement shm_kmappings when a mapping is removed. */ int shm_unmap(struct file *fp, void *mem, size_t size) { struct shmfd *shmfd; vm_map_entry_t entry; vm_offset_t kva, ofs; vm_object_t obj; vm_pindex_t pindex; vm_prot_t prot; boolean_t wired; vm_map_t map; int rv; if (fp->f_type != DTYPE_SHM) return (EINVAL); shmfd = fp->f_data; kva = (vm_offset_t)mem; ofs = kva & PAGE_MASK; kva = trunc_page(kva); size = round_page(size + ofs); map = kernel_map; rv = vm_map_lookup(&map, kva, VM_PROT_READ | VM_PROT_WRITE, &entry, &obj, &pindex, &prot, &wired); if (rv != KERN_SUCCESS) return (EINVAL); if (entry->start != kva || entry->end != kva + size) { vm_map_lookup_done(map, entry); return (EINVAL); } vm_map_lookup_done(map, entry); if (obj != shmfd->shm_object) return (EINVAL); vm_map_remove(map, kva, kva + size); VM_OBJECT_WLOCK(obj); KASSERT(shmfd->shm_kmappings > 0, ("shm_unmap: object not mapped")); shmfd->shm_kmappings--; VM_OBJECT_WUNLOCK(obj); return (0); } static int shm_fill_kinfo_locked(struct shmfd *shmfd, struct kinfo_file *kif, bool list) { const char *path, *pr_path; size_t pr_pathlen; bool visible; sx_assert(&shm_dict_lock, SA_LOCKED); kif->kf_type = KF_TYPE_SHM; kif->kf_un.kf_file.kf_file_mode = S_IFREG | shmfd->shm_mode; kif->kf_un.kf_file.kf_file_size = shmfd->shm_size; if (shmfd->shm_path != NULL) { if (shmfd->shm_path != NULL) { path = shmfd->shm_path; pr_path = curthread->td_ucred->cr_prison->pr_path; if (strcmp(pr_path, "/") != 0) { /* Return the jail-rooted pathname. */ pr_pathlen = strlen(pr_path); visible = strncmp(path, pr_path, pr_pathlen) == 0 && path[pr_pathlen] == '/'; if (list && !visible) return (EPERM); if (visible) path += pr_pathlen; } strlcpy(kif->kf_path, path, sizeof(kif->kf_path)); } } return (0); } static int shm_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp __unused) { int res; sx_slock(&shm_dict_lock); res = shm_fill_kinfo_locked(fp->f_data, kif, false); sx_sunlock(&shm_dict_lock); return (res); } static int shm_add_seals(struct file *fp, int seals) { struct shmfd *shmfd; void *rl_cookie; vm_ooffset_t writemappings; int error, nseals; error = 0; shmfd = fp->f_data; rl_cookie = rangelock_wlock(&shmfd->shm_rl, 0, OFF_MAX, &shmfd->shm_mtx); /* Even already-set seals should result in EPERM. */ if ((shmfd->shm_seals & F_SEAL_SEAL) != 0) { error = EPERM; goto out; } nseals = seals & ~shmfd->shm_seals; if ((nseals & F_SEAL_WRITE) != 0) { /* * The rangelock above prevents writable mappings from being * added after we've started applying seals. The RLOCK here * is to avoid torn reads on ILP32 arches as unmapping/reducing * writemappings will be done without a rangelock. */ VM_OBJECT_RLOCK(shmfd->shm_object); writemappings = shmfd->shm_object->un_pager.swp.writemappings; VM_OBJECT_RUNLOCK(shmfd->shm_object); /* kmappings are also writable */ if (writemappings > 0) { error = EBUSY; goto out; } } shmfd->shm_seals |= nseals; out: rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx); return (error); } static int shm_get_seals(struct file *fp, int *seals) { struct shmfd *shmfd; shmfd = fp->f_data; *seals = shmfd->shm_seals; return (0); } static int shm_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td) { void *rl_cookie; struct shmfd *shmfd; size_t size; int error; /* This assumes that the caller already checked for overflow. */ error = 0; shmfd = fp->f_data; size = offset + len; /* * Just grab the rangelock for the range that we may be attempting to * grow, rather than blocking read/write for regions we won't be * touching while this (potential) resize is in progress. Other * attempts to resize the shmfd will have to take a write lock from 0 to * OFF_MAX, so this being potentially beyond the current usable range of * the shmfd is not necessarily a concern. If other mechanisms are * added to grow a shmfd, this may need to be re-evaluated. */ rl_cookie = rangelock_wlock(&shmfd->shm_rl, offset, size, &shmfd->shm_mtx); if (size > shmfd->shm_size) { VM_OBJECT_WLOCK(shmfd->shm_object); error = shm_dotruncate_locked(shmfd, size, rl_cookie); VM_OBJECT_WUNLOCK(shmfd->shm_object); } rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx); /* Translate to posix_fallocate(2) return value as needed. */ if (error == ENOMEM) error = ENOSPC; return (error); } static int sysctl_posix_shm_list(SYSCTL_HANDLER_ARGS) { struct shm_mapping *shmm; struct sbuf sb; struct kinfo_file kif; u_long i; ssize_t curlen; int error, error2; sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_file) * 5, req); sbuf_clear_flags(&sb, SBUF_INCLUDENUL); curlen = 0; error = 0; sx_slock(&shm_dict_lock); for (i = 0; i < shm_hash + 1; i++) { LIST_FOREACH(shmm, &shm_dictionary[i], sm_link) { error = shm_fill_kinfo_locked(shmm->sm_shmfd, &kif, true); if (error == EPERM) continue; if (error != 0) break; pack_kinfo(&kif); if (req->oldptr != NULL && kif.kf_structsize + curlen > req->oldlen) break; error = sbuf_bcat(&sb, &kif, kif.kf_structsize) == 0 ? 0 : ENOMEM; if (error != 0) break; curlen += kif.kf_structsize; } } sx_sunlock(&shm_dict_lock); error2 = sbuf_finish(&sb); sbuf_delete(&sb); return (error != 0 ? error : error2); } SYSCTL_PROC(_kern_ipc, OID_AUTO, posix_shm_list, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLTYPE_OPAQUE, NULL, 0, sysctl_posix_shm_list, "", "POSIX SHM list"); int kern_shm_open(struct thread *td, const char *path, int flags, mode_t mode, struct filecaps *caps) { return (kern_shm_open2(td, path, flags, mode, 0, caps, NULL)); } /* * This version of the shm_open() interface leaves CLOEXEC behavior up to the * caller, and libc will enforce it for the traditional shm_open() call. This * allows other consumers, like memfd_create(), to opt-in for CLOEXEC. This * interface also includes a 'name' argument that is currently unused, but could * potentially be exported later via some interface for debugging purposes. * From the kernel's perspective, it is optional. Individual consumers like * memfd_create() may require it in order to be compatible with other systems * implementing the same function. */ int sys_shm_open2(struct thread *td, struct shm_open2_args *uap) { return (kern_shm_open2(td, uap->path, uap->flags, uap->mode, uap->shmflags, NULL, uap->name)); } Index: projects/clang1000-import/sys/kern/vfs_cache.c =================================================================== --- projects/clang1000-import/sys/kern/vfs_cache.c (revision 356919) +++ projects/clang1000-import/sys/kern/vfs_cache.c (revision 356920) @@ -1,2655 +1,2655 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Poul-Henning Kamp of the FreeBSD Project. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #ifdef DDB #include #endif #include SDT_PROVIDER_DECLARE(vfs); SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct vnode *", "char *", "struct vnode *"); SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, "struct vnode *", "char *"); SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, "struct vnode *"); SDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, "struct vnode *", "char *", "struct vnode *"); SDT_PROBE_DEFINE1(vfs, namecache, fullpath, miss, "struct vnode *"); SDT_PROBE_DEFINE3(vfs, namecache, fullpath, return, "int", "struct vnode *", "char *"); SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, "struct vnode *", "char *", "struct vnode *"); SDT_PROBE_DEFINE2(vfs, namecache, lookup, hit__negative, "struct vnode *", "char *"); SDT_PROBE_DEFINE2(vfs, namecache, lookup, miss, "struct vnode *", "char *"); SDT_PROBE_DEFINE1(vfs, namecache, purge, done, "struct vnode *"); SDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done, "struct vnode *"); SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, "struct mount *"); SDT_PROBE_DEFINE3(vfs, namecache, zap, done, "struct vnode *", "char *", "struct vnode *"); SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, "struct vnode *", "char *"); SDT_PROBE_DEFINE2(vfs, namecache, shrink_negative, done, "struct vnode *", "char *"); /* * This structure describes the elements in the cache of recent * names looked up by namei. */ struct namecache { LIST_ENTRY(namecache) nc_hash; /* hash chain */ LIST_ENTRY(namecache) nc_src; /* source vnode list */ TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ struct vnode *nc_dvp; /* vnode of parent of name */ union { struct vnode *nu_vp; /* vnode the name refers to */ } n_un; u_char nc_flag; /* flag bits */ u_char nc_nlen; /* length of name */ char nc_name[0]; /* segment name + nul */ }; /* * struct namecache_ts repeats struct namecache layout up to the * nc_nlen member. * struct namecache_ts is used in place of struct namecache when time(s) need * to be stored. The nc_dotdottime field is used when a cache entry is mapping * both a non-dotdot directory name plus dotdot for the directory's * parent. */ struct namecache_ts { struct timespec nc_time; /* timespec provided by fs */ struct timespec nc_dotdottime; /* dotdot timespec provided by fs */ int nc_ticks; /* ticks value when entry was added */ struct namecache nc_nc; }; #define nc_vp n_un.nu_vp /* * Flags in namecache.nc_flag */ #define NCF_WHITE 0x01 #define NCF_ISDOTDOT 0x02 #define NCF_TS 0x04 #define NCF_DTS 0x08 #define NCF_DVDROP 0x10 #define NCF_NEGATIVE 0x20 #define NCF_HOTNEGATIVE 0x40 /* * Name caching works as follows: * * Names found by directory scans are retained in a cache * for future reference. It is managed LRU, so frequently * used names will hang around. Cache is indexed by hash value * obtained from (dvp, name) where dvp refers to the directory * containing name. * * If it is a "negative" entry, (i.e. for a name that is known NOT to * exist) the vnode pointer will be NULL. * * Upon reaching the last segment of a path, if the reference * is for DELETE, or NOCACHE is set (rewrite), and the * name is located in the cache, it will be dropped. * * These locks are used (in the order in which they can be taken): * NAME TYPE ROLE * vnodelock mtx vnode lists and v_cache_dd field protection * bucketlock rwlock for access to given set of hash buckets * neglist mtx negative entry LRU management * * Additionally, ncneg_shrink_lock mtx is used to have at most one thread * shrinking the LRU list. * * It is legal to take multiple vnodelock and bucketlock locks. The locking * order is lower address first. Both are recursive. * * "." lookups are lockless. * * ".." and vnode -> name lookups require vnodelock. * * name -> vnode lookup requires the relevant bucketlock to be held for reading. * * Insertions and removals of entries require involved vnodes and bucketlocks * to be write-locked to prevent other threads from seeing the entry. * * Some lookups result in removal of the found entry (e.g. getting rid of a * negative entry with the intent to create a positive one), which poses a * problem when multiple threads reach the state. Similarly, two different * threads can purge two different vnodes and try to remove the same name. * * If the already held vnode lock is lower than the second required lock, we * can just take the other lock. However, in the opposite case, this could * deadlock. As such, this is resolved by trylocking and if that fails unlocking * the first node, locking everything in order and revalidating the state. */ /* * Structures associated with name caching. */ #define NCHHASH(hash) \ (&nchashtbl[(hash) & nchash]) static __read_mostly LIST_HEAD(nchashhead, namecache) *nchashtbl;/* Hash Table */ static u_long __read_mostly nchash; /* size of hash table */ SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "Size of namecache hash table"); static u_long __read_mostly ncnegfactor = 5; /* ratio of negative entries */ SYSCTL_ULONG(_vfs, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, "Ratio of negative namecache entries"); static u_long __exclusive_cache_line numneg; /* number of negative entries allocated */ static u_long __exclusive_cache_line numcache;/* number of cache entries allocated */ -static u_long __exclusive_cache_line numcachehv;/* number of cache entries with vnodes held */ u_int ncsizefactor = 2; SYSCTL_UINT(_vfs, OID_AUTO, ncsizefactor, CTLFLAG_RW, &ncsizefactor, 0, "Size factor for namecache"); static u_int __read_mostly ncpurgeminvnodes; SYSCTL_UINT(_vfs, OID_AUTO, ncpurgeminvnodes, CTLFLAG_RW, &ncpurgeminvnodes, 0, "Number of vnodes below which purgevfs ignores the request"); static u_int __read_mostly ncsize; /* the size as computed on creation or resizing */ struct nchstats nchstats; /* cache effectiveness statistics */ static struct mtx __exclusive_cache_line ncneg_shrink_lock; static int shrink_list_turn; struct neglist { struct mtx nl_lock; TAILQ_HEAD(, namecache) nl_list; } __aligned(CACHE_LINE_SIZE); static struct neglist __read_mostly *neglists; static struct neglist ncneg_hot; static u_long numhotneg; #define numneglists (ncneghash + 1) static u_int __read_mostly ncneghash; static inline struct neglist * NCP2NEGLIST(struct namecache *ncp) { return (&neglists[(((uintptr_t)(ncp) >> 8) & ncneghash)]); } #define numbucketlocks (ncbuckethash + 1) static u_int __read_mostly ncbuckethash; static struct rwlock_padalign __read_mostly *bucketlocks; #define HASH2BUCKETLOCK(hash) \ ((struct rwlock *)(&bucketlocks[((hash) & ncbuckethash)])) #define numvnodelocks (ncvnodehash + 1) static u_int __read_mostly ncvnodehash; static struct mtx __read_mostly *vnodelocks; static inline struct mtx * VP2VNODELOCK(struct vnode *vp) { return (&vnodelocks[(((uintptr_t)(vp) >> 8) & ncvnodehash)]); } /* * UMA zones for the VFS cache. * * The small cache is used for entries with short names, which are the * most common. The large cache is used for entries which are too big to * fit in the small cache. */ static uma_zone_t __read_mostly cache_zone_small; static uma_zone_t __read_mostly cache_zone_small_ts; static uma_zone_t __read_mostly cache_zone_large; static uma_zone_t __read_mostly cache_zone_large_ts; #define CACHE_PATH_CUTOFF 35 static struct namecache * cache_alloc(int len, int ts) { struct namecache_ts *ncp_ts; struct namecache *ncp; if (__predict_false(ts)) { if (len <= CACHE_PATH_CUTOFF) ncp_ts = uma_zalloc(cache_zone_small_ts, M_WAITOK); else ncp_ts = uma_zalloc(cache_zone_large_ts, M_WAITOK); ncp = &ncp_ts->nc_nc; } else { if (len <= CACHE_PATH_CUTOFF) ncp = uma_zalloc(cache_zone_small, M_WAITOK); else ncp = uma_zalloc(cache_zone_large, M_WAITOK); } return (ncp); } static void cache_free(struct namecache *ncp) { struct namecache_ts *ncp_ts; if (ncp == NULL) return; if ((ncp->nc_flag & NCF_DVDROP) != 0) vdrop(ncp->nc_dvp); if (__predict_false(ncp->nc_flag & NCF_TS)) { ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc); if (ncp->nc_nlen <= CACHE_PATH_CUTOFF) uma_zfree(cache_zone_small_ts, ncp_ts); else uma_zfree(cache_zone_large_ts, ncp_ts); } else { if (ncp->nc_nlen <= CACHE_PATH_CUTOFF) uma_zfree(cache_zone_small, ncp); else uma_zfree(cache_zone_large, ncp); } } static void cache_out_ts(struct namecache *ncp, struct timespec *tsp, int *ticksp) { struct namecache_ts *ncp_ts; KASSERT((ncp->nc_flag & NCF_TS) != 0 || (tsp == NULL && ticksp == NULL), ("No NCF_TS")); if (tsp == NULL && ticksp == NULL) return; ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc); if (tsp != NULL) *tsp = ncp_ts->nc_time; if (ticksp != NULL) *ticksp = ncp_ts->nc_ticks; } #ifdef DEBUG_CACHE static int __read_mostly doingcache = 1; /* 1 => enable the cache */ SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, "VFS namecache enabled"); #endif /* Export size information to userland */ SYSCTL_INT(_debug_sizeof, OID_AUTO, namecache, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, sizeof(struct namecache), "sizeof(struct namecache)"); /* * The new name cache statistics */ static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); #define STATNODE_ULONG(name, descr) \ SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, descr); #define STATNODE_COUNTER(name, descr) \ static counter_u64_t __read_mostly name; \ SYSCTL_COUNTER_U64(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, descr); STATNODE_ULONG(numneg, "Number of negative cache entries"); STATNODE_ULONG(numcache, "Number of cache entries"); -STATNODE_ULONG(numcachehv, "Number of namecache entries with vnodes held"); +STATNODE_COUNTER(numcachehv, "Number of namecache entries with vnodes held"); STATNODE_COUNTER(numcalls, "Number of cache lookups"); STATNODE_COUNTER(dothits, "Number of '.' hits"); STATNODE_COUNTER(dotdothits, "Number of '..' hits"); STATNODE_COUNTER(numchecks, "Number of checks in lookup"); STATNODE_COUNTER(nummiss, "Number of cache misses"); STATNODE_COUNTER(nummisszap, "Number of cache misses we do not want to cache"); STATNODE_COUNTER(numposzaps, "Number of cache hits (positive) we do not want to cache"); STATNODE_COUNTER(numposhits, "Number of cache hits (positive)"); STATNODE_COUNTER(numnegzaps, "Number of cache hits (negative) we do not want to cache"); STATNODE_COUNTER(numneghits, "Number of cache hits (negative)"); /* These count for kern___getcwd(), too. */ STATNODE_COUNTER(numfullpathcalls, "Number of fullpath search calls"); STATNODE_COUNTER(numfullpathfail1, "Number of fullpath search errors (ENOTDIR)"); STATNODE_COUNTER(numfullpathfail2, "Number of fullpath search errors (VOP_VPTOCNP failures)"); STATNODE_COUNTER(numfullpathfail4, "Number of fullpath search errors (ENOMEM)"); STATNODE_COUNTER(numfullpathfound, "Number of successful fullpath calls"); STATNODE_COUNTER(zap_and_exit_bucket_relock_success, "Number of successful removals after relocking"); static long zap_and_exit_bucket_fail; STATNODE_ULONG(zap_and_exit_bucket_fail, "Number of times zap_and_exit failed to lock"); static long zap_and_exit_bucket_fail2; STATNODE_ULONG(zap_and_exit_bucket_fail2, "Number of times zap_and_exit failed to lock"); static long cache_lock_vnodes_cel_3_failures; STATNODE_ULONG(cache_lock_vnodes_cel_3_failures, "Number of times 3-way vnode locking failed"); STATNODE_ULONG(numhotneg, "Number of hot negative entries"); STATNODE_COUNTER(numneg_evicted, "Number of negative entries evicted when adding a new entry"); STATNODE_COUNTER(shrinking_skipped, "Number of times shrinking was already in progress"); static void cache_zap_locked(struct namecache *ncp, bool neg_locked); static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, char *buf, char **retbuf, u_int buflen); static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); static int cache_yield; SYSCTL_INT(_vfs_cache, OID_AUTO, yield, CTLFLAG_RD, &cache_yield, 0, "Number of times cache called yield"); static void __noinline cache_maybe_yield(void) { if (should_yield()) { cache_yield++; kern_yield(PRI_USER); } } static inline void cache_assert_vlp_locked(struct mtx *vlp) { if (vlp != NULL) mtx_assert(vlp, MA_OWNED); } static inline void cache_assert_vnode_locked(struct vnode *vp) { struct mtx *vlp; vlp = VP2VNODELOCK(vp); cache_assert_vlp_locked(vlp); } static uint32_t cache_get_hash(char *name, u_char len, struct vnode *dvp) { uint32_t hash; hash = fnv_32_buf(name, len, FNV1_32_INIT); hash = fnv_32_buf(&dvp, sizeof(dvp), hash); return (hash); } static inline struct rwlock * NCP2BUCKETLOCK(struct namecache *ncp) { uint32_t hash; hash = cache_get_hash(ncp->nc_name, ncp->nc_nlen, ncp->nc_dvp); return (HASH2BUCKETLOCK(hash)); } #ifdef INVARIANTS static void cache_assert_bucket_locked(struct namecache *ncp, int mode) { struct rwlock *blp; blp = NCP2BUCKETLOCK(ncp); rw_assert(blp, mode); } #else #define cache_assert_bucket_locked(x, y) do { } while (0) #endif #define cache_sort_vnodes(x, y) _cache_sort_vnodes((void **)(x), (void **)(y)) static void _cache_sort_vnodes(void **p1, void **p2) { void *tmp; MPASS(*p1 != NULL || *p2 != NULL); if (*p1 > *p2) { tmp = *p2; *p2 = *p1; *p1 = tmp; } } static void cache_lock_all_buckets(void) { u_int i; for (i = 0; i < numbucketlocks; i++) rw_wlock(&bucketlocks[i]); } static void cache_unlock_all_buckets(void) { u_int i; for (i = 0; i < numbucketlocks; i++) rw_wunlock(&bucketlocks[i]); } static void cache_lock_all_vnodes(void) { u_int i; for (i = 0; i < numvnodelocks; i++) mtx_lock(&vnodelocks[i]); } static void cache_unlock_all_vnodes(void) { u_int i; for (i = 0; i < numvnodelocks; i++) mtx_unlock(&vnodelocks[i]); } static int cache_trylock_vnodes(struct mtx *vlp1, struct mtx *vlp2) { cache_sort_vnodes(&vlp1, &vlp2); if (vlp1 != NULL) { if (!mtx_trylock(vlp1)) return (EAGAIN); } if (!mtx_trylock(vlp2)) { if (vlp1 != NULL) mtx_unlock(vlp1); return (EAGAIN); } return (0); } static void cache_lock_vnodes(struct mtx *vlp1, struct mtx *vlp2) { MPASS(vlp1 != NULL || vlp2 != NULL); MPASS(vlp1 <= vlp2); if (vlp1 != NULL) mtx_lock(vlp1); if (vlp2 != NULL) mtx_lock(vlp2); } static void cache_unlock_vnodes(struct mtx *vlp1, struct mtx *vlp2) { MPASS(vlp1 != NULL || vlp2 != NULL); if (vlp1 != NULL) mtx_unlock(vlp1); if (vlp2 != NULL) mtx_unlock(vlp2); } static int sysctl_nchstats(SYSCTL_HANDLER_ARGS) { struct nchstats snap; if (req->oldptr == NULL) return (SYSCTL_OUT(req, 0, sizeof(snap))); snap = nchstats; snap.ncs_goodhits = counter_u64_fetch(numposhits); snap.ncs_neghits = counter_u64_fetch(numneghits); snap.ncs_badhits = counter_u64_fetch(numposzaps) + counter_u64_fetch(numnegzaps); snap.ncs_miss = counter_u64_fetch(nummisszap) + counter_u64_fetch(nummiss); return (SYSCTL_OUT(req, &snap, sizeof(snap))); } SYSCTL_PROC(_vfs_cache, OID_AUTO, nchstats, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, sysctl_nchstats, "LU", "VFS cache effectiveness statistics"); #ifdef DIAGNOSTIC /* * Grab an atomic snapshot of the name cache hash chain lengths */ static SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats"); static int sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) { struct nchashhead *ncpp; struct namecache *ncp; int i, error, n_nchash, *cntbuf; retry: n_nchash = nchash + 1; /* nchash is max index, not count */ if (req->oldptr == NULL) return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); cntbuf = malloc(n_nchash * sizeof(int), M_TEMP, M_ZERO | M_WAITOK); cache_lock_all_buckets(); if (n_nchash != nchash + 1) { cache_unlock_all_buckets(); free(cntbuf, M_TEMP); goto retry; } /* Scan hash tables counting entries */ for (ncpp = nchashtbl, i = 0; i < n_nchash; ncpp++, i++) LIST_FOREACH(ncp, ncpp, nc_hash) cntbuf[i]++; cache_unlock_all_buckets(); for (error = 0, i = 0; i < n_nchash; i++) if ((error = SYSCTL_OUT(req, &cntbuf[i], sizeof(int))) != 0) break; free(cntbuf, M_TEMP); return (error); } SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD| CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths"); static int sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) { int error; struct nchashhead *ncpp; struct namecache *ncp; int n_nchash; int count, maxlength, used, pct; if (!req->oldptr) return SYSCTL_OUT(req, 0, 4 * sizeof(int)); cache_lock_all_buckets(); n_nchash = nchash + 1; /* nchash is max index, not count */ used = 0; maxlength = 0; /* Scan hash tables for applicable entries */ for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { count = 0; LIST_FOREACH(ncp, ncpp, nc_hash) { count++; } if (count) used++; if (maxlength < count) maxlength = count; } n_nchash = nchash + 1; cache_unlock_all_buckets(); pct = (used * 100) / (n_nchash / 100); error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash)); if (error) return (error); error = SYSCTL_OUT(req, &used, sizeof(used)); if (error) return (error); error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength)); if (error) return (error); error = SYSCTL_OUT(req, &pct, sizeof(pct)); if (error) return (error); return (0); } SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD| CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash statistics (number of total/used buckets, maximum chain length, usage percentage)"); #endif /* * Negative entries management * * A variation of LRU scheme is used. New entries are hashed into one of * numneglists cold lists. Entries get promoted to the hot list on first hit. * * The shrinker will demote hot list head and evict from the cold list in a * round-robin manner. */ static void cache_negative_hit(struct namecache *ncp) { struct neglist *neglist; MPASS(ncp->nc_flag & NCF_NEGATIVE); if (ncp->nc_flag & NCF_HOTNEGATIVE) return; neglist = NCP2NEGLIST(ncp); mtx_lock(&ncneg_hot.nl_lock); mtx_lock(&neglist->nl_lock); if (!(ncp->nc_flag & NCF_HOTNEGATIVE)) { numhotneg++; TAILQ_REMOVE(&neglist->nl_list, ncp, nc_dst); TAILQ_INSERT_TAIL(&ncneg_hot.nl_list, ncp, nc_dst); ncp->nc_flag |= NCF_HOTNEGATIVE; } mtx_unlock(&neglist->nl_lock); mtx_unlock(&ncneg_hot.nl_lock); } static void cache_negative_insert(struct namecache *ncp, bool neg_locked) { struct neglist *neglist; MPASS(ncp->nc_flag & NCF_NEGATIVE); cache_assert_bucket_locked(ncp, RA_WLOCKED); neglist = NCP2NEGLIST(ncp); if (!neg_locked) { mtx_lock(&neglist->nl_lock); } else { mtx_assert(&neglist->nl_lock, MA_OWNED); } TAILQ_INSERT_TAIL(&neglist->nl_list, ncp, nc_dst); if (!neg_locked) mtx_unlock(&neglist->nl_lock); atomic_add_rel_long(&numneg, 1); } static void cache_negative_remove(struct namecache *ncp, bool neg_locked) { struct neglist *neglist; bool hot_locked = false; bool list_locked = false; MPASS(ncp->nc_flag & NCF_NEGATIVE); cache_assert_bucket_locked(ncp, RA_WLOCKED); neglist = NCP2NEGLIST(ncp); if (!neg_locked) { if (ncp->nc_flag & NCF_HOTNEGATIVE) { hot_locked = true; mtx_lock(&ncneg_hot.nl_lock); if (!(ncp->nc_flag & NCF_HOTNEGATIVE)) { list_locked = true; mtx_lock(&neglist->nl_lock); } } else { list_locked = true; mtx_lock(&neglist->nl_lock); } } if (ncp->nc_flag & NCF_HOTNEGATIVE) { mtx_assert(&ncneg_hot.nl_lock, MA_OWNED); TAILQ_REMOVE(&ncneg_hot.nl_list, ncp, nc_dst); numhotneg--; } else { mtx_assert(&neglist->nl_lock, MA_OWNED); TAILQ_REMOVE(&neglist->nl_list, ncp, nc_dst); } if (list_locked) mtx_unlock(&neglist->nl_lock); if (hot_locked) mtx_unlock(&ncneg_hot.nl_lock); atomic_subtract_rel_long(&numneg, 1); } static void cache_negative_shrink_select(int start, struct namecache **ncpp, struct neglist **neglistpp) { struct neglist *neglist; struct namecache *ncp; int i; *ncpp = ncp = NULL; neglist = NULL; for (i = start; i < numneglists; i++) { neglist = &neglists[i]; if (TAILQ_FIRST(&neglist->nl_list) == NULL) continue; mtx_lock(&neglist->nl_lock); ncp = TAILQ_FIRST(&neglist->nl_list); if (ncp != NULL) break; mtx_unlock(&neglist->nl_lock); } *neglistpp = neglist; *ncpp = ncp; } static void cache_negative_zap_one(void) { struct namecache *ncp, *ncp2; struct neglist *neglist; struct mtx *dvlp; struct rwlock *blp; if (mtx_owner(&ncneg_shrink_lock) != NULL || !mtx_trylock(&ncneg_shrink_lock)) { counter_u64_add(shrinking_skipped, 1); return; } mtx_lock(&ncneg_hot.nl_lock); ncp = TAILQ_FIRST(&ncneg_hot.nl_list); if (ncp != NULL) { neglist = NCP2NEGLIST(ncp); mtx_lock(&neglist->nl_lock); TAILQ_REMOVE(&ncneg_hot.nl_list, ncp, nc_dst); TAILQ_INSERT_TAIL(&neglist->nl_list, ncp, nc_dst); ncp->nc_flag &= ~NCF_HOTNEGATIVE; numhotneg--; mtx_unlock(&neglist->nl_lock); } mtx_unlock(&ncneg_hot.nl_lock); cache_negative_shrink_select(shrink_list_turn, &ncp, &neglist); shrink_list_turn++; if (shrink_list_turn == numneglists) shrink_list_turn = 0; if (ncp == NULL && shrink_list_turn == 0) cache_negative_shrink_select(shrink_list_turn, &ncp, &neglist); mtx_unlock(&ncneg_shrink_lock); if (ncp == NULL) return; MPASS(ncp->nc_flag & NCF_NEGATIVE); dvlp = VP2VNODELOCK(ncp->nc_dvp); blp = NCP2BUCKETLOCK(ncp); mtx_unlock(&neglist->nl_lock); mtx_lock(dvlp); rw_wlock(blp); mtx_lock(&neglist->nl_lock); ncp2 = TAILQ_FIRST(&neglist->nl_list); if (ncp != ncp2 || dvlp != VP2VNODELOCK(ncp2->nc_dvp) || blp != NCP2BUCKETLOCK(ncp2) || !(ncp2->nc_flag & NCF_NEGATIVE)) { ncp = NULL; } else { SDT_PROBE2(vfs, namecache, shrink_negative, done, ncp->nc_dvp, ncp->nc_name); cache_zap_locked(ncp, true); counter_u64_add(numneg_evicted, 1); } mtx_unlock(&neglist->nl_lock); rw_wunlock(blp); mtx_unlock(dvlp); cache_free(ncp); } /* * cache_zap_locked(): * * Removes a namecache entry from cache, whether it contains an actual * pointer to a vnode or if it is just a negative cache entry. */ static void cache_zap_locked(struct namecache *ncp, bool neg_locked) { if (!(ncp->nc_flag & NCF_NEGATIVE)) cache_assert_vnode_locked(ncp->nc_vp); cache_assert_vnode_locked(ncp->nc_dvp); cache_assert_bucket_locked(ncp, RA_WLOCKED); CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, (ncp->nc_flag & NCF_NEGATIVE) ? NULL : ncp->nc_vp); LIST_REMOVE(ncp, nc_hash); if (!(ncp->nc_flag & NCF_NEGATIVE)) { SDT_PROBE3(vfs, namecache, zap, done, ncp->nc_dvp, ncp->nc_name, ncp->nc_vp); TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); if (ncp == ncp->nc_vp->v_cache_dd) ncp->nc_vp->v_cache_dd = NULL; } else { SDT_PROBE2(vfs, namecache, zap_negative, done, ncp->nc_dvp, ncp->nc_name); cache_negative_remove(ncp, neg_locked); } if (ncp->nc_flag & NCF_ISDOTDOT) { if (ncp == ncp->nc_dvp->v_cache_dd) ncp->nc_dvp->v_cache_dd = NULL; } else { LIST_REMOVE(ncp, nc_src); if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { ncp->nc_flag |= NCF_DVDROP; - atomic_subtract_rel_long(&numcachehv, 1); + counter_u64_add(numcachehv, -1); } } atomic_subtract_rel_long(&numcache, 1); } static void cache_zap_negative_locked_vnode_kl(struct namecache *ncp, struct vnode *vp) { struct rwlock *blp; MPASS(ncp->nc_dvp == vp); MPASS(ncp->nc_flag & NCF_NEGATIVE); cache_assert_vnode_locked(vp); blp = NCP2BUCKETLOCK(ncp); rw_wlock(blp); cache_zap_locked(ncp, false); rw_wunlock(blp); } static bool cache_zap_locked_vnode_kl2(struct namecache *ncp, struct vnode *vp, struct mtx **vlpp) { struct mtx *pvlp, *vlp1, *vlp2, *to_unlock; struct rwlock *blp; MPASS(vp == ncp->nc_dvp || vp == ncp->nc_vp); cache_assert_vnode_locked(vp); if (ncp->nc_flag & NCF_NEGATIVE) { if (*vlpp != NULL) { mtx_unlock(*vlpp); *vlpp = NULL; } cache_zap_negative_locked_vnode_kl(ncp, vp); return (true); } pvlp = VP2VNODELOCK(vp); blp = NCP2BUCKETLOCK(ncp); vlp1 = VP2VNODELOCK(ncp->nc_dvp); vlp2 = VP2VNODELOCK(ncp->nc_vp); if (*vlpp == vlp1 || *vlpp == vlp2) { to_unlock = *vlpp; *vlpp = NULL; } else { if (*vlpp != NULL) { mtx_unlock(*vlpp); *vlpp = NULL; } cache_sort_vnodes(&vlp1, &vlp2); if (vlp1 == pvlp) { mtx_lock(vlp2); to_unlock = vlp2; } else { if (!mtx_trylock(vlp1)) goto out_relock; to_unlock = vlp1; } } rw_wlock(blp); cache_zap_locked(ncp, false); rw_wunlock(blp); if (to_unlock != NULL) mtx_unlock(to_unlock); return (true); out_relock: mtx_unlock(vlp2); mtx_lock(vlp1); mtx_lock(vlp2); MPASS(*vlpp == NULL); *vlpp = vlp1; return (false); } static int __noinline cache_zap_locked_vnode(struct namecache *ncp, struct vnode *vp) { struct mtx *pvlp, *vlp1, *vlp2, *to_unlock; struct rwlock *blp; int error = 0; MPASS(vp == ncp->nc_dvp || vp == ncp->nc_vp); cache_assert_vnode_locked(vp); pvlp = VP2VNODELOCK(vp); if (ncp->nc_flag & NCF_NEGATIVE) { cache_zap_negative_locked_vnode_kl(ncp, vp); goto out; } blp = NCP2BUCKETLOCK(ncp); vlp1 = VP2VNODELOCK(ncp->nc_dvp); vlp2 = VP2VNODELOCK(ncp->nc_vp); cache_sort_vnodes(&vlp1, &vlp2); if (vlp1 == pvlp) { mtx_lock(vlp2); to_unlock = vlp2; } else { if (!mtx_trylock(vlp1)) { error = EAGAIN; goto out; } to_unlock = vlp1; } rw_wlock(blp); cache_zap_locked(ncp, false); rw_wunlock(blp); mtx_unlock(to_unlock); out: mtx_unlock(pvlp); return (error); } /* * If trylocking failed we can get here. We know enough to take all needed locks * in the right order and re-lookup the entry. */ static int cache_zap_unlocked_bucket(struct namecache *ncp, struct componentname *cnp, struct vnode *dvp, struct mtx *dvlp, struct mtx *vlp, uint32_t hash, struct rwlock *blp) { struct namecache *rncp; cache_assert_bucket_locked(ncp, RA_UNLOCKED); cache_sort_vnodes(&dvlp, &vlp); cache_lock_vnodes(dvlp, vlp); rw_wlock(blp); LIST_FOREACH(rncp, (NCHHASH(hash)), nc_hash) { if (rncp == ncp && rncp->nc_dvp == dvp && rncp->nc_nlen == cnp->cn_namelen && !bcmp(rncp->nc_name, cnp->cn_nameptr, rncp->nc_nlen)) break; } if (rncp != NULL) { cache_zap_locked(rncp, false); rw_wunlock(blp); cache_unlock_vnodes(dvlp, vlp); counter_u64_add(zap_and_exit_bucket_relock_success, 1); return (0); } rw_wunlock(blp); cache_unlock_vnodes(dvlp, vlp); return (EAGAIN); } static int __noinline cache_zap_wlocked_bucket(struct namecache *ncp, struct componentname *cnp, uint32_t hash, struct rwlock *blp) { struct mtx *dvlp, *vlp; struct vnode *dvp; cache_assert_bucket_locked(ncp, RA_WLOCKED); dvlp = VP2VNODELOCK(ncp->nc_dvp); vlp = NULL; if (!(ncp->nc_flag & NCF_NEGATIVE)) vlp = VP2VNODELOCK(ncp->nc_vp); if (cache_trylock_vnodes(dvlp, vlp) == 0) { cache_zap_locked(ncp, false); rw_wunlock(blp); cache_unlock_vnodes(dvlp, vlp); return (0); } dvp = ncp->nc_dvp; rw_wunlock(blp); return (cache_zap_unlocked_bucket(ncp, cnp, dvp, dvlp, vlp, hash, blp)); } static int __noinline cache_zap_rlocked_bucket(struct namecache *ncp, struct componentname *cnp, uint32_t hash, struct rwlock *blp) { struct mtx *dvlp, *vlp; struct vnode *dvp; cache_assert_bucket_locked(ncp, RA_RLOCKED); dvlp = VP2VNODELOCK(ncp->nc_dvp); vlp = NULL; if (!(ncp->nc_flag & NCF_NEGATIVE)) vlp = VP2VNODELOCK(ncp->nc_vp); if (cache_trylock_vnodes(dvlp, vlp) == 0) { rw_runlock(blp); rw_wlock(blp); cache_zap_locked(ncp, false); rw_wunlock(blp); cache_unlock_vnodes(dvlp, vlp); return (0); } dvp = ncp->nc_dvp; rw_runlock(blp); return (cache_zap_unlocked_bucket(ncp, cnp, dvp, dvlp, vlp, hash, blp)); } static int cache_zap_wlocked_bucket_kl(struct namecache *ncp, struct rwlock *blp, struct mtx **vlpp1, struct mtx **vlpp2) { struct mtx *dvlp, *vlp; cache_assert_bucket_locked(ncp, RA_WLOCKED); dvlp = VP2VNODELOCK(ncp->nc_dvp); vlp = NULL; if (!(ncp->nc_flag & NCF_NEGATIVE)) vlp = VP2VNODELOCK(ncp->nc_vp); cache_sort_vnodes(&dvlp, &vlp); if (*vlpp1 == dvlp && *vlpp2 == vlp) { cache_zap_locked(ncp, false); cache_unlock_vnodes(dvlp, vlp); *vlpp1 = NULL; *vlpp2 = NULL; return (0); } if (*vlpp1 != NULL) mtx_unlock(*vlpp1); if (*vlpp2 != NULL) mtx_unlock(*vlpp2); *vlpp1 = NULL; *vlpp2 = NULL; if (cache_trylock_vnodes(dvlp, vlp) == 0) { cache_zap_locked(ncp, false); cache_unlock_vnodes(dvlp, vlp); return (0); } rw_wunlock(blp); *vlpp1 = dvlp; *vlpp2 = vlp; if (*vlpp1 != NULL) mtx_lock(*vlpp1); mtx_lock(*vlpp2); rw_wlock(blp); return (EAGAIN); } static void cache_lookup_unlock(struct rwlock *blp, struct mtx *vlp) { if (blp != NULL) { rw_runlock(blp); } else { mtx_unlock(vlp); } } static int __noinline cache_lookup_dot(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct timespec *tsp, int *ticksp) { int ltype; *vpp = dvp; CTR2(KTR_VFS, "cache_lookup(%p, %s) found via .", dvp, cnp->cn_nameptr); counter_u64_add(dothits, 1); SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ".", *vpp); if (tsp != NULL) timespecclear(tsp); if (ticksp != NULL) *ticksp = ticks; vrefact(*vpp); /* * When we lookup "." we still can be asked to lock it * differently... */ ltype = cnp->cn_lkflags & LK_TYPE_MASK; if (ltype != VOP_ISLOCKED(*vpp)) { if (ltype == LK_EXCLUSIVE) { vn_lock(*vpp, LK_UPGRADE | LK_RETRY); if (VN_IS_DOOMED((*vpp))) { /* forced unmount */ vrele(*vpp); *vpp = NULL; return (ENOENT); } } else vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY); } return (-1); } static __noinline int cache_lookup_nomakeentry(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct timespec *tsp, int *ticksp) { struct namecache *ncp; struct rwlock *blp; struct mtx *dvlp, *dvlp2; uint32_t hash; int error; if (cnp->cn_namelen == 2 && cnp->cn_nameptr[0] == '.' && cnp->cn_nameptr[1] == '.') { counter_u64_add(dotdothits, 1); dvlp = VP2VNODELOCK(dvp); dvlp2 = NULL; mtx_lock(dvlp); retry_dotdot: ncp = dvp->v_cache_dd; if (ncp == NULL) { SDT_PROBE3(vfs, namecache, lookup, miss, dvp, "..", NULL); mtx_unlock(dvlp); if (dvlp2 != NULL) mtx_unlock(dvlp2); return (0); } if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) { if (ncp->nc_dvp != dvp) panic("dvp %p v_cache_dd %p\n", dvp, ncp); if (!cache_zap_locked_vnode_kl2(ncp, dvp, &dvlp2)) goto retry_dotdot; MPASS(dvp->v_cache_dd == NULL); mtx_unlock(dvlp); if (dvlp2 != NULL) mtx_unlock(dvlp2); cache_free(ncp); } else { dvp->v_cache_dd = NULL; mtx_unlock(dvlp); if (dvlp2 != NULL) mtx_unlock(dvlp2); } return (0); } hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp); blp = HASH2BUCKETLOCK(hash); retry: if (LIST_EMPTY(NCHHASH(hash))) goto out_no_entry; rw_wlock(blp); LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { counter_u64_add(numchecks, 1); if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) break; } /* We failed to find an entry */ if (ncp == NULL) { rw_wunlock(blp); goto out_no_entry; } error = cache_zap_wlocked_bucket(ncp, cnp, hash, blp); if (__predict_false(error != 0)) { zap_and_exit_bucket_fail++; cache_maybe_yield(); goto retry; } counter_u64_add(numposzaps, 1); cache_free(ncp); return (0); out_no_entry: SDT_PROBE3(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr, NULL); counter_u64_add(nummisszap, 1); return (0); } /** * Lookup a name in the name cache * * # Arguments * * - dvp: Parent directory in which to search. * - vpp: Return argument. Will contain desired vnode on cache hit. * - cnp: Parameters of the name search. The most interesting bits of * the cn_flags field have the following meanings: * - MAKEENTRY: If clear, free an entry from the cache rather than look * it up. * - ISDOTDOT: Must be set if and only if cn_nameptr == ".." * - tsp: Return storage for cache timestamp. On a successful (positive * or negative) lookup, tsp will be filled with any timespec that * was stored when this cache entry was created. However, it will * be clear for "." entries. * - ticks: Return storage for alternate cache timestamp. On a successful * (positive or negative) lookup, it will contain the ticks value * that was current when the cache entry was created, unless cnp * was ".". * * # Returns * * - -1: A positive cache hit. vpp will contain the desired vnode. * - ENOENT: A negative cache hit, or dvp was recycled out from under us due * to a forced unmount. vpp will not be modified. If the entry * is a whiteout, then the ISWHITEOUT flag will be set in * cnp->cn_flags. * - 0: A cache miss. vpp will not be modified. * * # Locking * * On a cache hit, vpp will be returned locked and ref'd. If we're looking up * .., dvp is unlocked. If we're looking up . an extra ref is taken, but the * lock is not recursively acquired. */ int cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct timespec *tsp, int *ticksp) { struct namecache_ts *ncp_ts; struct namecache *ncp; struct rwlock *blp; struct mtx *dvlp; uint32_t hash; enum vgetstate vs; int error, ltype; #ifdef DEBUG_CACHE if (__predict_false(!doingcache)) { cnp->cn_flags &= ~MAKEENTRY; return (0); } #endif counter_u64_add(numcalls, 1); if (__predict_false(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.')) return (cache_lookup_dot(dvp, vpp, cnp, tsp, ticksp)); if ((cnp->cn_flags & MAKEENTRY) == 0) return (cache_lookup_nomakeentry(dvp, vpp, cnp, tsp, ticksp)); retry: blp = NULL; dvlp = NULL; error = 0; if (cnp->cn_namelen == 2 && cnp->cn_nameptr[0] == '.' && cnp->cn_nameptr[1] == '.') { counter_u64_add(dotdothits, 1); dvlp = VP2VNODELOCK(dvp); mtx_lock(dvlp); ncp = dvp->v_cache_dd; if (ncp == NULL) { SDT_PROBE3(vfs, namecache, lookup, miss, dvp, "..", NULL); mtx_unlock(dvlp); return (0); } if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) { if (ncp->nc_flag & NCF_NEGATIVE) *vpp = NULL; else *vpp = ncp->nc_vp; } else *vpp = ncp->nc_dvp; /* Return failure if negative entry was found. */ if (*vpp == NULL) goto negative_success; CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..", dvp, cnp->cn_nameptr, *vpp); SDT_PROBE3(vfs, namecache, lookup, hit, dvp, "..", *vpp); cache_out_ts(ncp, tsp, ticksp); if ((ncp->nc_flag & (NCF_ISDOTDOT | NCF_DTS)) == NCF_DTS && tsp != NULL) { ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc); *tsp = ncp_ts->nc_dotdottime; } goto success; } hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp); blp = HASH2BUCKETLOCK(hash); rw_rlock(blp); LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { counter_u64_add(numchecks, 1); if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) break; } /* We failed to find an entry */ if (__predict_false(ncp == NULL)) { rw_runlock(blp); SDT_PROBE3(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr, NULL); counter_u64_add(nummiss, 1); return (0); } if (ncp->nc_flag & NCF_NEGATIVE) goto negative_success; /* We found a "positive" match, return the vnode */ counter_u64_add(numposhits, 1); *vpp = ncp->nc_vp; CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p", dvp, cnp->cn_nameptr, *vpp, ncp); SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ncp->nc_name, *vpp); cache_out_ts(ncp, tsp, ticksp); success: /* * On success we return a locked and ref'd vnode as per the lookup * protocol. */ MPASS(dvp != *vpp); ltype = 0; /* silence gcc warning */ if (cnp->cn_flags & ISDOTDOT) { ltype = VOP_ISLOCKED(dvp); VOP_UNLOCK(dvp); } vs = vget_prep(*vpp); cache_lookup_unlock(blp, dvlp); error = vget_finish(*vpp, cnp->cn_lkflags, vs); if (cnp->cn_flags & ISDOTDOT) { vn_lock(dvp, ltype | LK_RETRY); if (VN_IS_DOOMED(dvp)) { if (error == 0) vput(*vpp); *vpp = NULL; return (ENOENT); } } if (error) { *vpp = NULL; goto retry; } if ((cnp->cn_flags & ISLASTCN) && (cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE) { ASSERT_VOP_ELOCKED(*vpp, "cache_lookup"); } return (-1); negative_success: /* We found a negative match, and want to create it, so purge */ if (cnp->cn_nameiop == CREATE) { counter_u64_add(numnegzaps, 1); goto zap_and_exit; } counter_u64_add(numneghits, 1); cache_negative_hit(ncp); if (ncp->nc_flag & NCF_WHITE) cnp->cn_flags |= ISWHITEOUT; SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp, ncp->nc_name); cache_out_ts(ncp, tsp, ticksp); cache_lookup_unlock(blp, dvlp); return (ENOENT); zap_and_exit: if (blp != NULL) error = cache_zap_rlocked_bucket(ncp, cnp, hash, blp); else error = cache_zap_locked_vnode(ncp, dvp); if (__predict_false(error != 0)) { zap_and_exit_bucket_fail2++; cache_maybe_yield(); goto retry; } cache_free(ncp); return (0); } struct celockstate { struct mtx *vlp[3]; struct rwlock *blp[2]; }; CTASSERT((nitems(((struct celockstate *)0)->vlp) == 3)); CTASSERT((nitems(((struct celockstate *)0)->blp) == 2)); static inline void cache_celockstate_init(struct celockstate *cel) { bzero(cel, sizeof(*cel)); } static void cache_lock_vnodes_cel(struct celockstate *cel, struct vnode *vp, struct vnode *dvp) { struct mtx *vlp1, *vlp2; MPASS(cel->vlp[0] == NULL); MPASS(cel->vlp[1] == NULL); MPASS(cel->vlp[2] == NULL); MPASS(vp != NULL || dvp != NULL); vlp1 = VP2VNODELOCK(vp); vlp2 = VP2VNODELOCK(dvp); cache_sort_vnodes(&vlp1, &vlp2); if (vlp1 != NULL) { mtx_lock(vlp1); cel->vlp[0] = vlp1; } mtx_lock(vlp2); cel->vlp[1] = vlp2; } static void cache_unlock_vnodes_cel(struct celockstate *cel) { MPASS(cel->vlp[0] != NULL || cel->vlp[1] != NULL); if (cel->vlp[0] != NULL) mtx_unlock(cel->vlp[0]); if (cel->vlp[1] != NULL) mtx_unlock(cel->vlp[1]); if (cel->vlp[2] != NULL) mtx_unlock(cel->vlp[2]); } static bool cache_lock_vnodes_cel_3(struct celockstate *cel, struct vnode *vp) { struct mtx *vlp; bool ret; cache_assert_vlp_locked(cel->vlp[0]); cache_assert_vlp_locked(cel->vlp[1]); MPASS(cel->vlp[2] == NULL); MPASS(vp != NULL); vlp = VP2VNODELOCK(vp); ret = true; if (vlp >= cel->vlp[1]) { mtx_lock(vlp); } else { if (mtx_trylock(vlp)) goto out; cache_lock_vnodes_cel_3_failures++; cache_unlock_vnodes_cel(cel); if (vlp < cel->vlp[0]) { mtx_lock(vlp); mtx_lock(cel->vlp[0]); mtx_lock(cel->vlp[1]); } else { if (cel->vlp[0] != NULL) mtx_lock(cel->vlp[0]); mtx_lock(vlp); mtx_lock(cel->vlp[1]); } ret = false; } out: cel->vlp[2] = vlp; return (ret); } static void cache_lock_buckets_cel(struct celockstate *cel, struct rwlock *blp1, struct rwlock *blp2) { MPASS(cel->blp[0] == NULL); MPASS(cel->blp[1] == NULL); cache_sort_vnodes(&blp1, &blp2); if (blp1 != NULL) { rw_wlock(blp1); cel->blp[0] = blp1; } rw_wlock(blp2); cel->blp[1] = blp2; } static void cache_unlock_buckets_cel(struct celockstate *cel) { if (cel->blp[0] != NULL) rw_wunlock(cel->blp[0]); rw_wunlock(cel->blp[1]); } /* * Lock part of the cache affected by the insertion. * * This means vnodelocks for dvp, vp and the relevant bucketlock. * However, insertion can result in removal of an old entry. In this * case we have an additional vnode and bucketlock pair to lock. If the * entry is negative, ncelock is locked instead of the vnode. * * That is, in the worst case we have to lock 3 vnodes and 2 bucketlocks, while * preserving the locking order (smaller address first). */ static void cache_enter_lock(struct celockstate *cel, struct vnode *dvp, struct vnode *vp, uint32_t hash) { struct namecache *ncp; struct rwlock *blps[2]; blps[0] = HASH2BUCKETLOCK(hash); for (;;) { blps[1] = NULL; cache_lock_vnodes_cel(cel, dvp, vp); if (vp == NULL || vp->v_type != VDIR) break; ncp = vp->v_cache_dd; if (ncp == NULL) break; if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) break; MPASS(ncp->nc_dvp == vp); blps[1] = NCP2BUCKETLOCK(ncp); if (ncp->nc_flag & NCF_NEGATIVE) break; if (cache_lock_vnodes_cel_3(cel, ncp->nc_vp)) break; /* * All vnodes got re-locked. Re-validate the state and if * nothing changed we are done. Otherwise restart. */ if (ncp == vp->v_cache_dd && (ncp->nc_flag & NCF_ISDOTDOT) != 0 && blps[1] == NCP2BUCKETLOCK(ncp) && VP2VNODELOCK(ncp->nc_vp) == cel->vlp[2]) break; cache_unlock_vnodes_cel(cel); cel->vlp[0] = NULL; cel->vlp[1] = NULL; cel->vlp[2] = NULL; } cache_lock_buckets_cel(cel, blps[0], blps[1]); } static void cache_enter_lock_dd(struct celockstate *cel, struct vnode *dvp, struct vnode *vp, uint32_t hash) { struct namecache *ncp; struct rwlock *blps[2]; blps[0] = HASH2BUCKETLOCK(hash); for (;;) { blps[1] = NULL; cache_lock_vnodes_cel(cel, dvp, vp); ncp = dvp->v_cache_dd; if (ncp == NULL) break; if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) break; MPASS(ncp->nc_dvp == dvp); blps[1] = NCP2BUCKETLOCK(ncp); if (ncp->nc_flag & NCF_NEGATIVE) break; if (cache_lock_vnodes_cel_3(cel, ncp->nc_vp)) break; if (ncp == dvp->v_cache_dd && (ncp->nc_flag & NCF_ISDOTDOT) != 0 && blps[1] == NCP2BUCKETLOCK(ncp) && VP2VNODELOCK(ncp->nc_vp) == cel->vlp[2]) break; cache_unlock_vnodes_cel(cel); cel->vlp[0] = NULL; cel->vlp[1] = NULL; cel->vlp[2] = NULL; } cache_lock_buckets_cel(cel, blps[0], blps[1]); } static void cache_enter_unlock(struct celockstate *cel) { cache_unlock_buckets_cel(cel); cache_unlock_vnodes_cel(cel); } static void __noinline cache_enter_dotdot_prep(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) { struct celockstate cel; struct namecache *ncp; uint32_t hash; int len; if (dvp->v_cache_dd == NULL) return; len = cnp->cn_namelen; cache_celockstate_init(&cel); hash = cache_get_hash(cnp->cn_nameptr, len, dvp); cache_enter_lock_dd(&cel, dvp, vp, hash); ncp = dvp->v_cache_dd; if (ncp != NULL && (ncp->nc_flag & NCF_ISDOTDOT)) { KASSERT(ncp->nc_dvp == dvp, ("wrong isdotdot parent")); cache_zap_locked(ncp, false); } else { ncp = NULL; } dvp->v_cache_dd = NULL; cache_enter_unlock(&cel); cache_free(ncp); } /* * Add an entry to the cache. */ void cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, struct timespec *tsp, struct timespec *dtsp) { struct celockstate cel; struct namecache *ncp, *n2, *ndd; struct namecache_ts *ncp_ts, *n2_ts; struct nchashhead *ncpp; uint32_t hash; int flag; int len; bool held_dvp; u_long lnumcache; CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr); VNASSERT(vp == NULL || !VN_IS_DOOMED(vp), vp, ("cache_enter: Adding a doomed vnode")); VNASSERT(dvp == NULL || !VN_IS_DOOMED(dvp), dvp, ("cache_enter: Doomed vnode used as src")); #ifdef DEBUG_CACHE if (__predict_false(!doingcache)) return; #endif flag = 0; if (__predict_false(cnp->cn_nameptr[0] == '.')) { if (cnp->cn_namelen == 1) return; if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { cache_enter_dotdot_prep(dvp, vp, cnp); flag = NCF_ISDOTDOT; } } /* * Avoid blowout in namecache entries. */ lnumcache = atomic_fetchadd_long(&numcache, 1) + 1; if (__predict_false(lnumcache >= ncsize)) { atomic_add_long(&numcache, -1); return; } cache_celockstate_init(&cel); ndd = NULL; ncp_ts = NULL; held_dvp = false; if (LIST_EMPTY(&dvp->v_cache_src) && flag != NCF_ISDOTDOT) { vhold(dvp); - atomic_add_long(&numcachehv, 1); + counter_u64_add(numcachehv, 1); held_dvp = true; } /* * Calculate the hash key and setup as much of the new * namecache entry as possible before acquiring the lock. */ ncp = cache_alloc(cnp->cn_namelen, tsp != NULL); ncp->nc_flag = flag; ncp->nc_vp = vp; if (vp == NULL) ncp->nc_flag |= NCF_NEGATIVE; ncp->nc_dvp = dvp; if (tsp != NULL) { ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc); ncp_ts->nc_time = *tsp; ncp_ts->nc_ticks = ticks; ncp_ts->nc_nc.nc_flag |= NCF_TS; if (dtsp != NULL) { ncp_ts->nc_dotdottime = *dtsp; ncp_ts->nc_nc.nc_flag |= NCF_DTS; } } len = ncp->nc_nlen = cnp->cn_namelen; hash = cache_get_hash(cnp->cn_nameptr, len, dvp); strlcpy(ncp->nc_name, cnp->cn_nameptr, len + 1); cache_enter_lock(&cel, dvp, vp, hash); /* * See if this vnode or negative entry is already in the cache * with this name. This can happen with concurrent lookups of * the same path name. */ ncpp = NCHHASH(hash); LIST_FOREACH(n2, ncpp, nc_hash) { if (n2->nc_dvp == dvp && n2->nc_nlen == cnp->cn_namelen && !bcmp(n2->nc_name, cnp->cn_nameptr, n2->nc_nlen)) { if (tsp != NULL) { KASSERT((n2->nc_flag & NCF_TS) != 0, ("no NCF_TS")); n2_ts = __containerof(n2, struct namecache_ts, nc_nc); n2_ts->nc_time = ncp_ts->nc_time; n2_ts->nc_ticks = ncp_ts->nc_ticks; if (dtsp != NULL) { n2_ts->nc_dotdottime = ncp_ts->nc_dotdottime; if (ncp->nc_flag & NCF_NEGATIVE) mtx_lock(&ncneg_hot.nl_lock); n2_ts->nc_nc.nc_flag |= NCF_DTS; if (ncp->nc_flag & NCF_NEGATIVE) mtx_unlock(&ncneg_hot.nl_lock); } } goto out_unlock_free; } } if (flag == NCF_ISDOTDOT) { /* * See if we are trying to add .. entry, but some other lookup * has populated v_cache_dd pointer already. */ if (dvp->v_cache_dd != NULL) goto out_unlock_free; KASSERT(vp == NULL || vp->v_type == VDIR, ("wrong vnode type %p", vp)); dvp->v_cache_dd = ncp; } if (vp != NULL) { if (vp->v_type == VDIR) { if (flag != NCF_ISDOTDOT) { /* * For this case, the cache entry maps both the * directory name in it and the name ".." for the * directory's parent. */ if ((ndd = vp->v_cache_dd) != NULL) { if ((ndd->nc_flag & NCF_ISDOTDOT) != 0) cache_zap_locked(ndd, false); else ndd = NULL; } vp->v_cache_dd = ncp; } } else { vp->v_cache_dd = NULL; } } if (flag != NCF_ISDOTDOT) { if (LIST_EMPTY(&dvp->v_cache_src)) { if (!held_dvp) { vhold(dvp); - atomic_add_long(&numcachehv, 1); + counter_u64_add(numcachehv, 1); } } else { if (held_dvp) { /* * This will not take the interlock as someone * else already holds the vnode on account of * the namecache and we hold locks preventing * this from changing. */ vdrop(dvp); - atomic_subtract_long(&numcachehv, 1); + counter_u64_add(numcachehv, -1); } } LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); } /* * Insert the new namecache entry into the appropriate chain * within the cache entries table. */ LIST_INSERT_HEAD(ncpp, ncp, nc_hash); /* * If the entry is "negative", we place it into the * "negative" cache queue, otherwise, we place it into the * destination vnode's cache entries queue. */ if (vp != NULL) { TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); SDT_PROBE3(vfs, namecache, enter, done, dvp, ncp->nc_name, vp); } else { if (cnp->cn_flags & ISWHITEOUT) ncp->nc_flag |= NCF_WHITE; cache_negative_insert(ncp, false); SDT_PROBE2(vfs, namecache, enter_negative, done, dvp, ncp->nc_name); } cache_enter_unlock(&cel); if (numneg * ncnegfactor > lnumcache) cache_negative_zap_one(); cache_free(ndd); return; out_unlock_free: cache_enter_unlock(&cel); cache_free(ncp); if (held_dvp) { vdrop(dvp); - atomic_subtract_long(&numcachehv, 1); + counter_u64_add(numcachehv, -1); } return; } static u_int cache_roundup_2(u_int val) { u_int res; for (res = 1; res <= val; res <<= 1) continue; return (res); } /* * Name cache initialization, from vfs_init() when we are booting */ static void nchinit(void *dummy __unused) { u_int i; cache_zone_small = uma_zcreate("S VFS Cache", sizeof(struct namecache) + CACHE_PATH_CUTOFF + 1, NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct namecache), UMA_ZONE_ZINIT); cache_zone_small_ts = uma_zcreate("STS VFS Cache", sizeof(struct namecache_ts) + CACHE_PATH_CUTOFF + 1, NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct namecache_ts), UMA_ZONE_ZINIT); cache_zone_large = uma_zcreate("L VFS Cache", sizeof(struct namecache) + NAME_MAX + 1, NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct namecache), UMA_ZONE_ZINIT); cache_zone_large_ts = uma_zcreate("LTS VFS Cache", sizeof(struct namecache_ts) + NAME_MAX + 1, NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct namecache_ts), UMA_ZONE_ZINIT); ncsize = desiredvnodes * ncsizefactor; nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); ncbuckethash = cache_roundup_2(mp_ncpus * mp_ncpus) - 1; if (ncbuckethash < 7) /* arbitrarily chosen to avoid having one lock */ ncbuckethash = 7; if (ncbuckethash > nchash) ncbuckethash = nchash; bucketlocks = malloc(sizeof(*bucketlocks) * numbucketlocks, M_VFSCACHE, M_WAITOK | M_ZERO); for (i = 0; i < numbucketlocks; i++) rw_init_flags(&bucketlocks[i], "ncbuc", RW_DUPOK | RW_RECURSE); ncvnodehash = ncbuckethash; vnodelocks = malloc(sizeof(*vnodelocks) * numvnodelocks, M_VFSCACHE, M_WAITOK | M_ZERO); for (i = 0; i < numvnodelocks; i++) mtx_init(&vnodelocks[i], "ncvn", NULL, MTX_DUPOK | MTX_RECURSE); ncpurgeminvnodes = numbucketlocks * 2; ncneghash = 3; neglists = malloc(sizeof(*neglists) * numneglists, M_VFSCACHE, M_WAITOK | M_ZERO); for (i = 0; i < numneglists; i++) { mtx_init(&neglists[i].nl_lock, "ncnegl", NULL, MTX_DEF); TAILQ_INIT(&neglists[i].nl_list); } mtx_init(&ncneg_hot.nl_lock, "ncneglh", NULL, MTX_DEF); TAILQ_INIT(&ncneg_hot.nl_list); mtx_init(&ncneg_shrink_lock, "ncnegs", NULL, MTX_DEF); + numcachehv = counter_u64_alloc(M_WAITOK); numcalls = counter_u64_alloc(M_WAITOK); dothits = counter_u64_alloc(M_WAITOK); dotdothits = counter_u64_alloc(M_WAITOK); numchecks = counter_u64_alloc(M_WAITOK); nummiss = counter_u64_alloc(M_WAITOK); nummisszap = counter_u64_alloc(M_WAITOK); numposzaps = counter_u64_alloc(M_WAITOK); numposhits = counter_u64_alloc(M_WAITOK); numnegzaps = counter_u64_alloc(M_WAITOK); numneghits = counter_u64_alloc(M_WAITOK); numfullpathcalls = counter_u64_alloc(M_WAITOK); numfullpathfail1 = counter_u64_alloc(M_WAITOK); numfullpathfail2 = counter_u64_alloc(M_WAITOK); numfullpathfail4 = counter_u64_alloc(M_WAITOK); numfullpathfound = counter_u64_alloc(M_WAITOK); zap_and_exit_bucket_relock_success = counter_u64_alloc(M_WAITOK); numneg_evicted = counter_u64_alloc(M_WAITOK); shrinking_skipped = counter_u64_alloc(M_WAITOK); } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL); void cache_changesize(u_long newmaxvnodes) { struct nchashhead *new_nchashtbl, *old_nchashtbl; u_long new_nchash, old_nchash; struct namecache *ncp; uint32_t hash; u_long newncsize; int i; newncsize = newmaxvnodes * ncsizefactor; newmaxvnodes = cache_roundup_2(newmaxvnodes * 2); if (newmaxvnodes < numbucketlocks) newmaxvnodes = numbucketlocks; new_nchashtbl = hashinit(newmaxvnodes, M_VFSCACHE, &new_nchash); /* If same hash table size, nothing to do */ if (nchash == new_nchash) { free(new_nchashtbl, M_VFSCACHE); return; } /* * Move everything from the old hash table to the new table. * None of the namecache entries in the table can be removed * because to do so, they have to be removed from the hash table. */ cache_lock_all_vnodes(); cache_lock_all_buckets(); old_nchashtbl = nchashtbl; old_nchash = nchash; nchashtbl = new_nchashtbl; nchash = new_nchash; for (i = 0; i <= old_nchash; i++) { while ((ncp = LIST_FIRST(&old_nchashtbl[i])) != NULL) { hash = cache_get_hash(ncp->nc_name, ncp->nc_nlen, ncp->nc_dvp); LIST_REMOVE(ncp, nc_hash); LIST_INSERT_HEAD(NCHHASH(hash), ncp, nc_hash); } } ncsize = newncsize; cache_unlock_all_buckets(); cache_unlock_all_vnodes(); free(old_nchashtbl, M_VFSCACHE); } /* * Invalidate all entries from and to a particular vnode. */ void cache_purge(struct vnode *vp) { TAILQ_HEAD(, namecache) ncps; struct namecache *ncp, *nnp; struct mtx *vlp, *vlp2; CTR1(KTR_VFS, "cache_purge(%p)", vp); SDT_PROBE1(vfs, namecache, purge, done, vp); if (LIST_EMPTY(&vp->v_cache_src) && TAILQ_EMPTY(&vp->v_cache_dst) && vp->v_cache_dd == NULL) return; TAILQ_INIT(&ncps); vlp = VP2VNODELOCK(vp); vlp2 = NULL; mtx_lock(vlp); retry: while (!LIST_EMPTY(&vp->v_cache_src)) { ncp = LIST_FIRST(&vp->v_cache_src); if (!cache_zap_locked_vnode_kl2(ncp, vp, &vlp2)) goto retry; TAILQ_INSERT_TAIL(&ncps, ncp, nc_dst); } while (!TAILQ_EMPTY(&vp->v_cache_dst)) { ncp = TAILQ_FIRST(&vp->v_cache_dst); if (!cache_zap_locked_vnode_kl2(ncp, vp, &vlp2)) goto retry; TAILQ_INSERT_TAIL(&ncps, ncp, nc_dst); } ncp = vp->v_cache_dd; if (ncp != NULL) { KASSERT(ncp->nc_flag & NCF_ISDOTDOT, ("lost dotdot link")); if (!cache_zap_locked_vnode_kl2(ncp, vp, &vlp2)) goto retry; TAILQ_INSERT_TAIL(&ncps, ncp, nc_dst); } KASSERT(vp->v_cache_dd == NULL, ("incomplete purge")); mtx_unlock(vlp); if (vlp2 != NULL) mtx_unlock(vlp2); TAILQ_FOREACH_SAFE(ncp, &ncps, nc_dst, nnp) { cache_free(ncp); } } /* * Invalidate all negative entries for a particular directory vnode. */ void cache_purge_negative(struct vnode *vp) { TAILQ_HEAD(, namecache) ncps; struct namecache *ncp, *nnp; struct mtx *vlp; CTR1(KTR_VFS, "cache_purge_negative(%p)", vp); SDT_PROBE1(vfs, namecache, purge_negative, done, vp); if (LIST_EMPTY(&vp->v_cache_src)) return; TAILQ_INIT(&ncps); vlp = VP2VNODELOCK(vp); mtx_lock(vlp); LIST_FOREACH_SAFE(ncp, &vp->v_cache_src, nc_src, nnp) { if (!(ncp->nc_flag & NCF_NEGATIVE)) continue; cache_zap_negative_locked_vnode_kl(ncp, vp); TAILQ_INSERT_TAIL(&ncps, ncp, nc_dst); } mtx_unlock(vlp); TAILQ_FOREACH_SAFE(ncp, &ncps, nc_dst, nnp) { cache_free(ncp); } } /* * Flush all entries referencing a particular filesystem. */ void cache_purgevfs(struct mount *mp, bool force) { TAILQ_HEAD(, namecache) ncps; struct mtx *vlp1, *vlp2; struct rwlock *blp; struct nchashhead *bucket; struct namecache *ncp, *nnp; u_long i, j, n_nchash; int error; /* Scan hash tables for applicable entries */ SDT_PROBE1(vfs, namecache, purgevfs, done, mp); if (!force && mp->mnt_nvnodelistsize <= ncpurgeminvnodes) return; TAILQ_INIT(&ncps); n_nchash = nchash + 1; vlp1 = vlp2 = NULL; for (i = 0; i < numbucketlocks; i++) { blp = (struct rwlock *)&bucketlocks[i]; rw_wlock(blp); for (j = i; j < n_nchash; j += numbucketlocks) { retry: bucket = &nchashtbl[j]; LIST_FOREACH_SAFE(ncp, bucket, nc_hash, nnp) { cache_assert_bucket_locked(ncp, RA_WLOCKED); if (ncp->nc_dvp->v_mount != mp) continue; error = cache_zap_wlocked_bucket_kl(ncp, blp, &vlp1, &vlp2); if (error != 0) goto retry; TAILQ_INSERT_HEAD(&ncps, ncp, nc_dst); } } rw_wunlock(blp); if (vlp1 == NULL && vlp2 == NULL) cache_maybe_yield(); } if (vlp1 != NULL) mtx_unlock(vlp1); if (vlp2 != NULL) mtx_unlock(vlp2); TAILQ_FOREACH_SAFE(ncp, &ncps, nc_dst, nnp) { cache_free(ncp); } } /* * Perform canonical checks and cache lookup and pass on to filesystem * through the vop_cachedlookup only if needed. */ int vfs_cache_lookup(struct vop_lookup_args *ap) { struct vnode *dvp; int error; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct ucred *cred = cnp->cn_cred; int flags = cnp->cn_flags; struct thread *td = cnp->cn_thread; *vpp = NULL; dvp = ap->a_dvp; if (dvp->v_type != VDIR) return (ENOTDIR); if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); error = VOP_ACCESS(dvp, VEXEC, cred, td); if (error) return (error); error = cache_lookup(dvp, vpp, cnp, NULL, NULL); if (error == 0) return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); if (error == -1) return (0); return (error); } /* * XXX All of these sysctls would probably be more productive dead. */ static int __read_mostly disablecwd; SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, "Disable the getcwd syscall"); /* Implementation of the getcwd syscall. */ int sys___getcwd(struct thread *td, struct __getcwd_args *uap) { return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen, MAXPATHLEN)); } int kern___getcwd(struct thread *td, char *buf, enum uio_seg bufseg, size_t buflen, size_t path_max) { char *bp, *tmpbuf; struct filedesc *fdp; struct vnode *cdir, *rdir; int error; if (__predict_false(disablecwd)) return (ENODEV); if (__predict_false(buflen < 2)) return (EINVAL); if (buflen > path_max) buflen = path_max; tmpbuf = malloc(buflen, M_TEMP, M_WAITOK); fdp = td->td_proc->p_fd; FILEDESC_SLOCK(fdp); cdir = fdp->fd_cdir; vrefact(cdir); rdir = fdp->fd_rdir; vrefact(rdir); FILEDESC_SUNLOCK(fdp); error = vn_fullpath1(td, cdir, rdir, tmpbuf, &bp, buflen); vrele(rdir); vrele(cdir); if (!error) { if (bufseg == UIO_SYSSPACE) bcopy(bp, buf, strlen(bp) + 1); else error = copyout(bp, buf, strlen(bp) + 1); #ifdef KTRACE if (KTRPOINT(curthread, KTR_NAMEI)) ktrnamei(bp); #endif } free(tmpbuf, M_TEMP); return (error); } /* * Thus begins the fullpath magic. */ static int __read_mostly disablefullpath; SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0, "Disable the vn_fullpath function"); /* * Retrieve the full filesystem path that correspond to a vnode from the name * cache (if available) */ int vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) { char *buf; struct filedesc *fdp; struct vnode *rdir; int error; if (__predict_false(disablefullpath)) return (ENODEV); if (__predict_false(vn == NULL)) return (EINVAL); buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); fdp = td->td_proc->p_fd; FILEDESC_SLOCK(fdp); rdir = fdp->fd_rdir; vrefact(rdir); FILEDESC_SUNLOCK(fdp); error = vn_fullpath1(td, vn, rdir, buf, retbuf, MAXPATHLEN); vrele(rdir); if (!error) *freebuf = buf; else free(buf, M_TEMP); return (error); } /* * This function is similar to vn_fullpath, but it attempts to lookup the * pathname relative to the global root mount point. This is required for the * auditing sub-system, as audited pathnames must be absolute, relative to the * global root mount point. */ int vn_fullpath_global(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) { char *buf; int error; if (__predict_false(disablefullpath)) return (ENODEV); if (__predict_false(vn == NULL)) return (EINVAL); buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, MAXPATHLEN); if (!error) *freebuf = buf; else free(buf, M_TEMP); return (error); } int vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen) { struct vnode *dvp; struct namecache *ncp; struct mtx *vlp; int error; vlp = VP2VNODELOCK(*vp); mtx_lock(vlp); TAILQ_FOREACH(ncp, &((*vp)->v_cache_dst), nc_dst) { if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) break; } if (ncp != NULL) { if (*buflen < ncp->nc_nlen) { mtx_unlock(vlp); vrele(*vp); counter_u64_add(numfullpathfail4, 1); error = ENOMEM; SDT_PROBE3(vfs, namecache, fullpath, return, error, vp, NULL); return (error); } *buflen -= ncp->nc_nlen; memcpy(buf + *buflen, ncp->nc_name, ncp->nc_nlen); SDT_PROBE3(vfs, namecache, fullpath, hit, ncp->nc_dvp, ncp->nc_name, vp); dvp = *vp; *vp = ncp->nc_dvp; vref(*vp); mtx_unlock(vlp); vrele(dvp); return (0); } SDT_PROBE1(vfs, namecache, fullpath, miss, vp); mtx_unlock(vlp); vn_lock(*vp, LK_SHARED | LK_RETRY); error = VOP_VPTOCNP(*vp, &dvp, cred, buf, buflen); vput(*vp); if (error) { counter_u64_add(numfullpathfail2, 1); SDT_PROBE3(vfs, namecache, fullpath, return, error, vp, NULL); return (error); } *vp = dvp; if (VN_IS_DOOMED(dvp)) { /* forced unmount */ vrele(dvp); error = ENOENT; SDT_PROBE3(vfs, namecache, fullpath, return, error, vp, NULL); return (error); } /* * *vp has its use count incremented still. */ return (0); } /* * The magic behind kern___getcwd() and vn_fullpath(). */ static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, char *buf, char **retbuf, u_int buflen) { int error, slash_prefixed; #ifdef KDTRACE_HOOKS struct vnode *startvp = vp; #endif struct vnode *vp1; buflen--; buf[buflen] = '\0'; error = 0; slash_prefixed = 0; SDT_PROBE1(vfs, namecache, fullpath, entry, vp); counter_u64_add(numfullpathcalls, 1); vref(vp); if (vp->v_type != VDIR) { error = vn_vptocnp(&vp, td->td_ucred, buf, &buflen); if (error) return (error); if (buflen == 0) { vrele(vp); return (ENOMEM); } buf[--buflen] = '/'; slash_prefixed = 1; } while (vp != rdir && vp != rootvnode) { /* * The vp vnode must be already fully constructed, * since it is either found in namecache or obtained * from VOP_VPTOCNP(). We may test for VV_ROOT safely * without obtaining the vnode lock. */ if ((vp->v_vflag & VV_ROOT) != 0) { vn_lock(vp, LK_RETRY | LK_SHARED); /* * With the vnode locked, check for races with * unmount, forced or not. Note that we * already verified that vp is not equal to * the root vnode, which means that * mnt_vnodecovered can be NULL only for the * case of unmount. */ if (VN_IS_DOOMED(vp) || (vp1 = vp->v_mount->mnt_vnodecovered) == NULL || vp1->v_mountedhere != vp->v_mount) { vput(vp); error = ENOENT; SDT_PROBE3(vfs, namecache, fullpath, return, error, vp, NULL); break; } vref(vp1); vput(vp); vp = vp1; continue; } if (vp->v_type != VDIR) { vrele(vp); counter_u64_add(numfullpathfail1, 1); error = ENOTDIR; SDT_PROBE3(vfs, namecache, fullpath, return, error, vp, NULL); break; } error = vn_vptocnp(&vp, td->td_ucred, buf, &buflen); if (error) break; if (buflen == 0) { vrele(vp); error = ENOMEM; SDT_PROBE3(vfs, namecache, fullpath, return, error, startvp, NULL); break; } buf[--buflen] = '/'; slash_prefixed = 1; } if (error) return (error); if (!slash_prefixed) { if (buflen == 0) { vrele(vp); counter_u64_add(numfullpathfail4, 1); SDT_PROBE3(vfs, namecache, fullpath, return, ENOMEM, startvp, NULL); return (ENOMEM); } buf[--buflen] = '/'; } counter_u64_add(numfullpathfound, 1); vrele(vp); SDT_PROBE3(vfs, namecache, fullpath, return, 0, startvp, buf + buflen); *retbuf = buf + buflen; return (0); } struct vnode * vn_dir_dd_ino(struct vnode *vp) { struct namecache *ncp; struct vnode *ddvp; struct mtx *vlp; enum vgetstate vs; ASSERT_VOP_LOCKED(vp, "vn_dir_dd_ino"); vlp = VP2VNODELOCK(vp); mtx_lock(vlp); TAILQ_FOREACH(ncp, &(vp->v_cache_dst), nc_dst) { if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) continue; ddvp = ncp->nc_dvp; vs = vget_prep(ddvp); mtx_unlock(vlp); if (vget_finish(ddvp, LK_SHARED | LK_NOWAIT, vs)) return (NULL); return (ddvp); } mtx_unlock(vlp); return (NULL); } int vn_commname(struct vnode *vp, char *buf, u_int buflen) { struct namecache *ncp; struct mtx *vlp; int l; vlp = VP2VNODELOCK(vp); mtx_lock(vlp); TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst) if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) break; if (ncp == NULL) { mtx_unlock(vlp); return (ENOENT); } l = min(ncp->nc_nlen, buflen - 1); memcpy(buf, ncp->nc_name, l); mtx_unlock(vlp); buf[l] = '\0'; return (0); } /* * This function updates path string to vnode's full global path * and checks the size of the new path string against the pathlen argument. * * Requires a locked, referenced vnode. * Vnode is re-locked on success or ENODEV, otherwise unlocked. * * If sysctl debug.disablefullpath is set, ENODEV is returned, * vnode is left locked and path remain untouched. * * If vp is a directory, the call to vn_fullpath_global() always succeeds * because it falls back to the ".." lookup if the namecache lookup fails. */ int vn_path_to_global_path(struct thread *td, struct vnode *vp, char *path, u_int pathlen) { struct nameidata nd; struct vnode *vp1; char *rpath, *fbuf; int error; ASSERT_VOP_ELOCKED(vp, __func__); /* Return ENODEV if sysctl debug.disablefullpath==1 */ if (__predict_false(disablefullpath)) return (ENODEV); /* Construct global filesystem path from vp. */ VOP_UNLOCK(vp); error = vn_fullpath_global(td, vp, &rpath, &fbuf); if (error != 0) { vrele(vp); return (error); } if (strlen(rpath) >= pathlen) { vrele(vp); error = ENAMETOOLONG; goto out; } /* * Re-lookup the vnode by path to detect a possible rename. * As a side effect, the vnode is relocked. * If vnode was renamed, return ENOENT. */ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE, path, td); error = namei(&nd); if (error != 0) { vrele(vp); goto out; } NDFREE(&nd, NDF_ONLY_PNBUF); vp1 = nd.ni_vp; vrele(vp); if (vp1 == vp) strcpy(path, rpath); else { vput(vp1); error = ENOENT; } out: free(fbuf, M_TEMP); return (error); } #ifdef DDB static void db_print_vpath(struct vnode *vp) { while (vp != NULL) { db_printf("%p: ", vp); if (vp == rootvnode) { db_printf("/"); vp = NULL; } else { if (vp->v_vflag & VV_ROOT) { db_printf(""); vp = vp->v_mount->mnt_vnodecovered; } else { struct namecache *ncp; char *ncn; int i; ncp = TAILQ_FIRST(&vp->v_cache_dst); if (ncp != NULL) { ncn = ncp->nc_name; for (i = 0; i < ncp->nc_nlen; i++) db_printf("%c", *ncn++); vp = ncp->nc_dvp; } else { vp = NULL; } } } db_printf("\n"); } return; } DB_SHOW_COMMAND(vpath, db_show_vpath) { struct vnode *vp; if (!have_addr) { db_printf("usage: show vpath \n"); return; } vp = (struct vnode *)addr; db_print_vpath(vp); } #endif Index: projects/clang1000-import/sys/kern/vfs_default.c =================================================================== --- projects/clang1000-import/sys/kern/vfs_default.c (revision 356919) +++ projects/clang1000-import/sys/kern/vfs_default.c (revision 356920) @@ -1,1473 +1,1473 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed * to Berkeley by John Heidemann of the UCLA Ficus project. * * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int vop_nolookup(struct vop_lookup_args *); static int vop_norename(struct vop_rename_args *); static int vop_nostrategy(struct vop_strategy_args *); static int get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf, int dirbuflen, off_t *off, char **cpos, int *len, int *eofflag, struct thread *td); static int dirent_exists(struct vnode *vp, const char *dirname, struct thread *td); #define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4) static int vop_stdis_text(struct vop_is_text_args *ap); static int vop_stdunset_text(struct vop_unset_text_args *ap); static int vop_stdadd_writecount(struct vop_add_writecount_args *ap); static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap); static int vop_stdfdatasync(struct vop_fdatasync_args *ap); static int vop_stdgetpages_async(struct vop_getpages_async_args *ap); /* * This vnode table stores what we want to do if the filesystem doesn't * implement a particular VOP. * * If there is no specific entry here, we will return EOPNOTSUPP. * * Note that every filesystem has to implement either vop_access * or vop_accessx; failing to do so will result in immediate crash * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(), * which calls vop_stdaccess() etc. */ struct vop_vector default_vnodeops = { .vop_default = NULL, .vop_bypass = VOP_EOPNOTSUPP, .vop_access = vop_stdaccess, .vop_accessx = vop_stdaccessx, .vop_advise = vop_stdadvise, .vop_advlock = vop_stdadvlock, .vop_advlockasync = vop_stdadvlockasync, .vop_advlockpurge = vop_stdadvlockpurge, .vop_allocate = vop_stdallocate, .vop_bmap = vop_stdbmap, .vop_close = VOP_NULL, .vop_fsync = VOP_NULL, .vop_fdatasync = vop_stdfdatasync, .vop_getpages = vop_stdgetpages, .vop_getpages_async = vop_stdgetpages_async, .vop_getwritemount = vop_stdgetwritemount, .vop_inactive = VOP_NULL, .vop_need_inactive = vop_stdneed_inactive, .vop_ioctl = vop_stdioctl, .vop_kqfilter = vop_stdkqfilter, .vop_islocked = vop_stdislocked, .vop_lock1 = vop_stdlock, .vop_lookup = vop_nolookup, .vop_open = VOP_NULL, .vop_pathconf = VOP_EINVAL, .vop_poll = vop_nopoll, .vop_putpages = vop_stdputpages, .vop_readlink = VOP_EINVAL, .vop_rename = vop_norename, .vop_revoke = VOP_PANIC, .vop_strategy = vop_nostrategy, .vop_unlock = vop_stdunlock, .vop_vptocnp = vop_stdvptocnp, .vop_vptofh = vop_stdvptofh, .vop_unp_bind = vop_stdunp_bind, .vop_unp_connect = vop_stdunp_connect, .vop_unp_detach = vop_stdunp_detach, .vop_is_text = vop_stdis_text, .vop_set_text = vop_stdset_text, .vop_unset_text = vop_stdunset_text, .vop_add_writecount = vop_stdadd_writecount, .vop_copy_file_range = vop_stdcopy_file_range, }; VFS_VOP_VECTOR_REGISTER(default_vnodeops); /* * Series of placeholder functions for various error returns for * VOPs. */ int vop_eopnotsupp(struct vop_generic_args *ap) { /* printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name); */ return (EOPNOTSUPP); } int vop_ebadf(struct vop_generic_args *ap) { return (EBADF); } int vop_enotty(struct vop_generic_args *ap) { return (ENOTTY); } int vop_einval(struct vop_generic_args *ap) { return (EINVAL); } int vop_enoent(struct vop_generic_args *ap) { return (ENOENT); } int vop_null(struct vop_generic_args *ap) { return (0); } /* * Helper function to panic on some bad VOPs in some filesystems. */ int vop_panic(struct vop_generic_args *ap) { panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name); } /* * vop_std and vop_no are default functions for use by * filesystems that need the "default reasonable" implementation for a * particular operation. * * The documentation for the operations they implement exists (if it exists) * in the VOP_(9) manpage (all uppercase). */ /* * Default vop for filesystems that do not support name lookup */ static int vop_nolookup(ap) struct vop_lookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { *ap->a_vpp = NULL; return (ENOTDIR); } /* * vop_norename: * * Handle unlock and reference counting for arguments of vop_rename * for filesystems that do not implement rename operation. */ static int vop_norename(struct vop_rename_args *ap) { vop_rename_fail(ap); return (EOPNOTSUPP); } /* * vop_nostrategy: * * Strategy routine for VFS devices that have none. * * BIO_ERROR and B_INVAL must be cleared prior to calling any strategy * routine. Typically this is done for a BIO_READ strategy call. * Typically B_INVAL is assumed to already be clear prior to a write * and should not be cleared manually unless you just made the buffer * invalid. BIO_ERROR should be cleared either way. */ static int vop_nostrategy (struct vop_strategy_args *ap) { printf("No strategy for buffer at %p\n", ap->a_bp); vn_printf(ap->a_vp, "vnode "); ap->a_bp->b_ioflags |= BIO_ERROR; ap->a_bp->b_error = EOPNOTSUPP; bufdone(ap->a_bp); return (EOPNOTSUPP); } static int get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf, int dirbuflen, off_t *off, char **cpos, int *len, int *eofflag, struct thread *td) { int error, reclen; struct uio uio; struct iovec iov; struct dirent *dp; KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); if (*len == 0) { iov.iov_base = dirbuf; iov.iov_len = dirbuflen; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = *off; uio.uio_resid = dirbuflen; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_td = td; *eofflag = 0; #ifdef MAC error = mac_vnode_check_readdir(td->td_ucred, vp); if (error == 0) #endif error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag, NULL, NULL); if (error) return (error); *off = uio.uio_offset; *cpos = dirbuf; *len = (dirbuflen - uio.uio_resid); if (*len == 0) return (ENOENT); } dp = (struct dirent *)(*cpos); reclen = dp->d_reclen; *dpp = dp; /* check for malformed directory.. */ if (reclen < DIRENT_MINSIZE) return (EINVAL); *cpos += reclen; *len -= reclen; return (0); } /* * Check if a named file exists in a given directory vnode. */ static int dirent_exists(struct vnode *vp, const char *dirname, struct thread *td) { char *dirbuf, *cpos; int error, eofflag, dirbuflen, len, found; off_t off; struct dirent *dp; struct vattr va; KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp)); KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp)); found = 0; error = VOP_GETATTR(vp, &va, td->td_ucred); if (error) return (found); dirbuflen = DEV_BSIZE; if (dirbuflen < va.va_blocksize) dirbuflen = va.va_blocksize; dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); off = 0; len = 0; do { error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off, &cpos, &len, &eofflag, td); if (error) goto out; if (dp->d_type != DT_WHT && dp->d_fileno != 0 && strcmp(dp->d_name, dirname) == 0) { found = 1; goto out; } } while (len > 0 || !eofflag); out: free(dirbuf, M_TEMP); return (found); } int vop_stdaccess(struct vop_access_args *ap) { KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0, ("invalid bit in accmode")); return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td)); } int vop_stdaccessx(struct vop_accessx_args *ap) { int error; accmode_t accmode = ap->a_accmode; error = vfs_unixify_accmode(&accmode); if (error != 0) return (error); if (accmode == 0) return (0); return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td)); } /* * Advisory record locking support */ int vop_stdadvlock(struct vop_advlock_args *ap) { struct vnode *vp; struct vattr vattr; int error; vp = ap->a_vp; if (ap->a_fl->l_whence == SEEK_END) { /* * The NFSv4 server must avoid doing a vn_lock() here, since it * can deadlock the nfsd threads, due to a LOR. Fortunately * the NFSv4 server always uses SEEK_SET and this code is * only required for the SEEK_END case. */ vn_lock(vp, LK_SHARED | LK_RETRY); error = VOP_GETATTR(vp, &vattr, curthread->td_ucred); VOP_UNLOCK(vp); if (error) return (error); } else vattr.va_size = 0; return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size)); } int vop_stdadvlockasync(struct vop_advlockasync_args *ap) { struct vnode *vp; struct vattr vattr; int error; vp = ap->a_vp; if (ap->a_fl->l_whence == SEEK_END) { /* The size argument is only needed for SEEK_END. */ vn_lock(vp, LK_SHARED | LK_RETRY); error = VOP_GETATTR(vp, &vattr, curthread->td_ucred); VOP_UNLOCK(vp); if (error) return (error); } else vattr.va_size = 0; return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size)); } int vop_stdadvlockpurge(struct vop_advlockpurge_args *ap) { struct vnode *vp; vp = ap->a_vp; lf_purgelocks(vp, &vp->v_lockf); return (0); } /* * vop_stdpathconf: * * Standard implementation of POSIX pathconf, to get information about limits * for a filesystem. * Override per filesystem for the case where the filesystem has smaller * limits. */ int vop_stdpathconf(ap) struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; } */ *ap; { switch (ap->a_name) { case _PC_ASYNC_IO: *ap->a_retval = _POSIX_ASYNCHRONOUS_IO; return (0); case _PC_PATH_MAX: *ap->a_retval = PATH_MAX; return (0); case _PC_ACL_EXTENDED: case _PC_ACL_NFS4: case _PC_CAP_PRESENT: case _PC_INF_PRESENT: case _PC_MAC_PRESENT: *ap->a_retval = 0; return (0); default: return (EINVAL); } /* NOTREACHED */ } /* * Standard lock, unlock and islocked functions. */ int vop_stdlock(ap) struct vop_lock1_args /* { struct vnode *a_vp; int a_flags; char *file; int line; } */ *ap; { struct vnode *vp = ap->a_vp; struct mtx *ilk; ilk = VI_MTX(vp); return (lockmgr_lock_fast_path(vp->v_vnlock, ap->a_flags, &ilk->lock_object, ap->a_file, ap->a_line)); } /* See above. */ int vop_stdunlock(ap) struct vop_unlock_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; - return (lockmgr_unlock_fast_path(vp->v_vnlock, 0, NULL)); + return (lockmgr_unlock(vp->v_vnlock)); } /* See above. */ int vop_stdislocked(ap) struct vop_islocked_args /* { struct vnode *a_vp; } */ *ap; { return (lockstatus(ap->a_vp->v_vnlock)); } /* * Variants of the above set. * * Differences are: * - shared locking disablement is not supported * - v_vnlock pointer is not honored */ int vop_lock(ap) struct vop_lock1_args /* { struct vnode *a_vp; int a_flags; char *file; int line; } */ *ap; { struct vnode *vp = ap->a_vp; int flags = ap->a_flags; struct mtx *ilk; MPASS(vp->v_vnlock == &vp->v_lock); if (__predict_false((flags & ~(LK_TYPE_MASK | LK_NODDLKTREAT | LK_RETRY)) != 0)) goto other; switch (flags & LK_TYPE_MASK) { case LK_SHARED: return (lockmgr_slock(&vp->v_lock, flags, ap->a_file, ap->a_line)); case LK_EXCLUSIVE: return (lockmgr_xlock(&vp->v_lock, flags, ap->a_file, ap->a_line)); } other: ilk = VI_MTX(vp); return (lockmgr_lock_fast_path(&vp->v_lock, flags, &ilk->lock_object, ap->a_file, ap->a_line)); } int vop_unlock(ap) struct vop_unlock_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; MPASS(vp->v_vnlock == &vp->v_lock); return (lockmgr_unlock(&vp->v_lock)); } int vop_islocked(ap) struct vop_islocked_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; MPASS(vp->v_vnlock == &vp->v_lock); return (lockstatus(&vp->v_lock)); } /* * Return true for select/poll. */ int vop_nopoll(ap) struct vop_poll_args /* { struct vnode *a_vp; int a_events; struct ucred *a_cred; struct thread *a_td; } */ *ap; { return (poll_no_poll(ap->a_events)); } /* * Implement poll for local filesystems that support it. */ int vop_stdpoll(ap) struct vop_poll_args /* { struct vnode *a_vp; int a_events; struct ucred *a_cred; struct thread *a_td; } */ *ap; { if (ap->a_events & ~POLLSTANDARD) return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events)); return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); } /* * Return our mount point, as we will take charge of the writes. */ int vop_stdgetwritemount(ap) struct vop_getwritemount_args /* { struct vnode *a_vp; struct mount **a_mpp; } */ *ap; { struct mount *mp; struct vnode *vp; /* * Note that having a reference does not prevent forced unmount from * setting ->v_mount to NULL after the lock gets released. This is of * no consequence for typical consumers (most notably vn_start_write) * since in this case the vnode is VIRF_DOOMED. Unmount might have * progressed far enough that its completion is only delayed by the * reference obtained here. The consumer only needs to concern itself * with releasing it. */ vp = ap->a_vp; mp = vp->v_mount; if (mp == NULL) { *(ap->a_mpp) = NULL; return (0); } if (vfs_op_thread_enter(mp)) { if (mp == vp->v_mount) { vfs_mp_count_add_pcpu(mp, ref, 1); vfs_op_thread_exit(mp); } else { vfs_op_thread_exit(mp); mp = NULL; } } else { MNT_ILOCK(mp); if (mp == vp->v_mount) { MNT_REF(mp); MNT_IUNLOCK(mp); } else { MNT_IUNLOCK(mp); mp = NULL; } } *(ap->a_mpp) = mp; return (0); } /* * If the file system doesn't implement VOP_BMAP, then return sensible defaults: * - Return the vnode's bufobj instead of any underlying device's bufobj * - Calculate the physical block number as if there were equal size * consecutive blocks, but * - Report no contiguous runs of blocks. */ int vop_stdbmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct bufobj **a_bop; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { if (ap->a_bop != NULL) *ap->a_bop = &ap->a_vp->v_bufobj; if (ap->a_bnp != NULL) *ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize); if (ap->a_runp != NULL) *ap->a_runp = 0; if (ap->a_runb != NULL) *ap->a_runb = 0; return (0); } int vop_stdfsync(ap) struct vop_fsync_args /* { struct vnode *a_vp; int a_waitfor; struct thread *a_td; } */ *ap; { return (vn_fsync_buf(ap->a_vp, ap->a_waitfor)); } static int vop_stdfdatasync(struct vop_fdatasync_args *ap) { return (VOP_FSYNC(ap->a_vp, MNT_WAIT, ap->a_td)); } int vop_stdfdatasync_buf(struct vop_fdatasync_args *ap) { return (vn_fsync_buf(ap->a_vp, MNT_WAIT)); } /* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */ int vop_stdgetpages(ap) struct vop_getpages_args /* { struct vnode *a_vp; vm_page_t *a_m; int a_count; int *a_rbehind; int *a_rahead; } */ *ap; { return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, ap->a_rahead, NULL, NULL); } static int vop_stdgetpages_async(struct vop_getpages_async_args *ap) { int error; error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, ap->a_rahead); ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error); return (error); } int vop_stdkqfilter(struct vop_kqfilter_args *ap) { return vfs_kqfilter(ap); } /* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */ int vop_stdputpages(ap) struct vop_putpages_args /* { struct vnode *a_vp; vm_page_t *a_m; int a_count; int a_sync; int *a_rtvals; } */ *ap; { return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, ap->a_rtvals); } int vop_stdvptofh(struct vop_vptofh_args *ap) { return (EOPNOTSUPP); } int vop_stdvptocnp(struct vop_vptocnp_args *ap) { struct vnode *vp = ap->a_vp; struct vnode **dvp = ap->a_vpp; struct ucred *cred = ap->a_cred; char *buf = ap->a_buf; int *buflen = ap->a_buflen; char *dirbuf, *cpos; int i, error, eofflag, dirbuflen, flags, locked, len, covered; off_t off; ino_t fileno; struct vattr va; struct nameidata nd; struct thread *td; struct dirent *dp; struct vnode *mvp; i = *buflen; error = 0; covered = 0; td = curthread; if (vp->v_type != VDIR) return (ENOENT); error = VOP_GETATTR(vp, &va, cred); if (error) return (error); VREF(vp); locked = VOP_ISLOCKED(vp); VOP_UNLOCK(vp); NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, "..", vp, td); flags = FREAD; error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL); if (error) { vn_lock(vp, locked | LK_RETRY); return (error); } NDFREE(&nd, NDF_ONLY_PNBUF); mvp = *dvp = nd.ni_vp; if (vp->v_mount != (*dvp)->v_mount && ((*dvp)->v_vflag & VV_ROOT) && ((*dvp)->v_mount->mnt_flag & MNT_UNION)) { *dvp = (*dvp)->v_mount->mnt_vnodecovered; VREF(mvp); VOP_UNLOCK(mvp); vn_close(mvp, FREAD, cred, td); VREF(*dvp); vn_lock(*dvp, LK_SHARED | LK_RETRY); covered = 1; } fileno = va.va_fileid; dirbuflen = DEV_BSIZE; if (dirbuflen < va.va_blocksize) dirbuflen = va.va_blocksize; dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); if ((*dvp)->v_type != VDIR) { error = ENOENT; goto out; } off = 0; len = 0; do { /* call VOP_READDIR of parent */ error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off, &cpos, &len, &eofflag, td); if (error) goto out; if ((dp->d_type != DT_WHT) && (dp->d_fileno == fileno)) { if (covered) { VOP_UNLOCK(*dvp); vn_lock(mvp, LK_SHARED | LK_RETRY); if (dirent_exists(mvp, dp->d_name, td)) { error = ENOENT; VOP_UNLOCK(mvp); vn_lock(*dvp, LK_SHARED | LK_RETRY); goto out; } VOP_UNLOCK(mvp); vn_lock(*dvp, LK_SHARED | LK_RETRY); } i -= dp->d_namlen; if (i < 0) { error = ENOMEM; goto out; } if (dp->d_namlen == 1 && dp->d_name[0] == '.') { error = ENOENT; } else { bcopy(dp->d_name, buf + i, dp->d_namlen); error = 0; } goto out; } } while (len > 0 || !eofflag); error = ENOENT; out: free(dirbuf, M_TEMP); if (!error) { *buflen = i; vref(*dvp); } if (covered) { vput(*dvp); vrele(mvp); } else { VOP_UNLOCK(mvp); vn_close(mvp, FREAD, cred, td); } vn_lock(vp, locked | LK_RETRY); return (error); } int vop_stdallocate(struct vop_allocate_args *ap) { #ifdef __notyet__ struct statfs *sfs; off_t maxfilesize = 0; #endif struct iovec aiov; struct vattr vattr, *vap; struct uio auio; off_t fsize, len, cur, offset; uint8_t *buf; struct thread *td; struct vnode *vp; size_t iosize; int error; buf = NULL; error = 0; td = curthread; vap = &vattr; vp = ap->a_vp; len = *ap->a_len; offset = *ap->a_offset; error = VOP_GETATTR(vp, vap, td->td_ucred); if (error != 0) goto out; fsize = vap->va_size; iosize = vap->va_blocksize; if (iosize == 0) iosize = BLKDEV_IOSIZE; if (iosize > MAXPHYS) iosize = MAXPHYS; buf = malloc(iosize, M_TEMP, M_WAITOK); #ifdef __notyet__ /* * Check if the filesystem sets f_maxfilesize; if not use * VOP_SETATTR to perform the check. */ sfs = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = VFS_STATFS(vp->v_mount, sfs, td); if (error == 0) maxfilesize = sfs->f_maxfilesize; free(sfs, M_STATFS); if (error != 0) goto out; if (maxfilesize) { if (offset > maxfilesize || len > maxfilesize || offset + len > maxfilesize) { error = EFBIG; goto out; } } else #endif if (offset + len > vap->va_size) { /* * Test offset + len against the filesystem's maxfilesize. */ VATTR_NULL(vap); vap->va_size = offset + len; error = VOP_SETATTR(vp, vap, td->td_ucred); if (error != 0) goto out; VATTR_NULL(vap); vap->va_size = fsize; error = VOP_SETATTR(vp, vap, td->td_ucred); if (error != 0) goto out; } for (;;) { /* * Read and write back anything below the nominal file * size. There's currently no way outside the filesystem * to know whether this area is sparse or not. */ cur = iosize; if ((offset % iosize) != 0) cur -= (offset % iosize); if (cur > len) cur = len; if (offset < fsize) { aiov.iov_base = buf; aiov.iov_len = cur; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = offset; auio.uio_resid = cur; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_td = td; error = VOP_READ(vp, &auio, 0, td->td_ucred); if (error != 0) break; if (auio.uio_resid > 0) { bzero(buf + cur - auio.uio_resid, auio.uio_resid); } } else { bzero(buf, cur); } aiov.iov_base = buf; aiov.iov_len = cur; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = offset; auio.uio_resid = cur; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_WRITE; auio.uio_td = td; error = VOP_WRITE(vp, &auio, 0, td->td_ucred); if (error != 0) break; len -= cur; offset += cur; if (len == 0) break; if (should_yield()) break; } out: *ap->a_len = len; *ap->a_offset = offset; free(buf, M_TEMP); return (error); } int vop_stdadvise(struct vop_advise_args *ap) { struct vnode *vp; struct bufobj *bo; daddr_t startn, endn; off_t bstart, bend, start, end; int bsize, error; vp = ap->a_vp; switch (ap->a_advice) { case POSIX_FADV_WILLNEED: /* * Do nothing for now. Filesystems should provide a * custom method which starts an asynchronous read of * the requested region. */ error = 0; break; case POSIX_FADV_DONTNEED: error = 0; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (VN_IS_DOOMED(vp)) { VOP_UNLOCK(vp); break; } /* * Round to block boundaries (and later possibly further to * page boundaries). Applications cannot reasonably be aware * of the boundaries, and the rounding must be to expand at * both extremities to cover enough. It still doesn't cover * read-ahead. For partial blocks, this gives unnecessary * discarding of buffers but is efficient enough since the * pages usually remain in VMIO for some time. */ bsize = vp->v_bufobj.bo_bsize; bstart = rounddown(ap->a_start, bsize); bend = roundup(ap->a_end, bsize); /* * Deactivate pages in the specified range from the backing VM * object. Pages that are resident in the buffer cache will * remain wired until their corresponding buffers are released * below. */ if (vp->v_object != NULL) { start = trunc_page(bstart); end = round_page(bend); VM_OBJECT_RLOCK(vp->v_object); vm_object_page_noreuse(vp->v_object, OFF_TO_IDX(start), OFF_TO_IDX(end)); VM_OBJECT_RUNLOCK(vp->v_object); } bo = &vp->v_bufobj; BO_RLOCK(bo); startn = bstart / bsize; endn = bend / bsize; error = bnoreuselist(&bo->bo_clean, bo, startn, endn); if (error == 0) error = bnoreuselist(&bo->bo_dirty, bo, startn, endn); BO_RUNLOCK(bo); VOP_UNLOCK(vp); break; default: error = EINVAL; break; } return (error); } int vop_stdunp_bind(struct vop_unp_bind_args *ap) { ap->a_vp->v_unpcb = ap->a_unpcb; return (0); } int vop_stdunp_connect(struct vop_unp_connect_args *ap) { *ap->a_unpcb = ap->a_vp->v_unpcb; return (0); } int vop_stdunp_detach(struct vop_unp_detach_args *ap) { ap->a_vp->v_unpcb = NULL; return (0); } static int vop_stdis_text(struct vop_is_text_args *ap) { return (ap->a_vp->v_writecount < 0); } int vop_stdset_text(struct vop_set_text_args *ap) { struct vnode *vp; struct mount *mp; int error; vp = ap->a_vp; VI_LOCK(vp); if (vp->v_writecount > 0) { error = ETXTBSY; } else { /* * If requested by fs, keep a use reference to the * vnode until the last text reference is released. */ mp = vp->v_mount; if (mp != NULL && (mp->mnt_kern_flag & MNTK_TEXT_REFS) != 0 && vp->v_writecount == 0) { vp->v_iflag |= VI_TEXT_REF; vrefl(vp); } vp->v_writecount--; error = 0; } VI_UNLOCK(vp); return (error); } static int vop_stdunset_text(struct vop_unset_text_args *ap) { struct vnode *vp; int error; bool last; vp = ap->a_vp; last = false; VI_LOCK(vp); if (vp->v_writecount < 0) { if ((vp->v_iflag & VI_TEXT_REF) != 0 && vp->v_writecount == -1) { last = true; vp->v_iflag &= ~VI_TEXT_REF; } vp->v_writecount++; error = 0; } else { error = EINVAL; } VI_UNLOCK(vp); if (last) vunref(vp); return (error); } static int vop_stdadd_writecount(struct vop_add_writecount_args *ap) { struct vnode *vp; struct mount *mp; int error; vp = ap->a_vp; VI_LOCK_FLAGS(vp, MTX_DUPOK); if (vp->v_writecount < 0) { error = ETXTBSY; } else { VNASSERT(vp->v_writecount + ap->a_inc >= 0, vp, ("neg writecount increment %d", ap->a_inc)); if (vp->v_writecount == 0) { mp = vp->v_mount; if (mp != NULL && (mp->mnt_kern_flag & MNTK_NOMSYNC) == 0) vlazy(vp); } vp->v_writecount += ap->a_inc; error = 0; } VI_UNLOCK(vp); return (error); } int vop_stdneed_inactive(struct vop_need_inactive_args *ap) { return (1); } int vop_stdioctl(struct vop_ioctl_args *ap) { struct vnode *vp; struct vattr va; off_t *offp; int error; switch (ap->a_command) { case FIOSEEKDATA: case FIOSEEKHOLE: vp = ap->a_vp; error = vn_lock(vp, LK_SHARED); if (error != 0) return (EBADF); if (vp->v_type == VREG) error = VOP_GETATTR(vp, &va, ap->a_cred); else error = ENOTTY; if (error == 0) { offp = ap->a_data; if (*offp < 0 || *offp >= va.va_size) error = ENXIO; else if (ap->a_command == FIOSEEKHOLE) *offp = va.va_size; } VOP_UNLOCK(vp); break; default: error = ENOTTY; break; } return (error); } /* * vfs default ops * used to fill the vfs function table to get reasonable default return values. */ int vfs_stdroot (mp, flags, vpp) struct mount *mp; int flags; struct vnode **vpp; { return (EOPNOTSUPP); } int vfs_stdstatfs (mp, sbp) struct mount *mp; struct statfs *sbp; { return (EOPNOTSUPP); } int vfs_stdquotactl (mp, cmds, uid, arg) struct mount *mp; int cmds; uid_t uid; void *arg; { return (EOPNOTSUPP); } int vfs_stdsync(mp, waitfor) struct mount *mp; int waitfor; { struct vnode *vp, *mvp; struct thread *td; int error, lockreq, allerror = 0; td = curthread; lockreq = LK_EXCLUSIVE | LK_INTERLOCK; if (waitfor != MNT_WAIT) lockreq |= LK_NOWAIT; /* * Force stale buffer cache information to be flushed. */ loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { if (vp->v_bufobj.bo_dirty.bv_cnt == 0) { VI_UNLOCK(vp); continue; } if ((error = vget(vp, lockreq, td)) != 0) { if (error == ENOENT) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } continue; } error = VOP_FSYNC(vp, waitfor, td); if (error) allerror = error; vput(vp); } return (allerror); } int vfs_stdnosync (mp, waitfor) struct mount *mp; int waitfor; { return (0); } static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap) { int error; error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp, ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags, ap->a_incred, ap->a_outcred, ap->a_fsizetd); return (error); } int vfs_stdvget (mp, ino, flags, vpp) struct mount *mp; ino_t ino; int flags; struct vnode **vpp; { return (EOPNOTSUPP); } int vfs_stdfhtovp (mp, fhp, flags, vpp) struct mount *mp; struct fid *fhp; int flags; struct vnode **vpp; { return (EOPNOTSUPP); } int vfs_stdinit (vfsp) struct vfsconf *vfsp; { return (0); } int vfs_stduninit (vfsp) struct vfsconf *vfsp; { return(0); } int vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname) struct mount *mp; int cmd; struct vnode *filename_vp; int attrnamespace; const char *attrname; { if (filename_vp != NULL) VOP_UNLOCK(filename_vp); return (EOPNOTSUPP); } int vfs_stdsysctl(mp, op, req) struct mount *mp; fsctlop_t op; struct sysctl_req *req; { return (EOPNOTSUPP); } static vop_bypass_t * bp_by_off(struct vop_vector *vop, struct vop_generic_args *a) { return (*(vop_bypass_t **)((char *)vop + a->a_desc->vdesc_vop_offset)); } int vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a) { vop_bypass_t *bp; int prev_stops, rc; for (; vop != NULL; vop = vop->vop_default) { bp = bp_by_off(vop, a); if (bp != NULL) break; /* * Bypass is not really supported. It is done for * fallback to unimplemented vops in the default * vector. */ bp = vop->vop_bypass; if (bp != NULL) break; } MPASS(bp != NULL); prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); rc = bp(a); sigallowstop(prev_stops); return (rc); } Index: projects/clang1000-import/sys/kern/vfs_subr.c =================================================================== --- projects/clang1000-import/sys/kern/vfs_subr.c (revision 356919) +++ projects/clang1000-import/sys/kern/vfs_subr.c (revision 356920) @@ -1,6291 +1,6341 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 */ /* * External virtual filesystem routines */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_watchdog.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif static void delmntque(struct vnode *vp); static int flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, int slpflag, int slptimeo); static void syncer_shutdown(void *arg, int howto); static int vtryrecycle(struct vnode *vp); static void v_init_counters(struct vnode *); static void v_incr_devcount(struct vnode *); static void v_decr_devcount(struct vnode *); static void vgonel(struct vnode *); static void vfs_knllock(void *arg); static void vfs_knlunlock(void *arg); static void vfs_knl_assert_locked(void *arg); static void vfs_knl_assert_unlocked(void *arg); static void destroy_vpollinfo(struct vpollinfo *vi); static int v_inval_buf_range_locked(struct vnode *vp, struct bufobj *bo, daddr_t startlbn, daddr_t endlbn); static void vnlru_recalc(void); /* * These fences are intended for cases where some synchronization is * needed between access of v_iflags and lockless vnode refcount (v_holdcnt * and v_usecount) updates. Access to v_iflags is generally synchronized * by the interlock, but we have some internal assertions that check vnode * flags without acquiring the lock. Thus, these fences are INVARIANTS-only * for now. */ #ifdef INVARIANTS #define VNODE_REFCOUNT_FENCE_ACQ() atomic_thread_fence_acq() #define VNODE_REFCOUNT_FENCE_REL() atomic_thread_fence_rel() #else #define VNODE_REFCOUNT_FENCE_ACQ() #define VNODE_REFCOUNT_FENCE_REL() #endif /* * Number of vnodes in existence. Increased whenever getnewvnode() * allocates a new vnode, decreased in vdropl() for VIRF_DOOMED vnode. */ static u_long __exclusive_cache_line numvnodes; SYSCTL_ULONG(_vfs, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "Number of vnodes in existence"); static counter_u64_t vnodes_created; SYSCTL_COUNTER_U64(_vfs, OID_AUTO, vnodes_created, CTLFLAG_RD, &vnodes_created, "Number of vnodes created by getnewvnode"); /* * Conversion tables for conversion from vnode types to inode formats * and back. */ enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON }; int vttoif_tab[10] = { 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFSOCK, S_IFIFO, S_IFMT, S_IFMT }; /* * List of allocates vnodes in the system. */ static TAILQ_HEAD(freelst, vnode) vnode_list; static struct vnode *vnode_list_free_marker; static struct vnode *vnode_list_reclaim_marker; /* * "Free" vnode target. Free vnodes are rarely completely free, but are * just ones that are cheap to recycle. Usually they are for files which * have been stat'd but not read; these usually have inode and namecache * data attached to them. This target is the preferred minimum size of a * sub-cache consisting mostly of such files. The system balances the size * of this sub-cache with its complement to try to prevent either from * thrashing while the other is relatively inactive. The targets express * a preference for the best balance. * * "Above" this target there are 2 further targets (watermarks) related * to recyling of free vnodes. In the best-operating case, the cache is * exactly full, the free list has size between vlowat and vhiwat above the * free target, and recycling from it and normal use maintains this state. * Sometimes the free list is below vlowat or even empty, but this state * is even better for immediate use provided the cache is not full. * Otherwise, vnlru_proc() runs to reclaim enough vnodes (usually non-free * ones) to reach one of these states. The watermarks are currently hard- * coded as 4% and 9% of the available space higher. These and the default * of 25% for wantfreevnodes are too large if the memory size is large. * E.g., 9% of 75% of MAXVNODES is more than 566000 vnodes to reclaim * whenever vnlru_proc() becomes active. */ -static u_long wantfreevnodes; -static u_long __exclusive_cache_line freevnodes; +static long wantfreevnodes; +static long __exclusive_cache_line freevnodes; SYSCTL_ULONG(_vfs, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "Number of \"free\" vnodes"); +static long freevnodes_old; static counter_u64_t recycles_count; SYSCTL_COUNTER_U64(_vfs, OID_AUTO, recycles, CTLFLAG_RD, &recycles_count, "Number of vnodes recycled to meet vnode cache targets"); static counter_u64_t recycles_free_count; SYSCTL_COUNTER_U64(_vfs, OID_AUTO, recycles_free, CTLFLAG_RD, &recycles_free_count, "Number of free vnodes recycled to meet vnode cache targets"); /* * Various variables used for debugging the new implementation of * reassignbuf(). * XXX these are probably of (very) limited utility now. */ static int reassignbufcalls; SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW | CTLFLAG_STATS, &reassignbufcalls, 0, "Number of calls to reassignbuf"); static counter_u64_t deferred_inact; SYSCTL_COUNTER_U64(_vfs, OID_AUTO, deferred_inact, CTLFLAG_RD, &deferred_inact, "Number of times inactive processing was deferred"); /* To keep more than one thread at a time from running vfs_getnewfsid */ static struct mtx mntid_mtx; /* * Lock for any access to the following: * vnode_list * numvnodes * freevnodes */ static struct mtx __exclusive_cache_line vnode_list_mtx; /* Publicly exported FS */ struct nfs_public nfs_pub; static uma_zone_t buf_trie_zone; /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */ static uma_zone_t vnode_zone; static uma_zone_t vnodepoll_zone; /* * The workitem queue. * * It is useful to delay writes of file data and filesystem metadata * for tens of seconds so that quickly created and deleted files need * not waste disk bandwidth being created and removed. To realize this, * we append vnodes to a "workitem" queue. When running with a soft * updates implementation, most pending metadata dependencies should * not wait for more than a few seconds. Thus, mounted on block devices * are delayed only about a half the time that file data is delayed. * Similarly, directory updates are more critical, so are only delayed * about a third the time that file data is delayed. Thus, there are * SYNCER_MAXDELAY queues that are processed round-robin at a rate of * one each second (driven off the filesystem syncer process). The * syncer_delayno variable indicates the next queue that is to be processed. * Items that need to be processed soon are placed in this queue: * * syncer_workitem_pending[syncer_delayno] * * A delay of fifteen seconds is done by placing the request fifteen * entries later in the queue: * * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] * */ static int syncer_delayno; static long syncer_mask; LIST_HEAD(synclist, bufobj); static struct synclist *syncer_workitem_pending; /* * The sync_mtx protects: * bo->bo_synclist * sync_vnode_count * syncer_delayno * syncer_state * syncer_workitem_pending * syncer_worklist_len * rushjob */ static struct mtx sync_mtx; static struct cv sync_wakeup; #define SYNCER_MAXDELAY 32 static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ static int syncdelay = 30; /* max time to delay syncing data */ static int filedelay = 30; /* time to delay syncing files */ SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, "Time to delay syncing files (in seconds)"); static int dirdelay = 29; /* time to delay syncing directories */ SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, "Time to delay syncing directories (in seconds)"); static int metadelay = 28; /* time to delay syncing metadata */ SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, "Time to delay syncing metadata (in seconds)"); static int rushjob; /* number of slots to run ASAP */ static int stat_rush_requests; /* number of times I/O speeded up */ SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, "Number of times I/O speeded up (rush requests)"); #define VDBATCH_SIZE 8 struct vdbatch { u_int index; + long freevnodes; struct mtx lock; struct vnode *tab[VDBATCH_SIZE]; }; DPCPU_DEFINE_STATIC(struct vdbatch, vd); static void vdbatch_dequeue(struct vnode *vp); /* * When shutting down the syncer, run it at four times normal speed. */ #define SYNCER_SHUTDOWN_SPEEDUP 4 static int sync_vnode_count; static int syncer_worklist_len; static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY } syncer_state; /* Target for maximum number of vnodes. */ u_long desiredvnodes; static u_long gapvnodes; /* gap between wanted and desired */ static u_long vhiwat; /* enough extras after expansion */ static u_long vlowat; /* minimal extras before expansion */ static u_long vstir; /* nonzero to stir non-free vnodes */ static volatile int vsmalltrigger = 8; /* pref to keep if > this many pages */ +static u_long vnlru_read_freevnodes(void); + /* * Note that no attempt is made to sanitize these parameters. */ static int sysctl_maxvnodes(SYSCTL_HANDLER_ARGS) { u_long val; int error; val = desiredvnodes; error = sysctl_handle_long(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (val == desiredvnodes) return (0); mtx_lock(&vnode_list_mtx); desiredvnodes = val; wantfreevnodes = desiredvnodes / 4; vnlru_recalc(); mtx_unlock(&vnode_list_mtx); /* * XXX There is no protection against multiple threads changing * desiredvnodes at the same time. Locking above only helps vnlru and * getnewvnode. */ vfs_hash_changesize(desiredvnodes); cache_changesize(desiredvnodes); return (0); } SYSCTL_PROC(_kern, KERN_MAXVNODES, maxvnodes, CTLTYPE_ULONG | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, sysctl_maxvnodes, "UL", "Target for maximum number of vnodes"); static int sysctl_wantfreevnodes(SYSCTL_HANDLER_ARGS) { u_long val; int error; val = wantfreevnodes; error = sysctl_handle_long(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (val == wantfreevnodes) return (0); mtx_lock(&vnode_list_mtx); wantfreevnodes = val; vnlru_recalc(); mtx_unlock(&vnode_list_mtx); return (0); } SYSCTL_PROC(_vfs, OID_AUTO, wantfreevnodes, CTLTYPE_ULONG | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, sysctl_wantfreevnodes, "UL", "Target for minimum number of \"free\" vnodes"); SYSCTL_ULONG(_kern, OID_AUTO, minvnodes, CTLFLAG_RW, &wantfreevnodes, 0, "Old name for vfs.wantfreevnodes (legacy)"); static int vnlru_nowhere; SYSCTL_INT(_debug, OID_AUTO, vnlru_nowhere, CTLFLAG_RW, &vnlru_nowhere, 0, "Number of times the vnlru process ran without success"); static int sysctl_try_reclaim_vnode(SYSCTL_HANDLER_ARGS) { struct vnode *vp; struct nameidata nd; char *buf; unsigned long ndflags; int error; if (req->newptr == NULL) return (EINVAL); if (req->newlen >= PATH_MAX) return (E2BIG); buf = malloc(PATH_MAX, M_TEMP, M_WAITOK); error = SYSCTL_IN(req, buf, req->newlen); if (error != 0) goto out; buf[req->newlen] = '\0'; ndflags = LOCKLEAF | NOFOLLOW | AUDITVNODE1 | NOCACHE | SAVENAME; NDINIT(&nd, LOOKUP, ndflags, UIO_SYSSPACE, buf, curthread); if ((error = namei(&nd)) != 0) goto out; vp = nd.ni_vp; if (VN_IS_DOOMED(vp)) { /* * This vnode is being recycled. Return != 0 to let the caller * know that the sysctl had no effect. Return EAGAIN because a * subsequent call will likely succeed (since namei will create * a new vnode if necessary) */ error = EAGAIN; goto putvnode; } counter_u64_add(recycles_count, 1); vgone(vp); putvnode: NDFREE(&nd, 0); out: free(buf, M_TEMP); return (error); } static int sysctl_ftry_reclaim_vnode(SYSCTL_HANDLER_ARGS) { struct thread *td = curthread; struct vnode *vp; struct file *fp; int error; int fd; if (req->newptr == NULL) return (EBADF); error = sysctl_handle_int(oidp, &fd, 0, req); if (error != 0) return (error); error = getvnode(curthread, fd, &cap_fcntl_rights, &fp); if (error != 0) return (error); vp = fp->f_vnode; error = vn_lock(vp, LK_EXCLUSIVE); if (error != 0) goto drop; counter_u64_add(recycles_count, 1); vgone(vp); VOP_UNLOCK(vp); drop: fdrop(fp, td); return (error); } SYSCTL_PROC(_debug, OID_AUTO, try_reclaim_vnode, CTLTYPE_STRING | CTLFLAG_MPSAFE | CTLFLAG_WR, NULL, 0, sysctl_try_reclaim_vnode, "A", "Try to reclaim a vnode by its pathname"); SYSCTL_PROC(_debug, OID_AUTO, ftry_reclaim_vnode, CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_WR, NULL, 0, sysctl_ftry_reclaim_vnode, "I", "Try to reclaim a vnode by its file descriptor"); /* Shift count for (uintptr_t)vp to initialize vp->v_hash. */ static int vnsz2log; /* * Support for the bufobj clean & dirty pctrie. */ static void * buf_trie_alloc(struct pctrie *ptree) { return uma_zalloc(buf_trie_zone, M_NOWAIT); } static void buf_trie_free(struct pctrie *ptree, void *node) { uma_zfree(buf_trie_zone, node); } PCTRIE_DEFINE(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free); /* * Initialize the vnode management data structures. * * Reevaluate the following cap on the number of vnodes after the physical * memory size exceeds 512GB. In the limit, as the physical memory size * grows, the ratio of the memory size in KB to vnodes approaches 64:1. */ #ifndef MAXVNODES_MAX #define MAXVNODES_MAX (512UL * 1024 * 1024 / 64) /* 8M */ #endif static MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker"); static struct vnode * vn_alloc_marker(struct mount *mp) { struct vnode *vp; vp = malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO); vp->v_type = VMARKER; vp->v_mount = mp; return (vp); } static void vn_free_marker(struct vnode *vp) { MPASS(vp->v_type == VMARKER); free(vp, M_VNODE_MARKER); } /* * Initialize a vnode as it first enters the zone. */ static int vnode_init(void *mem, int size, int flags) { struct vnode *vp; vp = mem; bzero(vp, size); /* * Setup locks. */ vp->v_vnlock = &vp->v_lock; mtx_init(&vp->v_interlock, "vnode interlock", NULL, MTX_DEF); /* * By default, don't allow shared locks unless filesystems opt-in. */ lockinit(vp->v_vnlock, PVFS, "vnode", VLKTIMEOUT, LK_NOSHARE | LK_IS_VNODE); /* * Initialize bufobj. */ bufobj_init(&vp->v_bufobj, vp); /* * Initialize namecache. */ LIST_INIT(&vp->v_cache_src); TAILQ_INIT(&vp->v_cache_dst); /* * Initialize rangelocks. */ rangelock_init(&vp->v_rl); vp->v_dbatchcpu = NOCPU; mtx_lock(&vnode_list_mtx); TAILQ_INSERT_BEFORE(vnode_list_free_marker, vp, v_vnodelist); mtx_unlock(&vnode_list_mtx); return (0); } /* * Free a vnode when it is cleared from the zone. */ static void vnode_fini(void *mem, int size) { struct vnode *vp; struct bufobj *bo; vp = mem; vdbatch_dequeue(vp); mtx_lock(&vnode_list_mtx); TAILQ_REMOVE(&vnode_list, vp, v_vnodelist); mtx_unlock(&vnode_list_mtx); rangelock_destroy(&vp->v_rl); lockdestroy(vp->v_vnlock); mtx_destroy(&vp->v_interlock); bo = &vp->v_bufobj; rw_destroy(BO_LOCKPTR(bo)); } /* * Provide the size of NFS nclnode and NFS fh for calculation of the * vnode memory consumption. The size is specified directly to * eliminate dependency on NFS-private header. * * Other filesystems may use bigger or smaller (like UFS and ZFS) * private inode data, but the NFS-based estimation is ample enough. * Still, we care about differences in the size between 64- and 32-bit * platforms. * * Namecache structure size is heuristically * sizeof(struct namecache_ts) + CACHE_PATH_CUTOFF + 1. */ #ifdef _LP64 #define NFS_NCLNODE_SZ (528 + 64) #define NC_SZ 148 #else #define NFS_NCLNODE_SZ (360 + 32) #define NC_SZ 92 #endif static void vntblinit(void *dummy __unused) { struct vdbatch *vd; int cpu, physvnodes, virtvnodes; u_int i; /* * Desiredvnodes is a function of the physical memory size and the * kernel's heap size. Generally speaking, it scales with the * physical memory size. The ratio of desiredvnodes to the physical * memory size is 1:16 until desiredvnodes exceeds 98,304. * Thereafter, the * marginal ratio of desiredvnodes to the physical memory size is * 1:64. However, desiredvnodes is limited by the kernel's heap * size. The memory required by desiredvnodes vnodes and vm objects * must not exceed 1/10th of the kernel's heap size. */ physvnodes = maxproc + pgtok(vm_cnt.v_page_count) / 64 + 3 * min(98304 * 16, pgtok(vm_cnt.v_page_count)) / 64; virtvnodes = vm_kmem_size / (10 * (sizeof(struct vm_object) + sizeof(struct vnode) + NC_SZ * ncsizefactor + NFS_NCLNODE_SZ)); desiredvnodes = min(physvnodes, virtvnodes); if (desiredvnodes > MAXVNODES_MAX) { if (bootverbose) printf("Reducing kern.maxvnodes %lu -> %lu\n", desiredvnodes, MAXVNODES_MAX); desiredvnodes = MAXVNODES_MAX; } wantfreevnodes = desiredvnodes / 4; mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF); TAILQ_INIT(&vnode_list); mtx_init(&vnode_list_mtx, "vnode_list", NULL, MTX_DEF); /* * The lock is taken to appease WITNESS. */ mtx_lock(&vnode_list_mtx); vnlru_recalc(); mtx_unlock(&vnode_list_mtx); vnode_list_free_marker = vn_alloc_marker(NULL); TAILQ_INSERT_HEAD(&vnode_list, vnode_list_free_marker, v_vnodelist); vnode_list_reclaim_marker = vn_alloc_marker(NULL); TAILQ_INSERT_HEAD(&vnode_list, vnode_list_reclaim_marker, v_vnodelist); vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL, vnode_init, vnode_fini, UMA_ALIGN_PTR, 0); vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); /* * Preallocate enough nodes to support one-per buf so that * we can not fail an insert. reassignbuf() callers can not * tolerate the insertion failure. */ buf_trie_zone = uma_zcreate("BUF TRIE", pctrie_node_size(), NULL, NULL, pctrie_zone_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM); uma_prealloc(buf_trie_zone, nbuf); vnodes_created = counter_u64_alloc(M_WAITOK); recycles_count = counter_u64_alloc(M_WAITOK); recycles_free_count = counter_u64_alloc(M_WAITOK); deferred_inact = counter_u64_alloc(M_WAITOK); /* * Initialize the filesystem syncer. */ syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, &syncer_mask); syncer_maxdelay = syncer_mask + 1; mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF); cv_init(&sync_wakeup, "syncer"); for (i = 1; i <= sizeof(struct vnode); i <<= 1) vnsz2log++; vnsz2log--; CPU_FOREACH(cpu) { vd = DPCPU_ID_PTR((cpu), vd); bzero(vd, sizeof(*vd)); mtx_init(&vd->lock, "vdbatch", NULL, MTX_DEF); } } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL); /* * Mark a mount point as busy. Used to synchronize access and to delay * unmounting. Eventually, mountlist_mtx is not released on failure. * * vfs_busy() is a custom lock, it can block the caller. * vfs_busy() only sleeps if the unmount is active on the mount point. * For a mountpoint mp, vfs_busy-enforced lock is before lock of any * vnode belonging to mp. * * Lookup uses vfs_busy() to traverse mount points. * root fs var fs * / vnode lock A / vnode lock (/var) D * /var vnode lock B /log vnode lock(/var/log) E * vfs_busy lock C vfs_busy lock F * * Within each file system, the lock order is C->A->B and F->D->E. * * When traversing across mounts, the system follows that lock order: * * C->A->B * | * +->F->D->E * * The lookup() process for namei("/var") illustrates the process: * VOP_LOOKUP() obtains B while A is held * vfs_busy() obtains a shared lock on F while A and B are held * vput() releases lock on B * vput() releases lock on A * VFS_ROOT() obtains lock on D while shared lock on F is held * vfs_unbusy() releases shared lock on F * vn_lock() obtains lock on deadfs vnode vp_crossmp instead of A. * Attempt to lock A (instead of vp_crossmp) while D is held would * violate the global order, causing deadlocks. * * dounmount() locks B while F is drained. */ int vfs_busy(struct mount *mp, int flags) { MPASS((flags & ~MBF_MASK) == 0); CTR3(KTR_VFS, "%s: mp %p with flags %d", __func__, mp, flags); if (vfs_op_thread_enter(mp)) { MPASS((mp->mnt_kern_flag & MNTK_DRAINING) == 0); MPASS((mp->mnt_kern_flag & MNTK_UNMOUNT) == 0); MPASS((mp->mnt_kern_flag & MNTK_REFEXPIRE) == 0); vfs_mp_count_add_pcpu(mp, ref, 1); vfs_mp_count_add_pcpu(mp, lockref, 1); vfs_op_thread_exit(mp); if (flags & MBF_MNTLSTLOCK) mtx_unlock(&mountlist_mtx); return (0); } MNT_ILOCK(mp); vfs_assert_mount_counters(mp); MNT_REF(mp); /* * If mount point is currently being unmounted, sleep until the * mount point fate is decided. If thread doing the unmounting fails, * it will clear MNTK_UNMOUNT flag before waking us up, indicating * that this mount point has survived the unmount attempt and vfs_busy * should retry. Otherwise the unmounter thread will set MNTK_REFEXPIRE * flag in addition to MNTK_UNMOUNT, indicating that mount point is * about to be really destroyed. vfs_busy needs to release its * reference on the mount point in this case and return with ENOENT, * telling the caller that mount mount it tried to busy is no longer * valid. */ while (mp->mnt_kern_flag & MNTK_UNMOUNT) { if (flags & MBF_NOWAIT || mp->mnt_kern_flag & MNTK_REFEXPIRE) { MNT_REL(mp); MNT_IUNLOCK(mp); CTR1(KTR_VFS, "%s: failed busying before sleeping", __func__); return (ENOENT); } if (flags & MBF_MNTLSTLOCK) mtx_unlock(&mountlist_mtx); mp->mnt_kern_flag |= MNTK_MWAIT; msleep(mp, MNT_MTX(mp), PVFS | PDROP, "vfs_busy", 0); if (flags & MBF_MNTLSTLOCK) mtx_lock(&mountlist_mtx); MNT_ILOCK(mp); } if (flags & MBF_MNTLSTLOCK) mtx_unlock(&mountlist_mtx); mp->mnt_lockref++; MNT_IUNLOCK(mp); return (0); } /* * Free a busy filesystem. */ void vfs_unbusy(struct mount *mp) { int c; CTR2(KTR_VFS, "%s: mp %p", __func__, mp); if (vfs_op_thread_enter(mp)) { MPASS((mp->mnt_kern_flag & MNTK_DRAINING) == 0); vfs_mp_count_sub_pcpu(mp, lockref, 1); vfs_mp_count_sub_pcpu(mp, ref, 1); vfs_op_thread_exit(mp); return; } MNT_ILOCK(mp); vfs_assert_mount_counters(mp); MNT_REL(mp); c = --mp->mnt_lockref; if (mp->mnt_vfs_ops == 0) { MPASS((mp->mnt_kern_flag & MNTK_DRAINING) == 0); MNT_IUNLOCK(mp); return; } if (c < 0) vfs_dump_mount_counters(mp); if (c == 0 && (mp->mnt_kern_flag & MNTK_DRAINING) != 0) { MPASS(mp->mnt_kern_flag & MNTK_UNMOUNT); CTR1(KTR_VFS, "%s: waking up waiters", __func__); mp->mnt_kern_flag &= ~MNTK_DRAINING; wakeup(&mp->mnt_lockref); } MNT_IUNLOCK(mp); } /* * Lookup a mount point by filesystem identifier. */ struct mount * vfs_getvfs(fsid_t *fsid) { struct mount *mp; CTR2(KTR_VFS, "%s: fsid %p", __func__, fsid); mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { vfs_ref(mp); mtx_unlock(&mountlist_mtx); return (mp); } } mtx_unlock(&mountlist_mtx); CTR2(KTR_VFS, "%s: lookup failed for %p id", __func__, fsid); return ((struct mount *) 0); } /* * Lookup a mount point by filesystem identifier, busying it before * returning. * * To avoid congestion on mountlist_mtx, implement simple direct-mapped * cache for popular filesystem identifiers. The cache is lockess, using * the fact that struct mount's are never freed. In worst case we may * get pointer to unmounted or even different filesystem, so we have to * check what we got, and go slow way if so. */ struct mount * vfs_busyfs(fsid_t *fsid) { #define FSID_CACHE_SIZE 256 typedef struct mount * volatile vmp_t; static vmp_t cache[FSID_CACHE_SIZE]; struct mount *mp; int error; uint32_t hash; CTR2(KTR_VFS, "%s: fsid %p", __func__, fsid); hash = fsid->val[0] ^ fsid->val[1]; hash = (hash >> 16 ^ hash) & (FSID_CACHE_SIZE - 1); mp = cache[hash]; if (mp == NULL || mp->mnt_stat.f_fsid.val[0] != fsid->val[0] || mp->mnt_stat.f_fsid.val[1] != fsid->val[1]) goto slow; if (vfs_busy(mp, 0) != 0) { cache[hash] = NULL; goto slow; } if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) return (mp); else vfs_unbusy(mp); slow: mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { error = vfs_busy(mp, MBF_MNTLSTLOCK); if (error) { cache[hash] = NULL; mtx_unlock(&mountlist_mtx); return (NULL); } cache[hash] = mp; return (mp); } } CTR2(KTR_VFS, "%s: lookup failed for %p id", __func__, fsid); mtx_unlock(&mountlist_mtx); return ((struct mount *) 0); } /* * Check if a user can access privileged mount options. */ int vfs_suser(struct mount *mp, struct thread *td) { int error; if (jailed(td->td_ucred)) { /* * If the jail of the calling thread lacks permission for * this type of file system, deny immediately. */ if (!prison_allow(td->td_ucred, mp->mnt_vfc->vfc_prison_flag)) return (EPERM); /* * If the file system was mounted outside the jail of the * calling thread, deny immediately. */ if (prison_check(td->td_ucred, mp->mnt_cred) != 0) return (EPERM); } /* * If file system supports delegated administration, we don't check * for the PRIV_VFS_MOUNT_OWNER privilege - it will be better verified * by the file system itself. * If this is not the user that did original mount, we check for * the PRIV_VFS_MOUNT_OWNER privilege. */ if (!(mp->mnt_vfc->vfc_flags & VFCF_DELEGADMIN) && mp->mnt_cred->cr_uid != td->td_ucred->cr_uid) { if ((error = priv_check(td, PRIV_VFS_MOUNT_OWNER)) != 0) return (error); } return (0); } /* * Get a new unique fsid. Try to make its val[0] unique, since this value * will be used to create fake device numbers for stat(). Also try (but * not so hard) make its val[0] unique mod 2^16, since some emulators only * support 16-bit device numbers. We end up with unique val[0]'s for the * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls. * * Keep in mind that several mounts may be running in parallel. Starting * the search one past where the previous search terminated is both a * micro-optimization and a defense against returning the same fsid to * different mounts. */ void vfs_getnewfsid(struct mount *mp) { static uint16_t mntid_base; struct mount *nmp; fsid_t tfsid; int mtype; CTR2(KTR_VFS, "%s: mp %p", __func__, mp); mtx_lock(&mntid_mtx); mtype = mp->mnt_vfc->vfc_typenum; tfsid.val[1] = mtype; mtype = (mtype & 0xFF) << 24; for (;;) { tfsid.val[0] = makedev(255, mtype | ((mntid_base & 0xFF00) << 8) | (mntid_base & 0xFF)); mntid_base++; if ((nmp = vfs_getvfs(&tfsid)) == NULL) break; vfs_rel(nmp); } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; mp->mnt_stat.f_fsid.val[1] = tfsid.val[1]; mtx_unlock(&mntid_mtx); } /* * Knob to control the precision of file timestamps: * * 0 = seconds only; nanoseconds zeroed. * 1 = seconds and nanoseconds, accurate within 1/HZ. * 2 = seconds and nanoseconds, truncated to microseconds. * >=3 = seconds and nanoseconds, maximum precision. */ enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC }; static int timestamp_precision = TSP_USEC; SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW, ×tamp_precision, 0, "File timestamp precision (0: seconds, " "1: sec + ns accurate to 1/HZ, 2: sec + ns truncated to us, " "3+: sec + ns (max. precision))"); /* * Get a current timestamp. */ void vfs_timestamp(struct timespec *tsp) { struct timeval tv; switch (timestamp_precision) { case TSP_SEC: tsp->tv_sec = time_second; tsp->tv_nsec = 0; break; case TSP_HZ: getnanotime(tsp); break; case TSP_USEC: microtime(&tv); TIMEVAL_TO_TIMESPEC(&tv, tsp); break; case TSP_NSEC: default: nanotime(tsp); break; } } /* * Set vnode attributes to VNOVAL */ void vattr_null(struct vattr *vap) { vap->va_type = VNON; vap->va_size = VNOVAL; vap->va_bytes = VNOVAL; vap->va_mode = VNOVAL; vap->va_nlink = VNOVAL; vap->va_uid = VNOVAL; vap->va_gid = VNOVAL; vap->va_fsid = VNOVAL; vap->va_fileid = VNOVAL; vap->va_blocksize = VNOVAL; vap->va_rdev = VNOVAL; vap->va_atime.tv_sec = VNOVAL; vap->va_atime.tv_nsec = VNOVAL; vap->va_mtime.tv_sec = VNOVAL; vap->va_mtime.tv_nsec = VNOVAL; vap->va_ctime.tv_sec = VNOVAL; vap->va_ctime.tv_nsec = VNOVAL; vap->va_birthtime.tv_sec = VNOVAL; vap->va_birthtime.tv_nsec = VNOVAL; vap->va_flags = VNOVAL; vap->va_gen = VNOVAL; vap->va_vaflags = 0; } /* * Try to reduce the total number of vnodes. * * This routine (and its user) are buggy in at least the following ways: * - all parameters were picked years ago when RAM sizes were significantly * smaller * - it can pick vnodes based on pages used by the vm object, but filesystems * like ZFS don't use it making the pick broken * - since ZFS has its own aging policy it gets partially combated by this one * - a dedicated method should be provided for filesystems to let them decide * whether the vnode should be recycled * * This routine is called when we have too many vnodes. It attempts * to free vnodes and will potentially free vnodes that still * have VM backing store (VM backing store is typically the cause * of a vnode blowout so we want to do this). Therefore, this operation * is not considered cheap. * * A number of conditions may prevent a vnode from being reclaimed. * the buffer cache may have references on the vnode, a directory * vnode may still have references due to the namei cache representing * underlying files, or the vnode may be in active use. It is not * desirable to reuse such vnodes. These conditions may cause the * number of vnodes to reach some minimum value regardless of what * you set kern.maxvnodes to. Do not set kern.maxvnodes too low. * * @param reclaim_nc_src Only reclaim directories with outgoing namecache * entries if this argument is strue * @param trigger Only reclaim vnodes with fewer than this many resident * pages. * @param target How many vnodes to reclaim. * @return The number of vnodes that were reclaimed. */ static int vlrureclaim(bool reclaim_nc_src, int trigger, u_long target) { struct vnode *vp, *mvp; struct mount *mp; u_long done; bool retried; mtx_assert(&vnode_list_mtx, MA_OWNED); retried = false; done = 0; mvp = vnode_list_reclaim_marker; restart: vp = mvp; while (done < target) { vp = TAILQ_NEXT(vp, v_vnodelist); if (__predict_false(vp == NULL)) break; if (__predict_false(vp->v_type == VMARKER)) continue; /* * If it's been deconstructed already, it's still * referenced, or it exceeds the trigger, skip it. * Also skip free vnodes. We are trying to make space * to expand the free list, not reduce it. */ if (vp->v_usecount > 0 || vp->v_holdcnt == 0 || (!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src))) goto next_iter; if (vp->v_type == VBAD || vp->v_type == VNON) goto next_iter; if (!VI_TRYLOCK(vp)) goto next_iter; if (vp->v_usecount > 0 || vp->v_holdcnt == 0 || (!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)) || vp->v_type == VBAD || vp->v_type == VNON || (vp->v_object != NULL && vp->v_object->resident_page_count > trigger)) { VI_UNLOCK(vp); goto next_iter; } vholdl(vp); VI_UNLOCK(vp); TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist); TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist); mtx_unlock(&vnode_list_mtx); if (vn_start_write(vp, &mp, V_NOWAIT) != 0) { vdrop(vp); goto next_iter_unlocked; } if (VOP_LOCK(vp, LK_EXCLUSIVE|LK_NOWAIT) != 0) { vdrop(vp); vn_finished_write(mp); goto next_iter_unlocked; } VI_LOCK(vp); if (vp->v_usecount > 0 || (!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)) || (vp->v_object != NULL && vp->v_object->resident_page_count > trigger)) { VOP_UNLOCK(vp); vdropl(vp); vn_finished_write(mp); goto next_iter_unlocked; } counter_u64_add(recycles_count, 1); vgonel(vp); VOP_UNLOCK(vp); vdropl(vp); vn_finished_write(mp); done++; next_iter_unlocked: if (should_yield()) kern_yield(PRI_USER); mtx_lock(&vnode_list_mtx); goto restart; next_iter: MPASS(vp->v_type != VMARKER); if (!should_yield()) continue; TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist); TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist); mtx_unlock(&vnode_list_mtx); kern_yield(PRI_USER); mtx_lock(&vnode_list_mtx); goto restart; } if (done == 0 && !retried) { TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist); TAILQ_INSERT_HEAD(&vnode_list, mvp, v_vnodelist); retried = true; goto restart; } return (done); } static int max_vnlru_free = 10000; /* limit on vnode free requests per call */ SYSCTL_INT(_debug, OID_AUTO, max_vnlru_free, CTLFLAG_RW, &max_vnlru_free, 0, "limit on vnode free requests per call to the vnlru_free routine"); /* * Attempt to reduce the free list by the requested amount. */ -static void +static int vnlru_free_locked(int count, struct vfsops *mnt_op) { struct vnode *vp, *mvp; struct mount *mp; + int ocount; mtx_assert(&vnode_list_mtx, MA_OWNED); if (count > max_vnlru_free) count = max_vnlru_free; + ocount = count; mvp = vnode_list_free_marker; restart: vp = mvp; while (count > 0) { vp = TAILQ_NEXT(vp, v_vnodelist); if (__predict_false(vp == NULL)) { TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist); TAILQ_INSERT_TAIL(&vnode_list, mvp, v_vnodelist); break; } if (__predict_false(vp->v_type == VMARKER)) continue; /* * Don't recycle if our vnode is from different type * of mount point. Note that mp is type-safe, the * check does not reach unmapped address even if * vnode is reclaimed. * Don't recycle if we can't get the interlock without * blocking. */ if (vp->v_holdcnt > 0 || (mnt_op != NULL && (mp = vp->v_mount) != NULL && mp->mnt_op != mnt_op) || !VI_TRYLOCK(vp)) { continue; } TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist); TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist); if (__predict_false(vp->v_type == VBAD || vp->v_type == VNON)) { VI_UNLOCK(vp); continue; } vholdl(vp); count--; mtx_unlock(&vnode_list_mtx); VI_UNLOCK(vp); vtryrecycle(vp); vdrop(vp); mtx_lock(&vnode_list_mtx); goto restart; } + return (ocount - count); } void vnlru_free(int count, struct vfsops *mnt_op) { mtx_lock(&vnode_list_mtx); vnlru_free_locked(count, mnt_op); mtx_unlock(&vnode_list_mtx); } static void vnlru_recalc(void) { mtx_assert(&vnode_list_mtx, MA_OWNED); gapvnodes = imax(desiredvnodes - wantfreevnodes, 100); vhiwat = gapvnodes / 11; /* 9% -- just under the 10% in vlrureclaim() */ vlowat = vhiwat / 2; } /* * Attempt to recycle vnodes in a context that is always safe to block. * Calling vlrurecycle() from the bowels of filesystem code has some * interesting deadlock problems. */ static struct proc *vnlruproc; static int vnlruproc_sig; +/* + * The main freevnodes counter is only updated when threads requeue their vnode + * batches. CPUs are conditionally walked to compute a more accurate total. + * + * Limit how much of a slop are we willing to tolerate. Note: the actual value + * at any given moment can still exceed slop, but it should not be by significant + * margin in practice. + */ +#define VNLRU_FREEVNODES_SLOP 128 + +static u_long +vnlru_read_freevnodes(void) +{ + struct vdbatch *vd; + long slop; + int cpu; + + mtx_assert(&vnode_list_mtx, MA_OWNED); + if (freevnodes > freevnodes_old) + slop = freevnodes - freevnodes_old; + else + slop = freevnodes_old - freevnodes; + if (slop < VNLRU_FREEVNODES_SLOP) + return (freevnodes >= 0 ? freevnodes : 0); + freevnodes_old = freevnodes; + CPU_FOREACH(cpu) { + vd = DPCPU_ID_PTR((cpu), vd); + freevnodes_old += vd->freevnodes; + } + return (freevnodes_old >= 0 ? freevnodes_old : 0); +} + static bool vnlru_under(u_long rnumvnodes, u_long limit) { u_long rfreevnodes, space; if (__predict_false(rnumvnodes > desiredvnodes)) return (true); space = desiredvnodes - rnumvnodes; if (space < limit) { + rfreevnodes = vnlru_read_freevnodes(); + if (rfreevnodes > wantfreevnodes) + space += rfreevnodes - wantfreevnodes; + } + return (space < limit); +} + +static bool +vnlru_under_unlocked(u_long rnumvnodes, u_long limit) +{ + long rfreevnodes, space; + + if (__predict_false(rnumvnodes > desiredvnodes)) + return (true); + + space = desiredvnodes - rnumvnodes; + if (space < limit) { rfreevnodes = atomic_load_long(&freevnodes); if (rfreevnodes > wantfreevnodes) space += rfreevnodes - wantfreevnodes; } return (space < limit); } static void vnlru_kick(void) { mtx_assert(&vnode_list_mtx, MA_OWNED); if (vnlruproc_sig == 0) { vnlruproc_sig = 1; wakeup(vnlruproc); } } static void vnlru_proc(void) { u_long rnumvnodes, rfreevnodes, target; unsigned long onumvnodes; int done, force, trigger, usevnodes; - bool reclaim_nc_src; + bool reclaim_nc_src, want_reread; EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, vnlruproc, SHUTDOWN_PRI_FIRST); force = 0; + want_reread = false; for (;;) { kproc_suspend_check(vnlruproc); mtx_lock(&vnode_list_mtx); rnumvnodes = atomic_load_long(&numvnodes); + + if (want_reread) { + force = vnlru_under(numvnodes, vhiwat) ? 1 : 0; + want_reread = false; + } + /* * If numvnodes is too large (due to desiredvnodes being * adjusted using its sysctl, or emergency growth), first * try to reduce it by discarding from the free list. */ if (rnumvnodes > desiredvnodes) { vnlru_free_locked(rnumvnodes - desiredvnodes, NULL); rnumvnodes = atomic_load_long(&numvnodes); } /* * Sleep if the vnode cache is in a good state. This is * when it is not over-full and has space for about a 4% * or 9% expansion (by growing its size or inexcessively * reducing its free list). Otherwise, try to reclaim * space for a 10% expansion. */ if (vstir && force == 0) { force = 1; vstir = 0; } if (force == 0 && !vnlru_under(rnumvnodes, vlowat)) { vnlruproc_sig = 0; wakeup(&vnlruproc_sig); msleep(vnlruproc, &vnode_list_mtx, PVFS|PDROP, "vlruwt", hz); continue; } - rfreevnodes = atomic_load_long(&freevnodes); + rfreevnodes = vnlru_read_freevnodes(); onumvnodes = rnumvnodes; /* * Calculate parameters for recycling. These are the same * throughout the loop to give some semblance of fairness. * The trigger point is to avoid recycling vnodes with lots * of resident pages. We aren't trying to free memory; we * are trying to recycle or at least free vnodes. */ if (rnumvnodes <= desiredvnodes) usevnodes = rnumvnodes - rfreevnodes; else usevnodes = rnumvnodes; if (usevnodes <= 0) usevnodes = 1; /* * The trigger value is is chosen to give a conservatively * large value to ensure that it alone doesn't prevent * making progress. The value can easily be so large that * it is effectively infinite in some congested and * misconfigured cases, and this is necessary. Normally * it is about 8 to 100 (pages), which is quite large. */ trigger = vm_cnt.v_page_count * 2 / usevnodes; if (force < 2) trigger = vsmalltrigger; reclaim_nc_src = force >= 3; target = rnumvnodes * (int64_t)gapvnodes / imax(desiredvnodes, 1); target = target / 10 + 1; done = vlrureclaim(reclaim_nc_src, trigger, target); mtx_unlock(&vnode_list_mtx); if (onumvnodes > desiredvnodes && numvnodes <= desiredvnodes) uma_reclaim(UMA_RECLAIM_DRAIN); if (done == 0) { if (force == 0 || force == 1) { force = 2; continue; } if (force == 2) { force = 3; continue; } + want_reread = true; force = 0; vnlru_nowhere++; tsleep(vnlruproc, PPAUSE, "vlrup", hz * 3); - } else + } else { + want_reread = true; kern_yield(PRI_USER); - /* - * After becoming active to expand above low water, keep - * active until above high water. - */ - force = vnlru_under(numvnodes, vhiwat) ? 1 : 0; + } } } static struct kproc_desc vnlru_kp = { "vnlru", vnlru_proc, &vnlruproc }; SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &vnlru_kp); /* * Routines having to do with the management of the vnode table. */ /* * Try to recycle a freed vnode. We abort if anyone picks up a reference * before we actually vgone(). This function must be called with the vnode * held to prevent the vnode from being returned to the free list midway * through vgone(). */ static int vtryrecycle(struct vnode *vp) { struct mount *vnmp; CTR2(KTR_VFS, "%s: vp %p", __func__, vp); VNASSERT(vp->v_holdcnt, vp, ("vtryrecycle: Recycling vp %p without a reference.", vp)); /* * This vnode may found and locked via some other list, if so we * can't recycle it yet. */ if (VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { CTR2(KTR_VFS, "%s: impossible to recycle, vp %p lock is already held", __func__, vp); return (EWOULDBLOCK); } /* * Don't recycle if its filesystem is being suspended. */ if (vn_start_write(vp, &vnmp, V_NOWAIT) != 0) { VOP_UNLOCK(vp); CTR2(KTR_VFS, "%s: impossible to recycle, cannot start the write for %p", __func__, vp); return (EBUSY); } /* * If we got this far, we need to acquire the interlock and see if * anyone picked up this vnode from another list. If not, we will * mark it with DOOMED via vgonel() so that anyone who does find it * will skip over it. */ VI_LOCK(vp); if (vp->v_usecount) { VOP_UNLOCK(vp); VI_UNLOCK(vp); vn_finished_write(vnmp); CTR2(KTR_VFS, "%s: impossible to recycle, %p is already referenced", __func__, vp); return (EBUSY); } if (!VN_IS_DOOMED(vp)) { counter_u64_add(recycles_free_count, 1); vgonel(vp); } VOP_UNLOCK(vp); VI_UNLOCK(vp); vn_finished_write(vnmp); return (0); } /* * Allocate a new vnode. * * The operation never returns an error. Returning an error was disabled * in r145385 (dated 2005) with the following comment: * * XXX Not all VFS_VGET/ffs_vget callers check returns. * * Given the age of this commit (almost 15 years at the time of writing this * comment) restoring the ability to fail requires a significant audit of * all codepaths. * * The routine can try to free a vnode or stall for up to 1 second waiting for * vnlru to clear things up, but ultimately always performs a M_WAITOK allocation. */ static u_long vn_alloc_cyclecount; static struct vnode * __noinline vn_alloc_hard(struct mount *mp) { u_long rnumvnodes, rfreevnodes; mtx_lock(&vnode_list_mtx); rnumvnodes = atomic_load_long(&numvnodes); if (rnumvnodes + 1 < desiredvnodes) { vn_alloc_cyclecount = 0; goto alloc; } - rfreevnodes = atomic_load_long(&freevnodes); + rfreevnodes = vnlru_read_freevnodes(); if (vn_alloc_cyclecount++ >= rfreevnodes) { vn_alloc_cyclecount = 0; vstir = 1; } /* * Grow the vnode cache if it will not be above its target max * after growing. Otherwise, if the free list is nonempty, try * to reclaim 1 item from it before growing the cache (possibly * above its target max if the reclamation failed or is delayed). * Otherwise, wait for some space. In all cases, schedule * vnlru_proc() if we are getting short of space. The watermarks * should be chosen so that we never wait or even reclaim from * the free list to below its target minimum. */ - if (rfreevnodes > 0) { - vnlru_free_locked(1, NULL); + if (vnlru_free_locked(1, NULL) > 0) goto alloc; - } if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPEND) == 0) { /* * Wait for space for a new vnode. */ vnlru_kick(); msleep(&vnlruproc_sig, &vnode_list_mtx, PVFS, "vlruwk", hz); if (atomic_load_long(&numvnodes) + 1 > desiredvnodes && - atomic_load_long(&freevnodes) > 1) + vnlru_read_freevnodes() > 1) vnlru_free_locked(1, NULL); } alloc: rnumvnodes = atomic_fetchadd_long(&numvnodes, 1) + 1; if (vnlru_under(rnumvnodes, vlowat)) vnlru_kick(); mtx_unlock(&vnode_list_mtx); return (uma_zalloc(vnode_zone, M_WAITOK)); } static struct vnode * vn_alloc(struct mount *mp) { u_long rnumvnodes; if (__predict_false(vn_alloc_cyclecount != 0)) return (vn_alloc_hard(mp)); rnumvnodes = atomic_fetchadd_long(&numvnodes, 1) + 1; - if (__predict_false(vnlru_under(rnumvnodes, vlowat))) { + if (__predict_false(vnlru_under_unlocked(rnumvnodes, vlowat))) { atomic_subtract_long(&numvnodes, 1); return (vn_alloc_hard(mp)); } return (uma_zalloc(vnode_zone, M_WAITOK)); } static void vn_free(struct vnode *vp) { atomic_subtract_long(&numvnodes, 1); uma_zfree(vnode_zone, vp); } /* * Return the next vnode from the free list. */ int getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops, struct vnode **vpp) { struct vnode *vp; struct thread *td; struct lock_object *lo; CTR3(KTR_VFS, "%s: mp %p with tag %s", __func__, mp, tag); KASSERT(vops->registered, ("%s: not registered vector op %p\n", __func__, vops)); td = curthread; if (td->td_vp_reserved != NULL) { vp = td->td_vp_reserved; td->td_vp_reserved = NULL; } else { vp = vn_alloc(mp); } counter_u64_add(vnodes_created, 1); /* * Locks are given the generic name "vnode" when created. * Follow the historic practice of using the filesystem * name when they allocated, e.g., "zfs", "ufs", "nfs, etc. * * Locks live in a witness group keyed on their name. Thus, * when a lock is renamed, it must also move from the witness * group of its old name to the witness group of its new name. * * The change only needs to be made when the vnode moves * from one filesystem type to another. We ensure that each * filesystem use a single static name pointer for its tag so * that we can compare pointers rather than doing a strcmp(). */ lo = &vp->v_vnlock->lock_object; +#ifdef WITNESS if (lo->lo_name != tag) { +#endif lo->lo_name = tag; +#ifdef WITNESS WITNESS_DESTROY(lo); WITNESS_INIT(lo, tag); } +#endif /* * By default, don't allow shared locks unless filesystems opt-in. */ vp->v_vnlock->lock_object.lo_flags |= LK_NOSHARE; /* * Finalize various vnode identity bits. */ KASSERT(vp->v_object == NULL, ("stale v_object %p", vp)); KASSERT(vp->v_lockf == NULL, ("stale v_lockf %p", vp)); KASSERT(vp->v_pollinfo == NULL, ("stale v_pollinfo %p", vp)); vp->v_type = VNON; vp->v_op = vops; v_init_counters(vp); vp->v_bufobj.bo_ops = &buf_ops_bio; #ifdef DIAGNOSTIC if (mp == NULL && vops != &dead_vnodeops) printf("NULL mp in getnewvnode(9), tag %s\n", tag); #endif #ifdef MAC mac_vnode_init(vp); if (mp != NULL && (mp->mnt_flag & MNT_MULTILABEL) == 0) mac_vnode_associate_singlelabel(mp, vp); #endif if (mp != NULL) { vp->v_bufobj.bo_bsize = mp->mnt_stat.f_iosize; if ((mp->mnt_kern_flag & MNTK_NOKNOTE) != 0) vp->v_vflag |= VV_NOKNOTE; } /* * For the filesystems which do not use vfs_hash_insert(), * still initialize v_hash to have vfs_hash_index() useful. * E.g., nullfs uses vfs_hash_index() on the lower vnode for * its own hashing. */ vp->v_hash = (uintptr_t)vp >> vnsz2log; *vpp = vp; return (0); } void getnewvnode_reserve(void) { struct thread *td; td = curthread; MPASS(td->td_vp_reserved == NULL); td->td_vp_reserved = vn_alloc(NULL); } void getnewvnode_drop_reserve(void) { struct thread *td; td = curthread; if (td->td_vp_reserved != NULL) { vn_free(td->td_vp_reserved); td->td_vp_reserved = NULL; } } static void freevnode(struct vnode *vp) { struct bufobj *bo; /* * The vnode has been marked for destruction, so free it. * * The vnode will be returned to the zone where it will * normally remain until it is needed for another vnode. We * need to cleanup (or verify that the cleanup has already * been done) any residual data left from its current use * so as not to contaminate the freshly allocated vnode. */ CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp); bo = &vp->v_bufobj; VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't")); VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count")); VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count")); VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count")); VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's")); VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0")); VNASSERT(pctrie_is_empty(&bo->bo_clean.bv_root), vp, ("clean blk trie not empty")); VNASSERT(bo->bo_dirty.bv_cnt == 0, vp, ("dirtybufcnt not 0")); VNASSERT(pctrie_is_empty(&bo->bo_dirty.bv_root), vp, ("dirty blk trie not empty")); VNASSERT(TAILQ_EMPTY(&vp->v_cache_dst), vp, ("vp has namecache dst")); VNASSERT(LIST_EMPTY(&vp->v_cache_src), vp, ("vp has namecache src")); VNASSERT(vp->v_cache_dd == NULL, vp, ("vp has namecache for ..")); VNASSERT(TAILQ_EMPTY(&vp->v_rl.rl_waiters), vp, ("Dangling rangelock waiters")); VI_UNLOCK(vp); #ifdef MAC mac_vnode_destroy(vp); #endif if (vp->v_pollinfo != NULL) { destroy_vpollinfo(vp->v_pollinfo); vp->v_pollinfo = NULL; } #ifdef INVARIANTS /* XXX Elsewhere we detect an already freed vnode via NULL v_op. */ vp->v_op = NULL; #endif vp->v_mountedhere = NULL; vp->v_unpcb = NULL; vp->v_rdev = NULL; vp->v_fifoinfo = NULL; vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; vp->v_irflag = 0; vp->v_iflag = 0; vp->v_vflag = 0; bo->bo_flag = 0; vn_free(vp); } /* * Delete from old mount point vnode list, if on one. */ static void delmntque(struct vnode *vp) { struct mount *mp; mp = vp->v_mount; if (mp == NULL) return; MNT_ILOCK(mp); VI_LOCK(vp); if (vp->v_mflag & VMP_LAZYLIST) { mtx_lock(&mp->mnt_listmtx); if (vp->v_mflag & VMP_LAZYLIST) { vp->v_mflag &= ~VMP_LAZYLIST; TAILQ_REMOVE(&mp->mnt_lazyvnodelist, vp, v_lazylist); mp->mnt_lazyvnodelistsize--; } mtx_unlock(&mp->mnt_listmtx); } vp->v_mount = NULL; VI_UNLOCK(vp); VNASSERT(mp->mnt_nvnodelistsize > 0, vp, ("bad mount point vnode list size")); TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes); mp->mnt_nvnodelistsize--; MNT_REL(mp); MNT_IUNLOCK(mp); } static void insmntque_stddtr(struct vnode *vp, void *dtr_arg) { vp->v_data = NULL; vp->v_op = &dead_vnodeops; vgone(vp); vput(vp); } /* * Insert into list of vnodes for the new mount point, if available. */ int insmntque1(struct vnode *vp, struct mount *mp, void (*dtr)(struct vnode *, void *), void *dtr_arg) { KASSERT(vp->v_mount == NULL, ("insmntque: vnode already on per mount vnode list")); VNASSERT(mp != NULL, vp, ("Don't call insmntque(foo, NULL)")); ASSERT_VOP_ELOCKED(vp, "insmntque: non-locked vp"); /* * We acquire the vnode interlock early to ensure that the * vnode cannot be recycled by another process releasing a * holdcnt on it before we get it on both the vnode list * and the active vnode list. The mount mutex protects only * manipulation of the vnode list and the vnode freelist * mutex protects only manipulation of the active vnode list. * Hence the need to hold the vnode interlock throughout. */ MNT_ILOCK(mp); VI_LOCK(vp); if (((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 && ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0 || mp->mnt_nvnodelistsize == 0)) && (vp->v_vflag & VV_FORCEINSMQ) == 0) { VI_UNLOCK(vp); MNT_IUNLOCK(mp); if (dtr != NULL) dtr(vp, dtr_arg); return (EBUSY); } vp->v_mount = mp; MNT_REF(mp); TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes); VNASSERT(mp->mnt_nvnodelistsize >= 0, vp, ("neg mount point vnode list size")); mp->mnt_nvnodelistsize++; VI_UNLOCK(vp); MNT_IUNLOCK(mp); return (0); } int insmntque(struct vnode *vp, struct mount *mp) { return (insmntque1(vp, mp, insmntque_stddtr, NULL)); } /* * Flush out and invalidate all buffers associated with a bufobj * Called with the underlying object locked. */ int bufobj_invalbuf(struct bufobj *bo, int flags, int slpflag, int slptimeo) { int error; BO_LOCK(bo); if (flags & V_SAVE) { error = bufobj_wwait(bo, slpflag, slptimeo); if (error) { BO_UNLOCK(bo); return (error); } if (bo->bo_dirty.bv_cnt > 0) { BO_UNLOCK(bo); if ((error = BO_SYNC(bo, MNT_WAIT)) != 0) return (error); /* * XXX We could save a lock/unlock if this was only * enabled under INVARIANTS */ BO_LOCK(bo); if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) panic("vinvalbuf: dirty bufs"); } } /* * If you alter this loop please notice that interlock is dropped and * reacquired in flushbuflist. Special care is needed to ensure that * no race conditions occur from this. */ do { error = flushbuflist(&bo->bo_clean, flags, bo, slpflag, slptimeo); if (error == 0 && !(flags & V_CLEANONLY)) error = flushbuflist(&bo->bo_dirty, flags, bo, slpflag, slptimeo); if (error != 0 && error != EAGAIN) { BO_UNLOCK(bo); return (error); } } while (error != 0); /* * Wait for I/O to complete. XXX needs cleaning up. The vnode can * have write I/O in-progress but if there is a VM object then the * VM object can also have read-I/O in-progress. */ do { bufobj_wwait(bo, 0, 0); if ((flags & V_VMIO) == 0 && bo->bo_object != NULL) { BO_UNLOCK(bo); vm_object_pip_wait_unlocked(bo->bo_object, "bovlbx"); BO_LOCK(bo); } } while (bo->bo_numoutput > 0); BO_UNLOCK(bo); /* * Destroy the copy in the VM cache, too. */ if (bo->bo_object != NULL && (flags & (V_ALT | V_NORMAL | V_CLEANONLY | V_VMIO)) == 0) { VM_OBJECT_WLOCK(bo->bo_object); vm_object_page_remove(bo->bo_object, 0, 0, (flags & V_SAVE) ? OBJPR_CLEANONLY : 0); VM_OBJECT_WUNLOCK(bo->bo_object); } #ifdef INVARIANTS BO_LOCK(bo); if ((flags & (V_ALT | V_NORMAL | V_CLEANONLY | V_VMIO | V_ALLOWCLEAN)) == 0 && (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0)) panic("vinvalbuf: flush failed"); if ((flags & (V_ALT | V_NORMAL | V_CLEANONLY | V_VMIO)) == 0 && bo->bo_dirty.bv_cnt > 0) panic("vinvalbuf: flush dirty failed"); BO_UNLOCK(bo); #endif return (0); } /* * Flush out and invalidate all buffers associated with a vnode. * Called with the underlying object locked. */ int vinvalbuf(struct vnode *vp, int flags, int slpflag, int slptimeo) { CTR3(KTR_VFS, "%s: vp %p with flags %d", __func__, vp, flags); ASSERT_VOP_LOCKED(vp, "vinvalbuf"); if (vp->v_object != NULL && vp->v_object->handle != vp) return (0); return (bufobj_invalbuf(&vp->v_bufobj, flags, slpflag, slptimeo)); } /* * Flush out buffers on the specified list. * */ static int flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, int slpflag, int slptimeo) { struct buf *bp, *nbp; int retval, error; daddr_t lblkno; b_xflags_t xflags; ASSERT_BO_WLOCKED(bo); retval = 0; TAILQ_FOREACH_SAFE(bp, &bufv->bv_hd, b_bobufs, nbp) { /* * If we are flushing both V_NORMAL and V_ALT buffers then * do not skip any buffers. If we are flushing only V_NORMAL * buffers then skip buffers marked as BX_ALTDATA. If we are * flushing only V_ALT buffers then skip buffers not marked * as BX_ALTDATA. */ if (((flags & (V_NORMAL | V_ALT)) != (V_NORMAL | V_ALT)) && (((flags & V_NORMAL) && (bp->b_xflags & BX_ALTDATA) != 0) || ((flags & V_ALT) && (bp->b_xflags & BX_ALTDATA) == 0))) { continue; } if (nbp != NULL) { lblkno = nbp->b_lblkno; xflags = nbp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN); } retval = EAGAIN; error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo), "flushbuf", slpflag, slptimeo); if (error) { BO_LOCK(bo); return (error != ENOLCK ? error : EAGAIN); } KASSERT(bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); /* * XXX Since there are no node locks for NFS, I * believe there is a slight chance that a delayed * write will occur while sleeping just above, so * check for it. */ if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && (flags & V_SAVE)) { bremfree(bp); bp->b_flags |= B_ASYNC; bwrite(bp); BO_LOCK(bo); return (EAGAIN); /* XXX: why not loop ? */ } bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); BO_LOCK(bo); if (nbp == NULL) break; nbp = gbincore(bo, lblkno); if (nbp == NULL || (nbp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) != xflags) break; /* nbp invalid */ } return (retval); } int bnoreuselist(struct bufv *bufv, struct bufobj *bo, daddr_t startn, daddr_t endn) { struct buf *bp; int error; daddr_t lblkno; ASSERT_BO_LOCKED(bo); for (lblkno = startn;;) { again: bp = BUF_PCTRIE_LOOKUP_GE(&bufv->bv_root, lblkno); if (bp == NULL || bp->b_lblkno >= endn || bp->b_lblkno < startn) break; error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo), "brlsfl", 0, 0); if (error != 0) { BO_RLOCK(bo); if (error == ENOLCK) goto again; return (error); } KASSERT(bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); lblkno = bp->b_lblkno + 1; if ((bp->b_flags & B_MANAGED) == 0) bremfree(bp); bp->b_flags |= B_RELBUF; /* * In the VMIO case, use the B_NOREUSE flag to hint that the * pages backing each buffer in the range are unlikely to be * reused. Dirty buffers will have the hint applied once * they've been written. */ if ((bp->b_flags & B_VMIO) != 0) bp->b_flags |= B_NOREUSE; brelse(bp); BO_RLOCK(bo); } return (0); } /* * Truncate a file's buffer and pages to a specified length. This * is in lieu of the old vinvalbuf mechanism, which performed unneeded * sync activity. */ int vtruncbuf(struct vnode *vp, off_t length, int blksize) { struct buf *bp, *nbp; struct bufobj *bo; daddr_t startlbn; CTR4(KTR_VFS, "%s: vp %p with block %d:%ju", __func__, vp, blksize, (uintmax_t)length); /* * Round up to the *next* lbn. */ startlbn = howmany(length, blksize); ASSERT_VOP_LOCKED(vp, "vtruncbuf"); bo = &vp->v_bufobj; restart_unlocked: BO_LOCK(bo); while (v_inval_buf_range_locked(vp, bo, startlbn, INT64_MAX) == EAGAIN) ; if (length > 0) { restartsync: TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno > 0) continue; /* * Since we hold the vnode lock this should only * fail if we're racing with the buf daemon. */ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) goto restart_unlocked; VNASSERT((bp->b_flags & B_DELWRI), vp, ("buf(%p) on dirty queue without DELWRI", bp)); bremfree(bp); bawrite(bp); BO_LOCK(bo); goto restartsync; } } bufobj_wwait(bo, 0, 0); BO_UNLOCK(bo); vnode_pager_setsize(vp, length); return (0); } /* * Invalidate the cached pages of a file's buffer within the range of block * numbers [startlbn, endlbn). */ void v_inval_buf_range(struct vnode *vp, daddr_t startlbn, daddr_t endlbn, int blksize) { struct bufobj *bo; off_t start, end; ASSERT_VOP_LOCKED(vp, "v_inval_buf_range"); start = blksize * startlbn; end = blksize * endlbn; bo = &vp->v_bufobj; BO_LOCK(bo); MPASS(blksize == bo->bo_bsize); while (v_inval_buf_range_locked(vp, bo, startlbn, endlbn) == EAGAIN) ; BO_UNLOCK(bo); vn_pages_remove(vp, OFF_TO_IDX(start), OFF_TO_IDX(end + PAGE_SIZE - 1)); } static int v_inval_buf_range_locked(struct vnode *vp, struct bufobj *bo, daddr_t startlbn, daddr_t endlbn) { struct buf *bp, *nbp; bool anyfreed; ASSERT_VOP_LOCKED(vp, "v_inval_buf_range_locked"); ASSERT_BO_LOCKED(bo); do { anyfreed = false; TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno < startlbn || bp->b_lblkno >= endlbn) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { BO_LOCK(bo); return (EAGAIN); } bremfree(bp); bp->b_flags |= B_INVAL | B_RELBUF; bp->b_flags &= ~B_ASYNC; brelse(bp); anyfreed = true; BO_LOCK(bo); if (nbp != NULL && (((nbp->b_xflags & BX_VNCLEAN) == 0) || nbp->b_vp != vp || (nbp->b_flags & B_DELWRI) != 0)) return (EAGAIN); } TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno < startlbn || bp->b_lblkno >= endlbn) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { BO_LOCK(bo); return (EAGAIN); } bremfree(bp); bp->b_flags |= B_INVAL | B_RELBUF; bp->b_flags &= ~B_ASYNC; brelse(bp); anyfreed = true; BO_LOCK(bo); if (nbp != NULL && (((nbp->b_xflags & BX_VNDIRTY) == 0) || (nbp->b_vp != vp) || (nbp->b_flags & B_DELWRI) == 0)) return (EAGAIN); } } while (anyfreed); return (0); } static void buf_vlist_remove(struct buf *bp) { struct bufv *bv; KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp)); ASSERT_BO_WLOCKED(bp->b_bufobj); KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) != (BX_VNDIRTY|BX_VNCLEAN), ("buf_vlist_remove: Buf %p is on two lists", bp)); if (bp->b_xflags & BX_VNDIRTY) bv = &bp->b_bufobj->bo_dirty; else bv = &bp->b_bufobj->bo_clean; BUF_PCTRIE_REMOVE(&bv->bv_root, bp->b_lblkno); TAILQ_REMOVE(&bv->bv_hd, bp, b_bobufs); bv->bv_cnt--; bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); } /* * Add the buffer to the sorted clean or dirty block list. * * NOTE: xflags is passed as a constant, optimizing this inline function! */ static void buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags) { struct bufv *bv; struct buf *n; int error; ASSERT_BO_WLOCKED(bo); KASSERT((xflags & BX_VNDIRTY) == 0 || (bo->bo_flag & BO_DEAD) == 0, ("dead bo %p", bo)); KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, ("buf_vlist_add: Buf %p has existing xflags %d", bp, bp->b_xflags)); bp->b_xflags |= xflags; if (xflags & BX_VNDIRTY) bv = &bo->bo_dirty; else bv = &bo->bo_clean; /* * Keep the list ordered. Optimize empty list insertion. Assume * we tend to grow at the tail so lookup_le should usually be cheaper * than _ge. */ if (bv->bv_cnt == 0 || bp->b_lblkno > TAILQ_LAST(&bv->bv_hd, buflists)->b_lblkno) TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs); else if ((n = BUF_PCTRIE_LOOKUP_LE(&bv->bv_root, bp->b_lblkno)) == NULL) TAILQ_INSERT_HEAD(&bv->bv_hd, bp, b_bobufs); else TAILQ_INSERT_AFTER(&bv->bv_hd, n, bp, b_bobufs); error = BUF_PCTRIE_INSERT(&bv->bv_root, bp); if (error) panic("buf_vlist_add: Preallocated nodes insufficient."); bv->bv_cnt++; } /* * Look up a buffer using the buffer tries. */ struct buf * gbincore(struct bufobj *bo, daddr_t lblkno) { struct buf *bp; ASSERT_BO_LOCKED(bo); bp = BUF_PCTRIE_LOOKUP(&bo->bo_clean.bv_root, lblkno); if (bp != NULL) return (bp); return BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, lblkno); } /* * Associate a buffer with a vnode. */ void bgetvp(struct vnode *vp, struct buf *bp) { struct bufobj *bo; bo = &vp->v_bufobj; ASSERT_BO_WLOCKED(bo); VNASSERT(bp->b_vp == NULL, bp->b_vp, ("bgetvp: not free")); CTR3(KTR_BUF, "bgetvp(%p) vp %p flags %X", bp, vp, bp->b_flags); VNASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, vp, ("bgetvp: bp already attached! %p", bp)); vhold(vp); bp->b_vp = vp; bp->b_bufobj = bo; /* * Insert onto list for new vnode. */ buf_vlist_add(bp, bo, BX_VNCLEAN); } /* * Disassociate a buffer from a vnode. */ void brelvp(struct buf *bp) { struct bufobj *bo; struct vnode *vp; CTR3(KTR_BUF, "brelvp(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); /* * Delete from old vnode list, if on one. */ vp = bp->b_vp; /* XXX */ bo = bp->b_bufobj; BO_LOCK(bo); if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) buf_vlist_remove(bp); else panic("brelvp: Buffer %p not on queue.", bp); if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) { bo->bo_flag &= ~BO_ONWORKLST; mtx_lock(&sync_mtx); LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; mtx_unlock(&sync_mtx); } bp->b_vp = NULL; bp->b_bufobj = NULL; BO_UNLOCK(bo); vdrop(vp); } /* * Add an item to the syncer work queue. */ static void vn_syncer_add_to_worklist(struct bufobj *bo, int delay) { int slot; ASSERT_BO_WLOCKED(bo); mtx_lock(&sync_mtx); if (bo->bo_flag & BO_ONWORKLST) LIST_REMOVE(bo, bo_synclist); else { bo->bo_flag |= BO_ONWORKLST; syncer_worklist_len++; } if (delay > syncer_maxdelay - 2) delay = syncer_maxdelay - 2; slot = (syncer_delayno + delay) & syncer_mask; LIST_INSERT_HEAD(&syncer_workitem_pending[slot], bo, bo_synclist); mtx_unlock(&sync_mtx); } static int sysctl_vfs_worklist_len(SYSCTL_HANDLER_ARGS) { int error, len; mtx_lock(&sync_mtx); len = syncer_worklist_len - sync_vnode_count; mtx_unlock(&sync_mtx); error = SYSCTL_OUT(req, &len, sizeof(len)); return (error); } SYSCTL_PROC(_vfs, OID_AUTO, worklist_len, CTLTYPE_INT | CTLFLAG_MPSAFE| CTLFLAG_RD, NULL, 0, sysctl_vfs_worklist_len, "I", "Syncer thread worklist length"); static struct proc *updateproc; static void sched_sync(void); static struct kproc_desc up_kp = { "syncer", sched_sync, &updateproc }; SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp); static int sync_vnode(struct synclist *slp, struct bufobj **bo, struct thread *td) { struct vnode *vp; struct mount *mp; *bo = LIST_FIRST(slp); if (*bo == NULL) return (0); vp = bo2vnode(*bo); if (VOP_ISLOCKED(vp) != 0 || VI_TRYLOCK(vp) == 0) return (1); /* * We use vhold in case the vnode does not * successfully sync. vhold prevents the vnode from * going away when we unlock the sync_mtx so that * we can acquire the vnode interlock. */ vholdl(vp); mtx_unlock(&sync_mtx); VI_UNLOCK(vp); if (vn_start_write(vp, &mp, V_NOWAIT) != 0) { vdrop(vp); mtx_lock(&sync_mtx); return (*bo == LIST_FIRST(slp)); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); (void) VOP_FSYNC(vp, MNT_LAZY, td); VOP_UNLOCK(vp); vn_finished_write(mp); BO_LOCK(*bo); if (((*bo)->bo_flag & BO_ONWORKLST) != 0) { /* * Put us back on the worklist. The worklist * routine will remove us from our current * position and then add us back in at a later * position. */ vn_syncer_add_to_worklist(*bo, syncdelay); } BO_UNLOCK(*bo); vdrop(vp); mtx_lock(&sync_mtx); return (0); } static int first_printf = 1; /* * System filesystem synchronizer daemon. */ static void sched_sync(void) { struct synclist *next, *slp; struct bufobj *bo; long starttime; struct thread *td = curthread; int last_work_seen; int net_worklist_len; int syncer_final_iter; int error; last_work_seen = 0; syncer_final_iter = 0; syncer_state = SYNCER_RUNNING; starttime = time_uptime; td->td_pflags |= TDP_NORUNNINGBUF; EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown, td->td_proc, SHUTDOWN_PRI_LAST); mtx_lock(&sync_mtx); for (;;) { if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter == 0) { mtx_unlock(&sync_mtx); kproc_suspend_check(td->td_proc); mtx_lock(&sync_mtx); } net_worklist_len = syncer_worklist_len - sync_vnode_count; if (syncer_state != SYNCER_RUNNING && starttime != time_uptime) { if (first_printf) { printf("\nSyncing disks, vnodes remaining... "); first_printf = 0; } printf("%d ", net_worklist_len); } starttime = time_uptime; /* * Push files whose dirty time has expired. Be careful * of interrupt race on slp queue. * * Skip over empty worklist slots when shutting down. */ do { slp = &syncer_workitem_pending[syncer_delayno]; syncer_delayno += 1; if (syncer_delayno == syncer_maxdelay) syncer_delayno = 0; next = &syncer_workitem_pending[syncer_delayno]; /* * If the worklist has wrapped since the * it was emptied of all but syncer vnodes, * switch to the FINAL_DELAY state and run * for one more second. */ if (syncer_state == SYNCER_SHUTTING_DOWN && net_worklist_len == 0 && last_work_seen == syncer_delayno) { syncer_state = SYNCER_FINAL_DELAY; syncer_final_iter = SYNCER_SHUTDOWN_SPEEDUP; } } while (syncer_state != SYNCER_RUNNING && LIST_EMPTY(slp) && syncer_worklist_len > 0); /* * Keep track of the last time there was anything * on the worklist other than syncer vnodes. * Return to the SHUTTING_DOWN state if any * new work appears. */ if (net_worklist_len > 0 || syncer_state == SYNCER_RUNNING) last_work_seen = syncer_delayno; if (net_worklist_len > 0 && syncer_state == SYNCER_FINAL_DELAY) syncer_state = SYNCER_SHUTTING_DOWN; while (!LIST_EMPTY(slp)) { error = sync_vnode(slp, &bo, td); if (error == 1) { LIST_REMOVE(bo, bo_synclist); LIST_INSERT_HEAD(next, bo, bo_synclist); continue; } if (first_printf == 0) { /* * Drop the sync mutex, because some watchdog * drivers need to sleep while patting */ mtx_unlock(&sync_mtx); wdog_kern_pat(WD_LASTVAL); mtx_lock(&sync_mtx); } } if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter > 0) syncer_final_iter--; /* * The variable rushjob allows the kernel to speed up the * processing of the filesystem syncer process. A rushjob * value of N tells the filesystem syncer to process the next * N seconds worth of work on its queue ASAP. Currently rushjob * is used by the soft update code to speed up the filesystem * syncer process when the incore state is getting so far * ahead of the disk that the kernel memory pool is being * threatened with exhaustion. */ if (rushjob > 0) { rushjob -= 1; continue; } /* * Just sleep for a short period of time between * iterations when shutting down to allow some I/O * to happen. * * If it has taken us less than a second to process the * current work, then wait. Otherwise start right over * again. We can still lose time if any single round * takes more than two seconds, but it does not really * matter as we are just trying to generally pace the * filesystem activity. */ if (syncer_state != SYNCER_RUNNING || time_uptime == starttime) { thread_lock(td); sched_prio(td, PPAUSE); thread_unlock(td); } if (syncer_state != SYNCER_RUNNING) cv_timedwait(&sync_wakeup, &sync_mtx, hz / SYNCER_SHUTDOWN_SPEEDUP); else if (time_uptime == starttime) cv_timedwait(&sync_wakeup, &sync_mtx, hz); } } /* * Request the syncer daemon to speed up its work. * We never push it to speed up more than half of its * normal turn time, otherwise it could take over the cpu. */ int speedup_syncer(void) { int ret = 0; mtx_lock(&sync_mtx); if (rushjob < syncdelay / 2) { rushjob += 1; stat_rush_requests += 1; ret = 1; } mtx_unlock(&sync_mtx); cv_broadcast(&sync_wakeup); return (ret); } /* * Tell the syncer to speed up its work and run though its work * list several times, then tell it to shut down. */ static void syncer_shutdown(void *arg, int howto) { if (howto & RB_NOSYNC) return; mtx_lock(&sync_mtx); syncer_state = SYNCER_SHUTTING_DOWN; rushjob = 0; mtx_unlock(&sync_mtx); cv_broadcast(&sync_wakeup); kproc_shutdown(arg, howto); } void syncer_suspend(void) { syncer_shutdown(updateproc, 0); } void syncer_resume(void) { mtx_lock(&sync_mtx); first_printf = 1; syncer_state = SYNCER_RUNNING; mtx_unlock(&sync_mtx); cv_broadcast(&sync_wakeup); kproc_resume(updateproc); } /* * Reassign a buffer from one vnode to another. * Used to assign file specific control information * (indirect blocks) to the vnode to which they belong. */ void reassignbuf(struct buf *bp) { struct vnode *vp; struct bufobj *bo; int delay; #ifdef INVARIANTS struct bufv *bv; #endif vp = bp->b_vp; bo = bp->b_bufobj; ++reassignbufcalls; CTR3(KTR_BUF, "reassignbuf(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); /* * B_PAGING flagged buffers cannot be reassigned because their vp * is not fully linked in. */ if (bp->b_flags & B_PAGING) panic("cannot reassign paging buffer"); /* * Delete from old vnode list, if on one. */ BO_LOCK(bo); if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) buf_vlist_remove(bp); else panic("reassignbuf: Buffer %p not on queue.", bp); /* * If dirty, put on list of dirty buffers; otherwise insert onto list * of clean buffers. */ if (bp->b_flags & B_DELWRI) { if ((bo->bo_flag & BO_ONWORKLST) == 0) { switch (vp->v_type) { case VDIR: delay = dirdelay; break; case VCHR: delay = metadelay; break; default: delay = filedelay; } vn_syncer_add_to_worklist(bo, delay); } buf_vlist_add(bp, bo, BX_VNDIRTY); } else { buf_vlist_add(bp, bo, BX_VNCLEAN); if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) { mtx_lock(&sync_mtx); LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; mtx_unlock(&sync_mtx); bo->bo_flag &= ~BO_ONWORKLST; } } #ifdef INVARIANTS bv = &bo->bo_clean; bp = TAILQ_FIRST(&bv->bv_hd); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bp = TAILQ_LAST(&bv->bv_hd, buflists); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bv = &bo->bo_dirty; bp = TAILQ_FIRST(&bv->bv_hd); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bp = TAILQ_LAST(&bv->bv_hd, buflists); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); #endif BO_UNLOCK(bo); } static void v_init_counters(struct vnode *vp) { VNASSERT(vp->v_type == VNON && vp->v_data == NULL && vp->v_iflag == 0, vp, ("%s called for an initialized vnode", __FUNCTION__)); ASSERT_VI_UNLOCKED(vp, __FUNCTION__); refcount_init(&vp->v_holdcnt, 1); refcount_init(&vp->v_usecount, 1); } /* * Increment si_usecount of the associated device, if any. */ static void v_incr_devcount(struct vnode *vp) { ASSERT_VI_LOCKED(vp, __FUNCTION__); if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount++; dev_unlock(); } } /* * Decrement si_usecount of the associated device, if any. */ static void v_decr_devcount(struct vnode *vp) { ASSERT_VI_LOCKED(vp, __FUNCTION__); if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount--; dev_unlock(); } } /* * Grab a particular vnode from the free list, increment its * reference count and lock it. VIRF_DOOMED is set if the vnode * is being destroyed. Only callers who specify LK_RETRY will * see doomed vnodes. If inactive processing was delayed in * vput try to do it here. * - * Both holdcnt and usecount can be manipulated using atomics without holding - * any locks except in these cases which require the vnode interlock: - * holdcnt: 1->0 and 0->1 - * usecount: 0->1 - * - * usecount is permitted to transition 1->0 without the interlock because - * vnode is kept live by holdcnt. + * usecount is manipulated using atomics without holding any locks, + * except when transitioning 0->1 in which case the interlock is held. + + * holdcnt is manipulated using atomics without holding any locks, + * except when transitioning 1->0 in which case the interlock is held. */ -static enum vgetstate __always_inline -_vget_prep(struct vnode *vp, bool interlock) +enum vgetstate +vget_prep(struct vnode *vp) { enum vgetstate vs; if (refcount_acquire_if_not_zero(&vp->v_usecount)) { vs = VGET_USECOUNT; } else { - if (interlock) - vholdl(vp); - else - vhold(vp); + vhold(vp); vs = VGET_HOLDCNT; } return (vs); } -enum vgetstate -vget_prep(struct vnode *vp) -{ - - return (_vget_prep(vp, false)); -} - int vget(struct vnode *vp, int flags, struct thread *td) { enum vgetstate vs; MPASS(td == curthread); - vs = _vget_prep(vp, (flags & LK_INTERLOCK) != 0); + vs = vget_prep(vp); return (vget_finish(vp, flags, vs)); } int vget_finish(struct vnode *vp, int flags, enum vgetstate vs) { int error, oweinact; VNASSERT((flags & LK_TYPE_MASK) != 0, vp, ("%s: invalid lock operation", __func__)); if ((flags & LK_INTERLOCK) != 0) ASSERT_VI_LOCKED(vp, __func__); else ASSERT_VI_UNLOCKED(vp, __func__); VNASSERT(vp->v_holdcnt > 0, vp, ("%s: vnode not held", __func__)); if (vs == VGET_USECOUNT) { VNASSERT(vp->v_usecount > 0, vp, ("%s: vnode without usecount when VGET_USECOUNT was passed", __func__)); } if ((error = vn_lock(vp, flags)) != 0) { if (vs == VGET_USECOUNT) vrele(vp); else vdrop(vp); CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__, vp); return (error); } if (vs == VGET_USECOUNT) { VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, ("%s: vnode with usecount and VI_OWEINACT set", __func__)); return (0); } /* * We hold the vnode. If the usecount is 0 it will be utilized to keep * the vnode around. Otherwise someone else lended their hold count and * we have to drop ours. */ if (refcount_acquire_if_not_zero(&vp->v_usecount)) { #ifdef INVARIANTS int old = atomic_fetchadd_int(&vp->v_holdcnt, -1); VNASSERT(old > 1, vp, ("%s: wrong hold count %d", __func__, old)); #else refcount_release(&vp->v_holdcnt); #endif VNODE_REFCOUNT_FENCE_ACQ(); VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, ("%s: vnode with usecount and VI_OWEINACT set", __func__)); return (0); } /* * We don't guarantee that any particular close will * trigger inactive processing so just make a best effort * here at preventing a reference to a removed file. If * we don't succeed no harm is done. * * Upgrade our holdcnt to a usecount. */ VI_LOCK(vp); /* * See the previous section. By the time we get here we may find * ourselves in the same spot. */ if (refcount_acquire_if_not_zero(&vp->v_usecount)) { #ifdef INVARIANTS int old = atomic_fetchadd_int(&vp->v_holdcnt, -1); VNASSERT(old > 1, vp, ("%s: wrong hold count %d", __func__, old)); #else refcount_release(&vp->v_holdcnt); #endif VNODE_REFCOUNT_FENCE_ACQ(); VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, ("%s: vnode with usecount and VI_OWEINACT set", __func__)); VI_UNLOCK(vp); return (0); } if ((vp->v_iflag & VI_OWEINACT) == 0) { oweinact = 0; } else { oweinact = 1; vp->v_iflag &= ~VI_OWEINACT; VNODE_REFCOUNT_FENCE_REL(); } v_incr_devcount(vp); refcount_acquire(&vp->v_usecount); if (oweinact && VOP_ISLOCKED(vp) == LK_EXCLUSIVE && (flags & LK_NOWAIT) == 0) vinactive(vp); VI_UNLOCK(vp); return (0); } /* * Increase the reference (use) and hold count of a vnode. * This will also remove the vnode from the free list if it is presently free. */ void vref(struct vnode *vp) { ASSERT_VI_UNLOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); if (refcount_acquire_if_not_zero(&vp->v_usecount)) { VNODE_REFCOUNT_FENCE_ACQ(); VNASSERT(vp->v_holdcnt > 0, vp, ("%s: active vnode not held", __func__)); VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, ("%s: vnode with usecount and VI_OWEINACT set", __func__)); return; } VI_LOCK(vp); vrefl(vp); VI_UNLOCK(vp); } void vrefl(struct vnode *vp) { ASSERT_VI_LOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); if (refcount_acquire_if_not_zero(&vp->v_usecount)) { VNODE_REFCOUNT_FENCE_ACQ(); VNASSERT(vp->v_holdcnt > 0, vp, ("%s: active vnode not held", __func__)); VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, ("%s: vnode with usecount and VI_OWEINACT set", __func__)); return; } vholdl(vp); if ((vp->v_iflag & VI_OWEINACT) != 0) { vp->v_iflag &= ~VI_OWEINACT; VNODE_REFCOUNT_FENCE_REL(); } v_incr_devcount(vp); refcount_acquire(&vp->v_usecount); } void vrefact(struct vnode *vp) { CTR2(KTR_VFS, "%s: vp %p", __func__, vp); #ifdef INVARIANTS int old = atomic_fetchadd_int(&vp->v_usecount, 1); VNASSERT(old > 0, vp, ("%s: wrong use count %d", __func__, old)); #else refcount_acquire(&vp->v_usecount); #endif } /* * Return reference count of a vnode. * * The results of this call are only guaranteed when some mechanism is used to * stop other processes from gaining references to the vnode. This may be the * case if the caller holds the only reference. This is also useful when stale * data is acceptable as race conditions may be accounted for by some other * means. */ int vrefcnt(struct vnode *vp) { return (vp->v_usecount); } void vlazy(struct vnode *vp) { struct mount *mp; VNASSERT(vp->v_holdcnt > 0, vp, ("%s: vnode not held", __func__)); if ((vp->v_mflag & VMP_LAZYLIST) != 0) return; mp = vp->v_mount; mtx_lock(&mp->mnt_listmtx); if ((vp->v_mflag & VMP_LAZYLIST) == 0) { vp->v_mflag |= VMP_LAZYLIST; TAILQ_INSERT_TAIL(&mp->mnt_lazyvnodelist, vp, v_lazylist); mp->mnt_lazyvnodelistsize++; } mtx_unlock(&mp->mnt_listmtx); } static void vdefer_inactive(struct vnode *vp) { ASSERT_VI_LOCKED(vp, __func__); VNASSERT(vp->v_iflag & VI_OWEINACT, vp, ("%s: vnode without VI_OWEINACT", __func__)); if (VN_IS_DOOMED(vp)) { vdropl(vp); return; } if (vp->v_iflag & VI_DEFINACT) { VNASSERT(vp->v_holdcnt > 1, vp, ("lost hold count")); vdropl(vp); return; } vlazy(vp); vp->v_iflag |= VI_DEFINACT; VI_UNLOCK(vp); counter_u64_add(deferred_inact, 1); } static void vdefer_inactive_cond(struct vnode *vp) { VI_LOCK(vp); VNASSERT(vp->v_holdcnt > 0, vp, ("vnode without hold count")); if ((vp->v_iflag & VI_OWEINACT) == 0) { vdropl(vp); return; } vdefer_inactive(vp); } enum vputx_op { VPUTX_VRELE, VPUTX_VPUT, VPUTX_VUNREF }; /* * Decrement the use and hold counts for a vnode. * * See an explanation near vget() as to why atomic operation is safe. */ static void vputx(struct vnode *vp, enum vputx_op func) { int error; KASSERT(vp != NULL, ("vputx: null vp")); if (func == VPUTX_VUNREF) ASSERT_VOP_LOCKED(vp, "vunref"); ASSERT_VI_UNLOCKED(vp, __func__); VNASSERT(vp->v_holdcnt > 0 && vp->v_usecount > 0, vp, ("%s: wrong ref counts", __func__)); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); /* * We want to hold the vnode until the inactive finishes to * prevent vgone() races. We drop the use count here and the * hold count below when we're done. * * If we release the last usecount we take ownership of the hold * count which provides liveness of the vnode, in which case we * have to vdrop. */ if (!refcount_release(&vp->v_usecount)) return; VI_LOCK(vp); v_decr_devcount(vp); /* * By the time we got here someone else might have transitioned * the count back to > 0. */ if (vp->v_usecount > 0) { vdropl(vp); return; } if (vp->v_iflag & VI_DOINGINACT) { vdropl(vp); return; } /* * Check if the fs wants to perform inactive processing. Note we * may be only holding the interlock, in which case it is possible * someone else called vgone on the vnode and ->v_data is now NULL. * Since vgone performs inactive on its own there is nothing to do * here but to drop our hold count. */ if (__predict_false(VN_IS_DOOMED(vp)) || VOP_NEED_INACTIVE(vp) == 0) { vdropl(vp); return; } /* * We must call VOP_INACTIVE with the node locked. Mark * as VI_DOINGINACT to avoid recursion. */ vp->v_iflag |= VI_OWEINACT; switch (func) { case VPUTX_VRELE: error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK); VI_LOCK(vp); break; case VPUTX_VPUT: error = VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_NOWAIT); VI_LOCK(vp); break; case VPUTX_VUNREF: error = 0; if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { error = VOP_LOCK(vp, LK_TRYUPGRADE | LK_INTERLOCK); VI_LOCK(vp); } break; } VNASSERT(vp->v_usecount == 0 || (vp->v_iflag & VI_OWEINACT) == 0, vp, ("vnode with usecount and VI_OWEINACT set")); if (error == 0) { if (vp->v_iflag & VI_OWEINACT) vinactive(vp); if (func != VPUTX_VUNREF) VOP_UNLOCK(vp); vdropl(vp); } else if (vp->v_iflag & VI_OWEINACT) { vdefer_inactive(vp); } else { vdropl(vp); } } /* * Vnode put/release. * If count drops to zero, call inactive routine and return to freelist. */ void vrele(struct vnode *vp) { vputx(vp, VPUTX_VRELE); } /* * Release an already locked vnode. This give the same effects as * unlock+vrele(), but takes less time and avoids releasing and * re-aquiring the lock (as vrele() acquires the lock internally.) * * It is an invariant that all VOP_* calls operate on a held vnode. * We may be only having an implicit hold stemming from our usecount, * which we are about to release. If we unlock the vnode afterwards we * open a time window where someone else dropped the last usecount and * proceeded to free the vnode before our unlock finished. For this * reason we unlock the vnode early. This is a little bit wasteful as * it may be the vnode is exclusively locked and inactive processing is * needed, in which case we are adding work. */ void vput(struct vnode *vp) { VOP_UNLOCK(vp); vputx(vp, VPUTX_VPUT); } /* * Release an exclusively locked vnode. Do not unlock the vnode lock. */ void vunref(struct vnode *vp) { vputx(vp, VPUTX_VUNREF); } -/* - * Increase the hold count and activate if this is the first reference. - */ -static void -vhold_activate(struct vnode *vp) -{ - - ASSERT_VI_LOCKED(vp, __func__); - VNASSERT(vp->v_holdcnt == 0, vp, - ("%s: wrong hold count", __func__)); - VNASSERT(vp->v_op != NULL, vp, - ("%s: vnode already reclaimed.", __func__)); - atomic_subtract_long(&freevnodes, 1); - refcount_acquire(&vp->v_holdcnt); -} - void vhold(struct vnode *vp) { + struct vdbatch *vd; + int old; - ASSERT_VI_UNLOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); - if (refcount_acquire_if_not_zero(&vp->v_holdcnt)) + old = atomic_fetchadd_int(&vp->v_holdcnt, 1); + VNASSERT(old >= 0, vp, ("%s: wrong hold count %d", __func__, old)); + if (old != 0) return; - VI_LOCK(vp); - vholdl(vp); - VI_UNLOCK(vp); + critical_enter(); + vd = DPCPU_PTR(vd); + vd->freevnodes--; + critical_exit(); } void vholdl(struct vnode *vp) { ASSERT_VI_LOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); - if (vp->v_holdcnt > 0) { - refcount_acquire(&vp->v_holdcnt); - return; - } - vhold_activate(vp); + vhold(vp); } void vholdnz(struct vnode *vp) { CTR2(KTR_VFS, "%s: vp %p", __func__, vp); #ifdef INVARIANTS int old = atomic_fetchadd_int(&vp->v_holdcnt, 1); VNASSERT(old > 0, vp, ("%s: wrong hold count %d", __func__, old)); #else atomic_add_int(&vp->v_holdcnt, 1); #endif } static void __noinline vdbatch_process(struct vdbatch *vd) { struct vnode *vp; int i; mtx_assert(&vd->lock, MA_OWNED); + MPASS(curthread->td_pinned > 0); MPASS(vd->index == VDBATCH_SIZE); mtx_lock(&vnode_list_mtx); + critical_enter(); + freevnodes += vd->freevnodes; for (i = 0; i < VDBATCH_SIZE; i++) { vp = vd->tab[i]; TAILQ_REMOVE(&vnode_list, vp, v_vnodelist); TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist); MPASS(vp->v_dbatchcpu != NOCPU); vp->v_dbatchcpu = NOCPU; } mtx_unlock(&vnode_list_mtx); + critical_exit(); + vd->freevnodes = 0; bzero(vd->tab, sizeof(vd->tab)); vd->index = 0; } static void vdbatch_enqueue(struct vnode *vp) { struct vdbatch *vd; ASSERT_VI_LOCKED(vp, __func__); VNASSERT(!VN_IS_DOOMED(vp), vp, ("%s: deferring requeue of a doomed vnode", __func__)); + critical_enter(); + vd = DPCPU_PTR(vd); + vd->freevnodes++; if (vp->v_dbatchcpu != NOCPU) { VI_UNLOCK(vp); + critical_exit(); return; } - /* - * A hack: pin us to the current CPU so that we know what to put in - * ->v_dbatchcpu. - */ sched_pin(); - vd = DPCPU_PTR(vd); + critical_exit(); mtx_lock(&vd->lock); MPASS(vd->index < VDBATCH_SIZE); MPASS(vd->tab[vd->index] == NULL); + /* + * A hack: we depend on being pinned so that we know what to put in + * ->v_dbatchcpu. + */ vp->v_dbatchcpu = curcpu; vd->tab[vd->index] = vp; vd->index++; VI_UNLOCK(vp); if (vd->index == VDBATCH_SIZE) vdbatch_process(vd); mtx_unlock(&vd->lock); sched_unpin(); } /* * This routine must only be called for vnodes which are about to be * deallocated. Supporting dequeue for arbitrary vndoes would require * validating that the locked batch matches. */ static void vdbatch_dequeue(struct vnode *vp) { struct vdbatch *vd; int i; short cpu; VNASSERT(vp->v_type == VBAD || vp->v_type == VNON, vp, ("%s: called for a used vnode\n", __func__)); cpu = vp->v_dbatchcpu; if (cpu == NOCPU) return; vd = DPCPU_ID_PTR(cpu, vd); mtx_lock(&vd->lock); for (i = 0; i < vd->index; i++) { if (vd->tab[i] != vp) continue; vp->v_dbatchcpu = NOCPU; vd->index--; vd->tab[i] = vd->tab[vd->index]; vd->tab[vd->index] = NULL; break; } mtx_unlock(&vd->lock); /* * Either we dequeued the vnode above or the target CPU beat us to it. */ MPASS(vp->v_dbatchcpu == NOCPU); } /* * Drop the hold count of the vnode. If this is the last reference to * the vnode we place it on the free list unless it has been vgone'd * (marked VIRF_DOOMED) in which case we will free it. * * Because the vnode vm object keeps a hold reference on the vnode if * there is at least one resident non-cached page, the vnode cannot * leave the active list without the page cleanup done. */ static void vdrop_deactivate(struct vnode *vp) { struct mount *mp; ASSERT_VI_LOCKED(vp, __func__); /* * Mark a vnode as free: remove it from its active list * and put it up for recycling on the freelist. */ VNASSERT(!VN_IS_DOOMED(vp), vp, ("vdrop: returning doomed vnode")); VNASSERT(vp->v_op != NULL, vp, ("vdrop: vnode already reclaimed.")); - VNASSERT(vp->v_holdcnt == 0, vp, - ("vdrop: freeing when we shouldn't")); VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, ("vnode with VI_OWEINACT set")); VNASSERT((vp->v_iflag & VI_DEFINACT) == 0, vp, ("vnode with VI_DEFINACT set")); if (vp->v_mflag & VMP_LAZYLIST) { mp = vp->v_mount; mtx_lock(&mp->mnt_listmtx); - vp->v_mflag &= ~VMP_LAZYLIST; - TAILQ_REMOVE(&mp->mnt_lazyvnodelist, vp, v_lazylist); - mp->mnt_lazyvnodelistsize--; + VNASSERT(vp->v_mflag & VMP_LAZYLIST, vp, ("lost VMP_LAZYLIST")); + /* + * Don't remove the vnode from the lazy list if another thread + * has increased the hold count. It may have re-enqueued the + * vnode to the lazy list and is now responsible for its + * removal. + */ + if (vp->v_holdcnt == 0) { + vp->v_mflag &= ~VMP_LAZYLIST; + TAILQ_REMOVE(&mp->mnt_lazyvnodelist, vp, v_lazylist); + mp->mnt_lazyvnodelistsize--; + } mtx_unlock(&mp->mnt_listmtx); } - atomic_add_long(&freevnodes, 1); vdbatch_enqueue(vp); } void vdrop(struct vnode *vp) { ASSERT_VI_UNLOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); if (refcount_release_if_not_last(&vp->v_holdcnt)) return; VI_LOCK(vp); vdropl(vp); } void vdropl(struct vnode *vp) { ASSERT_VI_LOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); if (!refcount_release(&vp->v_holdcnt)) { VI_UNLOCK(vp); return; } if (VN_IS_DOOMED(vp)) { freevnode(vp); return; } vdrop_deactivate(vp); } /* * Call VOP_INACTIVE on the vnode and manage the DOINGINACT and OWEINACT * flags. DOINGINACT prevents us from recursing in calls to vinactive. * OWEINACT tracks whether a vnode missed a call to inactive due to a * failed lock upgrade. */ void vinactive(struct vnode *vp) { struct vm_object *obj; ASSERT_VOP_ELOCKED(vp, "vinactive"); ASSERT_VI_LOCKED(vp, "vinactive"); VNASSERT((vp->v_iflag & VI_DOINGINACT) == 0, vp, ("vinactive: recursed on VI_DOINGINACT")); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); vp->v_iflag |= VI_DOINGINACT; vp->v_iflag &= ~VI_OWEINACT; VI_UNLOCK(vp); /* * Before moving off the active list, we must be sure that any * modified pages are converted into the vnode's dirty * buffers, since these will no longer be checked once the * vnode is on the inactive list. * * The write-out of the dirty pages is asynchronous. At the * point that VOP_INACTIVE() is called, there could still be * pending I/O and dirty pages in the object. */ if ((obj = vp->v_object) != NULL && (vp->v_vflag & VV_NOSYNC) == 0 && vm_object_mightbedirty(obj)) { VM_OBJECT_WLOCK(obj); vm_object_page_clean(obj, 0, 0, 0); VM_OBJECT_WUNLOCK(obj); } VOP_INACTIVE(vp, curthread); VI_LOCK(vp); VNASSERT(vp->v_iflag & VI_DOINGINACT, vp, ("vinactive: lost VI_DOINGINACT")); vp->v_iflag &= ~VI_DOINGINACT; } /* * Remove any vnodes in the vnode table belonging to mount point mp. * * If FORCECLOSE is not specified, there should not be any active ones, * return error if any are found (nb: this is a user error, not a * system error). If FORCECLOSE is specified, detach any active vnodes * that are found. * * If WRITECLOSE is set, only flush out regular file vnodes open for * writing. * * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped. * * `rootrefs' specifies the base reference count for the root vnode * of this filesystem. The root vnode is considered busy if its * v_usecount exceeds this value. On a successful return, vflush(, td) * will call vrele() on the root vnode exactly rootrefs times. * If the SKIPSYSTEM or WRITECLOSE flags are specified, rootrefs must * be zero. */ #ifdef DIAGNOSTIC static int busyprt = 0; /* print out busy vnodes */ SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "Print out busy vnodes"); #endif int vflush(struct mount *mp, int rootrefs, int flags, struct thread *td) { struct vnode *vp, *mvp, *rootvp = NULL; struct vattr vattr; int busy = 0, error; CTR4(KTR_VFS, "%s: mp %p with rootrefs %d and flags %d", __func__, mp, rootrefs, flags); if (rootrefs > 0) { KASSERT((flags & (SKIPSYSTEM | WRITECLOSE)) == 0, ("vflush: bad args")); /* * Get the filesystem root vnode. We can vput() it * immediately, since with rootrefs > 0, it won't go away. */ if ((error = VFS_ROOT(mp, LK_EXCLUSIVE, &rootvp)) != 0) { CTR2(KTR_VFS, "%s: vfs_root lookup failed with %d", __func__, error); return (error); } vput(rootvp); } loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { vholdl(vp); error = vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE); if (error) { vdrop(vp); MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } /* * Skip over a vnodes marked VV_SYSTEM. */ if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) { VOP_UNLOCK(vp); vdrop(vp); continue; } /* * If WRITECLOSE is set, flush out unlinked but still open * files (even if open only for reading) and regular file * vnodes open for writing. */ if (flags & WRITECLOSE) { if (vp->v_object != NULL) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, 0); VM_OBJECT_WUNLOCK(vp->v_object); } error = VOP_FSYNC(vp, MNT_WAIT, td); if (error != 0) { VOP_UNLOCK(vp); vdrop(vp); MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); return (error); } error = VOP_GETATTR(vp, &vattr, td->td_ucred); VI_LOCK(vp); if ((vp->v_type == VNON || (error == 0 && vattr.va_nlink > 0)) && (vp->v_writecount <= 0 || vp->v_type != VREG)) { VOP_UNLOCK(vp); vdropl(vp); continue; } } else VI_LOCK(vp); /* * With v_usecount == 0, all we need to do is clear out the * vnode data structures and we are done. * * If FORCECLOSE is set, forcibly close the vnode. */ if (vp->v_usecount == 0 || (flags & FORCECLOSE)) { vgonel(vp); } else { busy++; #ifdef DIAGNOSTIC if (busyprt) vn_printf(vp, "vflush: busy vnode "); #endif } VOP_UNLOCK(vp); vdropl(vp); } if (rootrefs > 0 && (flags & FORCECLOSE) == 0) { /* * If just the root vnode is busy, and if its refcount * is equal to `rootrefs', then go ahead and kill it. */ VI_LOCK(rootvp); KASSERT(busy > 0, ("vflush: not busy")); VNASSERT(rootvp->v_usecount >= rootrefs, rootvp, ("vflush: usecount %d < rootrefs %d", rootvp->v_usecount, rootrefs)); if (busy == 1 && rootvp->v_usecount == rootrefs) { VOP_LOCK(rootvp, LK_EXCLUSIVE|LK_INTERLOCK); vgone(rootvp); VOP_UNLOCK(rootvp); busy = 0; } else VI_UNLOCK(rootvp); } if (busy) { CTR2(KTR_VFS, "%s: failing as %d vnodes are busy", __func__, busy); return (EBUSY); } for (; rootrefs > 0; rootrefs--) vrele(rootvp); return (0); } /* * Recycle an unused vnode to the front of the free list. */ int vrecycle(struct vnode *vp) { int recycled; VI_LOCK(vp); recycled = vrecyclel(vp); VI_UNLOCK(vp); return (recycled); } /* * vrecycle, with the vp interlock held. */ int vrecyclel(struct vnode *vp) { int recycled; ASSERT_VOP_ELOCKED(vp, __func__); ASSERT_VI_LOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); recycled = 0; if (vp->v_usecount == 0) { recycled = 1; vgonel(vp); } return (recycled); } /* * Eliminate all activity associated with a vnode * in preparation for reuse. */ void vgone(struct vnode *vp) { VI_LOCK(vp); vgonel(vp); VI_UNLOCK(vp); } static void notify_lowervp_vfs_dummy(struct mount *mp __unused, struct vnode *lowervp __unused) { } /* * Notify upper mounts about reclaimed or unlinked vnode. */ void vfs_notify_upper(struct vnode *vp, int event) { static struct vfsops vgonel_vfsops = { .vfs_reclaim_lowervp = notify_lowervp_vfs_dummy, .vfs_unlink_lowervp = notify_lowervp_vfs_dummy, }; struct mount *mp, *ump, *mmp; mp = vp->v_mount; if (mp == NULL) return; if (TAILQ_EMPTY(&mp->mnt_uppers)) return; mmp = malloc(sizeof(struct mount), M_TEMP, M_WAITOK | M_ZERO); mmp->mnt_op = &vgonel_vfsops; mmp->mnt_kern_flag |= MNTK_MARKER; MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_VGONE_UPPER; for (ump = TAILQ_FIRST(&mp->mnt_uppers); ump != NULL;) { if ((ump->mnt_kern_flag & MNTK_MARKER) != 0) { ump = TAILQ_NEXT(ump, mnt_upper_link); continue; } TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link); MNT_IUNLOCK(mp); switch (event) { case VFS_NOTIFY_UPPER_RECLAIM: VFS_RECLAIM_LOWERVP(ump, vp); break; case VFS_NOTIFY_UPPER_UNLINK: VFS_UNLINK_LOWERVP(ump, vp); break; default: KASSERT(0, ("invalid event %d", event)); break; } MNT_ILOCK(mp); ump = TAILQ_NEXT(mmp, mnt_upper_link); TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link); } free(mmp, M_TEMP); mp->mnt_kern_flag &= ~MNTK_VGONE_UPPER; if ((mp->mnt_kern_flag & MNTK_VGONE_WAITER) != 0) { mp->mnt_kern_flag &= ~MNTK_VGONE_WAITER; wakeup(&mp->mnt_uppers); } MNT_IUNLOCK(mp); } /* * vgone, with the vp interlock held. */ static void vgonel(struct vnode *vp) { struct thread *td; struct mount *mp; vm_object_t object; bool active, oweinact; ASSERT_VOP_ELOCKED(vp, "vgonel"); ASSERT_VI_LOCKED(vp, "vgonel"); VNASSERT(vp->v_holdcnt, vp, ("vgonel: vp %p has no reference.", vp)); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); td = curthread; /* * Don't vgonel if we're already doomed. */ if (vp->v_irflag & VIRF_DOOMED) return; vp->v_irflag |= VIRF_DOOMED; /* * Check to see if the vnode is in use. If so, we have to call * VOP_CLOSE() and VOP_INACTIVE(). */ active = vp->v_usecount > 0; oweinact = (vp->v_iflag & VI_OWEINACT) != 0; /* * If we need to do inactive VI_OWEINACT will be set. */ if (vp->v_iflag & VI_DEFINACT) { VNASSERT(vp->v_holdcnt > 1, vp, ("lost hold count")); vp->v_iflag &= ~VI_DEFINACT; vdropl(vp); } else { VNASSERT(vp->v_holdcnt > 0, vp, ("vnode without hold count")); VI_UNLOCK(vp); } vfs_notify_upper(vp, VFS_NOTIFY_UPPER_RECLAIM); /* * If purging an active vnode, it must be closed and * deactivated before being reclaimed. */ if (active) VOP_CLOSE(vp, FNONBLOCK, NOCRED, td); if (oweinact || active) { VI_LOCK(vp); if ((vp->v_iflag & VI_DOINGINACT) == 0) vinactive(vp); VI_UNLOCK(vp); } if (vp->v_type == VSOCK) vfs_unp_reclaim(vp); /* * Clean out any buffers associated with the vnode. * If the flush fails, just toss the buffers. */ mp = NULL; if (!TAILQ_EMPTY(&vp->v_bufobj.bo_dirty.bv_hd)) (void) vn_start_secondary_write(vp, &mp, V_WAIT); if (vinvalbuf(vp, V_SAVE, 0, 0) != 0) { while (vinvalbuf(vp, 0, 0, 0) != 0) ; } BO_LOCK(&vp->v_bufobj); KASSERT(TAILQ_EMPTY(&vp->v_bufobj.bo_dirty.bv_hd) && vp->v_bufobj.bo_dirty.bv_cnt == 0 && TAILQ_EMPTY(&vp->v_bufobj.bo_clean.bv_hd) && vp->v_bufobj.bo_clean.bv_cnt == 0, ("vp %p bufobj not invalidated", vp)); /* * For VMIO bufobj, BO_DEAD is set later, or in * vm_object_terminate() after the object's page queue is * flushed. */ object = vp->v_bufobj.bo_object; if (object == NULL) vp->v_bufobj.bo_flag |= BO_DEAD; BO_UNLOCK(&vp->v_bufobj); /* * Handle the VM part. Tmpfs handles v_object on its own (the * OBJT_VNODE check). Nullfs or other bypassing filesystems * should not touch the object borrowed from the lower vnode * (the handle check). */ if (object != NULL && object->type == OBJT_VNODE && object->handle == vp) vnode_destroy_vobject(vp); /* * Reclaim the vnode. */ if (VOP_RECLAIM(vp, td)) panic("vgone: cannot reclaim"); if (mp != NULL) vn_finished_secondary_write(mp); VNASSERT(vp->v_object == NULL, vp, ("vop_reclaim left v_object vp=%p", vp)); /* * Clear the advisory locks and wake up waiting threads. */ (void)VOP_ADVLOCKPURGE(vp); vp->v_lockf = NULL; /* * Delete from old mount point vnode list. */ delmntque(vp); cache_purge(vp); /* * Done with purge, reset to the standard lock and invalidate * the vnode. */ VI_LOCK(vp); vp->v_vnlock = &vp->v_lock; vp->v_op = &dead_vnodeops; vp->v_type = VBAD; } /* * Calculate the total number of references to a special device. */ int vcount(struct vnode *vp) { int count; dev_lock(); count = vp->v_rdev->si_usecount; dev_unlock(); return (count); } /* * Print out a description of a vnode. */ static char *typename[] = {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD", "VMARKER"}; void vn_printf(struct vnode *vp, const char *fmt, ...) { va_list ap; char buf[256], buf2[16]; u_long flags; va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); printf("%p: ", (void *)vp); printf("type %s\n", typename[vp->v_type]); printf(" usecount %d, writecount %d, refcount %d", vp->v_usecount, vp->v_writecount, vp->v_holdcnt); switch (vp->v_type) { case VDIR: printf(" mountedhere %p\n", vp->v_mountedhere); break; case VCHR: printf(" rdev %p\n", vp->v_rdev); break; case VSOCK: printf(" socket %p\n", vp->v_unpcb); break; case VFIFO: printf(" fifoinfo %p\n", vp->v_fifoinfo); break; default: printf("\n"); break; } buf[0] = '\0'; buf[1] = '\0'; if (vp->v_irflag & VIRF_DOOMED) strlcat(buf, "|VIRF_DOOMED", sizeof(buf)); flags = vp->v_irflag & ~(VIRF_DOOMED); if (flags != 0) { snprintf(buf2, sizeof(buf2), "|VIRF(0x%lx)", flags); strlcat(buf, buf2, sizeof(buf)); } if (vp->v_vflag & VV_ROOT) strlcat(buf, "|VV_ROOT", sizeof(buf)); if (vp->v_vflag & VV_ISTTY) strlcat(buf, "|VV_ISTTY", sizeof(buf)); if (vp->v_vflag & VV_NOSYNC) strlcat(buf, "|VV_NOSYNC", sizeof(buf)); if (vp->v_vflag & VV_ETERNALDEV) strlcat(buf, "|VV_ETERNALDEV", sizeof(buf)); if (vp->v_vflag & VV_CACHEDLABEL) strlcat(buf, "|VV_CACHEDLABEL", sizeof(buf)); if (vp->v_vflag & VV_VMSIZEVNLOCK) strlcat(buf, "|VV_VMSIZEVNLOCK", sizeof(buf)); if (vp->v_vflag & VV_COPYONWRITE) strlcat(buf, "|VV_COPYONWRITE", sizeof(buf)); if (vp->v_vflag & VV_SYSTEM) strlcat(buf, "|VV_SYSTEM", sizeof(buf)); if (vp->v_vflag & VV_PROCDEP) strlcat(buf, "|VV_PROCDEP", sizeof(buf)); if (vp->v_vflag & VV_NOKNOTE) strlcat(buf, "|VV_NOKNOTE", sizeof(buf)); if (vp->v_vflag & VV_DELETED) strlcat(buf, "|VV_DELETED", sizeof(buf)); if (vp->v_vflag & VV_MD) strlcat(buf, "|VV_MD", sizeof(buf)); if (vp->v_vflag & VV_FORCEINSMQ) strlcat(buf, "|VV_FORCEINSMQ", sizeof(buf)); if (vp->v_vflag & VV_READLINK) strlcat(buf, "|VV_READLINK", sizeof(buf)); flags = vp->v_vflag & ~(VV_ROOT | VV_ISTTY | VV_NOSYNC | VV_ETERNALDEV | VV_CACHEDLABEL | VV_COPYONWRITE | VV_SYSTEM | VV_PROCDEP | VV_NOKNOTE | VV_DELETED | VV_MD | VV_FORCEINSMQ); if (flags != 0) { snprintf(buf2, sizeof(buf2), "|VV(0x%lx)", flags); strlcat(buf, buf2, sizeof(buf)); } if (vp->v_iflag & VI_TEXT_REF) strlcat(buf, "|VI_TEXT_REF", sizeof(buf)); if (vp->v_iflag & VI_MOUNT) strlcat(buf, "|VI_MOUNT", sizeof(buf)); if (vp->v_iflag & VI_DOINGINACT) strlcat(buf, "|VI_DOINGINACT", sizeof(buf)); if (vp->v_iflag & VI_OWEINACT) strlcat(buf, "|VI_OWEINACT", sizeof(buf)); if (vp->v_iflag & VI_DEFINACT) strlcat(buf, "|VI_DEFINACT", sizeof(buf)); flags = vp->v_iflag & ~(VI_TEXT_REF | VI_MOUNT | VI_DOINGINACT | VI_OWEINACT | VI_DEFINACT); if (flags != 0) { snprintf(buf2, sizeof(buf2), "|VI(0x%lx)", flags); strlcat(buf, buf2, sizeof(buf)); } if (vp->v_mflag & VMP_LAZYLIST) strlcat(buf, "|VMP_LAZYLIST", sizeof(buf)); flags = vp->v_mflag & ~(VMP_LAZYLIST); if (flags != 0) { snprintf(buf2, sizeof(buf2), "|VMP(0x%lx)", flags); strlcat(buf, buf2, sizeof(buf)); } printf(" flags (%s)\n", buf + 1); if (mtx_owned(VI_MTX(vp))) printf(" VI_LOCKed"); if (vp->v_object != NULL) printf(" v_object %p ref %d pages %d " "cleanbuf %d dirtybuf %d\n", vp->v_object, vp->v_object->ref_count, vp->v_object->resident_page_count, vp->v_bufobj.bo_clean.bv_cnt, vp->v_bufobj.bo_dirty.bv_cnt); printf(" "); lockmgr_printinfo(vp->v_vnlock); if (vp->v_data != NULL) VOP_PRINT(vp); } #ifdef DDB /* * List all of the locked vnodes in the system. * Called when debugging the kernel. */ DB_SHOW_COMMAND(lockedvnods, lockedvnodes) { struct mount *mp; struct vnode *vp; /* * Note: because this is DDB, we can't obey the locking semantics * for these structures, which means we could catch an inconsistent * state and dereference a nasty pointer. Not much to be done * about that. */ db_printf("Locked vnodes\n"); TAILQ_FOREACH(mp, &mountlist, mnt_list) { TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (vp->v_type != VMARKER && VOP_ISLOCKED(vp)) vn_printf(vp, "vnode "); } } } /* * Show details about the given vnode. */ DB_SHOW_COMMAND(vnode, db_show_vnode) { struct vnode *vp; if (!have_addr) return; vp = (struct vnode *)addr; vn_printf(vp, "vnode "); } /* * Show details about the given mount point. */ DB_SHOW_COMMAND(mount, db_show_mount) { struct mount *mp; struct vfsopt *opt; struct statfs *sp; struct vnode *vp; char buf[512]; uint64_t mflags; u_int flags; if (!have_addr) { /* No address given, print short info about all mount points. */ TAILQ_FOREACH(mp, &mountlist, mnt_list) { db_printf("%p %s on %s (%s)\n", mp, mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, mp->mnt_stat.f_fstypename); if (db_pager_quit) break; } db_printf("\nMore info: show mount \n"); return; } mp = (struct mount *)addr; db_printf("%p %s on %s (%s)\n", mp, mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, mp->mnt_stat.f_fstypename); buf[0] = '\0'; mflags = mp->mnt_flag; #define MNT_FLAG(flag) do { \ if (mflags & (flag)) { \ if (buf[0] != '\0') \ strlcat(buf, ", ", sizeof(buf)); \ strlcat(buf, (#flag) + 4, sizeof(buf)); \ mflags &= ~(flag); \ } \ } while (0) MNT_FLAG(MNT_RDONLY); MNT_FLAG(MNT_SYNCHRONOUS); MNT_FLAG(MNT_NOEXEC); MNT_FLAG(MNT_NOSUID); MNT_FLAG(MNT_NFS4ACLS); MNT_FLAG(MNT_UNION); MNT_FLAG(MNT_ASYNC); MNT_FLAG(MNT_SUIDDIR); MNT_FLAG(MNT_SOFTDEP); MNT_FLAG(MNT_NOSYMFOLLOW); MNT_FLAG(MNT_GJOURNAL); MNT_FLAG(MNT_MULTILABEL); MNT_FLAG(MNT_ACLS); MNT_FLAG(MNT_NOATIME); MNT_FLAG(MNT_NOCLUSTERR); MNT_FLAG(MNT_NOCLUSTERW); MNT_FLAG(MNT_SUJ); MNT_FLAG(MNT_EXRDONLY); MNT_FLAG(MNT_EXPORTED); MNT_FLAG(MNT_DEFEXPORTED); MNT_FLAG(MNT_EXPORTANON); MNT_FLAG(MNT_EXKERB); MNT_FLAG(MNT_EXPUBLIC); MNT_FLAG(MNT_LOCAL); MNT_FLAG(MNT_QUOTA); MNT_FLAG(MNT_ROOTFS); MNT_FLAG(MNT_USER); MNT_FLAG(MNT_IGNORE); MNT_FLAG(MNT_UPDATE); MNT_FLAG(MNT_DELEXPORT); MNT_FLAG(MNT_RELOAD); MNT_FLAG(MNT_FORCE); MNT_FLAG(MNT_SNAPSHOT); MNT_FLAG(MNT_BYFSID); #undef MNT_FLAG if (mflags != 0) { if (buf[0] != '\0') strlcat(buf, ", ", sizeof(buf)); snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "0x%016jx", mflags); } db_printf(" mnt_flag = %s\n", buf); buf[0] = '\0'; flags = mp->mnt_kern_flag; #define MNT_KERN_FLAG(flag) do { \ if (flags & (flag)) { \ if (buf[0] != '\0') \ strlcat(buf, ", ", sizeof(buf)); \ strlcat(buf, (#flag) + 5, sizeof(buf)); \ flags &= ~(flag); \ } \ } while (0) MNT_KERN_FLAG(MNTK_UNMOUNTF); MNT_KERN_FLAG(MNTK_ASYNC); MNT_KERN_FLAG(MNTK_SOFTDEP); MNT_KERN_FLAG(MNTK_DRAINING); MNT_KERN_FLAG(MNTK_REFEXPIRE); MNT_KERN_FLAG(MNTK_EXTENDED_SHARED); MNT_KERN_FLAG(MNTK_SHARED_WRITES); MNT_KERN_FLAG(MNTK_NO_IOPF); MNT_KERN_FLAG(MNTK_VGONE_UPPER); MNT_KERN_FLAG(MNTK_VGONE_WAITER); MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT); MNT_KERN_FLAG(MNTK_MARKER); MNT_KERN_FLAG(MNTK_USES_BCACHE); MNT_KERN_FLAG(MNTK_NOASYNC); MNT_KERN_FLAG(MNTK_UNMOUNT); MNT_KERN_FLAG(MNTK_MWAIT); MNT_KERN_FLAG(MNTK_SUSPEND); MNT_KERN_FLAG(MNTK_SUSPEND2); MNT_KERN_FLAG(MNTK_SUSPENDED); MNT_KERN_FLAG(MNTK_LOOKUP_SHARED); MNT_KERN_FLAG(MNTK_NOKNOTE); #undef MNT_KERN_FLAG if (flags != 0) { if (buf[0] != '\0') strlcat(buf, ", ", sizeof(buf)); snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "0x%08x", flags); } db_printf(" mnt_kern_flag = %s\n", buf); db_printf(" mnt_opt = "); opt = TAILQ_FIRST(mp->mnt_opt); if (opt != NULL) { db_printf("%s", opt->name); opt = TAILQ_NEXT(opt, link); while (opt != NULL) { db_printf(", %s", opt->name); opt = TAILQ_NEXT(opt, link); } } db_printf("\n"); sp = &mp->mnt_stat; db_printf(" mnt_stat = { version=%u type=%u flags=0x%016jx " "bsize=%ju iosize=%ju blocks=%ju bfree=%ju bavail=%jd files=%ju " "ffree=%jd syncwrites=%ju asyncwrites=%ju syncreads=%ju " "asyncreads=%ju namemax=%u owner=%u fsid=[%d, %d] }\n", (u_int)sp->f_version, (u_int)sp->f_type, (uintmax_t)sp->f_flags, (uintmax_t)sp->f_bsize, (uintmax_t)sp->f_iosize, (uintmax_t)sp->f_blocks, (uintmax_t)sp->f_bfree, (intmax_t)sp->f_bavail, (uintmax_t)sp->f_files, (intmax_t)sp->f_ffree, (uintmax_t)sp->f_syncwrites, (uintmax_t)sp->f_asyncwrites, (uintmax_t)sp->f_syncreads, (uintmax_t)sp->f_asyncreads, (u_int)sp->f_namemax, (u_int)sp->f_owner, (int)sp->f_fsid.val[0], (int)sp->f_fsid.val[1]); db_printf(" mnt_cred = { uid=%u ruid=%u", (u_int)mp->mnt_cred->cr_uid, (u_int)mp->mnt_cred->cr_ruid); if (jailed(mp->mnt_cred)) db_printf(", jail=%d", mp->mnt_cred->cr_prison->pr_id); db_printf(" }\n"); db_printf(" mnt_ref = %d (with %d in the struct)\n", vfs_mount_fetch_counter(mp, MNT_COUNT_REF), mp->mnt_ref); db_printf(" mnt_gen = %d\n", mp->mnt_gen); db_printf(" mnt_nvnodelistsize = %d\n", mp->mnt_nvnodelistsize); db_printf(" mnt_lazyvnodelistsize = %d\n", mp->mnt_lazyvnodelistsize); db_printf(" mnt_writeopcount = %d (with %d in the struct)\n", vfs_mount_fetch_counter(mp, MNT_COUNT_WRITEOPCOUNT), mp->mnt_writeopcount); db_printf(" mnt_maxsymlinklen = %d\n", mp->mnt_maxsymlinklen); db_printf(" mnt_iosize_max = %d\n", mp->mnt_iosize_max); db_printf(" mnt_hashseed = %u\n", mp->mnt_hashseed); db_printf(" mnt_lockref = %d (with %d in the struct)\n", vfs_mount_fetch_counter(mp, MNT_COUNT_LOCKREF), mp->mnt_lockref); db_printf(" mnt_secondary_writes = %d\n", mp->mnt_secondary_writes); db_printf(" mnt_secondary_accwrites = %d\n", mp->mnt_secondary_accwrites); db_printf(" mnt_gjprovider = %s\n", mp->mnt_gjprovider != NULL ? mp->mnt_gjprovider : "NULL"); db_printf(" mnt_vfs_ops = %d\n", mp->mnt_vfs_ops); db_printf("\n\nList of active vnodes\n"); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (vp->v_type != VMARKER && vp->v_holdcnt > 0) { vn_printf(vp, "vnode "); if (db_pager_quit) break; } } db_printf("\n\nList of inactive vnodes\n"); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (vp->v_type != VMARKER && vp->v_holdcnt == 0) { vn_printf(vp, "vnode "); if (db_pager_quit) break; } } } #endif /* DDB */ /* * Fill in a struct xvfsconf based on a struct vfsconf. */ static int vfsconf2x(struct sysctl_req *req, struct vfsconf *vfsp) { struct xvfsconf xvfsp; bzero(&xvfsp, sizeof(xvfsp)); strcpy(xvfsp.vfc_name, vfsp->vfc_name); xvfsp.vfc_typenum = vfsp->vfc_typenum; xvfsp.vfc_refcount = vfsp->vfc_refcount; xvfsp.vfc_flags = vfsp->vfc_flags; /* * These are unused in userland, we keep them * to not break binary compatibility. */ xvfsp.vfc_vfsops = NULL; xvfsp.vfc_next = NULL; return (SYSCTL_OUT(req, &xvfsp, sizeof(xvfsp))); } #ifdef COMPAT_FREEBSD32 struct xvfsconf32 { uint32_t vfc_vfsops; char vfc_name[MFSNAMELEN]; int32_t vfc_typenum; int32_t vfc_refcount; int32_t vfc_flags; uint32_t vfc_next; }; static int vfsconf2x32(struct sysctl_req *req, struct vfsconf *vfsp) { struct xvfsconf32 xvfsp; bzero(&xvfsp, sizeof(xvfsp)); strcpy(xvfsp.vfc_name, vfsp->vfc_name); xvfsp.vfc_typenum = vfsp->vfc_typenum; xvfsp.vfc_refcount = vfsp->vfc_refcount; xvfsp.vfc_flags = vfsp->vfc_flags; return (SYSCTL_OUT(req, &xvfsp, sizeof(xvfsp))); } #endif /* * Top level filesystem related information gathering. */ static int sysctl_vfs_conflist(SYSCTL_HANDLER_ARGS) { struct vfsconf *vfsp; int error; error = 0; vfsconf_slock(); TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { #ifdef COMPAT_FREEBSD32 if (req->flags & SCTL_MASK32) error = vfsconf2x32(req, vfsp); else #endif error = vfsconf2x(req, vfsp); if (error) break; } vfsconf_sunlock(); return (error); } SYSCTL_PROC(_vfs, OID_AUTO, conflist, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_vfs_conflist, "S,xvfsconf", "List of all configured filesystems"); #ifndef BURN_BRIDGES static int sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS); static int vfs_sysctl(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1 - 1; /* XXX */ u_int namelen = arg2 + 1; /* XXX */ struct vfsconf *vfsp; log(LOG_WARNING, "userland calling deprecated sysctl, " "please rebuild world\n"); #if 1 || defined(COMPAT_PRELITE2) /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ if (namelen == 1) return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); #endif switch (name[1]) { case VFS_MAXTYPENUM: if (namelen != 2) return (ENOTDIR); return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); case VFS_CONF: if (namelen != 3) return (ENOTDIR); /* overloaded */ vfsconf_slock(); TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { if (vfsp->vfc_typenum == name[2]) break; } vfsconf_sunlock(); if (vfsp == NULL) return (EOPNOTSUPP); #ifdef COMPAT_FREEBSD32 if (req->flags & SCTL_MASK32) return (vfsconf2x32(req, vfsp)); else #endif return (vfsconf2x(req, vfsp)); } return (EOPNOTSUPP); } static SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, vfs_sysctl, "Generic filesystem"); #if 1 || defined(COMPAT_PRELITE2) static int sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS) { int error; struct vfsconf *vfsp; struct ovfsconf ovfs; vfsconf_slock(); TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { bzero(&ovfs, sizeof(ovfs)); ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ strcpy(ovfs.vfc_name, vfsp->vfc_name); ovfs.vfc_index = vfsp->vfc_typenum; ovfs.vfc_refcount = vfsp->vfc_refcount; ovfs.vfc_flags = vfsp->vfc_flags; error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); if (error != 0) { vfsconf_sunlock(); return (error); } } vfsconf_sunlock(); return (0); } #endif /* 1 || COMPAT_PRELITE2 */ #endif /* !BURN_BRIDGES */ #define KINFO_VNODESLOP 10 #ifdef notyet /* * Dump vnode list (via sysctl). */ /* ARGSUSED */ static int sysctl_vnode(SYSCTL_HANDLER_ARGS) { struct xvnode *xvn; struct mount *mp; struct vnode *vp; int error, len, n; /* * Stale numvnodes access is not fatal here. */ req->lock = 0; len = (numvnodes + KINFO_VNODESLOP) * sizeof *xvn; if (!req->oldptr) /* Make an estimate */ return (SYSCTL_OUT(req, 0, len)); error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); xvn = malloc(len, M_TEMP, M_ZERO | M_WAITOK); n = 0; mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) continue; MNT_ILOCK(mp); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (n == len) break; vref(vp); xvn[n].xv_size = sizeof *xvn; xvn[n].xv_vnode = vp; xvn[n].xv_id = 0; /* XXX compat */ #define XV_COPY(field) xvn[n].xv_##field = vp->v_##field XV_COPY(usecount); XV_COPY(writecount); XV_COPY(holdcnt); XV_COPY(mount); XV_COPY(numoutput); XV_COPY(type); #undef XV_COPY xvn[n].xv_flag = vp->v_vflag; switch (vp->v_type) { case VREG: case VDIR: case VLNK: break; case VBLK: case VCHR: if (vp->v_rdev == NULL) { vrele(vp); continue; } xvn[n].xv_dev = dev2udev(vp->v_rdev); break; case VSOCK: xvn[n].xv_socket = vp->v_socket; break; case VFIFO: xvn[n].xv_fifo = vp->v_fifoinfo; break; case VNON: case VBAD: default: /* shouldn't happen? */ vrele(vp); continue; } vrele(vp); ++n; } MNT_IUNLOCK(mp); mtx_lock(&mountlist_mtx); vfs_unbusy(mp); if (n == len) break; } mtx_unlock(&mountlist_mtx); error = SYSCTL_OUT(req, xvn, n * sizeof *xvn); free(xvn, M_TEMP); return (error); } SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, sysctl_vnode, "S,xvnode", ""); #endif static void unmount_or_warn(struct mount *mp) { int error; error = dounmount(mp, MNT_FORCE, curthread); if (error != 0) { printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); if (error == EBUSY) printf("BUSY)\n"); else printf("%d)\n", error); } } /* * Unmount all filesystems. The list is traversed in reverse order * of mounting to avoid dependencies. */ void vfs_unmountall(void) { struct mount *mp, *tmp; CTR1(KTR_VFS, "%s: unmounting all filesystems", __func__); /* * Since this only runs when rebooting, it is not interlocked. */ TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, tmp) { vfs_ref(mp); /* * Forcibly unmounting "/dev" before "/" would prevent clean * unmount of the latter. */ if (mp == rootdevmp) continue; unmount_or_warn(mp); } if (rootdevmp != NULL) unmount_or_warn(rootdevmp); } static void vfs_deferred_inactive(struct vnode *vp, int lkflags) { ASSERT_VI_LOCKED(vp, __func__); VNASSERT((vp->v_iflag & VI_DEFINACT) == 0, vp, ("VI_DEFINACT still set")); if ((vp->v_iflag & VI_OWEINACT) == 0) { vdropl(vp); return; } if (vn_lock(vp, lkflags) == 0) { VI_LOCK(vp); if ((vp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == VI_OWEINACT) vinactive(vp); VOP_UNLOCK(vp); vdropl(vp); return; } vdefer_inactive_cond(vp); } static int vfs_periodic_inactive_filter(struct vnode *vp, void *arg) { return (vp->v_iflag & VI_DEFINACT); } static void __noinline vfs_periodic_inactive(struct mount *mp, int flags) { struct vnode *vp, *mvp; int lkflags; lkflags = LK_EXCLUSIVE | LK_INTERLOCK; if (flags != MNT_WAIT) lkflags |= LK_NOWAIT; MNT_VNODE_FOREACH_LAZY(vp, mp, mvp, vfs_periodic_inactive_filter, NULL) { if ((vp->v_iflag & VI_DEFINACT) == 0) { VI_UNLOCK(vp); continue; } vp->v_iflag &= ~VI_DEFINACT; vfs_deferred_inactive(vp, lkflags); } } static inline bool vfs_want_msync(struct vnode *vp) { struct vm_object *obj; /* * This test may be performed without any locks held. * We rely on vm_object's type stability. */ if (vp->v_vflag & VV_NOSYNC) return (false); obj = vp->v_object; return (obj != NULL && vm_object_mightbedirty(obj)); } static int vfs_periodic_msync_inactive_filter(struct vnode *vp, void *arg __unused) { if (vp->v_vflag & VV_NOSYNC) return (false); if (vp->v_iflag & VI_DEFINACT) return (true); return (vfs_want_msync(vp)); } static void __noinline vfs_periodic_msync_inactive(struct mount *mp, int flags) { struct vnode *vp, *mvp; struct vm_object *obj; struct thread *td; int lkflags, objflags; bool seen_defer; td = curthread; lkflags = LK_EXCLUSIVE | LK_INTERLOCK; if (flags != MNT_WAIT) { lkflags |= LK_NOWAIT; objflags = OBJPC_NOSYNC; } else { objflags = OBJPC_SYNC; } MNT_VNODE_FOREACH_LAZY(vp, mp, mvp, vfs_periodic_msync_inactive_filter, NULL) { seen_defer = false; if (vp->v_iflag & VI_DEFINACT) { vp->v_iflag &= ~VI_DEFINACT; seen_defer = true; } if (!vfs_want_msync(vp)) { if (seen_defer) vfs_deferred_inactive(vp, lkflags); else VI_UNLOCK(vp); continue; } if (vget(vp, lkflags, td) == 0) { obj = vp->v_object; if (obj != NULL && (vp->v_vflag & VV_NOSYNC) == 0) { VM_OBJECT_WLOCK(obj); vm_object_page_clean(obj, 0, 0, objflags); VM_OBJECT_WUNLOCK(obj); } vput(vp); if (seen_defer) vdrop(vp); } else { if (seen_defer) vdefer_inactive_cond(vp); } } } void vfs_periodic(struct mount *mp, int flags) { CTR2(KTR_VFS, "%s: mp %p", __func__, mp); if ((mp->mnt_kern_flag & MNTK_NOMSYNC) != 0) vfs_periodic_inactive(mp, flags); else vfs_periodic_msync_inactive(mp, flags); } static void destroy_vpollinfo_free(struct vpollinfo *vi) { knlist_destroy(&vi->vpi_selinfo.si_note); mtx_destroy(&vi->vpi_lock); uma_zfree(vnodepoll_zone, vi); } static void destroy_vpollinfo(struct vpollinfo *vi) { knlist_clear(&vi->vpi_selinfo.si_note, 1); seldrain(&vi->vpi_selinfo); destroy_vpollinfo_free(vi); } /* * Initialize per-vnode helper structure to hold poll-related state. */ void v_addpollinfo(struct vnode *vp) { struct vpollinfo *vi; if (vp->v_pollinfo != NULL) return; vi = uma_zalloc(vnodepoll_zone, M_WAITOK | M_ZERO); mtx_init(&vi->vpi_lock, "vnode pollinfo", NULL, MTX_DEF); knlist_init(&vi->vpi_selinfo.si_note, vp, vfs_knllock, vfs_knlunlock, vfs_knl_assert_locked, vfs_knl_assert_unlocked); VI_LOCK(vp); if (vp->v_pollinfo != NULL) { VI_UNLOCK(vp); destroy_vpollinfo_free(vi); return; } vp->v_pollinfo = vi; VI_UNLOCK(vp); } /* * Record a process's interest in events which might happen to * a vnode. Because poll uses the historic select-style interface * internally, this routine serves as both the ``check for any * pending events'' and the ``record my interest in future events'' * functions. (These are done together, while the lock is held, * to avoid race conditions.) */ int vn_pollrecord(struct vnode *vp, struct thread *td, int events) { v_addpollinfo(vp); mtx_lock(&vp->v_pollinfo->vpi_lock); if (vp->v_pollinfo->vpi_revents & events) { /* * This leaves events we are not interested * in available for the other process which * which presumably had requested them * (otherwise they would never have been * recorded). */ events &= vp->v_pollinfo->vpi_revents; vp->v_pollinfo->vpi_revents &= ~events; mtx_unlock(&vp->v_pollinfo->vpi_lock); return (events); } vp->v_pollinfo->vpi_events |= events; selrecord(td, &vp->v_pollinfo->vpi_selinfo); mtx_unlock(&vp->v_pollinfo->vpi_lock); return (0); } /* * Routine to create and manage a filesystem syncer vnode. */ #define sync_close ((int (*)(struct vop_close_args *))nullop) static int sync_fsync(struct vop_fsync_args *); static int sync_inactive(struct vop_inactive_args *); static int sync_reclaim(struct vop_reclaim_args *); static struct vop_vector sync_vnodeops = { .vop_bypass = VOP_EOPNOTSUPP, .vop_close = sync_close, /* close */ .vop_fsync = sync_fsync, /* fsync */ .vop_inactive = sync_inactive, /* inactive */ .vop_need_inactive = vop_stdneed_inactive, /* need_inactive */ .vop_reclaim = sync_reclaim, /* reclaim */ .vop_lock1 = vop_stdlock, /* lock */ .vop_unlock = vop_stdunlock, /* unlock */ .vop_islocked = vop_stdislocked, /* islocked */ }; VFS_VOP_VECTOR_REGISTER(sync_vnodeops); /* * Create a new filesystem syncer vnode for the specified mount point. */ void vfs_allocate_syncvnode(struct mount *mp) { struct vnode *vp; struct bufobj *bo; static long start, incr, next; int error; /* Allocate a new vnode */ error = getnewvnode("syncer", mp, &sync_vnodeops, &vp); if (error != 0) panic("vfs_allocate_syncvnode: getnewvnode() failed"); vp->v_type = VNON; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vp->v_vflag |= VV_FORCEINSMQ; error = insmntque(vp, mp); if (error != 0) panic("vfs_allocate_syncvnode: insmntque() failed"); vp->v_vflag &= ~VV_FORCEINSMQ; VOP_UNLOCK(vp); /* * Place the vnode onto the syncer worklist. We attempt to * scatter them about on the list so that they will go off * at evenly distributed times even if all the filesystems * are mounted at once. */ next += incr; if (next == 0 || next > syncer_maxdelay) { start /= 2; incr /= 2; if (start == 0) { start = syncer_maxdelay / 2; incr = syncer_maxdelay; } next = start; } bo = &vp->v_bufobj; BO_LOCK(bo); vn_syncer_add_to_worklist(bo, syncdelay > 0 ? next % syncdelay : 0); /* XXX - vn_syncer_add_to_worklist() also grabs and drops sync_mtx. */ mtx_lock(&sync_mtx); sync_vnode_count++; if (mp->mnt_syncer == NULL) { mp->mnt_syncer = vp; vp = NULL; } mtx_unlock(&sync_mtx); BO_UNLOCK(bo); if (vp != NULL) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vgone(vp); vput(vp); } } void vfs_deallocate_syncvnode(struct mount *mp) { struct vnode *vp; mtx_lock(&sync_mtx); vp = mp->mnt_syncer; if (vp != NULL) mp->mnt_syncer = NULL; mtx_unlock(&sync_mtx); if (vp != NULL) vrele(vp); } /* * Do a lazy sync of the filesystem. */ static int sync_fsync(struct vop_fsync_args *ap) { struct vnode *syncvp = ap->a_vp; struct mount *mp = syncvp->v_mount; int error, save; struct bufobj *bo; /* * We only need to do something if this is a lazy evaluation. */ if (ap->a_waitfor != MNT_LAZY) return (0); /* * Move ourselves to the back of the sync list. */ bo = &syncvp->v_bufobj; BO_LOCK(bo); vn_syncer_add_to_worklist(bo, syncdelay); BO_UNLOCK(bo); /* * Walk the list of vnodes pushing all that are dirty and * not already on the sync list. */ if (vfs_busy(mp, MBF_NOWAIT) != 0) return (0); if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) { vfs_unbusy(mp); return (0); } save = curthread_pflags_set(TDP_SYNCIO); /* * The filesystem at hand may be idle with free vnodes stored in the * batch. Return them instead of letting them stay there indefinitely. */ vfs_periodic(mp, MNT_NOWAIT); error = VFS_SYNC(mp, MNT_LAZY); curthread_pflags_restore(save); vn_finished_write(mp); vfs_unbusy(mp); return (error); } /* * The syncer vnode is no referenced. */ static int sync_inactive(struct vop_inactive_args *ap) { vgone(ap->a_vp); return (0); } /* * The syncer vnode is no longer needed and is being decommissioned. * * Modifications to the worklist must be protected by sync_mtx. */ static int sync_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp = ap->a_vp; struct bufobj *bo; bo = &vp->v_bufobj; BO_LOCK(bo); mtx_lock(&sync_mtx); if (vp->v_mount->mnt_syncer == vp) vp->v_mount->mnt_syncer = NULL; if (bo->bo_flag & BO_ONWORKLST) { LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; sync_vnode_count--; bo->bo_flag &= ~BO_ONWORKLST; } mtx_unlock(&sync_mtx); BO_UNLOCK(bo); return (0); } int vn_need_pageq_flush(struct vnode *vp) { struct vm_object *obj; int need; MPASS(mtx_owned(VI_MTX(vp))); need = 0; if ((obj = vp->v_object) != NULL && (vp->v_vflag & VV_NOSYNC) == 0 && vm_object_mightbedirty(obj)) need = 1; return (need); } /* * Check if vnode represents a disk device */ int vn_isdisk(struct vnode *vp, int *errp) { int error; if (vp->v_type != VCHR) { error = ENOTBLK; goto out; } error = 0; dev_lock(); if (vp->v_rdev == NULL) error = ENXIO; else if (vp->v_rdev->si_devsw == NULL) error = ENXIO; else if (!(vp->v_rdev->si_devsw->d_flags & D_DISK)) error = ENOTBLK; dev_unlock(); out: if (errp != NULL) *errp = error; return (error == 0); } /* * Common filesystem object access control check routine. Accepts a * vnode's type, "mode", uid and gid, requested access mode, credentials, * and optional call-by-reference privused argument allowing vaccess() * to indicate to the caller whether privilege was used to satisfy the * request (obsoleted). Returns 0 on success, or an errno on failure. */ int vaccess(enum vtype type, mode_t file_mode, uid_t file_uid, gid_t file_gid, accmode_t accmode, struct ucred *cred, int *privused) { accmode_t dac_granted; accmode_t priv_granted; KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0, ("invalid bit in accmode")); KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE), ("VAPPEND without VWRITE")); /* * Look for a normal, non-privileged way to access the file/directory * as requested. If it exists, go with that. */ if (privused != NULL) *privused = 0; dac_granted = 0; /* Check the owner. */ if (cred->cr_uid == file_uid) { dac_granted |= VADMIN; if (file_mode & S_IXUSR) dac_granted |= VEXEC; if (file_mode & S_IRUSR) dac_granted |= VREAD; if (file_mode & S_IWUSR) dac_granted |= (VWRITE | VAPPEND); if ((accmode & dac_granted) == accmode) return (0); goto privcheck; } /* Otherwise, check the groups (first match) */ if (groupmember(file_gid, cred)) { if (file_mode & S_IXGRP) dac_granted |= VEXEC; if (file_mode & S_IRGRP) dac_granted |= VREAD; if (file_mode & S_IWGRP) dac_granted |= (VWRITE | VAPPEND); if ((accmode & dac_granted) == accmode) return (0); goto privcheck; } /* Otherwise, check everyone else. */ if (file_mode & S_IXOTH) dac_granted |= VEXEC; if (file_mode & S_IROTH) dac_granted |= VREAD; if (file_mode & S_IWOTH) dac_granted |= (VWRITE | VAPPEND); if ((accmode & dac_granted) == accmode) return (0); privcheck: /* * Build a privilege mask to determine if the set of privileges * satisfies the requirements when combined with the granted mask * from above. For each privilege, if the privilege is required, * bitwise or the request type onto the priv_granted mask. */ priv_granted = 0; if (type == VDIR) { /* * For directories, use PRIV_VFS_LOOKUP to satisfy VEXEC * requests, instead of PRIV_VFS_EXEC. */ if ((accmode & VEXEC) && ((dac_granted & VEXEC) == 0) && !priv_check_cred(cred, PRIV_VFS_LOOKUP)) priv_granted |= VEXEC; } else { /* * Ensure that at least one execute bit is on. Otherwise, * a privileged user will always succeed, and we don't want * this to happen unless the file really is executable. */ if ((accmode & VEXEC) && ((dac_granted & VEXEC) == 0) && (file_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) != 0 && !priv_check_cred(cred, PRIV_VFS_EXEC)) priv_granted |= VEXEC; } if ((accmode & VREAD) && ((dac_granted & VREAD) == 0) && !priv_check_cred(cred, PRIV_VFS_READ)) priv_granted |= VREAD; if ((accmode & VWRITE) && ((dac_granted & VWRITE) == 0) && !priv_check_cred(cred, PRIV_VFS_WRITE)) priv_granted |= (VWRITE | VAPPEND); if ((accmode & VADMIN) && ((dac_granted & VADMIN) == 0) && !priv_check_cred(cred, PRIV_VFS_ADMIN)) priv_granted |= VADMIN; if ((accmode & (priv_granted | dac_granted)) == accmode) { /* XXX audit: privilege used */ if (privused != NULL) *privused = 1; return (0); } return ((accmode & VADMIN) ? EPERM : EACCES); } /* * Credential check based on process requesting service, and per-attribute * permissions. */ int extattr_check_cred(struct vnode *vp, int attrnamespace, struct ucred *cred, struct thread *td, accmode_t accmode) { /* * Kernel-invoked always succeeds. */ if (cred == NOCRED) return (0); /* * Do not allow privileged processes in jail to directly manipulate * system attributes. */ switch (attrnamespace) { case EXTATTR_NAMESPACE_SYSTEM: /* Potentially should be: return (EPERM); */ return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM)); case EXTATTR_NAMESPACE_USER: return (VOP_ACCESS(vp, accmode, cred, td)); default: return (EPERM); } } #ifdef DEBUG_VFS_LOCKS /* * This only exists to suppress warnings from unlocked specfs accesses. It is * no longer ok to have an unlocked VFS. */ #define IGNORE_LOCK(vp) (KERNEL_PANICKED() || (vp) == NULL || \ (vp)->v_type == VCHR || (vp)->v_type == VBAD) int vfs_badlock_ddb = 1; /* Drop into debugger on violation. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_ddb, CTLFLAG_RW, &vfs_badlock_ddb, 0, "Drop into debugger on lock violation"); int vfs_badlock_mutex = 1; /* Check for interlock across VOPs. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_mutex, CTLFLAG_RW, &vfs_badlock_mutex, 0, "Check for interlock across VOPs"); int vfs_badlock_print = 1; /* Print lock violations. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_print, CTLFLAG_RW, &vfs_badlock_print, 0, "Print lock violations"); int vfs_badlock_vnode = 1; /* Print vnode details on lock violations. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_vnode, CTLFLAG_RW, &vfs_badlock_vnode, 0, "Print vnode details on lock violations"); #ifdef KDB int vfs_badlock_backtrace = 1; /* Print backtrace at lock violations. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_backtrace, CTLFLAG_RW, &vfs_badlock_backtrace, 0, "Print backtrace at lock violations"); #endif static void vfs_badlock(const char *msg, const char *str, struct vnode *vp) { #ifdef KDB if (vfs_badlock_backtrace) kdb_backtrace(); #endif if (vfs_badlock_vnode) vn_printf(vp, "vnode "); if (vfs_badlock_print) printf("%s: %p %s\n", str, (void *)vp, msg); if (vfs_badlock_ddb) kdb_enter(KDB_WHY_VFSLOCK, "lock violation"); } void assert_vi_locked(struct vnode *vp, const char *str) { if (vfs_badlock_mutex && !mtx_owned(VI_MTX(vp))) vfs_badlock("interlock is not locked but should be", str, vp); } void assert_vi_unlocked(struct vnode *vp, const char *str) { if (vfs_badlock_mutex && mtx_owned(VI_MTX(vp))) vfs_badlock("interlock is locked but should not be", str, vp); } void assert_vop_locked(struct vnode *vp, const char *str) { int locked; if (!IGNORE_LOCK(vp)) { locked = VOP_ISLOCKED(vp); if (locked == 0 || locked == LK_EXCLOTHER) vfs_badlock("is not locked but should be", str, vp); } } void assert_vop_unlocked(struct vnode *vp, const char *str) { if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) vfs_badlock("is locked but should not be", str, vp); } void assert_vop_elocked(struct vnode *vp, const char *str) { if (!IGNORE_LOCK(vp) && VOP_ISLOCKED(vp) != LK_EXCLUSIVE) vfs_badlock("is not exclusive locked but should be", str, vp); } #endif /* DEBUG_VFS_LOCKS */ void vop_rename_fail(struct vop_rename_args *ap) { if (ap->a_tvp != NULL) vput(ap->a_tvp); if (ap->a_tdvp == ap->a_tvp) vrele(ap->a_tdvp); else vput(ap->a_tdvp); vrele(ap->a_fdvp); vrele(ap->a_fvp); } void vop_rename_pre(void *ap) { struct vop_rename_args *a = ap; #ifdef DEBUG_VFS_LOCKS if (a->a_tvp) ASSERT_VI_UNLOCKED(a->a_tvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_tdvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_fvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_fdvp, "VOP_RENAME"); /* Check the source (from). */ if (a->a_tdvp->v_vnlock != a->a_fdvp->v_vnlock && (a->a_tvp == NULL || a->a_tvp->v_vnlock != a->a_fdvp->v_vnlock)) ASSERT_VOP_UNLOCKED(a->a_fdvp, "vop_rename: fdvp locked"); if (a->a_tvp == NULL || a->a_tvp->v_vnlock != a->a_fvp->v_vnlock) ASSERT_VOP_UNLOCKED(a->a_fvp, "vop_rename: fvp locked"); /* Check the target. */ if (a->a_tvp) ASSERT_VOP_LOCKED(a->a_tvp, "vop_rename: tvp not locked"); ASSERT_VOP_LOCKED(a->a_tdvp, "vop_rename: tdvp not locked"); #endif if (a->a_tdvp != a->a_fdvp) vhold(a->a_fdvp); if (a->a_tvp != a->a_fvp) vhold(a->a_fvp); vhold(a->a_tdvp); if (a->a_tvp) vhold(a->a_tvp); } #ifdef DEBUG_VFS_LOCKS void vop_strategy_pre(void *ap) { struct vop_strategy_args *a; struct buf *bp; a = ap; bp = a->a_bp; /* * Cluster ops lock their component buffers but not the IO container. */ if ((bp->b_flags & B_CLUSTER) != 0) return; if (!KERNEL_PANICKED() && !BUF_ISLOCKED(bp)) { if (vfs_badlock_print) printf( "VOP_STRATEGY: bp is not locked but should be\n"); if (vfs_badlock_ddb) kdb_enter(KDB_WHY_VFSLOCK, "lock violation"); } } void vop_lock_pre(void *ap) { struct vop_lock1_args *a = ap; if ((a->a_flags & LK_INTERLOCK) == 0) ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK"); else ASSERT_VI_LOCKED(a->a_vp, "VOP_LOCK"); } void vop_lock_post(void *ap, int rc) { struct vop_lock1_args *a = ap; ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK"); if (rc == 0 && (a->a_flags & LK_EXCLOTHER) == 0) ASSERT_VOP_LOCKED(a->a_vp, "VOP_LOCK"); } void vop_unlock_pre(void *ap) { struct vop_unlock_args *a = ap; ASSERT_VOP_LOCKED(a->a_vp, "VOP_UNLOCK"); } void vop_unlock_post(void *ap, int rc) { return; } void vop_need_inactive_pre(void *ap) { struct vop_need_inactive_args *a = ap; ASSERT_VI_LOCKED(a->a_vp, "VOP_NEED_INACTIVE"); } void vop_need_inactive_post(void *ap, int rc) { struct vop_need_inactive_args *a = ap; ASSERT_VI_LOCKED(a->a_vp, "VOP_NEED_INACTIVE"); } #endif void vop_create_post(void *ap, int rc) { struct vop_create_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } void vop_deleteextattr_post(void *ap, int rc) { struct vop_deleteextattr_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); } void vop_link_post(void *ap, int rc) { struct vop_link_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_LINK); VFS_KNOTE_LOCKED(a->a_tdvp, NOTE_WRITE); } } void vop_mkdir_post(void *ap, int rc) { struct vop_mkdir_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK); } void vop_mknod_post(void *ap, int rc) { struct vop_mknod_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } void vop_reclaim_post(void *ap, int rc) { struct vop_reclaim_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_REVOKE); } void vop_remove_post(void *ap, int rc) { struct vop_remove_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE); } } void vop_rename_post(void *ap, int rc) { struct vop_rename_args *a = ap; long hint; if (!rc) { hint = NOTE_WRITE; if (a->a_fdvp == a->a_tdvp) { if (a->a_tvp != NULL && a->a_tvp->v_type == VDIR) hint |= NOTE_LINK; VFS_KNOTE_UNLOCKED(a->a_fdvp, hint); VFS_KNOTE_UNLOCKED(a->a_tdvp, hint); } else { hint |= NOTE_EXTEND; if (a->a_fvp->v_type == VDIR) hint |= NOTE_LINK; VFS_KNOTE_UNLOCKED(a->a_fdvp, hint); if (a->a_fvp->v_type == VDIR && a->a_tvp != NULL && a->a_tvp->v_type == VDIR) hint &= ~NOTE_LINK; VFS_KNOTE_UNLOCKED(a->a_tdvp, hint); } VFS_KNOTE_UNLOCKED(a->a_fvp, NOTE_RENAME); if (a->a_tvp) VFS_KNOTE_UNLOCKED(a->a_tvp, NOTE_DELETE); } if (a->a_tdvp != a->a_fdvp) vdrop(a->a_fdvp); if (a->a_tvp != a->a_fvp) vdrop(a->a_fvp); vdrop(a->a_tdvp); if (a->a_tvp) vdrop(a->a_tvp); } void vop_rmdir_post(void *ap, int rc) { struct vop_rmdir_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK); VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE); } } void vop_setattr_post(void *ap, int rc) { struct vop_setattr_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); } void vop_setextattr_post(void *ap, int rc) { struct vop_setextattr_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); } void vop_symlink_post(void *ap, int rc) { struct vop_symlink_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } void vop_open_post(void *ap, int rc) { struct vop_open_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_OPEN); } void vop_close_post(void *ap, int rc) { struct vop_close_args *a = ap; if (!rc && (a->a_cred != NOCRED || /* filter out revokes */ !VN_IS_DOOMED(a->a_vp))) { VFS_KNOTE_LOCKED(a->a_vp, (a->a_fflag & FWRITE) != 0 ? NOTE_CLOSE_WRITE : NOTE_CLOSE); } } void vop_read_post(void *ap, int rc) { struct vop_read_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ); } void vop_readdir_post(void *ap, int rc) { struct vop_readdir_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ); } static struct knlist fs_knlist; static void vfs_event_init(void *arg) { knlist_init_mtx(&fs_knlist, NULL); } /* XXX - correct order? */ SYSINIT(vfs_knlist, SI_SUB_VFS, SI_ORDER_ANY, vfs_event_init, NULL); void vfs_event_signal(fsid_t *fsid, uint32_t event, intptr_t data __unused) { KNOTE_UNLOCKED(&fs_knlist, event); } static int filt_fsattach(struct knote *kn); static void filt_fsdetach(struct knote *kn); static int filt_fsevent(struct knote *kn, long hint); struct filterops fs_filtops = { .f_isfd = 0, .f_attach = filt_fsattach, .f_detach = filt_fsdetach, .f_event = filt_fsevent }; static int filt_fsattach(struct knote *kn) { kn->kn_flags |= EV_CLEAR; knlist_add(&fs_knlist, kn, 0); return (0); } static void filt_fsdetach(struct knote *kn) { knlist_remove(&fs_knlist, kn, 0); } static int filt_fsevent(struct knote *kn, long hint) { kn->kn_fflags |= hint; return (kn->kn_fflags != 0); } static int sysctl_vfs_ctl(SYSCTL_HANDLER_ARGS) { struct vfsidctl vc; int error; struct mount *mp; error = SYSCTL_IN(req, &vc, sizeof(vc)); if (error) return (error); if (vc.vc_vers != VFS_CTL_VERS1) return (EINVAL); mp = vfs_getvfs(&vc.vc_fsid); if (mp == NULL) return (ENOENT); /* ensure that a specific sysctl goes to the right filesystem. */ if (strcmp(vc.vc_fstypename, "*") != 0 && strcmp(vc.vc_fstypename, mp->mnt_vfc->vfc_name) != 0) { vfs_rel(mp); return (EINVAL); } VCTLTOREQ(&vc, req); error = VFS_SYSCTL(mp, vc.vc_op, req); vfs_rel(mp); return (error); } SYSCTL_PROC(_vfs, OID_AUTO, ctl, CTLTYPE_OPAQUE | CTLFLAG_MPSAFE | CTLFLAG_WR, NULL, 0, sysctl_vfs_ctl, "", "Sysctl by fsid"); /* * Function to initialize a va_filerev field sensibly. * XXX: Wouldn't a random number make a lot more sense ?? */ u_quad_t init_va_filerev(void) { struct bintime bt; getbinuptime(&bt); return (((u_quad_t)bt.sec << 32LL) | (bt.frac >> 32LL)); } static int filt_vfsread(struct knote *kn, long hint); static int filt_vfswrite(struct knote *kn, long hint); static int filt_vfsvnode(struct knote *kn, long hint); static void filt_vfsdetach(struct knote *kn); static struct filterops vfsread_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfsread }; static struct filterops vfswrite_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfswrite }; static struct filterops vfsvnode_filtops = { .f_isfd = 1, .f_detach = filt_vfsdetach, .f_event = filt_vfsvnode }; static void vfs_knllock(void *arg) { struct vnode *vp = arg; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); } static void vfs_knlunlock(void *arg) { struct vnode *vp = arg; VOP_UNLOCK(vp); } static void vfs_knl_assert_locked(void *arg) { #ifdef DEBUG_VFS_LOCKS struct vnode *vp = arg; ASSERT_VOP_LOCKED(vp, "vfs_knl_assert_locked"); #endif } static void vfs_knl_assert_unlocked(void *arg) { #ifdef DEBUG_VFS_LOCKS struct vnode *vp = arg; ASSERT_VOP_UNLOCKED(vp, "vfs_knl_assert_unlocked"); #endif } int vfs_kqfilter(struct vop_kqfilter_args *ap) { struct vnode *vp = ap->a_vp; struct knote *kn = ap->a_kn; struct knlist *knl; switch (kn->kn_filter) { case EVFILT_READ: kn->kn_fop = &vfsread_filtops; break; case EVFILT_WRITE: kn->kn_fop = &vfswrite_filtops; break; case EVFILT_VNODE: kn->kn_fop = &vfsvnode_filtops; break; default: return (EINVAL); } kn->kn_hook = (caddr_t)vp; v_addpollinfo(vp); if (vp->v_pollinfo == NULL) return (ENOMEM); knl = &vp->v_pollinfo->vpi_selinfo.si_note; vhold(vp); knlist_add(knl, kn, 0); return (0); } /* * Detach knote from vnode */ static void filt_vfsdetach(struct knote *kn) { struct vnode *vp = (struct vnode *)kn->kn_hook; KASSERT(vp->v_pollinfo != NULL, ("Missing v_pollinfo")); knlist_remove(&vp->v_pollinfo->vpi_selinfo.si_note, kn, 0); vdrop(vp); } /*ARGSUSED*/ static int filt_vfsread(struct knote *kn, long hint) { struct vnode *vp = (struct vnode *)kn->kn_hook; struct vattr va; int res; /* * filesystem is gone, so set the EOF flag and schedule * the knote for deletion. */ if (hint == NOTE_REVOKE || (hint == 0 && vp->v_type == VBAD)) { VI_LOCK(vp); kn->kn_flags |= (EV_EOF | EV_ONESHOT); VI_UNLOCK(vp); return (1); } if (VOP_GETATTR(vp, &va, curthread->td_ucred)) return (0); VI_LOCK(vp); kn->kn_data = va.va_size - kn->kn_fp->f_offset; res = (kn->kn_sfflags & NOTE_FILE_POLL) != 0 || kn->kn_data != 0; VI_UNLOCK(vp); return (res); } /*ARGSUSED*/ static int filt_vfswrite(struct knote *kn, long hint) { struct vnode *vp = (struct vnode *)kn->kn_hook; VI_LOCK(vp); /* * filesystem is gone, so set the EOF flag and schedule * the knote for deletion. */ if (hint == NOTE_REVOKE || (hint == 0 && vp->v_type == VBAD)) kn->kn_flags |= (EV_EOF | EV_ONESHOT); kn->kn_data = 0; VI_UNLOCK(vp); return (1); } static int filt_vfsvnode(struct knote *kn, long hint) { struct vnode *vp = (struct vnode *)kn->kn_hook; int res; VI_LOCK(vp); if (kn->kn_sfflags & hint) kn->kn_fflags |= hint; if (hint == NOTE_REVOKE || (hint == 0 && vp->v_type == VBAD)) { kn->kn_flags |= EV_EOF; VI_UNLOCK(vp); return (1); } res = (kn->kn_fflags != 0); VI_UNLOCK(vp); return (res); } /* * Returns whether the directory is empty or not. * If it is empty, the return value is 0; otherwise * the return value is an error value (which may * be ENOTEMPTY). */ int vfs_emptydir(struct vnode *vp) { struct uio uio; struct iovec iov; struct dirent *dirent, *dp, *endp; int error, eof; error = 0; eof = 0; ASSERT_VOP_LOCKED(vp, "vfs_emptydir"); dirent = malloc(sizeof(struct dirent), M_TEMP, M_WAITOK); iov.iov_base = dirent; iov.iov_len = sizeof(struct dirent); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = 0; uio.uio_resid = sizeof(struct dirent); uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_td = curthread; while (eof == 0 && error == 0) { error = VOP_READDIR(vp, &uio, curthread->td_ucred, &eof, NULL, NULL); if (error != 0) break; endp = (void *)((uint8_t *)dirent + sizeof(struct dirent) - uio.uio_resid); for (dp = dirent; dp < endp; dp = (void *)((uint8_t *)dp + GENERIC_DIRSIZ(dp))) { if (dp->d_type == DT_WHT) continue; if (dp->d_namlen == 0) continue; if (dp->d_type != DT_DIR && dp->d_type != DT_UNKNOWN) { error = ENOTEMPTY; break; } if (dp->d_namlen > 2) { error = ENOTEMPTY; break; } if (dp->d_namlen == 1 && dp->d_name[0] != '.') { error = ENOTEMPTY; break; } if (dp->d_namlen == 2 && dp->d_name[1] != '.') { error = ENOTEMPTY; break; } uio.uio_resid = sizeof(struct dirent); } } free(dirent, M_TEMP); return (error); } int vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off) { int error; if (dp->d_reclen > ap->a_uio->uio_resid) return (ENAMETOOLONG); error = uiomove(dp, dp->d_reclen, ap->a_uio); if (error) { if (ap->a_ncookies != NULL) { if (ap->a_cookies != NULL) free(ap->a_cookies, M_TEMP); ap->a_cookies = NULL; *ap->a_ncookies = 0; } return (error); } if (ap->a_ncookies == NULL) return (0); KASSERT(ap->a_cookies, ("NULL ap->a_cookies value with non-NULL ap->a_ncookies!")); *ap->a_cookies = realloc(*ap->a_cookies, (*ap->a_ncookies + 1) * sizeof(u_long), M_TEMP, M_WAITOK | M_ZERO); (*ap->a_cookies)[*ap->a_ncookies] = off; *ap->a_ncookies += 1; return (0); } /* * Mark for update the access time of the file if the filesystem * supports VOP_MARKATIME. This functionality is used by execve and * mmap, so we want to avoid the I/O implied by directly setting * va_atime for the sake of efficiency. */ void vfs_mark_atime(struct vnode *vp, struct ucred *cred) { struct mount *mp; mp = vp->v_mount; ASSERT_VOP_LOCKED(vp, "vfs_mark_atime"); if (mp != NULL && (mp->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) (void)VOP_MARKATIME(vp); } /* * The purpose of this routine is to remove granularity from accmode_t, * reducing it into standard unix access bits - VEXEC, VREAD, VWRITE, * VADMIN and VAPPEND. * * If it returns 0, the caller is supposed to continue with the usual * access checks using 'accmode' as modified by this routine. If it * returns nonzero value, the caller is supposed to return that value * as errno. * * Note that after this routine runs, accmode may be zero. */ int vfs_unixify_accmode(accmode_t *accmode) { /* * There is no way to specify explicit "deny" rule using * file mode or POSIX.1e ACLs. */ if (*accmode & VEXPLICIT_DENY) { *accmode = 0; return (0); } /* * None of these can be translated into usual access bits. * Also, the common case for NFSv4 ACLs is to not contain * either of these bits. Caller should check for VWRITE * on the containing directory instead. */ if (*accmode & (VDELETE_CHILD | VDELETE)) return (EPERM); if (*accmode & VADMIN_PERMS) { *accmode &= ~VADMIN_PERMS; *accmode |= VADMIN; } /* * There is no way to deny VREAD_ATTRIBUTES, VREAD_ACL * or VSYNCHRONIZE using file mode or POSIX.1e ACL. */ *accmode &= ~(VSTAT_PERMS | VSYNCHRONIZE); return (0); } /* * Clear out a doomed vnode (if any) and replace it with a new one as long * as the fs is not being unmounted. Return the root vnode to the caller. */ static int __noinline vfs_cache_root_fallback(struct mount *mp, int flags, struct vnode **vpp) { struct vnode *vp; int error; restart: if (mp->mnt_rootvnode != NULL) { MNT_ILOCK(mp); vp = mp->mnt_rootvnode; if (vp != NULL) { if (!VN_IS_DOOMED(vp)) { vrefact(vp); MNT_IUNLOCK(mp); error = vn_lock(vp, flags); if (error == 0) { *vpp = vp; return (0); } vrele(vp); goto restart; } /* * Clear the old one. */ mp->mnt_rootvnode = NULL; } MNT_IUNLOCK(mp); if (vp != NULL) { /* * Paired with a fence in vfs_op_thread_exit(). */ atomic_thread_fence_acq(); vfs_op_barrier_wait(mp); vrele(vp); } } error = VFS_CACHEDROOT(mp, flags, vpp); if (error != 0) return (error); if (mp->mnt_vfs_ops == 0) { MNT_ILOCK(mp); if (mp->mnt_vfs_ops != 0) { MNT_IUNLOCK(mp); return (0); } if (mp->mnt_rootvnode == NULL) { vrefact(*vpp); mp->mnt_rootvnode = *vpp; } else { if (mp->mnt_rootvnode != *vpp) { if (!VN_IS_DOOMED(mp->mnt_rootvnode)) { panic("%s: mismatch between vnode returned " " by VFS_CACHEDROOT and the one cached " " (%p != %p)", __func__, *vpp, mp->mnt_rootvnode); } } } MNT_IUNLOCK(mp); } return (0); } int vfs_cache_root(struct mount *mp, int flags, struct vnode **vpp) { struct vnode *vp; int error; if (!vfs_op_thread_enter(mp)) return (vfs_cache_root_fallback(mp, flags, vpp)); vp = (struct vnode *)atomic_load_ptr(&mp->mnt_rootvnode); if (vp == NULL || VN_IS_DOOMED(vp)) { vfs_op_thread_exit(mp); return (vfs_cache_root_fallback(mp, flags, vpp)); } vrefact(vp); vfs_op_thread_exit(mp); error = vn_lock(vp, flags); if (error != 0) { vrele(vp); return (vfs_cache_root_fallback(mp, flags, vpp)); } *vpp = vp; return (0); } struct vnode * vfs_cache_root_clear(struct mount *mp) { struct vnode *vp; /* * ops > 0 guarantees there is nobody who can see this vnode */ MPASS(mp->mnt_vfs_ops > 0); vp = mp->mnt_rootvnode; mp->mnt_rootvnode = NULL; return (vp); } void vfs_cache_root_set(struct mount *mp, struct vnode *vp) { MPASS(mp->mnt_vfs_ops > 0); vrefact(vp); mp->mnt_rootvnode = vp; } /* * These are helper functions for filesystems to traverse all * their vnodes. See MNT_VNODE_FOREACH_ALL() in sys/mount.h. * * This interface replaces MNT_VNODE_FOREACH. */ struct vnode * __mnt_vnode_next_all(struct vnode **mvp, struct mount *mp) { struct vnode *vp; if (should_yield()) kern_yield(PRI_USER); MNT_ILOCK(mp); KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); for (vp = TAILQ_NEXT(*mvp, v_nmntvnodes); vp != NULL; vp = TAILQ_NEXT(vp, v_nmntvnodes)) { /* Allow a racy peek at VIRF_DOOMED to save a lock acquisition. */ if (vp->v_type == VMARKER || VN_IS_DOOMED(vp)) continue; VI_LOCK(vp); if (VN_IS_DOOMED(vp)) { VI_UNLOCK(vp); continue; } break; } if (vp == NULL) { __mnt_vnode_markerfree_all(mvp, mp); /* MNT_IUNLOCK(mp); -- done in above function */ mtx_assert(MNT_MTX(mp), MA_NOTOWNED); return (NULL); } TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes); TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes); MNT_IUNLOCK(mp); return (vp); } struct vnode * __mnt_vnode_first_all(struct vnode **mvp, struct mount *mp) { struct vnode *vp; *mvp = vn_alloc_marker(mp); MNT_ILOCK(mp); MNT_REF(mp); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { /* Allow a racy peek at VIRF_DOOMED to save a lock acquisition. */ if (vp->v_type == VMARKER || VN_IS_DOOMED(vp)) continue; VI_LOCK(vp); if (VN_IS_DOOMED(vp)) { VI_UNLOCK(vp); continue; } break; } if (vp == NULL) { MNT_REL(mp); MNT_IUNLOCK(mp); vn_free_marker(*mvp); *mvp = NULL; return (NULL); } TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes); MNT_IUNLOCK(mp); return (vp); } void __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp) { if (*mvp == NULL) { MNT_IUNLOCK(mp); return; } mtx_assert(MNT_MTX(mp), MA_OWNED); KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes); MNT_REL(mp); MNT_IUNLOCK(mp); vn_free_marker(*mvp); *mvp = NULL; } /* * These are helper functions for filesystems to traverse their * lazy vnodes. See MNT_VNODE_FOREACH_LAZY() in sys/mount.h */ static void mnt_vnode_markerfree_lazy(struct vnode **mvp, struct mount *mp) { KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); MNT_ILOCK(mp); MNT_REL(mp); MNT_IUNLOCK(mp); vn_free_marker(*mvp); *mvp = NULL; } /* * Relock the mp mount vnode list lock with the vp vnode interlock in the * conventional lock order during mnt_vnode_next_lazy iteration. * * On entry, the mount vnode list lock is held and the vnode interlock is not. * The list lock is dropped and reacquired. On success, both locks are held. * On failure, the mount vnode list lock is held but the vnode interlock is * not, and the procedure may have yielded. */ static bool mnt_vnode_next_lazy_relock(struct vnode *mvp, struct mount *mp, struct vnode *vp) { const struct vnode *tmp; bool held, ret; VNASSERT(mvp->v_mount == mp && mvp->v_type == VMARKER && TAILQ_NEXT(mvp, v_lazylist) != NULL, mvp, ("%s: bad marker", __func__)); VNASSERT(vp->v_mount == mp && vp->v_type != VMARKER, vp, ("%s: inappropriate vnode", __func__)); ASSERT_VI_UNLOCKED(vp, __func__); mtx_assert(&mp->mnt_listmtx, MA_OWNED); ret = false; TAILQ_REMOVE(&mp->mnt_lazyvnodelist, mvp, v_lazylist); TAILQ_INSERT_BEFORE(vp, mvp, v_lazylist); /* * Use a hold to prevent vp from disappearing while the mount vnode * list lock is dropped and reacquired. Normally a hold would be * acquired with vhold(), but that might try to acquire the vnode * interlock, which would be a LOR with the mount vnode list lock. */ held = refcount_acquire_if_not_zero(&vp->v_holdcnt); mtx_unlock(&mp->mnt_listmtx); if (!held) goto abort; VI_LOCK(vp); if (!refcount_release_if_not_last(&vp->v_holdcnt)) { vdropl(vp); goto abort; } mtx_lock(&mp->mnt_listmtx); /* * Determine whether the vnode is still the next one after the marker, * excepting any other markers. If the vnode has not been doomed by * vgone() then the hold should have ensured that it remained on the * lazy list. If it has been doomed but is still on the lazy list, * don't abort, but rather skip over it (avoid spinning on doomed * vnodes). */ tmp = mvp; do { tmp = TAILQ_NEXT(tmp, v_lazylist); } while (tmp != NULL && tmp->v_type == VMARKER); if (tmp != vp) { mtx_unlock(&mp->mnt_listmtx); VI_UNLOCK(vp); goto abort; } ret = true; goto out; abort: maybe_yield(); mtx_lock(&mp->mnt_listmtx); out: if (ret) ASSERT_VI_LOCKED(vp, __func__); else ASSERT_VI_UNLOCKED(vp, __func__); mtx_assert(&mp->mnt_listmtx, MA_OWNED); return (ret); } static struct vnode * mnt_vnode_next_lazy(struct vnode **mvp, struct mount *mp, mnt_lazy_cb_t *cb, void *cbarg) { struct vnode *vp, *nvp; mtx_assert(&mp->mnt_listmtx, MA_OWNED); KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); restart: vp = TAILQ_NEXT(*mvp, v_lazylist); while (vp != NULL) { if (vp->v_type == VMARKER) { vp = TAILQ_NEXT(vp, v_lazylist); continue; } /* * See if we want to process the vnode. Note we may encounter a * long string of vnodes we don't care about and hog the list * as a result. Check for it and requeue the marker. */ if (VN_IS_DOOMED(vp) || !cb(vp, cbarg)) { if (!should_yield()) { vp = TAILQ_NEXT(vp, v_lazylist); continue; } TAILQ_REMOVE(&mp->mnt_lazyvnodelist, *mvp, v_lazylist); TAILQ_INSERT_AFTER(&mp->mnt_lazyvnodelist, vp, *mvp, v_lazylist); mtx_unlock(&mp->mnt_listmtx); kern_yield(PRI_USER); mtx_lock(&mp->mnt_listmtx); goto restart; } /* * Try-lock because this is the wrong lock order. If that does * not succeed, drop the mount vnode list lock and try to * reacquire it and the vnode interlock in the right order. */ if (!VI_TRYLOCK(vp) && !mnt_vnode_next_lazy_relock(*mvp, mp, vp)) goto restart; KASSERT(vp->v_type != VMARKER, ("locked marker %p", vp)); KASSERT(vp->v_mount == mp || vp->v_mount == NULL, ("alien vnode on the lazy list %p %p", vp, mp)); if (vp->v_mount == mp && !VN_IS_DOOMED(vp)) break; nvp = TAILQ_NEXT(vp, v_lazylist); VI_UNLOCK(vp); vp = nvp; } TAILQ_REMOVE(&mp->mnt_lazyvnodelist, *mvp, v_lazylist); /* Check if we are done */ if (vp == NULL) { mtx_unlock(&mp->mnt_listmtx); mnt_vnode_markerfree_lazy(mvp, mp); return (NULL); } TAILQ_INSERT_AFTER(&mp->mnt_lazyvnodelist, vp, *mvp, v_lazylist); mtx_unlock(&mp->mnt_listmtx); ASSERT_VI_LOCKED(vp, "lazy iter"); return (vp); } struct vnode * __mnt_vnode_next_lazy(struct vnode **mvp, struct mount *mp, mnt_lazy_cb_t *cb, void *cbarg) { if (should_yield()) kern_yield(PRI_USER); mtx_lock(&mp->mnt_listmtx); return (mnt_vnode_next_lazy(mvp, mp, cb, cbarg)); } struct vnode * __mnt_vnode_first_lazy(struct vnode **mvp, struct mount *mp, mnt_lazy_cb_t *cb, void *cbarg) { struct vnode *vp; *mvp = vn_alloc_marker(mp); MNT_ILOCK(mp); MNT_REF(mp); MNT_IUNLOCK(mp); mtx_lock(&mp->mnt_listmtx); vp = TAILQ_FIRST(&mp->mnt_lazyvnodelist); if (vp == NULL) { mtx_unlock(&mp->mnt_listmtx); mnt_vnode_markerfree_lazy(mvp, mp); return (NULL); } TAILQ_INSERT_BEFORE(vp, *mvp, v_lazylist); return (mnt_vnode_next_lazy(mvp, mp, cb, cbarg)); } void __mnt_vnode_markerfree_lazy(struct vnode **mvp, struct mount *mp) { if (*mvp == NULL) return; mtx_lock(&mp->mnt_listmtx); TAILQ_REMOVE(&mp->mnt_lazyvnodelist, *mvp, v_lazylist); mtx_unlock(&mp->mnt_listmtx); mnt_vnode_markerfree_lazy(mvp, mp); } Index: projects/clang1000-import/sys/net/if.c =================================================================== --- projects/clang1000-import/sys/net/if.c (revision 356919) +++ projects/clang1000-import/sys/net/if.c (revision 356920) @@ -1,4556 +1,4559 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if.c 8.5 (Berkeley) 1/9/95 * $FreeBSD$ */ #include "opt_bpf.h" #include "opt_inet6.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #include #include #ifdef INET #include #include #endif /* INET */ #ifdef INET6 #include #include #endif /* INET6 */ #endif /* INET || INET6 */ #include /* * Consumers of struct ifreq such as tcpdump assume no pad between ifr_name * and ifr_ifru when it is used in SIOCGIFCONF. */ _Static_assert(sizeof(((struct ifreq *)0)->ifr_name) == offsetof(struct ifreq, ifr_ifru), "gap between ifr_name and ifr_ifru"); __read_mostly epoch_t net_epoch_preempt; #ifdef COMPAT_FREEBSD32 #include #include struct ifreq_buffer32 { uint32_t length; /* (size_t) */ uint32_t buffer; /* (void *) */ }; /* * Interface request structure used for socket * ioctl's. All interface ioctl's must have parameter * definitions which begin with ifr_name. The * remainder may be interface specific. */ struct ifreq32 { char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */ union { struct sockaddr ifru_addr; struct sockaddr ifru_dstaddr; struct sockaddr ifru_broadaddr; struct ifreq_buffer32 ifru_buffer; short ifru_flags[2]; short ifru_index; int ifru_jid; int ifru_metric; int ifru_mtu; int ifru_phys; int ifru_media; uint32_t ifru_data; int ifru_cap[2]; u_int ifru_fib; u_char ifru_vlan_pcp; } ifr_ifru; }; CTASSERT(sizeof(struct ifreq) == sizeof(struct ifreq32)); CTASSERT(__offsetof(struct ifreq, ifr_ifru) == __offsetof(struct ifreq32, ifr_ifru)); struct ifgroupreq32 { char ifgr_name[IFNAMSIZ]; u_int ifgr_len; union { char ifgru_group[IFNAMSIZ]; uint32_t ifgru_groups; } ifgr_ifgru; }; struct ifmediareq32 { char ifm_name[IFNAMSIZ]; int ifm_current; int ifm_mask; int ifm_status; int ifm_active; int ifm_count; uint32_t ifm_ulist; /* (int *) */ }; #define SIOCGIFMEDIA32 _IOC_NEWTYPE(SIOCGIFMEDIA, struct ifmediareq32) #define SIOCGIFXMEDIA32 _IOC_NEWTYPE(SIOCGIFXMEDIA, struct ifmediareq32) #define _CASE_IOC_IFGROUPREQ_32(cmd) \ _IOC_NEWTYPE((cmd), struct ifgroupreq32): case #else /* !COMPAT_FREEBSD32 */ #define _CASE_IOC_IFGROUPREQ_32(cmd) #endif /* !COMPAT_FREEBSD32 */ #define CASE_IOC_IFGROUPREQ(cmd) \ _CASE_IOC_IFGROUPREQ_32(cmd) \ (cmd) union ifreq_union { struct ifreq ifr; #ifdef COMPAT_FREEBSD32 struct ifreq32 ifr32; #endif }; union ifgroupreq_union { struct ifgroupreq ifgr; #ifdef COMPAT_FREEBSD32 struct ifgroupreq32 ifgr32; #endif }; SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN, &ifqmaxlen, 0, "max send queue size"); /* Log link state change events */ static int log_link_state_change = 1; SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW, &log_link_state_change, 0, "log interface link state change events"); /* Log promiscuous mode change events */ static int log_promisc_mode_change = 1; SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN, &log_promisc_mode_change, 1, "log promiscuous mode change events"); /* Interface description */ static unsigned int ifdescr_maxlen = 1024; SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW, &ifdescr_maxlen, 0, "administrative maximum length for interface description"); static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions"); /* global sx for non-critical path ifdescr */ static struct sx ifdescr_sx; SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr"); void (*ng_ether_link_state_p)(struct ifnet *ifp, int state); void (*lagg_linkstate_p)(struct ifnet *ifp, int state); /* These are external hooks for CARP. */ void (*carp_linkstate_p)(struct ifnet *ifp); void (*carp_demote_adj_p)(int, char *); int (*carp_master_p)(struct ifaddr *); #if defined(INET) || defined(INET6) int (*carp_forus_p)(struct ifnet *ifp, u_char *dhost); int (*carp_output_p)(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa); int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *); int (*carp_attach_p)(struct ifaddr *, int); void (*carp_detach_p)(struct ifaddr *, bool); #endif #ifdef INET int (*carp_iamatch_p)(struct ifaddr *, uint8_t **); #endif #ifdef INET6 struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6); caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr); #endif struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL; /* * XXX: Style; these should be sorted alphabetically, and unprototyped * static functions should be prototyped. Currently they are sorted by * declaration order. */ static void if_attachdomain(void *); static void if_attachdomain1(struct ifnet *); static int ifconf(u_long, caddr_t); static void *if_grow(void); static void if_input_default(struct ifnet *, struct mbuf *); static int if_requestencap_default(struct ifnet *, struct if_encap_req *); static void if_route(struct ifnet *, int flag, int fam); static int if_setflag(struct ifnet *, int, int, int *, int); static int if_transmit(struct ifnet *ifp, struct mbuf *m); static void if_unroute(struct ifnet *, int flag, int fam); static int if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int); static void do_link_state_change(void *, int); static int if_getgroup(struct ifgroupreq *, struct ifnet *); static int if_getgroupmembers(struct ifgroupreq *); static void if_delgroups(struct ifnet *); static void if_attach_internal(struct ifnet *, int, struct if_clone *); static int if_detach_internal(struct ifnet *, int, struct if_clone **); static void if_siocaddmulti(void *, int); #ifdef VIMAGE static int if_vmove(struct ifnet *, struct vnet *); #endif #ifdef INET6 /* * XXX: declare here to avoid to include many inet6 related files.. * should be more generalized? */ extern void nd6_setmtu(struct ifnet *); #endif /* ipsec helper hooks */ VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]); VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]); VNET_DEFINE(int, if_index); int ifqmaxlen = IFQ_MAXLEN; VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */ VNET_DEFINE(struct ifgrouphead, ifg_head); VNET_DEFINE_STATIC(int, if_indexlim) = 8; /* Table of ifnet by index. */ VNET_DEFINE(struct ifnet **, ifindex_table); #define V_if_indexlim VNET(if_indexlim) #define V_ifindex_table VNET(ifindex_table) /* * The global network interface list (V_ifnet) and related state (such as * if_index, if_indexlim, and ifindex_table) are protected by an sxlock and * an rwlock. Either may be acquired shared to stablize the list, but both * must be acquired writable to modify the list. This model allows us to * both stablize the interface list during interrupt thread processing, but * also to stablize it over long-running ioctls, without introducing priority * inversions and deadlocks. */ struct rwlock ifnet_rwlock; RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE); struct sx ifnet_sxlock; SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE); /* * The allocation of network interfaces is a rather non-atomic affair; we * need to select an index before we are ready to expose the interface for * use, so will use this pointer value to indicate reservation. */ #define IFNET_HOLD (void *)(uintptr_t)(-1) static if_com_alloc_t *if_com_alloc[256]; static if_com_free_t *if_com_free[256]; static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals"); MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); struct ifnet * ifnet_byindex(u_short idx) { struct ifnet *ifp; if (__predict_false(idx > V_if_index)) return (NULL); ifp = *(struct ifnet * const volatile *)(V_ifindex_table + idx); return (__predict_false(ifp == IFNET_HOLD) ? NULL : ifp); } struct ifnet * ifnet_byindex_ref(u_short idx) { struct ifnet *ifp; NET_EPOCH_ASSERT(); ifp = ifnet_byindex(idx); if (ifp == NULL || (ifp->if_flags & IFF_DYING)) return (NULL); if_ref(ifp); return (ifp); } /* * Allocate an ifindex array entry; return 0 on success or an error on * failure. */ static u_short ifindex_alloc(void **old) { u_short idx; IFNET_WLOCK_ASSERT(); /* * Try to find an empty slot below V_if_index. If we fail, take the * next slot. */ for (idx = 1; idx <= V_if_index; idx++) { if (V_ifindex_table[idx] == NULL) break; } /* Catch if_index overflow. */ if (idx >= V_if_indexlim) { *old = if_grow(); return (USHRT_MAX); } if (idx > V_if_index) V_if_index = idx; return (idx); } static void ifindex_free_locked(u_short idx) { IFNET_WLOCK_ASSERT(); V_ifindex_table[idx] = NULL; while (V_if_index > 0 && V_ifindex_table[V_if_index] == NULL) V_if_index--; } static void ifindex_free(u_short idx) { IFNET_WLOCK(); ifindex_free_locked(idx); IFNET_WUNLOCK(); } static void ifnet_setbyindex(u_short idx, struct ifnet *ifp) { V_ifindex_table[idx] = ifp; } struct ifaddr * ifaddr_byindex(u_short idx) { struct ifnet *ifp; struct ifaddr *ifa = NULL; NET_EPOCH_ASSERT(); ifp = ifnet_byindex(idx); if (ifp != NULL && (ifa = ifp->if_addr) != NULL) ifa_ref(ifa); return (ifa); } /* * Network interface utility routines. * * Routines with ifa_ifwith* names take sockaddr *'s as * parameters. */ static void vnet_if_init(const void *unused __unused) { void *old; CK_STAILQ_INIT(&V_ifnet); CK_STAILQ_INIT(&V_ifg_head); IFNET_WLOCK(); old = if_grow(); /* create initial table */ IFNET_WUNLOCK(); epoch_wait_preempt(net_epoch_preempt); free(old, M_IFNET); vnet_if_clone_init(); } VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init, NULL); #ifdef VIMAGE static void vnet_if_uninit(const void *unused __unused) { VNET_ASSERT(CK_STAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p " "not empty", __func__, __LINE__, &V_ifnet)); VNET_ASSERT(CK_STAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p " "not empty", __func__, __LINE__, &V_ifg_head)); free((caddr_t)V_ifindex_table, M_IFNET); } VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST, vnet_if_uninit, NULL); static void vnet_if_return(const void *unused __unused) { struct ifnet *ifp, *nifp; /* Return all inherited interfaces to their parent vnets. */ CK_STAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) { if (ifp->if_home_vnet != ifp->if_vnet) if_vmove(ifp, ifp->if_home_vnet); } } VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY, vnet_if_return, NULL); #endif static void * if_grow(void) { int oldlim; u_int n; struct ifnet **e; void *old; old = NULL; IFNET_WLOCK_ASSERT(); oldlim = V_if_indexlim; IFNET_WUNLOCK(); n = (oldlim << 1) * sizeof(*e); e = malloc(n, M_IFNET, M_WAITOK | M_ZERO); IFNET_WLOCK(); if (V_if_indexlim != oldlim) { free(e, M_IFNET); return (NULL); } if (V_ifindex_table != NULL) { memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2); old = V_ifindex_table; } V_if_indexlim <<= 1; V_ifindex_table = e; return (old); } /* * Allocate a struct ifnet and an index for an interface. A layer 2 * common structure will also be allocated if an allocation routine is * registered for the passed type. */ struct ifnet * if_alloc_domain(u_char type, int numa_domain) { struct ifnet *ifp; u_short idx; void *old; KASSERT(numa_domain <= IF_NODOM, ("numa_domain too large")); if (numa_domain == IF_NODOM) ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK | M_ZERO); else ifp = malloc_domainset(sizeof(struct ifnet), M_IFNET, DOMAINSET_PREF(numa_domain), M_WAITOK | M_ZERO); restart: IFNET_WLOCK(); idx = ifindex_alloc(&old); if (__predict_false(idx == USHRT_MAX)) { IFNET_WUNLOCK(); epoch_wait_preempt(net_epoch_preempt); free(old, M_IFNET); goto restart; } ifnet_setbyindex(idx, IFNET_HOLD); IFNET_WUNLOCK(); ifp->if_index = idx; ifp->if_type = type; ifp->if_alloctype = type; ifp->if_numa_domain = numa_domain; #ifdef VIMAGE ifp->if_vnet = curvnet; #endif if (if_com_alloc[type] != NULL) { ifp->if_l2com = if_com_alloc[type](type, ifp); if (ifp->if_l2com == NULL) { free(ifp, M_IFNET); ifindex_free(idx); return (NULL); } } IF_ADDR_LOCK_INIT(ifp); TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp); TASK_INIT(&ifp->if_addmultitask, 0, if_siocaddmulti, ifp); ifp->if_afdata_initialized = 0; IF_AFDATA_LOCK_INIT(ifp); CK_STAILQ_INIT(&ifp->if_addrhead); CK_STAILQ_INIT(&ifp->if_multiaddrs); CK_STAILQ_INIT(&ifp->if_groups); #ifdef MAC mac_ifnet_init(ifp); #endif ifq_init(&ifp->if_snd, ifp); refcount_init(&ifp->if_refcount, 1); /* Index reference. */ for (int i = 0; i < IFCOUNTERS; i++) ifp->if_counters[i] = counter_u64_alloc(M_WAITOK); ifp->if_get_counter = if_get_counter_default; ifp->if_pcp = IFNET_PCP_NONE; ifnet_setbyindex(ifp->if_index, ifp); return (ifp); } struct ifnet * if_alloc_dev(u_char type, device_t dev) { int numa_domain; if (dev == NULL || bus_get_domain(dev, &numa_domain) != 0) return (if_alloc_domain(type, IF_NODOM)); return (if_alloc_domain(type, numa_domain)); } struct ifnet * if_alloc(u_char type) { return (if_alloc_domain(type, IF_NODOM)); } /* * Do the actual work of freeing a struct ifnet, and layer 2 common * structure. This call is made when the last reference to an * interface is released. */ static void if_free_internal(struct ifnet *ifp) { KASSERT((ifp->if_flags & IFF_DYING), ("if_free_internal: interface not dying")); if (if_com_free[ifp->if_alloctype] != NULL) if_com_free[ifp->if_alloctype](ifp->if_l2com, ifp->if_alloctype); #ifdef MAC mac_ifnet_destroy(ifp); #endif /* MAC */ IF_AFDATA_DESTROY(ifp); IF_ADDR_LOCK_DESTROY(ifp); ifq_delete(&ifp->if_snd); for (int i = 0; i < IFCOUNTERS; i++) counter_u64_free(ifp->if_counters[i]); free(ifp->if_description, M_IFDESCR); free(ifp->if_hw_addr, M_IFADDR); if (ifp->if_numa_domain == IF_NODOM) free(ifp, M_IFNET); else free_domain(ifp, M_IFNET); } static void if_destroy(epoch_context_t ctx) { struct ifnet *ifp; ifp = __containerof(ctx, struct ifnet, if_epoch_ctx); if_free_internal(ifp); } /* * Deregister an interface and free the associated storage. */ void if_free(struct ifnet *ifp) { ifp->if_flags |= IFF_DYING; /* XXX: Locking */ CURVNET_SET_QUIET(ifp->if_vnet); IFNET_WLOCK(); KASSERT(ifp == ifnet_byindex(ifp->if_index), ("%s: freeing unallocated ifnet", ifp->if_xname)); ifindex_free_locked(ifp->if_index); IFNET_WUNLOCK(); if (refcount_release(&ifp->if_refcount)) NET_EPOCH_CALL(if_destroy, &ifp->if_epoch_ctx); CURVNET_RESTORE(); } /* * Interfaces to keep an ifnet type-stable despite the possibility of the * driver calling if_free(). If there are additional references, we defer * freeing the underlying data structure. */ void if_ref(struct ifnet *ifp) { /* We don't assert the ifnet list lock here, but arguably should. */ refcount_acquire(&ifp->if_refcount); } void if_rele(struct ifnet *ifp) { if (!refcount_release(&ifp->if_refcount)) return; NET_EPOCH_CALL(if_destroy, &ifp->if_epoch_ctx); } void ifq_init(struct ifaltq *ifq, struct ifnet *ifp) { mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF); if (ifq->ifq_maxlen == 0) ifq->ifq_maxlen = ifqmaxlen; ifq->altq_type = 0; ifq->altq_disc = NULL; ifq->altq_flags &= ALTQF_CANTCHANGE; ifq->altq_tbr = NULL; ifq->altq_ifp = ifp; } void ifq_delete(struct ifaltq *ifq) { mtx_destroy(&ifq->ifq_mtx); } /* * Perform generic interface initialization tasks and attach the interface * to the list of "active" interfaces. If vmove flag is set on entry * to if_attach_internal(), perform only a limited subset of initialization * tasks, given that we are moving from one vnet to another an ifnet which * has already been fully initialized. * * Note that if_detach_internal() removes group membership unconditionally * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL. * Thus, when if_vmove() is applied to a cloned interface, group membership * is lost while a cloned one always joins a group whose name is * ifc->ifc_name. To recover this after if_detach_internal() and * if_attach_internal(), the cloner should be specified to * if_attach_internal() via ifc. If it is non-NULL, if_attach_internal() * attempts to join a group whose name is ifc->ifc_name. * * XXX: * - The decision to return void and thus require this function to * succeed is questionable. * - We should probably do more sanity checking. For instance we don't * do anything to insure if_xname is unique or non-empty. */ void if_attach(struct ifnet *ifp) { if_attach_internal(ifp, 0, NULL); } /* * Compute the least common TSO limit. */ void if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax) { /* * 1) If there is no limit currently, take the limit from * the network adapter. * * 2) If the network adapter has a limit below the current * limit, apply it. */ if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 && ifp->if_hw_tsomax < pmax->tsomaxbytes)) { pmax->tsomaxbytes = ifp->if_hw_tsomax; } if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 && ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) { pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; } if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 && ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) { pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; } } /* * Update TSO limit of a network adapter. * * Returns zero if no change. Else non-zero. */ int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax) { int retval = 0; if (ifp->if_hw_tsomax != pmax->tsomaxbytes) { ifp->if_hw_tsomax = pmax->tsomaxbytes; retval++; } if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) { ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize; retval++; } if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) { ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount; retval++; } return (retval); } static void if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc) { unsigned socksize, ifasize; int namelen, masklen; struct sockaddr_dl *sdl; struct ifaddr *ifa; if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index)) panic ("%s: BUG: if_attach called without if_alloc'd input()\n", ifp->if_xname); #ifdef VIMAGE ifp->if_vnet = curvnet; if (ifp->if_home_vnet == NULL) ifp->if_home_vnet = curvnet; #endif if_addgroup(ifp, IFG_ALL); /* Restore group membership for cloned interfaces. */ if (vmove && ifc != NULL) if_clone_addgroup(ifp, ifc); getmicrotime(&ifp->if_lastchange); ifp->if_epoch = time_uptime; KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) || (ifp->if_transmit != NULL && ifp->if_qflush != NULL), ("transmit and qflush must both either be set or both be NULL")); if (ifp->if_transmit == NULL) { ifp->if_transmit = if_transmit; ifp->if_qflush = if_qflush; } if (ifp->if_input == NULL) ifp->if_input = if_input_default; if (ifp->if_requestencap == NULL) ifp->if_requestencap = if_requestencap_default; if (!vmove) { #ifdef MAC mac_ifnet_create(ifp); #endif /* * Create a Link Level name for this device. */ namelen = strlen(ifp->if_xname); /* * Always save enough space for any possiable name so we * can do a rename in place later. */ masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ; socksize = masklen + ifp->if_addrlen; if (socksize < sizeof(*sdl)) socksize = sizeof(*sdl); socksize = roundup2(socksize, sizeof(long)); ifasize = sizeof(*ifa) + 2 * socksize; ifa = ifa_alloc(ifasize, M_WAITOK); sdl = (struct sockaddr_dl *)(ifa + 1); sdl->sdl_len = socksize; sdl->sdl_family = AF_LINK; bcopy(ifp->if_xname, sdl->sdl_data, namelen); sdl->sdl_nlen = namelen; sdl->sdl_index = ifp->if_index; sdl->sdl_type = ifp->if_type; ifp->if_addr = ifa; ifa->ifa_ifp = ifp; ifa->ifa_addr = (struct sockaddr *)sdl; sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); ifa->ifa_netmask = (struct sockaddr *)sdl; sdl->sdl_len = masklen; while (namelen != 0) sdl->sdl_data[--namelen] = 0xff; CK_STAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link); /* Reliably crash if used uninitialized. */ ifp->if_broadcastaddr = NULL; if (ifp->if_type == IFT_ETHER) { ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR, M_WAITOK | M_ZERO); } #if defined(INET) || defined(INET6) /* Use defaults for TSO, if nothing is set */ if (ifp->if_hw_tsomax == 0 && ifp->if_hw_tsomaxsegcount == 0 && ifp->if_hw_tsomaxsegsize == 0) { /* * The TSO defaults needs to be such that an * NFS mbuf list of 35 mbufs totalling just * below 64K works and that a chain of mbufs * can be defragged into at most 32 segments: */ ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); ifp->if_hw_tsomaxsegcount = 35; ifp->if_hw_tsomaxsegsize = 2048; /* 2K */ /* XXX some drivers set IFCAP_TSO after ethernet attach */ if (ifp->if_capabilities & IFCAP_TSO) { if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n", ifp->if_hw_tsomax, ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); } } #endif } #ifdef VIMAGE else { /* * Update the interface index in the link layer address * of the interface. */ for (ifa = ifp->if_addr; ifa != NULL; ifa = CK_STAILQ_NEXT(ifa, ifa_link)) { if (ifa->ifa_addr->sa_family == AF_LINK) { sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_index = ifp->if_index; } } } #endif IFNET_WLOCK(); CK_STAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link); #ifdef VIMAGE curvnet->vnet_ifcnt++; #endif IFNET_WUNLOCK(); if (domain_init_status >= 2) if_attachdomain1(ifp); EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); /* Announce the interface. */ rt_ifannouncemsg(ifp, IFAN_ARRIVAL); } static void if_epochalloc(void *dummy __unused) { net_epoch_preempt = epoch_alloc("Net preemptible", EPOCH_PREEMPT); } SYSINIT(ifepochalloc, SI_SUB_EPOCH, SI_ORDER_ANY, if_epochalloc, NULL); static void if_attachdomain(void *dummy) { struct ifnet *ifp; CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) if_attachdomain1(ifp); } SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND, if_attachdomain, NULL); static void if_attachdomain1(struct ifnet *ifp) { struct domain *dp; /* * Since dp->dom_ifattach calls malloc() with M_WAITOK, we * cannot lock ifp->if_afdata initialization, entirely. */ IF_AFDATA_LOCK(ifp); if (ifp->if_afdata_initialized >= domain_init_status) { IF_AFDATA_UNLOCK(ifp); log(LOG_WARNING, "%s called more than once on %s\n", __func__, ifp->if_xname); return; } ifp->if_afdata_initialized = domain_init_status; IF_AFDATA_UNLOCK(ifp); /* address family dependent data region */ bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); for (dp = domains; dp; dp = dp->dom_next) { if (dp->dom_ifattach) ifp->if_afdata[dp->dom_family] = (*dp->dom_ifattach)(ifp); } } /* * Remove any unicast or broadcast network addresses from an interface. */ void if_purgeaddrs(struct ifnet *ifp) { struct ifaddr *ifa; while (1) { struct epoch_tracker et; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_LINK) break; } NET_EPOCH_EXIT(et); if (ifa == NULL) break; #ifdef INET /* XXX: Ugly!! ad hoc just for INET */ if (ifa->ifa_addr->sa_family == AF_INET) { struct ifaliasreq ifr; bzero(&ifr, sizeof(ifr)); ifr.ifra_addr = *ifa->ifa_addr; if (ifa->ifa_dstaddr) ifr.ifra_broadaddr = *ifa->ifa_dstaddr; if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp, NULL) == 0) continue; } #endif /* INET */ #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) { in6_purgeaddr(ifa); /* ifp_addrhead is already updated */ continue; } #endif /* INET6 */ IF_ADDR_WLOCK(ifp); CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(ifa); } } /* * Remove any multicast network addresses from an interface when an ifnet * is going away. */ static void if_purgemaddrs(struct ifnet *ifp) { struct ifmultiaddr *ifma; IF_ADDR_WLOCK(ifp); while (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) { ifma = CK_STAILQ_FIRST(&ifp->if_multiaddrs); CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); if_delmulti_locked(ifp, ifma, 1); } IF_ADDR_WUNLOCK(ifp); } /* * Detach an interface, removing it from the list of "active" interfaces. * If vmove flag is set on entry to if_detach_internal(), perform only a * limited subset of cleanup tasks, given that we are moving an ifnet from * one vnet to another, where it must be fully operational. * * XXXRW: There are some significant questions about event ordering, and * how to prevent things from starting to use the interface during detach. */ void if_detach(struct ifnet *ifp) { CURVNET_SET_QUIET(ifp->if_vnet); if_detach_internal(ifp, 0, NULL); CURVNET_RESTORE(); } /* * The vmove flag, if set, indicates that we are called from a callpath * that is moving an interface to a different vnet instance. * * The shutdown flag, if set, indicates that we are called in the * process of shutting down a vnet instance. Currently only the * vnet_if_return SYSUNINIT function sets it. Note: we can be called * on a vnet instance shutdown without this flag being set, e.g., when * the cloned interfaces are destoyed as first thing of teardown. */ static int if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp) { struct ifaddr *ifa; int i; struct domain *dp; struct ifnet *iter; int found = 0; #ifdef VIMAGE bool shutdown; shutdown = ifp->if_vnet->vnet_shutdown; #endif IFNET_WLOCK(); CK_STAILQ_FOREACH(iter, &V_ifnet, if_link) if (iter == ifp) { CK_STAILQ_REMOVE(&V_ifnet, ifp, ifnet, if_link); if (!vmove) ifp->if_flags |= IFF_DYING; found = 1; break; } IFNET_WUNLOCK(); if (!found) { /* * While we would want to panic here, we cannot * guarantee that the interface is indeed still on * the list given we don't hold locks all the way. */ return (ENOENT); #if 0 if (vmove) panic("%s: ifp=%p not on the ifnet tailq %p", __func__, ifp, &V_ifnet); else return; /* XXX this should panic as well? */ #endif } /* * At this point we know the interface still was on the ifnet list * and we removed it so we are in a stable state. */ #ifdef VIMAGE curvnet->vnet_ifcnt--; #endif epoch_wait_preempt(net_epoch_preempt); /* * Ensure all pending EPOCH(9) callbacks have been executed. This * fixes issues about late destruction of multicast options * which lead to leave group calls, which in turn access the * belonging ifnet structure: */ epoch_drain_callbacks(net_epoch_preempt); /* * In any case (destroy or vmove) detach us from the groups * and remove/wait for pending events on the taskq. * XXX-BZ in theory an interface could still enqueue a taskq change? */ if_delgroups(ifp); taskqueue_drain(taskqueue_swi, &ifp->if_linktask); taskqueue_drain(taskqueue_swi, &ifp->if_addmultitask); /* * Check if this is a cloned interface or not. Must do even if * shutting down as a if_vmove_reclaim() would move the ifp and * the if_clone_addgroup() will have a corrupted string overwise * from a gibberish pointer. */ if (vmove && ifcp != NULL) *ifcp = if_clone_findifc(ifp); if_down(ifp); #ifdef VIMAGE /* * On VNET shutdown abort here as the stack teardown will do all * the work top-down for us. */ if (shutdown) { /* Give interface users the chance to clean up. */ EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); /* * In case of a vmove we are done here without error. * If we would signal an error it would lead to the same * abort as if we did not find the ifnet anymore. * if_detach() calls us in void context and does not care * about an early abort notification, so life is splendid :) */ goto finish_vnet_shutdown; } #endif /* * At this point we are not tearing down a VNET and are either * going to destroy or vmove the interface and have to cleanup * accordingly. */ /* * Remove routes and flush queues. */ #ifdef ALTQ if (ALTQ_IS_ENABLED(&ifp->if_snd)) altq_disable(&ifp->if_snd); if (ALTQ_IS_ATTACHED(&ifp->if_snd)) altq_detach(&ifp->if_snd); #endif if_purgeaddrs(ifp); #ifdef INET in_ifdetach(ifp); #endif #ifdef INET6 /* * Remove all IPv6 kernel structs related to ifp. This should be done * before removing routing entries below, since IPv6 interface direct * routes are expected to be removed by the IPv6-specific kernel API. * Otherwise, the kernel will detect some inconsistency and bark it. */ in6_ifdetach(ifp); #endif if_purgemaddrs(ifp); /* Announce that the interface is gone. */ rt_ifannouncemsg(ifp, IFAN_DEPARTURE); EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); if (!vmove) { /* * Prevent further calls into the device driver via ifnet. */ if_dead(ifp); /* * Clean up all addresses. */ IF_ADDR_WLOCK(ifp); if (!CK_STAILQ_EMPTY(&ifp->if_addrhead)) { ifa = CK_STAILQ_FIRST(&ifp->if_addrhead); CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(ifa); } else IF_ADDR_WUNLOCK(ifp); } rt_flushifroutes(ifp); #ifdef VIMAGE finish_vnet_shutdown: #endif /* * We cannot hold the lock over dom_ifdetach calls as they might * sleep, for example trying to drain a callout, thus open up the * theoretical race with re-attaching. */ IF_AFDATA_LOCK(ifp); i = ifp->if_afdata_initialized; ifp->if_afdata_initialized = 0; IF_AFDATA_UNLOCK(ifp); for (dp = domains; i > 0 && dp; dp = dp->dom_next) { if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) { (*dp->dom_ifdetach)(ifp, ifp->if_afdata[dp->dom_family]); ifp->if_afdata[dp->dom_family] = NULL; } } return (0); } #ifdef VIMAGE /* * if_vmove() performs a limited version of if_detach() in current * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg. * An attempt is made to shrink if_index in current vnet, find an * unused if_index in target vnet and calls if_grow() if necessary, * and finally find an unused if_xname for the target vnet. */ static int if_vmove(struct ifnet *ifp, struct vnet *new_vnet) { struct if_clone *ifc; #ifdef DEV_BPF u_int bif_dlt, bif_hdrlen; #endif void *old; int rc; #ifdef DEV_BPF /* * if_detach_internal() will call the eventhandler to notify * interface departure. That will detach if_bpf. We need to * safe the dlt and hdrlen so we can re-attach it later. */ bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen); #endif /* * Detach from current vnet, but preserve LLADDR info, do not * mark as dead etc. so that the ifnet can be reattached later. * If we cannot find it, we lost the race to someone else. */ rc = if_detach_internal(ifp, 1, &ifc); if (rc != 0) return (rc); /* * Unlink the ifnet from ifindex_table[] in current vnet, and shrink * the if_index for that vnet if possible. * * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized, * or we'd lock on one vnet and unlock on another. */ IFNET_WLOCK(); ifindex_free_locked(ifp->if_index); IFNET_WUNLOCK(); /* * Perform interface-specific reassignment tasks, if provided by * the driver. */ if (ifp->if_reassign != NULL) ifp->if_reassign(ifp, new_vnet, NULL); /* * Switch to the context of the target vnet. */ CURVNET_SET_QUIET(new_vnet); restart: IFNET_WLOCK(); ifp->if_index = ifindex_alloc(&old); if (__predict_false(ifp->if_index == USHRT_MAX)) { IFNET_WUNLOCK(); epoch_wait_preempt(net_epoch_preempt); free(old, M_IFNET); goto restart; } ifnet_setbyindex(ifp->if_index, ifp); IFNET_WUNLOCK(); if_attach_internal(ifp, 1, ifc); #ifdef DEV_BPF if (ifp->if_bpf == NULL) bpfattach(ifp, bif_dlt, bif_hdrlen); #endif CURVNET_RESTORE(); return (0); } /* * Move an ifnet to or from another child prison/vnet, specified by the jail id. */ static int if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid) { struct prison *pr; struct ifnet *difp; int error; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, jid); sx_sunlock(&allprison_lock); if (pr == NULL) return (ENXIO); prison_hold_locked(pr); mtx_unlock(&pr->pr_mtx); /* Do not try to move the iface from and to the same prison. */ if (pr->pr_vnet == ifp->if_vnet) { prison_free(pr); return (EEXIST); } /* Make sure the named iface does not exists in the dst. prison/vnet. */ /* XXX Lock interfaces to avoid races. */ CURVNET_SET_QUIET(pr->pr_vnet); difp = ifunit(ifname); if (difp != NULL) { CURVNET_RESTORE(); prison_free(pr); return (EEXIST); } /* Make sure the VNET is stable. */ if (ifp->if_vnet->vnet_shutdown) { CURVNET_RESTORE(); prison_free(pr); return (EBUSY); } CURVNET_RESTORE(); /* Move the interface into the child jail/vnet. */ error = if_vmove(ifp, pr->pr_vnet); /* Report the new if_xname back to the userland on success. */ if (error == 0) sprintf(ifname, "%s", ifp->if_xname); prison_free(pr); return (error); } static int if_vmove_reclaim(struct thread *td, char *ifname, int jid) { struct prison *pr; struct vnet *vnet_dst; struct ifnet *ifp; int error; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, jid); sx_sunlock(&allprison_lock); if (pr == NULL) return (ENXIO); prison_hold_locked(pr); mtx_unlock(&pr->pr_mtx); /* Make sure the named iface exists in the source prison/vnet. */ CURVNET_SET(pr->pr_vnet); ifp = ifunit(ifname); /* XXX Lock to avoid races. */ if (ifp == NULL) { CURVNET_RESTORE(); prison_free(pr); return (ENXIO); } /* Do not try to move the iface from and to the same prison. */ vnet_dst = TD_TO_VNET(td); if (vnet_dst == ifp->if_vnet) { CURVNET_RESTORE(); prison_free(pr); return (EEXIST); } /* Make sure the VNET is stable. */ if (ifp->if_vnet->vnet_shutdown) { CURVNET_RESTORE(); prison_free(pr); return (EBUSY); } /* Get interface back from child jail/vnet. */ error = if_vmove(ifp, vnet_dst); CURVNET_RESTORE(); /* Report the new if_xname back to the userland on success. */ if (error == 0) sprintf(ifname, "%s", ifp->if_xname); prison_free(pr); return (error); } #endif /* VIMAGE */ /* * Add a group to an interface */ int if_addgroup(struct ifnet *ifp, const char *groupname) { struct ifg_list *ifgl; struct ifg_group *ifg = NULL; struct ifg_member *ifgm; int new = 0; if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' && groupname[strlen(groupname) - 1] <= '9') return (EINVAL); IFNET_WLOCK(); CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) { IFNET_WUNLOCK(); return (EEXIST); } if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL) { IFNET_WUNLOCK(); return (ENOMEM); } if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) { free(ifgl, M_TEMP); IFNET_WUNLOCK(); return (ENOMEM); } CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) if (!strcmp(ifg->ifg_group, groupname)) break; if (ifg == NULL) { if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL) { free(ifgl, M_TEMP); free(ifgm, M_TEMP); IFNET_WUNLOCK(); return (ENOMEM); } strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); ifg->ifg_refcnt = 0; CK_STAILQ_INIT(&ifg->ifg_members); CK_STAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next); new = 1; } ifg->ifg_refcnt++; ifgl->ifgl_group = ifg; ifgm->ifgm_ifp = ifp; IF_ADDR_WLOCK(ifp); CK_STAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); CK_STAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); IF_ADDR_WUNLOCK(ifp); IFNET_WUNLOCK(); if (new) EVENTHANDLER_INVOKE(group_attach_event, ifg); EVENTHANDLER_INVOKE(group_change_event, groupname); return (0); } /* * Helper function to remove a group out of an interface. Expects the global * ifnet lock to be write-locked, and drops it before returning. */ static void _if_delgroup_locked(struct ifnet *ifp, struct ifg_list *ifgl, const char *groupname) { struct ifg_member *ifgm; bool freeifgl; IFNET_WLOCK_ASSERT(); IF_ADDR_WLOCK(ifp); CK_STAILQ_REMOVE(&ifp->if_groups, ifgl, ifg_list, ifgl_next); IF_ADDR_WUNLOCK(ifp); CK_STAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) { if (ifgm->ifgm_ifp == ifp) { CK_STAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifg_member, ifgm_next); break; } } if (--ifgl->ifgl_group->ifg_refcnt == 0) { CK_STAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_group, ifg_next); freeifgl = true; } else { freeifgl = false; } IFNET_WUNLOCK(); epoch_wait_preempt(net_epoch_preempt); if (freeifgl) { EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group); free(ifgl->ifgl_group, M_TEMP); } free(ifgm, M_TEMP); free(ifgl, M_TEMP); EVENTHANDLER_INVOKE(group_change_event, groupname); } /* * Remove a group from an interface */ int if_delgroup(struct ifnet *ifp, const char *groupname) { struct ifg_list *ifgl; IFNET_WLOCK(); CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) break; if (ifgl == NULL) { IFNET_WUNLOCK(); return (ENOENT); } _if_delgroup_locked(ifp, ifgl, groupname); return (0); } /* * Remove an interface from all groups */ static void if_delgroups(struct ifnet *ifp) { struct ifg_list *ifgl; char groupname[IFNAMSIZ]; IFNET_WLOCK(); while ((ifgl = CK_STAILQ_FIRST(&ifp->if_groups)) != NULL) { strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ); _if_delgroup_locked(ifp, ifgl, groupname); IFNET_WLOCK(); } IFNET_WUNLOCK(); } static char * ifgr_group_get(void *ifgrp) { union ifgroupreq_union *ifgrup; ifgrup = ifgrp; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) return (&ifgrup->ifgr32.ifgr_ifgru.ifgru_group[0]); #endif return (&ifgrup->ifgr.ifgr_ifgru.ifgru_group[0]); } static struct ifg_req * ifgr_groups_get(void *ifgrp) { union ifgroupreq_union *ifgrup; ifgrup = ifgrp; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) return ((struct ifg_req *)(uintptr_t) ifgrup->ifgr32.ifgr_ifgru.ifgru_groups); #endif return (ifgrup->ifgr.ifgr_ifgru.ifgru_groups); } /* * Stores all groups from an interface in memory pointed to by ifgr. */ static int if_getgroup(struct ifgroupreq *ifgr, struct ifnet *ifp) { int len, error; struct ifg_list *ifgl; struct ifg_req ifgrq, *ifgp; NET_EPOCH_ASSERT(); if (ifgr->ifgr_len == 0) { CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) ifgr->ifgr_len += sizeof(struct ifg_req); return (0); } len = ifgr->ifgr_len; ifgp = ifgr_groups_get(ifgr); /* XXX: wire */ CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { if (len < sizeof(ifgrq)) return (EINVAL); bzero(&ifgrq, sizeof ifgrq); strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group, sizeof(ifgrq.ifgrq_group)); if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) return (error); len -= sizeof(ifgrq); ifgp++; } return (0); } /* * Stores all members of a group in memory pointed to by igfr */ static int if_getgroupmembers(struct ifgroupreq *ifgr) { struct ifg_group *ifg; struct ifg_member *ifgm; struct ifg_req ifgrq, *ifgp; int len, error; IFNET_RLOCK(); CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) if (strcmp(ifg->ifg_group, ifgr->ifgr_name) == 0) break; if (ifg == NULL) { IFNET_RUNLOCK(); return (ENOENT); } if (ifgr->ifgr_len == 0) { CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) ifgr->ifgr_len += sizeof(ifgrq); IFNET_RUNLOCK(); return (0); } len = ifgr->ifgr_len; ifgp = ifgr_groups_get(ifgr); CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { if (len < sizeof(ifgrq)) { IFNET_RUNLOCK(); return (EINVAL); } bzero(&ifgrq, sizeof ifgrq); strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname, sizeof(ifgrq.ifgrq_member)); if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) { IFNET_RUNLOCK(); return (error); } len -= sizeof(ifgrq); ifgp++; } IFNET_RUNLOCK(); return (0); } /* * Return counter values from counter(9)s stored in ifnet. */ uint64_t if_get_counter_default(struct ifnet *ifp, ift_counter cnt) { KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); return (counter_u64_fetch(ifp->if_counters[cnt])); } /* * Increase an ifnet counter. Usually used for counters shared * between the stack and a driver, but function supports them all. */ void if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc) { KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); counter_u64_add(ifp->if_counters[cnt], inc); } /* * Copy data from ifnet to userland API structure if_data. */ void if_data_copy(struct ifnet *ifp, struct if_data *ifd) { ifd->ifi_type = ifp->if_type; ifd->ifi_physical = 0; ifd->ifi_addrlen = ifp->if_addrlen; ifd->ifi_hdrlen = ifp->if_hdrlen; ifd->ifi_link_state = ifp->if_link_state; ifd->ifi_vhid = 0; ifd->ifi_datalen = sizeof(struct if_data); ifd->ifi_mtu = ifp->if_mtu; ifd->ifi_metric = ifp->if_metric; ifd->ifi_baudrate = ifp->if_baudrate; ifd->ifi_hwassist = ifp->if_hwassist; ifd->ifi_epoch = ifp->if_epoch; ifd->ifi_lastchange = ifp->if_lastchange; ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS); ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS); ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS); ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS); ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS); ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES); ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES); ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS); ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS); ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS); ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS); ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO); } /* * Initialization, destruction and refcounting functions for ifaddrs. */ struct ifaddr * ifa_alloc(size_t size, int flags) { struct ifaddr *ifa; KASSERT(size >= sizeof(struct ifaddr), ("%s: invalid size %zu", __func__, size)); ifa = malloc(size, M_IFADDR, M_ZERO | flags); if (ifa == NULL) return (NULL); if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL) goto fail; refcount_init(&ifa->ifa_refcnt, 1); return (ifa); fail: /* free(NULL) is okay */ counter_u64_free(ifa->ifa_opackets); counter_u64_free(ifa->ifa_ipackets); counter_u64_free(ifa->ifa_obytes); counter_u64_free(ifa->ifa_ibytes); free(ifa, M_IFADDR); return (NULL); } void ifa_ref(struct ifaddr *ifa) { refcount_acquire(&ifa->ifa_refcnt); } static void ifa_destroy(epoch_context_t ctx) { struct ifaddr *ifa; ifa = __containerof(ctx, struct ifaddr, ifa_epoch_ctx); counter_u64_free(ifa->ifa_opackets); counter_u64_free(ifa->ifa_ipackets); counter_u64_free(ifa->ifa_obytes); counter_u64_free(ifa->ifa_ibytes); free(ifa, M_IFADDR); } void ifa_free(struct ifaddr *ifa) { if (refcount_release(&ifa->ifa_refcnt)) NET_EPOCH_CALL(ifa_destroy, &ifa->ifa_epoch_ctx); } static int ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa, struct sockaddr *ia) { struct epoch_tracker et; int error; struct rt_addrinfo info; struct sockaddr_dl null_sdl; struct ifnet *ifp; struct ifaddr *rti_ifa = NULL; ifp = ifa->ifa_ifp; bzero(&info, sizeof(info)); if (cmd != RTM_DELETE) info.rti_ifp = V_loif; if (cmd == RTM_ADD) { /* explicitly specify (loopback) ifa */ if (info.rti_ifp != NULL) { NET_EPOCH_ENTER(et); rti_ifa = ifaof_ifpforaddr(ifa->ifa_addr, info.rti_ifp); if (rti_ifa != NULL) ifa_ref(rti_ifa); info.rti_ifa = rti_ifa; NET_EPOCH_EXIT(et); } } info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED; info.rti_info[RTAX_DST] = ia; info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl; link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type); error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib); if (rti_ifa != NULL) ifa_free(rti_ifa); - if (error != 0 && - !(cmd == RTM_ADD && error == EEXIST) && - !(cmd == RTM_DELETE && error == ENOENT)) - if_printf(ifp, "%s failed: %d\n", otype, error); + if (error == 0 || + (cmd == RTM_ADD && error == EEXIST) || + (cmd == RTM_DELETE && (error == ENOENT || error == ESRCH))) + return (error); + + log(LOG_DEBUG, "%s: %s failed for interface %s: %u\n", + __func__, otype, if_name(ifp), error); return (error); } int ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) { return (ifa_maintain_loopback_route(RTM_ADD, "insertion", ifa, ia)); } int ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) { return (ifa_maintain_loopback_route(RTM_DELETE, "deletion", ifa, ia)); } int ifa_switch_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) { return (ifa_maintain_loopback_route(RTM_CHANGE, "switch", ifa, ia)); } /* * XXX: Because sockaddr_dl has deeper structure than the sockaddr * structs used to represent other address families, it is necessary * to perform a different comparison. */ #define sa_dl_equal(a1, a2) \ ((((const struct sockaddr_dl *)(a1))->sdl_len == \ ((const struct sockaddr_dl *)(a2))->sdl_len) && \ (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)), \ CLLADDR((const struct sockaddr_dl *)(a2)), \ ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0)) /* * Locate an interface based on a complete address. */ /*ARGSUSED*/ struct ifaddr * ifa_ifwithaddr(const struct sockaddr *addr) { struct ifnet *ifp; struct ifaddr *ifa; NET_EPOCH_ASSERT(); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (sa_equal(addr, ifa->ifa_addr)) { goto done; } /* IP6 doesn't have broadcast */ if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && ifa->ifa_broadaddr->sa_len != 0 && sa_equal(ifa->ifa_broadaddr, addr)) { goto done; } } } ifa = NULL; done: return (ifa); } int ifa_ifwithaddr_check(const struct sockaddr *addr) { struct epoch_tracker et; int rc; NET_EPOCH_ENTER(et); rc = (ifa_ifwithaddr(addr) != NULL); NET_EPOCH_EXIT(et); return (rc); } /* * Locate an interface based on the broadcast address. */ /* ARGSUSED */ struct ifaddr * ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; NET_EPOCH_ASSERT(); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && ifa->ifa_broadaddr->sa_len != 0 && sa_equal(ifa->ifa_broadaddr, addr)) { goto done; } } } ifa = NULL; done: return (ifa); } /* * Locate the point to point interface with a given destination address. */ /*ARGSUSED*/ struct ifaddr * ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; NET_EPOCH_ASSERT(); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((ifp->if_flags & IFF_POINTOPOINT) == 0) continue; if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (ifa->ifa_dstaddr != NULL && sa_equal(addr, ifa->ifa_dstaddr)) { goto done; } } } ifa = NULL; done: return (ifa); } /* * Find an interface on a specific network. If many, choice * is most specific found. */ struct ifaddr * ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; struct ifaddr *ifa_maybe = NULL; u_int af = addr->sa_family; const char *addr_data = addr->sa_data, *cplim; NET_EPOCH_ASSERT(); /* * AF_LINK addresses can be looked up directly by their index number, * so do that if we can. */ if (af == AF_LINK) { const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr; if (sdl->sdl_index && sdl->sdl_index <= V_if_index) return (ifaddr_byindex(sdl->sdl_index)); } /* * Scan though each interface, looking for ones that have addresses * in this address family and the requested fib. */ CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { const char *cp, *cp2, *cp3; if (ifa->ifa_addr->sa_family != af) next: continue; if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) { /* * This is a bit broken as it doesn't * take into account that the remote end may * be a single node in the network we are * looking for. * The trouble is that we don't know the * netmask for the remote end. */ if (ifa->ifa_dstaddr != NULL && sa_equal(addr, ifa->ifa_dstaddr)) { goto done; } } else { /* * Scan all the bits in the ifa's address. * If a bit dissagrees with what we are * looking for, mask it with the netmask * to see if it really matters. * (A byte at a time) */ if (ifa->ifa_netmask == 0) continue; cp = addr_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; while (cp3 < cplim) if ((*cp++ ^ *cp2++) & *cp3++) goto next; /* next address! */ /* * If the netmask of what we just found * is more specific than what we had before * (if we had one), or if the virtual status * of new prefix is better than of the old one, * then remember the new one before continuing * to search for an even better one. */ if (ifa_maybe == NULL || ifa_preferred(ifa_maybe, ifa) || rn_refines((caddr_t)ifa->ifa_netmask, (caddr_t)ifa_maybe->ifa_netmask)) { ifa_maybe = ifa; } } } } ifa = ifa_maybe; ifa_maybe = NULL; done: return (ifa); } /* * Find an interface address specific to an interface best matching * a given address. */ struct ifaddr * ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp) { struct ifaddr *ifa; const char *cp, *cp2, *cp3; char *cplim; struct ifaddr *ifa_maybe = NULL; u_int af = addr->sa_family; if (af >= AF_MAX) return (NULL); NET_EPOCH_ASSERT(); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != af) continue; if (ifa_maybe == NULL) ifa_maybe = ifa; if (ifa->ifa_netmask == 0) { if (sa_equal(addr, ifa->ifa_addr) || (ifa->ifa_dstaddr && sa_equal(addr, ifa->ifa_dstaddr))) goto done; continue; } if (ifp->if_flags & IFF_POINTOPOINT) { if (sa_equal(addr, ifa->ifa_dstaddr)) goto done; } else { cp = addr->sa_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; for (; cp3 < cplim; cp3++) if ((*cp++ ^ *cp2++) & *cp3) break; if (cp3 == cplim) goto done; } } ifa = ifa_maybe; done: return (ifa); } /* * See whether new ifa is better than current one: * 1) A non-virtual one is preferred over virtual. * 2) A virtual in master state preferred over any other state. * * Used in several address selecting functions. */ int ifa_preferred(struct ifaddr *cur, struct ifaddr *next) { return (cur->ifa_carp && (!next->ifa_carp || ((*carp_master_p)(next) && !(*carp_master_p)(cur)))); } struct sockaddr_dl * link_alloc_sdl(size_t size, int flags) { return (malloc(size, M_TEMP, flags)); } void link_free_sdl(struct sockaddr *sa) { free(sa, M_TEMP); } /* * Fills in given sdl with interface basic info. * Returns pointer to filled sdl. */ struct sockaddr_dl * link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)paddr; memset(sdl, 0, sizeof(struct sockaddr_dl)); sdl->sdl_len = sizeof(struct sockaddr_dl); sdl->sdl_family = AF_LINK; sdl->sdl_index = ifp->if_index; sdl->sdl_type = iftype; return (sdl); } /* * Mark an interface down and notify protocols of * the transition. */ static void if_unroute(struct ifnet *ifp, int flag, int fam) { struct ifaddr *ifa; KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP")); ifp->if_flags &= ~flag; getmicrotime(&ifp->if_lastchange); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) pfctlinput(PRC_IFDOWN, ifa->ifa_addr); ifp->if_qflush(ifp); if (ifp->if_carp) (*carp_linkstate_p)(ifp); rt_ifmsg(ifp); } /* * Mark an interface up and notify protocols of * the transition. */ static void if_route(struct ifnet *ifp, int flag, int fam) { struct ifaddr *ifa; KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP")); ifp->if_flags |= flag; getmicrotime(&ifp->if_lastchange); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) pfctlinput(PRC_IFUP, ifa->ifa_addr); if (ifp->if_carp) (*carp_linkstate_p)(ifp); rt_ifmsg(ifp); #ifdef INET6 in6_if_up(ifp); #endif } void (*vlan_link_state_p)(struct ifnet *); /* XXX: private from if_vlan */ void (*vlan_trunk_cap_p)(struct ifnet *); /* XXX: private from if_vlan */ struct ifnet *(*vlan_trunkdev_p)(struct ifnet *); struct ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t); int (*vlan_tag_p)(struct ifnet *, uint16_t *); int (*vlan_pcp_p)(struct ifnet *, uint16_t *); int (*vlan_setcookie_p)(struct ifnet *, void *); void *(*vlan_cookie_p)(struct ifnet *); /* * Handle a change in the interface link state. To avoid LORs * between driver lock and upper layer locks, as well as possible * recursions, we post event to taskqueue, and all job * is done in static do_link_state_change(). */ void if_link_state_change(struct ifnet *ifp, int link_state) { /* Return if state hasn't changed. */ if (ifp->if_link_state == link_state) return; ifp->if_link_state = link_state; /* XXXGL: reference ifp? */ taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask); } static void do_link_state_change(void *arg, int pending) { struct ifnet *ifp; int link_state; ifp = arg; link_state = ifp->if_link_state; CURVNET_SET(ifp->if_vnet); rt_ifmsg(ifp); if (ifp->if_vlantrunk != NULL) (*vlan_link_state_p)(ifp); if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) && ifp->if_l2com != NULL) (*ng_ether_link_state_p)(ifp, link_state); if (ifp->if_carp) (*carp_linkstate_p)(ifp); if (ifp->if_bridge) ifp->if_bridge_linkstate(ifp); if (ifp->if_lagg) (*lagg_linkstate_p)(ifp, link_state); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); if (pending > 1) if_printf(ifp, "%d link states coalesced\n", pending); if (log_link_state_change) if_printf(ifp, "link state changed to %s\n", (link_state == LINK_STATE_UP) ? "UP" : "DOWN" ); EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state); CURVNET_RESTORE(); } /* * Mark an interface down and notify protocols of * the transition. */ void if_down(struct ifnet *ifp) { EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN); if_unroute(ifp, IFF_UP, AF_UNSPEC); } /* * Mark an interface up and notify protocols of * the transition. */ void if_up(struct ifnet *ifp) { if_route(ifp, IFF_UP, AF_UNSPEC); EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP); } /* * Flush an interface queue. */ void if_qflush(struct ifnet *ifp) { struct mbuf *m, *n; struct ifaltq *ifq; ifq = &ifp->if_snd; IFQ_LOCK(ifq); #ifdef ALTQ if (ALTQ_IS_ENABLED(ifq)) ALTQ_PURGE(ifq); #endif n = ifq->ifq_head; while ((m = n) != NULL) { n = m->m_nextpkt; m_freem(m); } ifq->ifq_head = 0; ifq->ifq_tail = 0; ifq->ifq_len = 0; IFQ_UNLOCK(ifq); } /* * Map interface name to interface structure pointer, with or without * returning a reference. */ struct ifnet * ifunit_ref(const char *name) { struct epoch_tracker et; struct ifnet *ifp; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 && !(ifp->if_flags & IFF_DYING)) break; } if (ifp != NULL) if_ref(ifp); NET_EPOCH_EXIT(et); return (ifp); } struct ifnet * ifunit(const char *name) { struct epoch_tracker et; struct ifnet *ifp; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0) break; } NET_EPOCH_EXIT(et); return (ifp); } static void * ifr_buffer_get_buffer(void *data) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) return ((void *)(uintptr_t) ifrup->ifr32.ifr_ifru.ifru_buffer.buffer); #endif return (ifrup->ifr.ifr_ifru.ifru_buffer.buffer); } static void ifr_buffer_set_buffer_null(void *data) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) ifrup->ifr32.ifr_ifru.ifru_buffer.buffer = 0; else #endif ifrup->ifr.ifr_ifru.ifru_buffer.buffer = NULL; } static size_t ifr_buffer_get_length(void *data) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) return (ifrup->ifr32.ifr_ifru.ifru_buffer.length); #endif return (ifrup->ifr.ifr_ifru.ifru_buffer.length); } static void ifr_buffer_set_length(void *data, size_t len) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) ifrup->ifr32.ifr_ifru.ifru_buffer.length = len; else #endif ifrup->ifr.ifr_ifru.ifru_buffer.length = len; } void * ifr_data_get_ptr(void *ifrp) { union ifreq_union *ifrup; ifrup = ifrp; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) return ((void *)(uintptr_t) ifrup->ifr32.ifr_ifru.ifru_data); #endif return (ifrup->ifr.ifr_ifru.ifru_data); } /* * Hardware specific interface ioctls. */ int ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) { struct ifreq *ifr; int error = 0, do_ifup = 0; int new_flags, temp_flags; size_t namelen, onamelen; size_t descrlen; char *descrbuf, *odescrbuf; char new_name[IFNAMSIZ]; struct ifaddr *ifa; struct sockaddr_dl *sdl; ifr = (struct ifreq *)data; switch (cmd) { case SIOCGIFINDEX: ifr->ifr_index = ifp->if_index; break; case SIOCGIFFLAGS: temp_flags = ifp->if_flags | ifp->if_drv_flags; ifr->ifr_flags = temp_flags & 0xffff; ifr->ifr_flagshigh = temp_flags >> 16; break; case SIOCGIFCAP: ifr->ifr_reqcap = ifp->if_capabilities; ifr->ifr_curcap = ifp->if_capenable; break; #ifdef MAC case SIOCGIFMAC: error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp); break; #endif case SIOCGIFMETRIC: ifr->ifr_metric = ifp->if_metric; break; case SIOCGIFMTU: ifr->ifr_mtu = ifp->if_mtu; break; case SIOCGIFPHYS: /* XXXGL: did this ever worked? */ ifr->ifr_phys = 0; break; case SIOCGIFDESCR: error = 0; sx_slock(&ifdescr_sx); if (ifp->if_description == NULL) error = ENOMSG; else { /* space for terminating nul */ descrlen = strlen(ifp->if_description) + 1; if (ifr_buffer_get_length(ifr) < descrlen) ifr_buffer_set_buffer_null(ifr); else error = copyout(ifp->if_description, ifr_buffer_get_buffer(ifr), descrlen); ifr_buffer_set_length(ifr, descrlen); } sx_sunlock(&ifdescr_sx); break; case SIOCSIFDESCR: error = priv_check(td, PRIV_NET_SETIFDESCR); if (error) return (error); /* * Copy only (length-1) bytes to make sure that * if_description is always nul terminated. The * length parameter is supposed to count the * terminating nul in. */ if (ifr_buffer_get_length(ifr) > ifdescr_maxlen) return (ENAMETOOLONG); else if (ifr_buffer_get_length(ifr) == 0) descrbuf = NULL; else { descrbuf = malloc(ifr_buffer_get_length(ifr), M_IFDESCR, M_WAITOK | M_ZERO); error = copyin(ifr_buffer_get_buffer(ifr), descrbuf, ifr_buffer_get_length(ifr) - 1); if (error) { free(descrbuf, M_IFDESCR); break; } } sx_xlock(&ifdescr_sx); odescrbuf = ifp->if_description; ifp->if_description = descrbuf; sx_xunlock(&ifdescr_sx); getmicrotime(&ifp->if_lastchange); free(odescrbuf, M_IFDESCR); break; case SIOCGIFFIB: ifr->ifr_fib = ifp->if_fib; break; case SIOCSIFFIB: error = priv_check(td, PRIV_NET_SETIFFIB); if (error) return (error); if (ifr->ifr_fib >= rt_numfibs) return (EINVAL); ifp->if_fib = ifr->ifr_fib; break; case SIOCSIFFLAGS: error = priv_check(td, PRIV_NET_SETIFFLAGS); if (error) return (error); /* * Currently, no driver owned flags pass the IFF_CANTCHANGE * check, so we don't need special handling here yet. */ new_flags = (ifr->ifr_flags & 0xffff) | (ifr->ifr_flagshigh << 16); if (ifp->if_flags & IFF_UP && (new_flags & IFF_UP) == 0) { if_down(ifp); } else if (new_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) { do_ifup = 1; } /* See if permanently promiscuous mode bit is about to flip */ if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) { if (new_flags & IFF_PPROMISC) ifp->if_flags |= IFF_PROMISC; else if (ifp->if_pcount == 0) ifp->if_flags &= ~IFF_PROMISC; if (log_promisc_mode_change) if_printf(ifp, "permanently promiscuous mode %s\n", ((new_flags & IFF_PPROMISC) ? "enabled" : "disabled")); } ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | (new_flags &~ IFF_CANTCHANGE); if (ifp->if_ioctl) { (void) (*ifp->if_ioctl)(ifp, cmd, data); } if (do_ifup) if_up(ifp); getmicrotime(&ifp->if_lastchange); break; case SIOCSIFCAP: error = priv_check(td, PRIV_NET_SETIFCAP); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); if (ifr->ifr_reqcap & ~ifp->if_capabilities) return (EINVAL); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; #ifdef MAC case SIOCSIFMAC: error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp); break; #endif case SIOCSIFNAME: error = priv_check(td, PRIV_NET_SETIFNAME); if (error) return (error); error = copyinstr(ifr_data_get_ptr(ifr), new_name, IFNAMSIZ, NULL); if (error != 0) return (error); if (new_name[0] == '\0') return (EINVAL); if (new_name[IFNAMSIZ-1] != '\0') { new_name[IFNAMSIZ-1] = '\0'; if (strlen(new_name) == IFNAMSIZ-1) return (EINVAL); } if (strcmp(new_name, ifp->if_xname) == 0) break; if (ifunit(new_name) != NULL) return (EEXIST); /* * XXX: Locking. Nothing else seems to lock if_flags, * and there are numerous other races with the * ifunit() checks not being atomic with namespace * changes (renames, vmoves, if_attach, etc). */ ifp->if_flags |= IFF_RENAMING; /* Announce the departure of the interface. */ rt_ifannouncemsg(ifp, IFAN_DEPARTURE); EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); if_printf(ifp, "changing name to '%s'\n", new_name); IF_ADDR_WLOCK(ifp); strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); ifa = ifp->if_addr; sdl = (struct sockaddr_dl *)ifa->ifa_addr; namelen = strlen(new_name); onamelen = sdl->sdl_nlen; /* * Move the address if needed. This is safe because we * allocate space for a name of length IFNAMSIZ when we * create this in if_attach(). */ if (namelen != onamelen) { bcopy(sdl->sdl_data + onamelen, sdl->sdl_data + namelen, sdl->sdl_alen); } bcopy(new_name, sdl->sdl_data, namelen); sdl->sdl_nlen = namelen; sdl = (struct sockaddr_dl *)ifa->ifa_netmask; bzero(sdl->sdl_data, onamelen); while (namelen != 0) sdl->sdl_data[--namelen] = 0xff; IF_ADDR_WUNLOCK(ifp); EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); /* Announce the return of the interface. */ rt_ifannouncemsg(ifp, IFAN_ARRIVAL); ifp->if_flags &= ~IFF_RENAMING; break; #ifdef VIMAGE case SIOCSIFVNET: error = priv_check(td, PRIV_NET_SETIFVNET); if (error) return (error); error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid); break; #endif case SIOCSIFMETRIC: error = priv_check(td, PRIV_NET_SETIFMETRIC); if (error) return (error); ifp->if_metric = ifr->ifr_metric; getmicrotime(&ifp->if_lastchange); break; case SIOCSIFPHYS: error = priv_check(td, PRIV_NET_SETIFPHYS); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFMTU: { u_long oldmtu = ifp->if_mtu; error = priv_check(td, PRIV_NET_SETIFMTU); if (error) return (error); if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) return (EINVAL); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) { getmicrotime(&ifp->if_lastchange); rt_ifmsg(ifp); #ifdef INET DEBUGNET_NOTIFY_MTU(ifp); #endif } /* * If the link MTU changed, do network layer specific procedure. */ if (ifp->if_mtu != oldmtu) { #ifdef INET6 nd6_setmtu(ifp); #endif rt_updatemtu(ifp); } break; } case SIOCADDMULTI: case SIOCDELMULTI: if (cmd == SIOCADDMULTI) error = priv_check(td, PRIV_NET_ADDMULTI); else error = priv_check(td, PRIV_NET_DELMULTI); if (error) return (error); /* Don't allow group membership on non-multicast interfaces. */ if ((ifp->if_flags & IFF_MULTICAST) == 0) return (EOPNOTSUPP); /* Don't let users screw up protocols' entries. */ if (ifr->ifr_addr.sa_family != AF_LINK) return (EINVAL); if (cmd == SIOCADDMULTI) { struct epoch_tracker et; struct ifmultiaddr *ifma; /* * Userland is only permitted to join groups once * via the if_addmulti() KPI, because it cannot hold * struct ifmultiaddr * between calls. It may also * lose a race while we check if the membership * already exists. */ NET_EPOCH_ENTER(et); ifma = if_findmulti(ifp, &ifr->ifr_addr); NET_EPOCH_EXIT(et); if (ifma != NULL) error = EADDRINUSE; else error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); } else { error = if_delmulti(ifp, &ifr->ifr_addr); } if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFPHYADDR: case SIOCDIFPHYADDR: #ifdef INET6 case SIOCSIFPHYADDR_IN6: #endif case SIOCSIFMEDIA: case SIOCSIFGENERIC: error = priv_check(td, PRIV_NET_HWIOCTL); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCGIFSTATUS: case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: case SIOCGIFMEDIA: case SIOCGIFXMEDIA: case SIOCGIFGENERIC: case SIOCGIFRSSKEY: case SIOCGIFRSSHASH: case SIOCGIFDOWNREASON: if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); break; case SIOCSIFLLADDR: error = priv_check(td, PRIV_NET_SETLLADDR); if (error) return (error); error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len); break; case SIOCGHWADDR: error = if_gethwaddr(ifp, ifr); break; case CASE_IOC_IFGROUPREQ(SIOCAIFGROUP): error = priv_check(td, PRIV_NET_ADDIFGROUP); if (error) return (error); if ((error = if_addgroup(ifp, ifgr_group_get((struct ifgroupreq *)data)))) return (error); break; case CASE_IOC_IFGROUPREQ(SIOCGIFGROUP): { struct epoch_tracker et; NET_EPOCH_ENTER(et); error = if_getgroup((struct ifgroupreq *)data, ifp); NET_EPOCH_EXIT(et); break; } case CASE_IOC_IFGROUPREQ(SIOCDIFGROUP): error = priv_check(td, PRIV_NET_DELIFGROUP); if (error) return (error); if ((error = if_delgroup(ifp, ifgr_group_get((struct ifgroupreq *)data)))) return (error); break; default: error = ENOIOCTL; break; } return (error); } #ifdef COMPAT_FREEBSD32 struct ifconf32 { int32_t ifc_len; union { uint32_t ifcu_buf; uint32_t ifcu_req; } ifc_ifcu; }; #define SIOCGIFCONF32 _IOWR('i', 36, struct ifconf32) #endif #ifdef COMPAT_FREEBSD32 static void ifmr_init(struct ifmediareq *ifmr, caddr_t data) { struct ifmediareq32 *ifmr32; ifmr32 = (struct ifmediareq32 *)data; memcpy(ifmr->ifm_name, ifmr32->ifm_name, sizeof(ifmr->ifm_name)); ifmr->ifm_current = ifmr32->ifm_current; ifmr->ifm_mask = ifmr32->ifm_mask; ifmr->ifm_status = ifmr32->ifm_status; ifmr->ifm_active = ifmr32->ifm_active; ifmr->ifm_count = ifmr32->ifm_count; ifmr->ifm_ulist = (int *)(uintptr_t)ifmr32->ifm_ulist; } static void ifmr_update(const struct ifmediareq *ifmr, caddr_t data) { struct ifmediareq32 *ifmr32; ifmr32 = (struct ifmediareq32 *)data; ifmr32->ifm_current = ifmr->ifm_current; ifmr32->ifm_mask = ifmr->ifm_mask; ifmr32->ifm_status = ifmr->ifm_status; ifmr32->ifm_active = ifmr->ifm_active; ifmr32->ifm_count = ifmr->ifm_count; } #endif /* * Interface ioctls. */ int ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td) { #ifdef COMPAT_FREEBSD32 caddr_t saved_data = NULL; struct ifmediareq ifmr; struct ifmediareq *ifmrp = NULL; #endif struct ifnet *ifp; struct ifreq *ifr; int error; int oif_flags; CURVNET_SET(so->so_vnet); #ifdef VIMAGE /* Make sure the VNET is stable. */ if (so->so_vnet->vnet_shutdown) { CURVNET_RESTORE(); return (EBUSY); } #endif switch (cmd) { case SIOCGIFCONF: error = ifconf(cmd, data); goto out_noref; #ifdef COMPAT_FREEBSD32 case SIOCGIFCONF32: { struct ifconf32 *ifc32; struct ifconf ifc; ifc32 = (struct ifconf32 *)data; ifc.ifc_len = ifc32->ifc_len; ifc.ifc_buf = PTRIN(ifc32->ifc_buf); error = ifconf(SIOCGIFCONF, (void *)&ifc); if (error == 0) ifc32->ifc_len = ifc.ifc_len; goto out_noref; } #endif } #ifdef COMPAT_FREEBSD32 switch (cmd) { case SIOCGIFMEDIA32: case SIOCGIFXMEDIA32: ifmrp = &ifmr; ifmr_init(ifmrp, data); cmd = _IOC_NEWTYPE(cmd, struct ifmediareq); saved_data = data; data = (caddr_t)ifmrp; } #endif ifr = (struct ifreq *)data; switch (cmd) { #ifdef VIMAGE case SIOCSIFRVNET: error = priv_check(td, PRIV_NET_SETIFVNET); if (error == 0) error = if_vmove_reclaim(td, ifr->ifr_name, ifr->ifr_jid); goto out_noref; #endif case SIOCIFCREATE: case SIOCIFCREATE2: error = priv_check(td, PRIV_NET_IFCREATE); if (error == 0) error = if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ? ifr_data_get_ptr(ifr) : NULL); goto out_noref; case SIOCIFDESTROY: error = priv_check(td, PRIV_NET_IFDESTROY); if (error == 0) error = if_clone_destroy(ifr->ifr_name); goto out_noref; case SIOCIFGCLONERS: error = if_clone_list((struct if_clonereq *)data); goto out_noref; case CASE_IOC_IFGROUPREQ(SIOCGIFGMEMB): error = if_getgroupmembers((struct ifgroupreq *)data); goto out_noref; #if defined(INET) || defined(INET6) case SIOCSVH: case SIOCGVH: if (carp_ioctl_p == NULL) error = EPROTONOSUPPORT; else error = (*carp_ioctl_p)(ifr, cmd, td); goto out_noref; #endif } ifp = ifunit_ref(ifr->ifr_name); if (ifp == NULL) { error = ENXIO; goto out_noref; } error = ifhwioctl(cmd, ifp, data, td); if (error != ENOIOCTL) goto out_ref; oif_flags = ifp->if_flags; if (so->so_proto == NULL) { error = EOPNOTSUPP; goto out_ref; } /* * Pass the request on to the socket control method, and if the * latter returns EOPNOTSUPP, directly to the interface. * * Make an exception for the legacy SIOCSIF* requests. Drivers * trust SIOCSIFADDR et al to come from an already privileged * layer, and do not perform any credentials checks or input * validation. */ error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data, ifp, td)); if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL && cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR && cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK) error = (*ifp->if_ioctl)(ifp, cmd, data); if ((oif_flags ^ ifp->if_flags) & IFF_UP) { #ifdef INET6 if (ifp->if_flags & IFF_UP) in6_if_up(ifp); #endif } out_ref: if_rele(ifp); out_noref: #ifdef COMPAT_FREEBSD32 if (ifmrp != NULL) { KASSERT((cmd == SIOCGIFMEDIA || cmd == SIOCGIFXMEDIA), ("ifmrp non-NULL, but cmd is not an ifmedia req 0x%lx", cmd)); data = saved_data; ifmr_update(ifmrp, data); } #endif CURVNET_RESTORE(); return (error); } /* * The code common to handling reference counted flags, * e.g., in ifpromisc() and if_allmulti(). * The "pflag" argument can specify a permanent mode flag to check, * such as IFF_PPROMISC for promiscuous mode; should be 0 if none. * * Only to be used on stack-owned flags, not driver-owned flags. */ static int if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch) { struct ifreq ifr; int error; int oldflags, oldcount; /* Sanity checks to catch programming errors */ KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0, ("%s: setting driver-owned flag %d", __func__, flag)); if (onswitch) KASSERT(*refcount >= 0, ("%s: increment negative refcount %d for flag %d", __func__, *refcount, flag)); else KASSERT(*refcount > 0, ("%s: decrement non-positive refcount %d for flag %d", __func__, *refcount, flag)); /* In case this mode is permanent, just touch refcount */ if (ifp->if_flags & pflag) { *refcount += onswitch ? 1 : -1; return (0); } /* Save ifnet parameters for if_ioctl() may fail */ oldcount = *refcount; oldflags = ifp->if_flags; /* * See if we aren't the only and touching refcount is enough. * Actually toggle interface flag if we are the first or last. */ if (onswitch) { if ((*refcount)++) return (0); ifp->if_flags |= flag; } else { if (--(*refcount)) return (0); ifp->if_flags &= ~flag; } /* Call down the driver since we've changed interface flags */ if (ifp->if_ioctl == NULL) { error = EOPNOTSUPP; goto recover; } ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); if (error) goto recover; /* Notify userland that interface flags have changed */ rt_ifmsg(ifp); return (0); recover: /* Recover after driver error */ *refcount = oldcount; ifp->if_flags = oldflags; return (error); } /* * Set/clear promiscuous mode on interface ifp based on the truth value * of pswitch. The calls are reference counted so that only the first * "on" request actually has an effect, as does the final "off" request. * Results are undefined if the "off" and "on" requests are not matched. */ int ifpromisc(struct ifnet *ifp, int pswitch) { int error; int oldflags = ifp->if_flags; error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC, &ifp->if_pcount, pswitch); /* If promiscuous mode status has changed, log a message */ if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) && log_promisc_mode_change) if_printf(ifp, "promiscuous mode %s\n", (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled"); return (error); } /* * Return interface configuration * of system. List may be used * in later ioctl's (above) to get * other information. */ /*ARGSUSED*/ static int ifconf(u_long cmd, caddr_t data) { struct ifconf *ifc = (struct ifconf *)data; struct ifnet *ifp; struct ifaddr *ifa; struct ifreq ifr; struct sbuf *sb; int error, full = 0, valid_len, max_len; /* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */ max_len = MAXPHYS - 1; /* Prevent hostile input from being able to crash the system */ if (ifc->ifc_len <= 0) return (EINVAL); again: if (ifc->ifc_len <= max_len) { max_len = ifc->ifc_len; full = 1; } sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN); max_len = 0; valid_len = 0; IFNET_RLOCK(); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { struct epoch_tracker et; int addrs; /* * Zero the ifr to make sure we don't disclose the contents * of the stack. */ memset(&ifr, 0, sizeof(ifr)); if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) >= sizeof(ifr.ifr_name)) { sbuf_delete(sb); IFNET_RUNLOCK(); return (ENAMETOOLONG); } addrs = 0; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa = ifa->ifa_addr; if (prison_if(curthread->td_ucred, sa) != 0) continue; addrs++; if (sa->sa_len <= sizeof(*sa)) { if (sa->sa_len < sizeof(*sa)) { memset(&ifr.ifr_ifru.ifru_addr, 0, sizeof(ifr.ifr_ifru.ifru_addr)); memcpy(&ifr.ifr_ifru.ifru_addr, sa, sa->sa_len); } else ifr.ifr_ifru.ifru_addr = *sa; sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); } else { sbuf_bcat(sb, &ifr, offsetof(struct ifreq, ifr_addr)); max_len += offsetof(struct ifreq, ifr_addr); sbuf_bcat(sb, sa, sa->sa_len); max_len += sa->sa_len; } if (sbuf_error(sb) == 0) valid_len = sbuf_len(sb); } NET_EPOCH_EXIT(et); if (addrs == 0) { sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); if (sbuf_error(sb) == 0) valid_len = sbuf_len(sb); } } IFNET_RUNLOCK(); /* * If we didn't allocate enough space (uncommon), try again. If * we have already allocated as much space as we are allowed, * return what we've got. */ if (valid_len != max_len && !full) { sbuf_delete(sb); goto again; } ifc->ifc_len = valid_len; sbuf_finish(sb); error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len); sbuf_delete(sb); return (error); } /* * Just like ifpromisc(), but for all-multicast-reception mode. */ int if_allmulti(struct ifnet *ifp, int onswitch) { return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch)); } struct ifmultiaddr * if_findmulti(struct ifnet *ifp, const struct sockaddr *sa) { struct ifmultiaddr *ifma; IF_ADDR_LOCK_ASSERT(ifp); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (sa->sa_family == AF_LINK) { if (sa_dl_equal(ifma->ifma_addr, sa)) break; } else { if (sa_equal(ifma->ifma_addr, sa)) break; } } return ifma; } /* * Allocate a new ifmultiaddr and initialize based on passed arguments. We * make copies of passed sockaddrs. The ifmultiaddr will not be added to * the ifnet multicast address list here, so the caller must do that and * other setup work (such as notifying the device driver). The reference * count is initialized to 1. */ static struct ifmultiaddr * if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa, int mflags) { struct ifmultiaddr *ifma; struct sockaddr *dupsa; ifma = malloc(sizeof *ifma, M_IFMADDR, mflags | M_ZERO); if (ifma == NULL) return (NULL); dupsa = malloc(sa->sa_len, M_IFMADDR, mflags); if (dupsa == NULL) { free(ifma, M_IFMADDR); return (NULL); } bcopy(sa, dupsa, sa->sa_len); ifma->ifma_addr = dupsa; ifma->ifma_ifp = ifp; ifma->ifma_refcount = 1; ifma->ifma_protospec = NULL; if (llsa == NULL) { ifma->ifma_lladdr = NULL; return (ifma); } dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags); if (dupsa == NULL) { free(ifma->ifma_addr, M_IFMADDR); free(ifma, M_IFMADDR); return (NULL); } bcopy(llsa, dupsa, llsa->sa_len); ifma->ifma_lladdr = dupsa; return (ifma); } /* * if_freemulti: free ifmultiaddr structure and possibly attached related * addresses. The caller is responsible for implementing reference * counting, notifying the driver, handling routing messages, and releasing * any dependent link layer state. */ #ifdef MCAST_VERBOSE extern void kdb_backtrace(void); #endif static void if_freemulti_internal(struct ifmultiaddr *ifma) { KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d", ifma->ifma_refcount)); if (ifma->ifma_lladdr != NULL) free(ifma->ifma_lladdr, M_IFMADDR); #ifdef MCAST_VERBOSE kdb_backtrace(); printf("%s freeing ifma: %p\n", __func__, ifma); #endif free(ifma->ifma_addr, M_IFMADDR); free(ifma, M_IFMADDR); } static void if_destroymulti(epoch_context_t ctx) { struct ifmultiaddr *ifma; ifma = __containerof(ctx, struct ifmultiaddr, ifma_epoch_ctx); if_freemulti_internal(ifma); } void if_freemulti(struct ifmultiaddr *ifma) { KASSERT(ifma->ifma_refcount == 0, ("if_freemulti_epoch: refcount %d", ifma->ifma_refcount)); NET_EPOCH_CALL(if_destroymulti, &ifma->ifma_epoch_ctx); } /* * Register an additional multicast address with a network interface. * * - If the address is already present, bump the reference count on the * address and return. * - If the address is not link-layer, look up a link layer address. * - Allocate address structures for one or both addresses, and attach to the * multicast address list on the interface. If automatically adding a link * layer address, the protocol address will own a reference to the link * layer address, to be freed when it is freed. * - Notify the network device driver of an addition to the multicast address * list. * * 'sa' points to caller-owned memory with the desired multicast address. * * 'retifma' will be used to return a pointer to the resulting multicast * address reference, if desired. */ int if_addmulti(struct ifnet *ifp, struct sockaddr *sa, struct ifmultiaddr **retifma) { struct ifmultiaddr *ifma, *ll_ifma; struct sockaddr *llsa; struct sockaddr_dl sdl; int error; #ifdef INET IN_MULTI_LIST_UNLOCK_ASSERT(); #endif #ifdef INET6 IN6_MULTI_LIST_UNLOCK_ASSERT(); #endif /* * If the address is already present, return a new reference to it; * otherwise, allocate storage and set up a new address. */ IF_ADDR_WLOCK(ifp); ifma = if_findmulti(ifp, sa); if (ifma != NULL) { ifma->ifma_refcount++; if (retifma != NULL) *retifma = ifma; IF_ADDR_WUNLOCK(ifp); return (0); } /* * The address isn't already present; resolve the protocol address * into a link layer address, and then look that up, bump its * refcount or allocate an ifma for that also. * Most link layer resolving functions returns address data which * fits inside default sockaddr_dl structure. However callback * can allocate another sockaddr structure, in that case we need to * free it later. */ llsa = NULL; ll_ifma = NULL; if (ifp->if_resolvemulti != NULL) { /* Provide called function with buffer size information */ sdl.sdl_len = sizeof(sdl); llsa = (struct sockaddr *)&sdl; error = ifp->if_resolvemulti(ifp, &llsa, sa); if (error) goto unlock_out; } /* * Allocate the new address. Don't hook it up yet, as we may also * need to allocate a link layer multicast address. */ ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT); if (ifma == NULL) { error = ENOMEM; goto free_llsa_out; } /* * If a link layer address is found, we'll need to see if it's * already present in the address list, or allocate is as well. * When this block finishes, the link layer address will be on the * list. */ if (llsa != NULL) { ll_ifma = if_findmulti(ifp, llsa); if (ll_ifma == NULL) { ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT); if (ll_ifma == NULL) { --ifma->ifma_refcount; if_freemulti(ifma); error = ENOMEM; goto free_llsa_out; } ll_ifma->ifma_flags |= IFMA_F_ENQUEUED; CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma, ifma_link); } else ll_ifma->ifma_refcount++; ifma->ifma_llifma = ll_ifma; } /* * We now have a new multicast address, ifma, and possibly a new or * referenced link layer address. Add the primary address to the * ifnet address list. */ ifma->ifma_flags |= IFMA_F_ENQUEUED; CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); if (retifma != NULL) *retifma = ifma; /* * Must generate the message while holding the lock so that 'ifma' * pointer is still valid. */ rt_newmaddrmsg(RTM_NEWMADDR, ifma); IF_ADDR_WUNLOCK(ifp); /* * We are certain we have added something, so call down to the * interface to let them know about it. */ if (ifp->if_ioctl != NULL) { if (THREAD_CAN_SLEEP()) (void )(*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0); else taskqueue_enqueue(taskqueue_swi, &ifp->if_addmultitask); } if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl)) link_free_sdl(llsa); return (0); free_llsa_out: if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl)) link_free_sdl(llsa); unlock_out: IF_ADDR_WUNLOCK(ifp); return (error); } static void if_siocaddmulti(void *arg, int pending) { struct ifnet *ifp; ifp = arg; #ifdef DIAGNOSTIC if (pending > 1) if_printf(ifp, "%d SIOCADDMULTI coalesced\n", pending); #endif CURVNET_SET(ifp->if_vnet); (void )(*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0); CURVNET_RESTORE(); } /* * Delete a multicast group membership by network-layer group address. * * Returns ENOENT if the entry could not be found. If ifp no longer * exists, results are undefined. This entry point should only be used * from subsystems which do appropriate locking to hold ifp for the * duration of the call. * Network-layer protocol domains must use if_delmulti_ifma(). */ int if_delmulti(struct ifnet *ifp, struct sockaddr *sa) { struct ifmultiaddr *ifma; int lastref; KASSERT(ifp, ("%s: NULL ifp", __func__)); IF_ADDR_WLOCK(ifp); lastref = 0; ifma = if_findmulti(ifp, sa); if (ifma != NULL) lastref = if_delmulti_locked(ifp, ifma, 0); IF_ADDR_WUNLOCK(ifp); if (ifma == NULL) return (ENOENT); if (lastref && ifp->if_ioctl != NULL) { (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0); } return (0); } /* * Delete all multicast group membership for an interface. * Should be used to quickly flush all multicast filters. */ void if_delallmulti(struct ifnet *ifp) { struct ifmultiaddr *ifma; struct ifmultiaddr *next; IF_ADDR_WLOCK(ifp); CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) if_delmulti_locked(ifp, ifma, 0); IF_ADDR_WUNLOCK(ifp); } void if_delmulti_ifma(struct ifmultiaddr *ifma) { if_delmulti_ifma_flags(ifma, 0); } /* * Delete a multicast group membership by group membership pointer. * Network-layer protocol domains must use this routine. * * It is safe to call this routine if the ifp disappeared. */ void if_delmulti_ifma_flags(struct ifmultiaddr *ifma, int flags) { struct ifnet *ifp; int lastref; MCDPRINTF("%s freeing ifma: %p\n", __func__, ifma); #ifdef INET IN_MULTI_LIST_UNLOCK_ASSERT(); #endif ifp = ifma->ifma_ifp; #ifdef DIAGNOSTIC if (ifp == NULL) { printf("%s: ifma_ifp seems to be detached\n", __func__); } else { struct epoch_tracker et; struct ifnet *oifp; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(oifp, &V_ifnet, if_link) if (ifp == oifp) break; NET_EPOCH_EXIT(et); if (ifp != oifp) ifp = NULL; } #endif /* * If and only if the ifnet instance exists: Acquire the address lock. */ if (ifp != NULL) IF_ADDR_WLOCK(ifp); lastref = if_delmulti_locked(ifp, ifma, flags); if (ifp != NULL) { /* * If and only if the ifnet instance exists: * Release the address lock. * If the group was left: update the hardware hash filter. */ IF_ADDR_WUNLOCK(ifp); if (lastref && ifp->if_ioctl != NULL) { (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0); } } } /* * Perform deletion of network-layer and/or link-layer multicast address. * * Return 0 if the reference count was decremented. * Return 1 if the final reference was released, indicating that the * hardware hash filter should be reprogrammed. */ static int if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching) { struct ifmultiaddr *ll_ifma; if (ifp != NULL && ifma->ifma_ifp != NULL) { KASSERT(ifma->ifma_ifp == ifp, ("%s: inconsistent ifp %p", __func__, ifp)); IF_ADDR_WLOCK_ASSERT(ifp); } ifp = ifma->ifma_ifp; MCDPRINTF("%s freeing %p from %s \n", __func__, ifma, ifp ? ifp->if_xname : ""); /* * If the ifnet is detaching, null out references to ifnet, * so that upper protocol layers will notice, and not attempt * to obtain locks for an ifnet which no longer exists. The * routing socket announcement must happen before the ifnet * instance is detached from the system. */ if (detaching) { #ifdef DIAGNOSTIC printf("%s: detaching ifnet instance %p\n", __func__, ifp); #endif /* * ifp may already be nulled out if we are being reentered * to delete the ll_ifma. */ if (ifp != NULL) { rt_newmaddrmsg(RTM_DELMADDR, ifma); ifma->ifma_ifp = NULL; } } if (--ifma->ifma_refcount > 0) return 0; if (ifp != NULL && detaching == 0 && (ifma->ifma_flags & IFMA_F_ENQUEUED)) { CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); ifma->ifma_flags &= ~IFMA_F_ENQUEUED; } /* * If this ifma is a network-layer ifma, a link-layer ifma may * have been associated with it. Release it first if so. */ ll_ifma = ifma->ifma_llifma; if (ll_ifma != NULL) { KASSERT(ifma->ifma_lladdr != NULL, ("%s: llifma w/o lladdr", __func__)); if (detaching) ll_ifma->ifma_ifp = NULL; /* XXX */ if (--ll_ifma->ifma_refcount == 0) { if (ifp != NULL) { if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) { CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link); ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED; } } if_freemulti(ll_ifma); } } #ifdef INVARIANTS if (ifp) { struct ifmultiaddr *ifmatmp; CK_STAILQ_FOREACH(ifmatmp, &ifp->if_multiaddrs, ifma_link) MPASS(ifma != ifmatmp); } #endif if_freemulti(ifma); /* * The last reference to this instance of struct ifmultiaddr * was released; the hardware should be notified of this change. */ return 1; } /* * Set the link layer address on an interface. * * At this time we only support certain types of interfaces, * and we don't allow the length of the address to change. * * Set noinline to be dtrace-friendly */ __noinline int if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) { struct sockaddr_dl *sdl; struct ifaddr *ifa; struct ifreq ifr; ifa = ifp->if_addr; if (ifa == NULL) return (EINVAL); sdl = (struct sockaddr_dl *)ifa->ifa_addr; if (sdl == NULL) return (EINVAL); if (len != sdl->sdl_alen) /* don't allow length to change */ return (EINVAL); switch (ifp->if_type) { case IFT_ETHER: case IFT_XETHER: case IFT_L2VLAN: case IFT_BRIDGE: case IFT_IEEE8023ADLAG: bcopy(lladdr, LLADDR(sdl), len); break; default: return (ENODEV); } /* * If the interface is already up, we need * to re-init it in order to reprogram its * address filter. */ if ((ifp->if_flags & IFF_UP) != 0) { if (ifp->if_ioctl) { ifp->if_flags &= ~IFF_UP; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); ifp->if_flags |= IFF_UP; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); } } EVENTHANDLER_INVOKE(iflladdr_event, ifp); return (0); } /* * Compat function for handling basic encapsulation requests. * Not converted stacks (FDDI, IB, ..) supports traditional * output model: ARP (and other similar L2 protocols) are handled * inside output routine, arpresolve/nd6_resolve() returns MAC * address instead of full prepend. * * This function creates calculated header==MAC for IPv4/IPv6 and * returns EAFNOSUPPORT (which is then handled in ARP code) for other * address families. */ static int if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req) { if (req->rtype != IFENCAP_LL) return (EOPNOTSUPP); if (req->bufsize < req->lladdr_len) return (ENOMEM); switch (req->family) { case AF_INET: case AF_INET6: break; default: return (EAFNOSUPPORT); } /* Copy lladdr to storage as is */ memmove(req->buf, req->lladdr, req->lladdr_len); req->bufsize = req->lladdr_len; req->lladdr_off = 0; return (0); } /* * Tunnel interfaces can nest, also they may cause infinite recursion * calls when misconfigured. We'll prevent this by detecting loops. * High nesting level may cause stack exhaustion. We'll prevent this * by introducing upper limit. * * Return 0, if tunnel nesting count is equal or less than limit. */ int if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, uint32_t cookie, int limit) { struct m_tag *mtag; int count; count = 1; mtag = NULL; while ((mtag = m_tag_locate(m, cookie, 0, mtag)) != NULL) { if (*(struct ifnet **)(mtag + 1) == ifp) { log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp)); return (EIO); } count++; } if (count > limit) { log(LOG_NOTICE, "%s: if_output recursively called too many times(%d)\n", if_name(ifp), count); return (EIO); } mtag = m_tag_alloc(cookie, 0, sizeof(struct ifnet *), M_NOWAIT); if (mtag == NULL) return (ENOMEM); *(struct ifnet **)(mtag + 1) = ifp; m_tag_prepend(m, mtag); return (0); } /* * Get the link layer address that was read from the hardware at attach. * * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type * their component interfaces as IFT_IEEE8023ADLAG. */ int if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr) { if (ifp->if_hw_addr == NULL) return (ENODEV); switch (ifp->if_type) { case IFT_ETHER: case IFT_IEEE8023ADLAG: bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen); return (0); default: return (ENODEV); } } /* * The name argument must be a pointer to storage which will last as * long as the interface does. For physical devices, the result of * device_get_name(dev) is a good choice and for pseudo-devices a * static string works well. */ void if_initname(struct ifnet *ifp, const char *name, int unit) { ifp->if_dname = name; ifp->if_dunit = unit; if (unit != IF_DUNIT_NONE) snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); else strlcpy(ifp->if_xname, name, IFNAMSIZ); } int if_printf(struct ifnet *ifp, const char *fmt, ...) { char if_fmt[256]; va_list ap; snprintf(if_fmt, sizeof(if_fmt), "%s: %s", ifp->if_xname, fmt); va_start(ap, fmt); vlog(LOG_INFO, if_fmt, ap); va_end(ap); return (0); } void if_start(struct ifnet *ifp) { (*(ifp)->if_start)(ifp); } /* * Backwards compatibility interface for drivers * that have not implemented it */ static int if_transmit(struct ifnet *ifp, struct mbuf *m) { int error; IFQ_HANDOFF(ifp, m, error); return (error); } static void if_input_default(struct ifnet *ifp __unused, struct mbuf *m) { m_freem(m); } int if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust) { int active = 0; IF_LOCK(ifq); if (_IF_QFULL(ifq)) { IF_UNLOCK(ifq); if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); m_freem(m); return (0); } if (ifp != NULL) { if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust); if (m->m_flags & (M_BCAST|M_MCAST)) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); active = ifp->if_drv_flags & IFF_DRV_OACTIVE; } _IF_ENQUEUE(ifq, m); IF_UNLOCK(ifq); if (ifp != NULL && !active) (*(ifp)->if_start)(ifp); return (1); } void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f) { KASSERT(if_com_alloc[type] == NULL, ("if_register_com_alloc: %d already registered", type)); KASSERT(if_com_free[type] == NULL, ("if_register_com_alloc: %d free already registered", type)); if_com_alloc[type] = a; if_com_free[type] = f; } void if_deregister_com_alloc(u_char type) { KASSERT(if_com_alloc[type] != NULL, ("if_deregister_com_alloc: %d not registered", type)); KASSERT(if_com_free[type] != NULL, ("if_deregister_com_alloc: %d free not registered", type)); if_com_alloc[type] = NULL; if_com_free[type] = NULL; } /* API for driver access to network stack owned ifnet.*/ uint64_t if_setbaudrate(struct ifnet *ifp, uint64_t baudrate) { uint64_t oldbrate; oldbrate = ifp->if_baudrate; ifp->if_baudrate = baudrate; return (oldbrate); } uint64_t if_getbaudrate(if_t ifp) { return (((struct ifnet *)ifp)->if_baudrate); } int if_setcapabilities(if_t ifp, int capabilities) { ((struct ifnet *)ifp)->if_capabilities = capabilities; return (0); } int if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit) { ((struct ifnet *)ifp)->if_capabilities |= setbit; ((struct ifnet *)ifp)->if_capabilities &= ~clearbit; return (0); } int if_getcapabilities(if_t ifp) { return ((struct ifnet *)ifp)->if_capabilities; } int if_setcapenable(if_t ifp, int capabilities) { ((struct ifnet *)ifp)->if_capenable = capabilities; return (0); } int if_setcapenablebit(if_t ifp, int setcap, int clearcap) { if(setcap) ((struct ifnet *)ifp)->if_capenable |= setcap; if(clearcap) ((struct ifnet *)ifp)->if_capenable &= ~clearcap; return (0); } const char * if_getdname(if_t ifp) { return ((struct ifnet *)ifp)->if_dname; } int if_togglecapenable(if_t ifp, int togglecap) { ((struct ifnet *)ifp)->if_capenable ^= togglecap; return (0); } int if_getcapenable(if_t ifp) { return ((struct ifnet *)ifp)->if_capenable; } /* * This is largely undesirable because it ties ifnet to a device, but does * provide flexiblity for an embedded product vendor. Should be used with * the understanding that it violates the interface boundaries, and should be * a last resort only. */ int if_setdev(if_t ifp, void *dev) { return (0); } int if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags) { ((struct ifnet *)ifp)->if_drv_flags |= set_flags; ((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags; return (0); } int if_getdrvflags(if_t ifp) { return ((struct ifnet *)ifp)->if_drv_flags; } int if_setdrvflags(if_t ifp, int flags) { ((struct ifnet *)ifp)->if_drv_flags = flags; return (0); } int if_setflags(if_t ifp, int flags) { ((struct ifnet *)ifp)->if_flags = flags; return (0); } int if_setflagbits(if_t ifp, int set, int clear) { ((struct ifnet *)ifp)->if_flags |= set; ((struct ifnet *)ifp)->if_flags &= ~clear; return (0); } int if_getflags(if_t ifp) { return ((struct ifnet *)ifp)->if_flags; } int if_clearhwassist(if_t ifp) { ((struct ifnet *)ifp)->if_hwassist = 0; return (0); } int if_sethwassistbits(if_t ifp, int toset, int toclear) { ((struct ifnet *)ifp)->if_hwassist |= toset; ((struct ifnet *)ifp)->if_hwassist &= ~toclear; return (0); } int if_sethwassist(if_t ifp, int hwassist_bit) { ((struct ifnet *)ifp)->if_hwassist = hwassist_bit; return (0); } int if_gethwassist(if_t ifp) { return ((struct ifnet *)ifp)->if_hwassist; } int if_setmtu(if_t ifp, int mtu) { ((struct ifnet *)ifp)->if_mtu = mtu; return (0); } int if_getmtu(if_t ifp) { return ((struct ifnet *)ifp)->if_mtu; } int if_getmtu_family(if_t ifp, int family) { struct domain *dp; for (dp = domains; dp; dp = dp->dom_next) { if (dp->dom_family == family && dp->dom_ifmtu != NULL) return (dp->dom_ifmtu((struct ifnet *)ifp)); } return (((struct ifnet *)ifp)->if_mtu); } /* * Methods for drivers to access interface unicast and multicast * link level addresses. Driver shall not know 'struct ifaddr' neither * 'struct ifmultiaddr'. */ u_int if_lladdr_count(if_t ifp) { struct epoch_tracker et; struct ifaddr *ifa; u_int count; count = 0; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (ifa->ifa_addr->sa_family == AF_LINK) count++; NET_EPOCH_EXIT(et); return (count); } u_int if_foreach_lladdr(if_t ifp, iflladdr_cb_t cb, void *cb_arg) { struct epoch_tracker et; struct ifaddr *ifa; u_int count; MPASS(cb); count = 0; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_LINK) continue; count += (*cb)(cb_arg, (struct sockaddr_dl *)ifa->ifa_addr, count); } NET_EPOCH_EXIT(et); return (count); } u_int if_llmaddr_count(if_t ifp) { struct epoch_tracker et; struct ifmultiaddr *ifma; int count; count = 0; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) if (ifma->ifma_addr->sa_family == AF_LINK) count++; NET_EPOCH_EXIT(et); return (count); } u_int if_foreach_llmaddr(if_t ifp, iflladdr_cb_t cb, void *cb_arg) { struct epoch_tracker et; struct ifmultiaddr *ifma; u_int count; MPASS(cb); count = 0; NET_EPOCH_ENTER(et); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; count += (*cb)(cb_arg, (struct sockaddr_dl *)ifma->ifma_addr, count); } NET_EPOCH_EXIT(et); return (count); } int if_setsoftc(if_t ifp, void *softc) { ((struct ifnet *)ifp)->if_softc = softc; return (0); } void * if_getsoftc(if_t ifp) { return ((struct ifnet *)ifp)->if_softc; } void if_setrcvif(struct mbuf *m, if_t ifp) { MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); m->m_pkthdr.rcvif = (struct ifnet *)ifp; } void if_setvtag(struct mbuf *m, uint16_t tag) { m->m_pkthdr.ether_vtag = tag; } uint16_t if_getvtag(struct mbuf *m) { return (m->m_pkthdr.ether_vtag); } int if_sendq_empty(if_t ifp) { return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd); } struct ifaddr * if_getifaddr(if_t ifp) { return ((struct ifnet *)ifp)->if_addr; } int if_getamcount(if_t ifp) { return ((struct ifnet *)ifp)->if_amcount; } int if_setsendqready(if_t ifp) { IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd); return (0); } int if_setsendqlen(if_t ifp, int tx_desc_count) { IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count); ((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count; return (0); } int if_vlantrunkinuse(if_t ifp) { return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0; } int if_input(if_t ifp, struct mbuf* sendmp) { (*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp); return (0); } struct mbuf * if_dequeue(if_t ifp) { struct mbuf *m; IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m); return (m); } int if_sendq_prepend(if_t ifp, struct mbuf *m) { IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m); return (0); } int if_setifheaderlen(if_t ifp, int len) { ((struct ifnet *)ifp)->if_hdrlen = len; return (0); } caddr_t if_getlladdr(if_t ifp) { return (IF_LLADDR((struct ifnet *)ifp)); } void * if_gethandle(u_char type) { return (if_alloc(type)); } void if_bpfmtap(if_t ifh, struct mbuf *m) { struct ifnet *ifp = (struct ifnet *)ifh; BPF_MTAP(ifp, m); } void if_etherbpfmtap(if_t ifh, struct mbuf *m) { struct ifnet *ifp = (struct ifnet *)ifh; ETHER_BPF_MTAP(ifp, m); } void if_vlancap(if_t ifh) { struct ifnet *ifp = (struct ifnet *)ifh; VLAN_CAPABILITIES(ifp); } int if_sethwtsomax(if_t ifp, u_int if_hw_tsomax) { ((struct ifnet *)ifp)->if_hw_tsomax = if_hw_tsomax; return (0); } int if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount) { ((struct ifnet *)ifp)->if_hw_tsomaxsegcount = if_hw_tsomaxsegcount; return (0); } int if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize) { ((struct ifnet *)ifp)->if_hw_tsomaxsegsize = if_hw_tsomaxsegsize; return (0); } u_int if_gethwtsomax(if_t ifp) { return (((struct ifnet *)ifp)->if_hw_tsomax); } u_int if_gethwtsomaxsegcount(if_t ifp) { return (((struct ifnet *)ifp)->if_hw_tsomaxsegcount); } u_int if_gethwtsomaxsegsize(if_t ifp) { return (((struct ifnet *)ifp)->if_hw_tsomaxsegsize); } void if_setinitfn(if_t ifp, void (*init_fn)(void *)) { ((struct ifnet *)ifp)->if_init = init_fn; } void if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t)) { ((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn; } void if_setstartfn(if_t ifp, void (*start_fn)(if_t)) { ((struct ifnet *)ifp)->if_start = (void *)start_fn; } void if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn) { ((struct ifnet *)ifp)->if_transmit = start_fn; } void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn) { ((struct ifnet *)ifp)->if_qflush = flush_fn; } void if_setgetcounterfn(if_t ifp, if_get_counter_t fn) { ifp->if_get_counter = fn; } /* Revisit these - These are inline functions originally. */ int drbr_inuse_drv(if_t ifh, struct buf_ring *br) { return drbr_inuse(ifh, br); } struct mbuf* drbr_dequeue_drv(if_t ifh, struct buf_ring *br) { return drbr_dequeue(ifh, br); } int drbr_needs_enqueue_drv(if_t ifh, struct buf_ring *br) { return drbr_needs_enqueue(ifh, br); } int drbr_enqueue_drv(if_t ifh, struct buf_ring *br, struct mbuf *m) { return drbr_enqueue(ifh, br, m); } Index: projects/clang1000-import/sys/net80211/ieee80211_amrr.c =================================================================== --- projects/clang1000-import/sys/net80211/ieee80211_amrr.c (revision 356919) +++ projects/clang1000-import/sys/net80211/ieee80211_amrr.c (revision 356920) @@ -1,508 +1,514 @@ /* $OpenBSD: ieee80211_amrr.c,v 1.1 2006/06/17 19:07:19 damien Exp $ */ /*- * Copyright (c) 2010 Rui Paulo * Copyright (c) 2006 * Damien Bergamini * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include __FBSDID("$FreeBSD$"); /*- * Naive implementation of the Adaptive Multi Rate Retry algorithm: * * "IEEE 802.11 Rate Adaptation: A Practical Approach" * Mathieu Lacage, Hossein Manshaei, Thierry Turletti * INRIA Sophia - Projet Planete * http://www-sop.inria.fr/rapports/sophia/RR-5208.html */ #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif #include #include #include #include #define is_success(amn) \ ((amn)->amn_retrycnt < (amn)->amn_txcnt / 10) #define is_failure(amn) \ ((amn)->amn_retrycnt > (amn)->amn_txcnt / 3) #define is_enough(amn) \ ((amn)->amn_txcnt > 10) static void amrr_setinterval(const struct ieee80211vap *, int); static void amrr_init(struct ieee80211vap *); static void amrr_deinit(struct ieee80211vap *); static void amrr_node_init(struct ieee80211_node *); static void amrr_node_deinit(struct ieee80211_node *); static int amrr_update(struct ieee80211_amrr *, struct ieee80211_amrr_node *, struct ieee80211_node *); static int amrr_rate(struct ieee80211_node *, void *, uint32_t); static void amrr_tx_complete(const struct ieee80211_node *, const struct ieee80211_ratectl_tx_status *); static void amrr_tx_update_cb(void *, struct ieee80211_node *); static void amrr_tx_update(struct ieee80211vap *vap, struct ieee80211_ratectl_tx_stats *); static void amrr_sysctlattach(struct ieee80211vap *, struct sysctl_ctx_list *, struct sysctl_oid *); static void amrr_node_stats(struct ieee80211_node *ni, struct sbuf *s); /* number of references from net80211 layer */ static int nrefs = 0; static const struct ieee80211_ratectl amrr = { .ir_name = "amrr", .ir_attach = NULL, .ir_detach = NULL, .ir_init = amrr_init, .ir_deinit = amrr_deinit, .ir_node_init = amrr_node_init, .ir_node_deinit = amrr_node_deinit, .ir_rate = amrr_rate, .ir_tx_complete = amrr_tx_complete, .ir_tx_update = amrr_tx_update, .ir_setinterval = amrr_setinterval, .ir_node_stats = amrr_node_stats, }; IEEE80211_RATECTL_MODULE(amrr, 1); IEEE80211_RATECTL_ALG(amrr, IEEE80211_RATECTL_AMRR, amrr); static void amrr_setinterval(const struct ieee80211vap *vap, int msecs) { struct ieee80211_amrr *amrr = vap->iv_rs; if (!amrr) return; if (msecs < 100) msecs = 100; amrr->amrr_interval = msecs_to_ticks(msecs); } static void amrr_init(struct ieee80211vap *vap) { struct ieee80211_amrr *amrr; KASSERT(vap->iv_rs == NULL, ("%s called multiple times", __func__)); nrefs++; /* XXX locking */ amrr = vap->iv_rs = IEEE80211_MALLOC(sizeof(struct ieee80211_amrr), M_80211_RATECTL, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (amrr == NULL) { if_printf(vap->iv_ifp, "couldn't alloc ratectl structure\n"); return; } amrr->amrr_min_success_threshold = IEEE80211_AMRR_MIN_SUCCESS_THRESHOLD; amrr->amrr_max_success_threshold = IEEE80211_AMRR_MAX_SUCCESS_THRESHOLD; amrr_setinterval(vap, 500 /* ms */); amrr_sysctlattach(vap, vap->iv_sysctl, vap->iv_oid); } static void amrr_deinit(struct ieee80211vap *vap) { IEEE80211_FREE(vap->iv_rs, M_80211_RATECTL); KASSERT(nrefs > 0, ("imbalanced attach/detach")); nrefs--; /* XXX locking */ } /* * Return whether 11n rates are possible. * * Some 11n devices may return HT information but no HT rates. * Thus, we shouldn't treat them as an 11n node. */ static int amrr_node_is_11n(struct ieee80211_node *ni) { if (ni->ni_chan == NULL) return (0); if (ni->ni_chan == IEEE80211_CHAN_ANYC) return (0); if (IEEE80211_IS_CHAN_HT(ni->ni_chan) && ni->ni_htrates.rs_nrates == 0) return (0); return (IEEE80211_IS_CHAN_HT(ni->ni_chan)); } static void amrr_node_init(struct ieee80211_node *ni) { const struct ieee80211_rateset *rs = NULL; struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_amrr *amrr = vap->iv_rs; struct ieee80211_amrr_node *amn; uint8_t rate; if (!amrr) { if_printf(vap->iv_ifp, "ratectl structure was not allocated, " "per-node structure allocation skipped\n"); return; } if (ni->ni_rctls == NULL) { ni->ni_rctls = amn = IEEE80211_MALLOC(sizeof(struct ieee80211_amrr_node), M_80211_RATECTL, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (amn == NULL) { if_printf(vap->iv_ifp, "couldn't alloc per-node ratectl " "structure\n"); return; } } else amn = ni->ni_rctls; amn->amn_amrr = amrr; amn->amn_success = 0; amn->amn_recovery = 0; amn->amn_txcnt = amn->amn_retrycnt = 0; amn->amn_success_threshold = amrr->amrr_min_success_threshold; /* 11n or not? Pick the right rateset */ if (amrr_node_is_11n(ni)) { /* XXX ew */ IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_RATECTL, ni, "%s: 11n node", __func__); rs = (struct ieee80211_rateset *) &ni->ni_htrates; } else { IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_RATECTL, ni, "%s: non-11n node", __func__); rs = &ni->ni_rates; } /* Initial rate - lowest */ rate = rs->rs_rates[0]; /* XXX clear the basic rate flag if it's not 11n */ if (! amrr_node_is_11n(ni)) rate &= IEEE80211_RATE_VAL; /* pick initial rate from the rateset - HT or otherwise */ /* Pick something low that's likely to succeed */ for (amn->amn_rix = rs->rs_nrates - 1; amn->amn_rix > 0; amn->amn_rix--) { /* legacy - anything < 36mbit, stop searching */ /* 11n - stop at MCS4 */ if (amrr_node_is_11n(ni)) { if ((rs->rs_rates[amn->amn_rix] & 0x1f) < 4) break; } else if ((rs->rs_rates[amn->amn_rix] & IEEE80211_RATE_VAL) <= 72) break; } rate = rs->rs_rates[amn->amn_rix] & IEEE80211_RATE_VAL; /* if the rate is an 11n rate, ensure the MCS bit is set */ if (amrr_node_is_11n(ni)) rate |= IEEE80211_RATE_MCS; /* Assign initial rate from the rateset */ ni->ni_txrate = rate; amn->amn_ticks = ticks; /* XXX TODO: we really need a rate-to-string method */ /* XXX TODO: non-11n rate should be divided by two.. */ IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_RATECTL, ni, "AMRR: nrates=%d, initial rate %s%d", rs->rs_nrates, amrr_node_is_11n(ni) ? "MCS " : "", rate & IEEE80211_RATE_VAL); } static void amrr_node_deinit(struct ieee80211_node *ni) { IEEE80211_FREE(ni->ni_rctls, M_80211_RATECTL); } static int amrr_update(struct ieee80211_amrr *amrr, struct ieee80211_amrr_node *amn, struct ieee80211_node *ni) { int rix = amn->amn_rix; const struct ieee80211_rateset *rs = NULL; KASSERT(is_enough(amn), ("txcnt %d", amn->amn_txcnt)); /* 11n or not? Pick the right rateset */ if (amrr_node_is_11n(ni)) { /* XXX ew */ rs = (struct ieee80211_rateset *) &ni->ni_htrates; } else { rs = &ni->ni_rates; } /* XXX TODO: we really need a rate-to-string method */ /* XXX TODO: non-11n rate should be divided by two.. */ IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_RATECTL, ni, "AMRR: current rate %d, txcnt=%d, retrycnt=%d", rs->rs_rates[rix] & IEEE80211_RATE_VAL, amn->amn_txcnt, amn->amn_retrycnt); /* * XXX This is totally bogus for 11n, as although high MCS * rates for each stream may be failing, the next stream * should be checked. * * Eg, if MCS5 is ok but MCS6/7 isn't, and we can go up to * MCS23, we should skip 6/7 and try 8 onwards. */ if (is_success(amn)) { amn->amn_success++; if (amn->amn_success >= amn->amn_success_threshold && rix + 1 < rs->rs_nrates) { amn->amn_recovery = 1; amn->amn_success = 0; rix++; /* XXX TODO: we really need a rate-to-string method */ /* XXX TODO: non-11n rate should be divided by two.. */ IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_RATECTL, ni, "AMRR increasing rate %d (txcnt=%d retrycnt=%d)", rs->rs_rates[rix] & IEEE80211_RATE_VAL, amn->amn_txcnt, amn->amn_retrycnt); } else { amn->amn_recovery = 0; } } else if (is_failure(amn)) { amn->amn_success = 0; if (rix > 0) { if (amn->amn_recovery) { amn->amn_success_threshold *= 2; if (amn->amn_success_threshold > amrr->amrr_max_success_threshold) amn->amn_success_threshold = amrr->amrr_max_success_threshold; } else { amn->amn_success_threshold = amrr->amrr_min_success_threshold; } rix--; /* XXX TODO: we really need a rate-to-string method */ /* XXX TODO: non-11n rate should be divided by two.. */ IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_RATECTL, ni, "AMRR decreasing rate %d (txcnt=%d retrycnt=%d)", rs->rs_rates[rix] & IEEE80211_RATE_VAL, amn->amn_txcnt, amn->amn_retrycnt); } amn->amn_recovery = 0; } /* reset counters */ amn->amn_txcnt = 0; amn->amn_retrycnt = 0; return rix; } /* * Return the rate index to use in sending a data frame. * Update our internal state if it's been long enough. * If the rate changes we also update ni_txrate to match. */ static int amrr_rate(struct ieee80211_node *ni, void *arg __unused, uint32_t iarg __unused) { struct ieee80211_amrr_node *amn = ni->ni_rctls; struct ieee80211_amrr *amrr; const struct ieee80211_rateset *rs = NULL; int rix; /* XXX should return -1 here, but drivers may not expect this... */ if (!amn) { ni->ni_txrate = ni->ni_rates.rs_rates[0]; return 0; } amrr = amn->amn_amrr; /* 11n or not? Pick the right rateset */ if (amrr_node_is_11n(ni)) { /* XXX ew */ rs = (struct ieee80211_rateset *) &ni->ni_htrates; } else { rs = &ni->ni_rates; } if (is_enough(amn) && (ticks - amn->amn_ticks) > amrr->amrr_interval) { rix = amrr_update(amrr, amn, ni); if (rix != amn->amn_rix) { /* update public rate */ ni->ni_txrate = rs->rs_rates[rix]; /* XXX strip basic rate flag from txrate, if non-11n */ if (amrr_node_is_11n(ni)) ni->ni_txrate |= IEEE80211_RATE_MCS; else ni->ni_txrate &= IEEE80211_RATE_VAL; amn->amn_rix = rix; } amn->amn_ticks = ticks; } else rix = amn->amn_rix; return rix; } /* * Update statistics with tx complete status. Ok is non-zero * if the packet is known to be ACK'd. Retries has the number * retransmissions (i.e. xmit attempts - 1). */ static void amrr_tx_complete(const struct ieee80211_node *ni, const struct ieee80211_ratectl_tx_status *status) { struct ieee80211_amrr_node *amn = ni->ni_rctls; int retries; if (!amn) return; retries = 0; if (status->flags & IEEE80211_RATECTL_STATUS_LONG_RETRY) retries = status->long_retries; amn->amn_txcnt++; if (status->status == IEEE80211_RATECTL_TX_SUCCESS) amn->amn_success++; amn->amn_retrycnt += retries; } static void amrr_tx_update_cb(void *arg, struct ieee80211_node *ni) { struct ieee80211_ratectl_tx_stats *stats = arg; struct ieee80211_amrr_node *amn = ni->ni_rctls; int txcnt, success, retrycnt; if (!amn) return; txcnt = stats->nframes; success = stats->nsuccess; retrycnt = 0; if (stats->flags & IEEE80211_RATECTL_TX_STATS_RETRIES) retrycnt = stats->nretries; amn->amn_txcnt += txcnt; amn->amn_success += success; amn->amn_retrycnt += retrycnt; } /* * Set tx count/retry statistics explicitly. Intended for * drivers that poll the device for statistics maintained * in the device. */ static void amrr_tx_update(struct ieee80211vap *vap, struct ieee80211_ratectl_tx_stats *stats) { if (stats->flags & IEEE80211_RATECTL_TX_STATS_NODE) amrr_tx_update_cb(stats, stats->ni); else { ieee80211_iterate_nodes_vap(&vap->iv_ic->ic_sta, vap, amrr_tx_update_cb, stats); } } static int amrr_sysctl_interval(SYSCTL_HANDLER_ARGS) { struct ieee80211vap *vap = arg1; struct ieee80211_amrr *amrr = vap->iv_rs; int msecs, error; if (!amrr) return ENOMEM; msecs = ticks_to_msecs(amrr->amrr_interval); error = sysctl_handle_int(oidp, &msecs, 0, req); if (error || !req->newptr) return error; amrr_setinterval(vap, msecs); return 0; } static void amrr_sysctlattach(struct ieee80211vap *vap, struct sysctl_ctx_list *ctx, struct sysctl_oid *tree) { struct ieee80211_amrr *amrr = vap->iv_rs; if (!amrr) return; SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "amrr_rate_interval", CTLTYPE_INT | CTLFLAG_RW, vap, 0, amrr_sysctl_interval, "I", "amrr operation interval (ms)"); /* XXX bounds check values */ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "amrr_max_sucess_threshold", CTLFLAG_RW, &amrr->amrr_max_success_threshold, 0, ""); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "amrr_min_sucess_threshold", CTLFLAG_RW, &amrr->amrr_min_success_threshold, 0, ""); } static void -amrr_node_stats(struct ieee80211_node *ni, struct sbuf *s) +amrr_print_node_rate(struct ieee80211_amrr_node *amn, + struct ieee80211_node *ni, struct sbuf *s) { int rate; - struct ieee80211_amrr_node *amn = ni->ni_rctls; struct ieee80211_rateset *rs; - /* XXX TODO: check locking? */ - - if (!amn) - return; - - /* XXX TODO: this should be a method */ if (amrr_node_is_11n(ni)) { rs = (struct ieee80211_rateset *) &ni->ni_htrates; rate = rs->rs_rates[amn->amn_rix] & IEEE80211_RATE_VAL; sbuf_printf(s, "rate: MCS %d\n", rate); } else { rs = &ni->ni_rates; rate = rs->rs_rates[amn->amn_rix] & IEEE80211_RATE_VAL; sbuf_printf(s, "rate: %d Mbit\n", rate / 2); } +} +static void +amrr_node_stats(struct ieee80211_node *ni, struct sbuf *s) +{ + struct ieee80211_amrr_node *amn = ni->ni_rctls; + + /* XXX TODO: check locking? */ + + if (!amn) + return; + + amrr_print_node_rate(amn, ni, s); sbuf_printf(s, "ticks: %d\n", amn->amn_ticks); sbuf_printf(s, "txcnt: %u\n", amn->amn_txcnt); sbuf_printf(s, "success: %u\n", amn->amn_success); sbuf_printf(s, "success_threshold: %u\n", amn->amn_success_threshold); sbuf_printf(s, "recovery: %u\n", amn->amn_recovery); sbuf_printf(s, "retry_cnt: %u\n", amn->amn_retrycnt); } Index: projects/clang1000-import/sys/powerpc/aim/aim_machdep.c =================================================================== --- projects/clang1000-import/sys/powerpc/aim/aim_machdep.c (revision 356919) +++ projects/clang1000-import/sys/powerpc/aim/aim_machdep.c (revision 356920) @@ -1,695 +1,697 @@ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 2001 Benno Rice * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * $NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_kstack_pages.h" #include "opt_platform.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef __powerpc64__ #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __powerpc64__ #include "mmu_oea64.h" #endif #ifndef __powerpc64__ struct bat battable[16]; #endif #ifndef __powerpc64__ /* Bits for running on 64-bit systems in 32-bit mode. */ extern void *testppc64, *testppc64size; extern void *restorebridge, *restorebridgesize; extern void *rfid_patch, *rfi_patch1, *rfi_patch2; extern void *trapcode64; extern Elf_Addr _GLOBAL_OFFSET_TABLE_[]; #endif extern void *rstcode, *rstcodeend; extern void *trapcode, *trapcodeend; extern void *hypertrapcode, *hypertrapcodeend; extern void *generictrap, *generictrap64; extern void *alitrap, *aliend; extern void *dsitrap, *dsiend; extern void *decrint, *decrsize; extern void *extint, *extsize; extern void *dblow, *dbend; extern void *imisstrap, *imisssize; extern void *dlmisstrap, *dlmisssize; extern void *dsmisstrap, *dsmisssize; extern void *ap_pcpu; extern void __restartkernel(vm_offset_t, vm_offset_t, vm_offset_t, void *, uint32_t, register_t offset, register_t msr); void aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp, uint32_t mdp_cookie); void aim_cpu_init(vm_offset_t toc); void aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp, uint32_t mdp_cookie) { register_t scratch; /* * If running from an FDT, make sure we are in real mode to avoid * tromping on firmware page tables. Everything in the kernel assumes * 1:1 mappings out of firmware, so this won't break anything not * already broken. This doesn't work if there is live OF, since OF * may internally use non-1:1 mappings. */ if (ofentry == 0) mtmsr(mfmsr() & ~(PSL_IR | PSL_DR)); #ifdef __powerpc64__ /* * If in real mode, relocate to high memory so that the kernel * can execute from the direct map. */ if (!(mfmsr() & PSL_DR) && (vm_offset_t)&aim_early_init < DMAP_BASE_ADDRESS) __restartkernel(fdt, 0, ofentry, mdp, mdp_cookie, DMAP_BASE_ADDRESS, mfmsr()); #endif /* Various very early CPU fix ups */ switch (mfpvr() >> 16) { /* * PowerPC 970 CPUs have a misfeature requested by Apple that * makes them pretend they have a 32-byte cacheline. Turn this * off before we measure the cacheline size. */ case IBM970: case IBM970FX: case IBM970MP: case IBM970GX: scratch = mfspr(SPR_HID5); scratch &= ~HID5_970_DCBZ_SIZE_HI; mtspr(SPR_HID5, scratch); break; #ifdef __powerpc64__ case IBMPOWER7: case IBMPOWER7PLUS: case IBMPOWER8: case IBMPOWER8E: case IBMPOWER8NVL: case IBMPOWER9: /* XXX: get from ibm,slb-size in device tree */ n_slbs = 32; break; #endif } } void aim_cpu_init(vm_offset_t toc) { size_t trap_offset, trapsize; vm_offset_t trap; register_t msr; uint8_t *cache_check; int cacheline_warn; #ifndef __powerpc64__ register_t scratch; int ppc64; #endif trap_offset = 0; cacheline_warn = 0; /* General setup for AIM CPUs */ psl_kernset = PSL_EE | PSL_ME | PSL_IR | PSL_DR | PSL_RI; #ifdef __powerpc64__ psl_kernset |= PSL_SF; if (mfmsr() & PSL_HV) psl_kernset |= PSL_HV; #endif psl_userset = psl_kernset | PSL_PR; #ifdef __powerpc64__ psl_userset32 = psl_userset & ~PSL_SF; #endif /* Bits that users aren't allowed to change */ psl_userstatic = ~(PSL_VEC | PSL_FP | PSL_FE0 | PSL_FE1); /* * Mask bits from the SRR1 that aren't really the MSR: * Bits 1-4, 10-15 (ppc32), 33-36, 42-47 (ppc64) */ psl_userstatic &= ~0x783f0000UL; /* * Initialize the interrupt tables and figure out our cache line * size and whether or not we need the 64-bit bridge code. */ /* * Disable translation in case the vector area hasn't been * mapped (G5). Note that no OFW calls can be made until * translation is re-enabled. */ msr = mfmsr(); mtmsr((msr & ~(PSL_IR | PSL_DR)) | PSL_RI); /* * Measure the cacheline size using dcbz * * Use EXC_PGM as a playground. We are about to overwrite it * anyway, we know it exists, and we know it is cache-aligned. */ cache_check = (void *)EXC_PGM; for (cacheline_size = 0; cacheline_size < 0x100; cacheline_size++) cache_check[cacheline_size] = 0xff; __asm __volatile("dcbz 0,%0":: "r" (cache_check) : "memory"); /* Find the first byte dcbz did not zero to get the cache line size */ for (cacheline_size = 0; cacheline_size < 0x100 && cache_check[cacheline_size] == 0; cacheline_size++); /* Work around psim bug */ if (cacheline_size == 0) { cacheline_warn = 1; cacheline_size = 32; } #ifndef __powerpc64__ /* * Figure out whether we need to use the 64 bit PMAP. This works by * executing an instruction that is only legal on 64-bit PPC (mtmsrd), * and setting ppc64 = 0 if that causes a trap. */ ppc64 = 1; bcopy(&testppc64, (void *)EXC_PGM, (size_t)&testppc64size); __syncicache((void *)EXC_PGM, (size_t)&testppc64size); __asm __volatile("\ mfmsr %0; \ mtsprg2 %1; \ \ mtmsrd %0; \ mfsprg2 %1;" : "=r"(scratch), "=r"(ppc64)); if (ppc64) cpu_features |= PPC_FEATURE_64; /* * Now copy restorebridge into all the handlers, if necessary, * and set up the trap tables. */ if (cpu_features & PPC_FEATURE_64) { /* Patch the two instances of rfi -> rfid */ bcopy(&rfid_patch,&rfi_patch1,4); #ifdef KDB /* rfi_patch2 is at the end of dbleave */ bcopy(&rfid_patch,&rfi_patch2,4); #endif } #else /* powerpc64 */ cpu_features |= PPC_FEATURE_64; #endif trapsize = (size_t)&trapcodeend - (size_t)&trapcode; /* * Copy generic handler into every possible trap. Special cases will get * different ones in a minute. */ for (trap = EXC_RST; trap < EXC_LAST; trap += 0x20) bcopy(&trapcode, (void *)trap, trapsize); #ifndef __powerpc64__ if (cpu_features & PPC_FEATURE_64) { /* * Copy a code snippet to restore 32-bit bridge mode * to the top of every non-generic trap handler */ trap_offset += (size_t)&restorebridgesize; bcopy(&restorebridge, (void *)EXC_RST, trap_offset); bcopy(&restorebridge, (void *)EXC_DSI, trap_offset); bcopy(&restorebridge, (void *)EXC_ALI, trap_offset); bcopy(&restorebridge, (void *)EXC_PGM, trap_offset); bcopy(&restorebridge, (void *)EXC_MCHK, trap_offset); bcopy(&restorebridge, (void *)EXC_TRC, trap_offset); bcopy(&restorebridge, (void *)EXC_BPT, trap_offset); } #else trapsize = (size_t)&hypertrapcodeend - (size_t)&hypertrapcode; bcopy(&hypertrapcode, (void *)(EXC_HEA + trap_offset), trapsize); bcopy(&hypertrapcode, (void *)(EXC_HMI + trap_offset), trapsize); bcopy(&hypertrapcode, (void *)(EXC_HVI + trap_offset), trapsize); bcopy(&hypertrapcode, (void *)(EXC_SOFT_PATCH + trap_offset), trapsize); #endif bcopy(&rstcode, (void *)(EXC_RST + trap_offset), (size_t)&rstcodeend - (size_t)&rstcode); #ifdef KDB bcopy(&dblow, (void *)(EXC_MCHK + trap_offset), (size_t)&dbend - (size_t)&dblow); bcopy(&dblow, (void *)(EXC_PGM + trap_offset), (size_t)&dbend - (size_t)&dblow); bcopy(&dblow, (void *)(EXC_TRC + trap_offset), (size_t)&dbend - (size_t)&dblow); bcopy(&dblow, (void *)(EXC_BPT + trap_offset), (size_t)&dbend - (size_t)&dblow); #endif bcopy(&alitrap, (void *)(EXC_ALI + trap_offset), (size_t)&aliend - (size_t)&alitrap); bcopy(&dsitrap, (void *)(EXC_DSI + trap_offset), (size_t)&dsiend - (size_t)&dsitrap); + /* Set address of generictrap for self-reloc calculations */ + *((void **)TRAP_GENTRAP) = &generictrap; #ifdef __powerpc64__ /* Set TOC base so that the interrupt code can get at it */ - *((void **)TRAP_GENTRAP) = &generictrap; + *((void **)TRAP_ENTRY) = &generictrap; *((register_t *)TRAP_TOCBASE) = toc; #else /* Set branch address for trap code */ if (cpu_features & PPC_FEATURE_64) - *((void **)TRAP_GENTRAP) = &generictrap64; + *((void **)TRAP_ENTRY) = &generictrap64; else - *((void **)TRAP_GENTRAP) = &generictrap; + *((void **)TRAP_ENTRY) = &generictrap; *((void **)TRAP_TOCBASE) = _GLOBAL_OFFSET_TABLE_; /* G2-specific TLB miss helper handlers */ bcopy(&imisstrap, (void *)EXC_IMISS, (size_t)&imisssize); bcopy(&dlmisstrap, (void *)EXC_DLMISS, (size_t)&dlmisssize); bcopy(&dsmisstrap, (void *)EXC_DSMISS, (size_t)&dsmisssize); #endif __syncicache(EXC_RSVD, EXC_LAST - EXC_RSVD); /* * Restore MSR */ mtmsr(msr); /* Warn if cachline size was not determined */ if (cacheline_warn == 1) { printf("WARNING: cacheline size undetermined, setting to 32\n"); } /* * Initialise virtual memory. Use BUS_PROBE_GENERIC priority * in case the platform module had a better idea of what we * should do. */ if (cpu_features & PPC_FEATURE_64) pmap_mmu_install(MMU_TYPE_G5, BUS_PROBE_GENERIC); else pmap_mmu_install(MMU_TYPE_OEA, BUS_PROBE_GENERIC); } /* * Shutdown the CPU as much as possible. */ void cpu_halt(void) { OF_exit(); } int ptrace_single_step(struct thread *td) { struct trapframe *tf; tf = td->td_frame; tf->srr1 |= PSL_SE; return (0); } int ptrace_clear_single_step(struct thread *td) { struct trapframe *tf; tf = td->td_frame; tf->srr1 &= ~PSL_SE; return (0); } void kdb_cpu_clear_singlestep(void) { kdb_frame->srr1 &= ~PSL_SE; } void kdb_cpu_set_singlestep(void) { kdb_frame->srr1 |= PSL_SE; } /* * Initialise a struct pcpu. */ void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t sz) { #ifdef __powerpc64__ /* Copy the SLB contents from the current CPU */ memcpy(pcpu->pc_aim.slb, PCPU_GET(aim.slb), sizeof(pcpu->pc_aim.slb)); #endif } #ifndef __powerpc64__ uint64_t va_to_vsid(pmap_t pm, vm_offset_t va) { return ((pm->pm_sr[(uintptr_t)va >> ADDR_SR_SHFT]) & SR_VSID_MASK); } #endif /* * These functions need to provide addresses that both (a) work in real mode * (or whatever mode/circumstances the kernel is in in early boot (now)) and * (b) can still, in principle, work once the kernel is going. Because these * rely on existing mappings/real mode, unmap is a no-op. */ vm_offset_t pmap_early_io_map(vm_paddr_t pa, vm_size_t size) { KASSERT(!pmap_bootstrapped, ("Not available after PMAP started!")); /* * If we have the MMU up in early boot, assume it is 1:1. Otherwise, * try to get the address in a memory region compatible with the * direct map for efficiency later. */ if (mfmsr() & PSL_DR) return (pa); else return (DMAP_BASE_ADDRESS + pa); } void pmap_early_io_unmap(vm_offset_t va, vm_size_t size) { KASSERT(!pmap_bootstrapped, ("Not available after PMAP started!")); } /* From p3-53 of the MPC7450 RISC Microprocessor Family Reference Manual */ void flush_disable_caches(void) { register_t msr; register_t msscr0; register_t cache_reg; volatile uint32_t *memp; uint32_t temp; int i; int x; msr = mfmsr(); powerpc_sync(); mtmsr(msr & ~(PSL_EE | PSL_DR)); msscr0 = mfspr(SPR_MSSCR0); msscr0 &= ~MSSCR0_L2PFE; mtspr(SPR_MSSCR0, msscr0); powerpc_sync(); isync(); __asm__ __volatile__("dssall; sync"); powerpc_sync(); isync(); __asm__ __volatile__("dcbf 0,%0" :: "r"(0)); __asm__ __volatile__("dcbf 0,%0" :: "r"(0)); __asm__ __volatile__("dcbf 0,%0" :: "r"(0)); /* Lock the L1 Data cache. */ mtspr(SPR_LDSTCR, mfspr(SPR_LDSTCR) | 0xFF); powerpc_sync(); isync(); mtspr(SPR_LDSTCR, 0); /* * Perform this in two stages: Flush the cache starting in RAM, then do it * from ROM. */ memp = (volatile uint32_t *)0x00000000; for (i = 0; i < 128 * 1024; i++) { temp = *memp; __asm__ __volatile__("dcbf 0,%0" :: "r"(memp)); memp += 32/sizeof(*memp); } memp = (volatile uint32_t *)0xfff00000; x = 0xfe; for (; x != 0xff;) { mtspr(SPR_LDSTCR, x); for (i = 0; i < 128; i++) { temp = *memp; __asm__ __volatile__("dcbf 0,%0" :: "r"(memp)); memp += 32/sizeof(*memp); } x = ((x << 1) | 1) & 0xff; } mtspr(SPR_LDSTCR, 0); cache_reg = mfspr(SPR_L2CR); if (cache_reg & L2CR_L2E) { cache_reg &= ~(L2CR_L2IO_7450 | L2CR_L2DO_7450); mtspr(SPR_L2CR, cache_reg); powerpc_sync(); mtspr(SPR_L2CR, cache_reg | L2CR_L2HWF); while (mfspr(SPR_L2CR) & L2CR_L2HWF) ; /* Busy wait for cache to flush */ powerpc_sync(); cache_reg &= ~L2CR_L2E; mtspr(SPR_L2CR, cache_reg); powerpc_sync(); mtspr(SPR_L2CR, cache_reg | L2CR_L2I); powerpc_sync(); while (mfspr(SPR_L2CR) & L2CR_L2I) ; /* Busy wait for L2 cache invalidate */ powerpc_sync(); } cache_reg = mfspr(SPR_L3CR); if (cache_reg & L3CR_L3E) { cache_reg &= ~(L3CR_L3IO | L3CR_L3DO); mtspr(SPR_L3CR, cache_reg); powerpc_sync(); mtspr(SPR_L3CR, cache_reg | L3CR_L3HWF); while (mfspr(SPR_L3CR) & L3CR_L3HWF) ; /* Busy wait for cache to flush */ powerpc_sync(); cache_reg &= ~L3CR_L3E; mtspr(SPR_L3CR, cache_reg); powerpc_sync(); mtspr(SPR_L3CR, cache_reg | L3CR_L3I); powerpc_sync(); while (mfspr(SPR_L3CR) & L3CR_L3I) ; /* Busy wait for L3 cache invalidate */ powerpc_sync(); } mtspr(SPR_HID0, mfspr(SPR_HID0) & ~HID0_DCE); powerpc_sync(); isync(); mtmsr(msr); } void cpu_sleep() { static u_quad_t timebase = 0; static register_t sprgs[4]; static register_t srrs[2]; jmp_buf resetjb; struct thread *fputd; struct thread *vectd; register_t hid0; register_t msr; register_t saved_msr; ap_pcpu = pcpup; PCPU_SET(restore, &resetjb); saved_msr = mfmsr(); fputd = PCPU_GET(fputhread); vectd = PCPU_GET(vecthread); if (fputd != NULL) save_fpu(fputd); if (vectd != NULL) save_vec(vectd); if (setjmp(resetjb) == 0) { sprgs[0] = mfspr(SPR_SPRG0); sprgs[1] = mfspr(SPR_SPRG1); sprgs[2] = mfspr(SPR_SPRG2); sprgs[3] = mfspr(SPR_SPRG3); srrs[0] = mfspr(SPR_SRR0); srrs[1] = mfspr(SPR_SRR1); timebase = mftb(); powerpc_sync(); flush_disable_caches(); hid0 = mfspr(SPR_HID0); hid0 = (hid0 & ~(HID0_DOZE | HID0_NAP)) | HID0_SLEEP; powerpc_sync(); isync(); msr = mfmsr() | PSL_POW; mtspr(SPR_HID0, hid0); powerpc_sync(); while (1) mtmsr(msr); } platform_smp_timebase_sync(timebase, 0); PCPU_SET(curthread, curthread); PCPU_SET(curpcb, curthread->td_pcb); pmap_activate(curthread); powerpc_sync(); mtspr(SPR_SPRG0, sprgs[0]); mtspr(SPR_SPRG1, sprgs[1]); mtspr(SPR_SPRG2, sprgs[2]); mtspr(SPR_SPRG3, sprgs[3]); mtspr(SPR_SRR0, srrs[0]); mtspr(SPR_SRR1, srrs[1]); mtmsr(saved_msr); if (fputd == curthread) enable_fpu(curthread); if (vectd == curthread) enable_vec(curthread); powerpc_sync(); } Index: projects/clang1000-import/sys/powerpc/aim/mmu_oea64.c =================================================================== --- projects/clang1000-import/sys/powerpc/aim/mmu_oea64.c (revision 356919) +++ projects/clang1000-import/sys/powerpc/aim/mmu_oea64.c (revision 356920) @@ -1,3127 +1,3129 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2008-2015 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * Since the information managed by this module is also stored by the * logical address mapping module, this module may throw away valid virtual * to physical mappings at almost any time. However, invalidations of * mappings must be done as requested. * * In order to cope with hardware architectures which make virtual to * physical map invalidates expensive, this module may delay invalidate * reduced protection operations until such time as they are actually * necessary. This module is given full information as to which processors * are currently using which maps, and to when physical maps must be made * correct. */ #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mmu_oea64.h" #include "mmu_if.h" #include "moea64_if.h" void moea64_release_vsid(uint64_t vsid); uintptr_t moea64_get_unique_vsid(void); #define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR) #define ENABLE_TRANS(msr) mtmsr(msr) #define VSID_MAKE(sr, hash) ((sr) | (((hash) & 0xfffff) << 4)) #define VSID_TO_HASH(vsid) (((vsid) >> 4) & 0xfffff) #define VSID_HASH_MASK 0x0000007fffffffffULL /* * Locking semantics: * * There are two locks of interest: the page locks and the pmap locks, which * protect their individual PVO lists and are locked in that order. The contents * of all PVO entries are protected by the locks of their respective pmaps. * The pmap of any PVO is guaranteed not to change so long as the PVO is linked * into any list. * */ #define PV_LOCK_PER_DOM (PA_LOCK_COUNT * 3) #define PV_LOCK_COUNT (PV_LOCK_PER_DOM * MAXMEMDOM) static struct mtx_padalign pv_lock[PV_LOCK_COUNT]; /* * Cheap NUMA-izing of the pv locks, to reduce contention across domains. * NUMA domains on POWER9 appear to be indexed as sparse memory spaces, with the * index at (N << 45). */ #ifdef __powerpc64__ #define PV_LOCK_IDX(pa) (pa_index(pa) % PV_LOCK_PER_DOM + \ (((pa) >> 45) % MAXMEMDOM) * PV_LOCK_PER_DOM) #else #define PV_LOCK_IDX(pa) (pa_index(pa) % PV_LOCK_COUNT) #endif #define PV_LOCKPTR(pa) ((struct mtx *)(&pv_lock[PV_LOCK_IDX(pa)])) #define PV_LOCK(pa) mtx_lock(PV_LOCKPTR(pa)) #define PV_UNLOCK(pa) mtx_unlock(PV_LOCKPTR(pa)) #define PV_LOCKASSERT(pa) mtx_assert(PV_LOCKPTR(pa), MA_OWNED) #define PV_PAGE_LOCK(m) PV_LOCK(VM_PAGE_TO_PHYS(m)) #define PV_PAGE_UNLOCK(m) PV_UNLOCK(VM_PAGE_TO_PHYS(m)) #define PV_PAGE_LOCKASSERT(m) PV_LOCKASSERT(VM_PAGE_TO_PHYS(m)) struct ofw_map { cell_t om_va; cell_t om_len; uint64_t om_pa; cell_t om_mode; }; extern unsigned char _etext[]; extern unsigned char _end[]; extern void *slbtrap, *slbtrapend; /* * Map of physical memory regions. */ static struct mem_region *regions; static struct mem_region *pregions; static struct numa_mem_region *numa_pregions; static u_int phys_avail_count; static int regions_sz, pregions_sz, numapregions_sz; extern void bs_remap_earlyboot(void); /* * Lock for the SLB tables. */ struct mtx moea64_slb_mutex; /* * PTEG data. */ u_long moea64_pteg_count; u_long moea64_pteg_mask; /* * PVO data. */ uma_zone_t moea64_pvo_zone; /* zone for pvo entries */ static struct pvo_entry *moea64_bpvo_pool; static int moea64_bpvo_pool_index = 0; static int moea64_bpvo_pool_size = 327680; -TUNABLE_INT("machdep.moea64_bpvo_pool_size", &moea64_bpvo_pool_size); SYSCTL_INT(_machdep, OID_AUTO, moea64_allocated_bpvo_entries, CTLFLAG_RD, &moea64_bpvo_pool_index, 0, ""); #define VSID_NBPW (sizeof(u_int32_t) * 8) #ifdef __powerpc64__ #define NVSIDS (NPMAPS * 16) #define VSID_HASHMASK 0xffffffffUL #else #define NVSIDS NPMAPS #define VSID_HASHMASK 0xfffffUL #endif static u_int moea64_vsid_bitmap[NVSIDS / VSID_NBPW]; static boolean_t moea64_initialized = FALSE; #ifdef MOEA64_STATS /* * Statistics. */ u_int moea64_pte_valid = 0; u_int moea64_pte_overflow = 0; u_int moea64_pvo_entries = 0; u_int moea64_pvo_enter_calls = 0; u_int moea64_pvo_remove_calls = 0; SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_valid, CTLFLAG_RD, &moea64_pte_valid, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_overflow, CTLFLAG_RD, &moea64_pte_overflow, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_entries, CTLFLAG_RD, &moea64_pvo_entries, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_enter_calls, CTLFLAG_RD, &moea64_pvo_enter_calls, 0, ""); SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_remove_calls, CTLFLAG_RD, &moea64_pvo_remove_calls, 0, ""); #endif vm_offset_t moea64_scratchpage_va[2]; struct pvo_entry *moea64_scratchpage_pvo[2]; struct mtx moea64_scratchpage_mtx; uint64_t moea64_large_page_mask = 0; uint64_t moea64_large_page_size = 0; int moea64_large_page_shift = 0; /* * PVO calls. */ static int moea64_pvo_enter(mmu_t mmu, struct pvo_entry *pvo, struct pvo_head *pvo_head, struct pvo_entry **oldpvo); static void moea64_pvo_remove_from_pmap(mmu_t mmu, struct pvo_entry *pvo); static void moea64_pvo_remove_from_page(mmu_t mmu, struct pvo_entry *pvo); static void moea64_pvo_remove_from_page_locked(mmu_t mmu, struct pvo_entry *pvo, vm_page_t m); static struct pvo_entry *moea64_pvo_find_va(pmap_t, vm_offset_t); /* * Utility routines. */ static boolean_t moea64_query_bit(mmu_t, vm_page_t, uint64_t); static u_int moea64_clear_bit(mmu_t, vm_page_t, uint64_t); static void moea64_kremove(mmu_t, vm_offset_t); static void moea64_syncicache(mmu_t, pmap_t pmap, vm_offset_t va, vm_paddr_t pa, vm_size_t sz); static void moea64_pmap_init_qpages(void); /* * Kernel MMU interface */ void moea64_clear_modify(mmu_t, vm_page_t); void moea64_copy_page(mmu_t, vm_page_t, vm_page_t); void moea64_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, vm_page_t *mb, vm_offset_t b_offset, int xfersize); int moea64_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t, u_int flags, int8_t psind); void moea64_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_page_t, vm_prot_t); void moea64_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t); vm_paddr_t moea64_extract(mmu_t, pmap_t, vm_offset_t); vm_page_t moea64_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t); void moea64_init(mmu_t); boolean_t moea64_is_modified(mmu_t, vm_page_t); boolean_t moea64_is_prefaultable(mmu_t, pmap_t, vm_offset_t); boolean_t moea64_is_referenced(mmu_t, vm_page_t); int moea64_ts_referenced(mmu_t, vm_page_t); vm_offset_t moea64_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t, int); boolean_t moea64_page_exists_quick(mmu_t, pmap_t, vm_page_t); void moea64_page_init(mmu_t, vm_page_t); int moea64_page_wired_mappings(mmu_t, vm_page_t); void moea64_pinit(mmu_t, pmap_t); void moea64_pinit0(mmu_t, pmap_t); void moea64_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_prot_t); void moea64_qenter(mmu_t, vm_offset_t, vm_page_t *, int); void moea64_qremove(mmu_t, vm_offset_t, int); void moea64_release(mmu_t, pmap_t); void moea64_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); void moea64_remove_pages(mmu_t, pmap_t); void moea64_remove_all(mmu_t, vm_page_t); void moea64_remove_write(mmu_t, vm_page_t); void moea64_unwire(mmu_t, pmap_t, vm_offset_t, vm_offset_t); void moea64_zero_page(mmu_t, vm_page_t); void moea64_zero_page_area(mmu_t, vm_page_t, int, int); void moea64_activate(mmu_t, struct thread *); void moea64_deactivate(mmu_t, struct thread *); void *moea64_mapdev(mmu_t, vm_paddr_t, vm_size_t); void *moea64_mapdev_attr(mmu_t, vm_paddr_t, vm_size_t, vm_memattr_t); void moea64_unmapdev(mmu_t, vm_offset_t, vm_size_t); vm_paddr_t moea64_kextract(mmu_t, vm_offset_t); void moea64_page_set_memattr(mmu_t, vm_page_t m, vm_memattr_t ma); void moea64_kenter_attr(mmu_t, vm_offset_t, vm_paddr_t, vm_memattr_t ma); void moea64_kenter(mmu_t, vm_offset_t, vm_paddr_t); boolean_t moea64_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t); static void moea64_sync_icache(mmu_t, pmap_t, vm_offset_t, vm_size_t); void moea64_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, void **va); void moea64_scan_init(mmu_t mmu); vm_offset_t moea64_quick_enter_page(mmu_t mmu, vm_page_t m); void moea64_quick_remove_page(mmu_t mmu, vm_offset_t addr); static int moea64_map_user_ptr(mmu_t mmu, pmap_t pm, volatile const void *uaddr, void **kaddr, size_t ulen, size_t *klen); static int moea64_decode_kernel_ptr(mmu_t mmu, vm_offset_t addr, int *is_user, vm_offset_t *decoded_addr); static size_t moea64_scan_pmap(mmu_t mmu); static void *moea64_dump_pmap_init(mmu_t mmu, unsigned blkpgs); #ifdef __powerpc64__ static void moea64_page_array_startup(mmu_t, long); #endif static mmu_method_t moea64_methods[] = { MMUMETHOD(mmu_clear_modify, moea64_clear_modify), MMUMETHOD(mmu_copy_page, moea64_copy_page), MMUMETHOD(mmu_copy_pages, moea64_copy_pages), MMUMETHOD(mmu_enter, moea64_enter), MMUMETHOD(mmu_enter_object, moea64_enter_object), MMUMETHOD(mmu_enter_quick, moea64_enter_quick), MMUMETHOD(mmu_extract, moea64_extract), MMUMETHOD(mmu_extract_and_hold, moea64_extract_and_hold), MMUMETHOD(mmu_init, moea64_init), MMUMETHOD(mmu_is_modified, moea64_is_modified), MMUMETHOD(mmu_is_prefaultable, moea64_is_prefaultable), MMUMETHOD(mmu_is_referenced, moea64_is_referenced), MMUMETHOD(mmu_ts_referenced, moea64_ts_referenced), MMUMETHOD(mmu_map, moea64_map), MMUMETHOD(mmu_page_exists_quick,moea64_page_exists_quick), MMUMETHOD(mmu_page_init, moea64_page_init), MMUMETHOD(mmu_page_wired_mappings,moea64_page_wired_mappings), MMUMETHOD(mmu_pinit, moea64_pinit), MMUMETHOD(mmu_pinit0, moea64_pinit0), MMUMETHOD(mmu_protect, moea64_protect), MMUMETHOD(mmu_qenter, moea64_qenter), MMUMETHOD(mmu_qremove, moea64_qremove), MMUMETHOD(mmu_release, moea64_release), MMUMETHOD(mmu_remove, moea64_remove), MMUMETHOD(mmu_remove_pages, moea64_remove_pages), MMUMETHOD(mmu_remove_all, moea64_remove_all), MMUMETHOD(mmu_remove_write, moea64_remove_write), MMUMETHOD(mmu_sync_icache, moea64_sync_icache), MMUMETHOD(mmu_unwire, moea64_unwire), MMUMETHOD(mmu_zero_page, moea64_zero_page), MMUMETHOD(mmu_zero_page_area, moea64_zero_page_area), MMUMETHOD(mmu_activate, moea64_activate), MMUMETHOD(mmu_deactivate, moea64_deactivate), MMUMETHOD(mmu_page_set_memattr, moea64_page_set_memattr), MMUMETHOD(mmu_quick_enter_page, moea64_quick_enter_page), MMUMETHOD(mmu_quick_remove_page, moea64_quick_remove_page), #ifdef __powerpc64__ MMUMETHOD(mmu_page_array_startup, moea64_page_array_startup), #endif /* Internal interfaces */ MMUMETHOD(mmu_mapdev, moea64_mapdev), MMUMETHOD(mmu_mapdev_attr, moea64_mapdev_attr), MMUMETHOD(mmu_unmapdev, moea64_unmapdev), MMUMETHOD(mmu_kextract, moea64_kextract), MMUMETHOD(mmu_kenter, moea64_kenter), MMUMETHOD(mmu_kenter_attr, moea64_kenter_attr), MMUMETHOD(mmu_dev_direct_mapped,moea64_dev_direct_mapped), MMUMETHOD(mmu_scan_init, moea64_scan_init), MMUMETHOD(mmu_scan_pmap, moea64_scan_pmap), MMUMETHOD(mmu_dump_pmap_init, moea64_dump_pmap_init), MMUMETHOD(mmu_dumpsys_map, moea64_dumpsys_map), MMUMETHOD(mmu_map_user_ptr, moea64_map_user_ptr), MMUMETHOD(mmu_decode_kernel_ptr, moea64_decode_kernel_ptr), { 0, 0 } }; MMU_DEF(oea64_mmu, "mmu_oea64_base", moea64_methods, 0); static struct pvo_head * vm_page_to_pvoh(vm_page_t m) { mtx_assert(PV_LOCKPTR(VM_PAGE_TO_PHYS(m)), MA_OWNED); return (&m->md.mdpg_pvoh); } static struct pvo_entry * alloc_pvo_entry(int bootstrap) { struct pvo_entry *pvo; if (!moea64_initialized || bootstrap) { if (moea64_bpvo_pool_index >= moea64_bpvo_pool_size) { - panic("moea64_enter: bpvo pool exhausted, %d, %d, %zd", - moea64_bpvo_pool_index, moea64_bpvo_pool_size, - moea64_bpvo_pool_size * sizeof(struct pvo_entry)); + panic("%s: bpvo pool exhausted, index=%d, size=%d, bytes=%zd." + "Try setting machdep.moea64_bpvo_pool_size tunable", + __func__, moea64_bpvo_pool_index, + moea64_bpvo_pool_size, + moea64_bpvo_pool_size * sizeof(struct pvo_entry)); } pvo = &moea64_bpvo_pool[ atomic_fetchadd_int(&moea64_bpvo_pool_index, 1)]; bzero(pvo, sizeof(*pvo)); pvo->pvo_vaddr = PVO_BOOTSTRAP; } else pvo = uma_zalloc(moea64_pvo_zone, M_NOWAIT | M_ZERO); return (pvo); } static void init_pvo_entry(struct pvo_entry *pvo, pmap_t pmap, vm_offset_t va) { uint64_t vsid; uint64_t hash; int shift; PMAP_LOCK_ASSERT(pmap, MA_OWNED); pvo->pvo_pmap = pmap; va &= ~ADDR_POFF; pvo->pvo_vaddr |= va; vsid = va_to_vsid(pmap, va); pvo->pvo_vpn = (uint64_t)((va & ADDR_PIDX) >> ADDR_PIDX_SHFT) | (vsid << 16); shift = (pvo->pvo_vaddr & PVO_LARGE) ? moea64_large_page_shift : ADDR_PIDX_SHFT; hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)va & ADDR_PIDX) >> shift); pvo->pvo_pte.slot = (hash & moea64_pteg_mask) << 3; } static void free_pvo_entry(struct pvo_entry *pvo) { if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP)) uma_zfree(moea64_pvo_zone, pvo); } void moea64_pte_from_pvo(const struct pvo_entry *pvo, struct lpte *lpte) { lpte->pte_hi = moea64_pte_vpn_from_pvo_vpn(pvo); lpte->pte_hi |= LPTE_VALID; if (pvo->pvo_vaddr & PVO_LARGE) lpte->pte_hi |= LPTE_BIG; if (pvo->pvo_vaddr & PVO_WIRED) lpte->pte_hi |= LPTE_WIRED; if (pvo->pvo_vaddr & PVO_HID) lpte->pte_hi |= LPTE_HID; lpte->pte_lo = pvo->pvo_pte.pa; /* Includes WIMG bits */ if (pvo->pvo_pte.prot & VM_PROT_WRITE) lpte->pte_lo |= LPTE_BW; else lpte->pte_lo |= LPTE_BR; if (!(pvo->pvo_pte.prot & VM_PROT_EXECUTE)) lpte->pte_lo |= LPTE_NOEXEC; } static __inline uint64_t moea64_calc_wimg(vm_paddr_t pa, vm_memattr_t ma) { uint64_t pte_lo; int i; if (ma != VM_MEMATTR_DEFAULT) { switch (ma) { case VM_MEMATTR_UNCACHEABLE: return (LPTE_I | LPTE_G); case VM_MEMATTR_CACHEABLE: return (LPTE_M); case VM_MEMATTR_WRITE_COMBINING: case VM_MEMATTR_WRITE_BACK: case VM_MEMATTR_PREFETCHABLE: return (LPTE_I); case VM_MEMATTR_WRITE_THROUGH: return (LPTE_W | LPTE_M); } } /* * Assume the page is cache inhibited and access is guarded unless * it's in our available memory array. */ pte_lo = LPTE_I | LPTE_G; for (i = 0; i < pregions_sz; i++) { if ((pa >= pregions[i].mr_start) && (pa < (pregions[i].mr_start + pregions[i].mr_size))) { pte_lo &= ~(LPTE_I | LPTE_G); pte_lo |= LPTE_M; break; } } return pte_lo; } /* * Quick sort callout for comparing memory regions. */ static int om_cmp(const void *a, const void *b); static int om_cmp(const void *a, const void *b) { const struct ofw_map *mapa; const struct ofw_map *mapb; mapa = a; mapb = b; if (mapa->om_pa < mapb->om_pa) return (-1); else if (mapa->om_pa > mapb->om_pa) return (1); else return (0); } static void moea64_add_ofw_mappings(mmu_t mmup, phandle_t mmu, size_t sz) { struct ofw_map translations[sz/(4*sizeof(cell_t))]; /*>= 4 cells per */ pcell_t acells, trans_cells[sz/sizeof(cell_t)]; struct pvo_entry *pvo; register_t msr; vm_offset_t off; vm_paddr_t pa_base; int i, j; bzero(translations, sz); OF_getencprop(OF_finddevice("/"), "#address-cells", &acells, sizeof(acells)); if (OF_getencprop(mmu, "translations", trans_cells, sz) == -1) panic("moea64_bootstrap: can't get ofw translations"); CTR0(KTR_PMAP, "moea64_add_ofw_mappings: translations"); sz /= sizeof(cell_t); for (i = 0, j = 0; i < sz; j++) { translations[j].om_va = trans_cells[i++]; translations[j].om_len = trans_cells[i++]; translations[j].om_pa = trans_cells[i++]; if (acells == 2) { translations[j].om_pa <<= 32; translations[j].om_pa |= trans_cells[i++]; } translations[j].om_mode = trans_cells[i++]; } KASSERT(i == sz, ("Translations map has incorrect cell count (%d/%zd)", i, sz)); sz = j; qsort(translations, sz, sizeof (*translations), om_cmp); for (i = 0; i < sz; i++) { pa_base = translations[i].om_pa; #ifndef __powerpc64__ if ((translations[i].om_pa >> 32) != 0) panic("OFW translations above 32-bit boundary!"); #endif if (pa_base % PAGE_SIZE) panic("OFW translation not page-aligned (phys)!"); if (translations[i].om_va % PAGE_SIZE) panic("OFW translation not page-aligned (virt)!"); CTR3(KTR_PMAP, "translation: pa=%#zx va=%#x len=%#x", pa_base, translations[i].om_va, translations[i].om_len); /* Now enter the pages for this mapping */ DISABLE_TRANS(msr); for (off = 0; off < translations[i].om_len; off += PAGE_SIZE) { /* If this address is direct-mapped, skip remapping */ if (hw_direct_map && translations[i].om_va == PHYS_TO_DMAP(pa_base) && moea64_calc_wimg(pa_base + off, VM_MEMATTR_DEFAULT) == LPTE_M) continue; PMAP_LOCK(kernel_pmap); pvo = moea64_pvo_find_va(kernel_pmap, translations[i].om_va + off); PMAP_UNLOCK(kernel_pmap); if (pvo != NULL) continue; moea64_kenter(mmup, translations[i].om_va + off, pa_base + off); } ENABLE_TRANS(msr); } } #ifdef __powerpc64__ static void moea64_probe_large_page(void) { uint16_t pvr = mfpvr() >> 16; switch (pvr) { case IBM970: case IBM970FX: case IBM970MP: powerpc_sync(); isync(); mtspr(SPR_HID4, mfspr(SPR_HID4) & ~HID4_970_DISABLE_LG_PG); powerpc_sync(); isync(); /* FALLTHROUGH */ default: if (moea64_large_page_size == 0) { moea64_large_page_size = 0x1000000; /* 16 MB */ moea64_large_page_shift = 24; } } moea64_large_page_mask = moea64_large_page_size - 1; } static void moea64_bootstrap_slb_prefault(vm_offset_t va, int large) { struct slb *cache; struct slb entry; uint64_t esid, slbe; uint64_t i; cache = PCPU_GET(aim.slb); esid = va >> ADDR_SR_SHFT; slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID; for (i = 0; i < 64; i++) { if (cache[i].slbe == (slbe | i)) return; } entry.slbe = slbe; entry.slbv = KERNEL_VSID(esid) << SLBV_VSID_SHIFT; if (large) entry.slbv |= SLBV_L; slb_insert_kernel(entry.slbe, entry.slbv); } #endif static int moea64_kenter_large(mmu_t mmup, vm_offset_t va, vm_paddr_t pa, uint64_t attr, int bootstrap) { struct pvo_entry *pvo; uint64_t pte_lo; int error; pte_lo = LPTE_M; pte_lo |= attr; pvo = alloc_pvo_entry(bootstrap); pvo->pvo_vaddr |= PVO_WIRED | PVO_LARGE; init_pvo_entry(pvo, kernel_pmap, va); pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; pvo->pvo_pte.pa = pa | pte_lo; error = moea64_pvo_enter(mmup, pvo, NULL, NULL); if (error != 0) panic("Error %d inserting large page\n", error); return (0); } static void moea64_setup_direct_map(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { register_t msr; vm_paddr_t pa, pkernelstart, pkernelend; vm_offset_t size, off; uint64_t pte_lo; int i; if (moea64_large_page_size == 0) hw_direct_map = 0; DISABLE_TRANS(msr); if (hw_direct_map) { PMAP_LOCK(kernel_pmap); for (i = 0; i < pregions_sz; i++) { for (pa = pregions[i].mr_start; pa < pregions[i].mr_start + pregions[i].mr_size; pa += moea64_large_page_size) { pte_lo = LPTE_M; if (pa & moea64_large_page_mask) { pa &= moea64_large_page_mask; pte_lo |= LPTE_G; } if (pa + moea64_large_page_size > pregions[i].mr_start + pregions[i].mr_size) pte_lo |= LPTE_G; moea64_kenter_large(mmup, PHYS_TO_DMAP(pa), pa, pte_lo, 1); } } PMAP_UNLOCK(kernel_pmap); } /* * Make sure the kernel and BPVO pool stay mapped on systems either * without a direct map or on which the kernel is not already executing * out of the direct-mapped region. */ if (kernelstart < DMAP_BASE_ADDRESS) { /* * For pre-dmap execution, we need to use identity mapping * because we will be operating with the mmu on but in the * wrong address configuration until we __restartkernel(). */ for (pa = kernelstart & ~PAGE_MASK; pa < kernelend; pa += PAGE_SIZE) moea64_kenter(mmup, pa, pa); } else if (!hw_direct_map) { pkernelstart = kernelstart & ~DMAP_BASE_ADDRESS; pkernelend = kernelend & ~DMAP_BASE_ADDRESS; for (pa = pkernelstart & ~PAGE_MASK; pa < pkernelend; pa += PAGE_SIZE) moea64_kenter(mmup, pa | DMAP_BASE_ADDRESS, pa); } if (!hw_direct_map) { size = moea64_bpvo_pool_size*sizeof(struct pvo_entry); off = (vm_offset_t)(moea64_bpvo_pool); for (pa = off; pa < off + size; pa += PAGE_SIZE) moea64_kenter(mmup, pa, pa); /* Map exception vectors */ for (pa = EXC_RSVD; pa < EXC_LAST; pa += PAGE_SIZE) moea64_kenter(mmup, pa | DMAP_BASE_ADDRESS, pa); } ENABLE_TRANS(msr); /* * Allow user to override unmapped_buf_allowed for testing. * XXXKIB Only direct map implementation was tested. */ if (!TUNABLE_INT_FETCH("vfs.unmapped_buf_allowed", &unmapped_buf_allowed)) unmapped_buf_allowed = hw_direct_map; } /* Quick sort callout for comparing physical addresses. */ static int pa_cmp(const void *a, const void *b) { const vm_paddr_t *pa = a, *pb = b; if (*pa < *pb) return (-1); else if (*pa > *pb) return (1); else return (0); } void moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { int i, j; vm_size_t physsz, hwphyssz; vm_paddr_t kernelphysstart, kernelphysend; int rm_pavail; #ifndef __powerpc64__ /* We don't have a direct map since there is no BAT */ hw_direct_map = 0; /* Make sure battable is zero, since we have no BAT */ for (i = 0; i < 16; i++) { battable[i].batu = 0; battable[i].batl = 0; } #else moea64_probe_large_page(); /* Use a direct map if we have large page support */ if (moea64_large_page_size > 0) hw_direct_map = 1; else hw_direct_map = 0; /* Install trap handlers for SLBs */ bcopy(&slbtrap, (void *)EXC_DSE,(size_t)&slbtrapend - (size_t)&slbtrap); bcopy(&slbtrap, (void *)EXC_ISE,(size_t)&slbtrapend - (size_t)&slbtrap); __syncicache((void *)EXC_DSE, 0x80); __syncicache((void *)EXC_ISE, 0x80); #endif kernelphysstart = kernelstart & ~DMAP_BASE_ADDRESS; kernelphysend = kernelend & ~DMAP_BASE_ADDRESS; /* Get physical memory regions from firmware */ mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz); CTR0(KTR_PMAP, "moea64_bootstrap: physical memory"); if (PHYS_AVAIL_ENTRIES < regions_sz) panic("moea64_bootstrap: phys_avail too small"); phys_avail_count = 0; physsz = 0; hwphyssz = 0; TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); for (i = 0, j = 0; i < regions_sz; i++, j += 2) { CTR3(KTR_PMAP, "region: %#zx - %#zx (%#zx)", regions[i].mr_start, regions[i].mr_start + regions[i].mr_size, regions[i].mr_size); if (hwphyssz != 0 && (physsz + regions[i].mr_size) >= hwphyssz) { if (physsz < hwphyssz) { phys_avail[j] = regions[i].mr_start; phys_avail[j + 1] = regions[i].mr_start + hwphyssz - physsz; physsz = hwphyssz; phys_avail_count++; dump_avail[j] = phys_avail[j]; dump_avail[j + 1] = phys_avail[j + 1]; } break; } phys_avail[j] = regions[i].mr_start; phys_avail[j + 1] = regions[i].mr_start + regions[i].mr_size; phys_avail_count++; physsz += regions[i].mr_size; dump_avail[j] = phys_avail[j]; dump_avail[j + 1] = phys_avail[j + 1]; } /* Check for overlap with the kernel and exception vectors */ rm_pavail = 0; for (j = 0; j < 2*phys_avail_count; j+=2) { if (phys_avail[j] < EXC_LAST) phys_avail[j] += EXC_LAST; if (phys_avail[j] >= kernelphysstart && phys_avail[j+1] <= kernelphysend) { phys_avail[j] = phys_avail[j+1] = ~0; rm_pavail++; continue; } if (kernelphysstart >= phys_avail[j] && kernelphysstart < phys_avail[j+1]) { if (kernelphysend < phys_avail[j+1]) { phys_avail[2*phys_avail_count] = (kernelphysend & ~PAGE_MASK) + PAGE_SIZE; phys_avail[2*phys_avail_count + 1] = phys_avail[j+1]; phys_avail_count++; } phys_avail[j+1] = kernelphysstart & ~PAGE_MASK; } if (kernelphysend >= phys_avail[j] && kernelphysend < phys_avail[j+1]) { if (kernelphysstart > phys_avail[j]) { phys_avail[2*phys_avail_count] = phys_avail[j]; phys_avail[2*phys_avail_count + 1] = kernelphysstart & ~PAGE_MASK; phys_avail_count++; } phys_avail[j] = (kernelphysend & ~PAGE_MASK) + PAGE_SIZE; } } /* Remove physical available regions marked for removal (~0) */ if (rm_pavail) { qsort(phys_avail, 2*phys_avail_count, sizeof(phys_avail[0]), pa_cmp); phys_avail_count -= rm_pavail; for (i = 2*phys_avail_count; i < 2*(phys_avail_count + rm_pavail); i+=2) phys_avail[i] = phys_avail[i+1] = 0; } physmem = btoc(physsz); #ifdef PTEGCOUNT moea64_pteg_count = PTEGCOUNT; #else moea64_pteg_count = 0x1000; while (moea64_pteg_count < physmem) moea64_pteg_count <<= 1; moea64_pteg_count >>= 1; #endif /* PTEGCOUNT */ } void moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { int i; /* * Set PTEG mask */ moea64_pteg_mask = moea64_pteg_count - 1; /* * Initialize SLB table lock and page locks */ mtx_init(&moea64_slb_mutex, "SLB table", NULL, MTX_DEF); for (i = 0; i < PV_LOCK_COUNT; i++) mtx_init(&pv_lock[i], "page pv", NULL, MTX_DEF); /* * Initialise the bootstrap pvo pool. */ + TUNABLE_INT_FETCH("machdep.moea64_bpvo_pool_size", &moea64_bpvo_pool_size); moea64_bpvo_pool = (struct pvo_entry *)moea64_bootstrap_alloc( moea64_bpvo_pool_size*sizeof(struct pvo_entry), PAGE_SIZE); moea64_bpvo_pool_index = 0; /* Place at address usable through the direct map */ if (hw_direct_map) moea64_bpvo_pool = (struct pvo_entry *) PHYS_TO_DMAP((uintptr_t)moea64_bpvo_pool); /* * Make sure kernel vsid is allocated as well as VSID 0. */ #ifndef __powerpc64__ moea64_vsid_bitmap[(KERNEL_VSIDBITS & (NVSIDS - 1)) / VSID_NBPW] |= 1 << (KERNEL_VSIDBITS % VSID_NBPW); moea64_vsid_bitmap[0] |= 1; #endif /* * Initialize the kernel pmap (which is statically allocated). */ #ifdef __powerpc64__ for (i = 0; i < 64; i++) { pcpup->pc_aim.slb[i].slbv = 0; pcpup->pc_aim.slb[i].slbe = 0; } #else for (i = 0; i < 16; i++) kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i; #endif kernel_pmap->pmap_phys = kernel_pmap; CPU_FILL(&kernel_pmap->pm_active); RB_INIT(&kernel_pmap->pmap_pvo); PMAP_LOCK_INIT(kernel_pmap); /* * Now map in all the other buffers we allocated earlier */ moea64_setup_direct_map(mmup, kernelstart, kernelend); } void moea64_late_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { ihandle_t mmui; phandle_t chosen; phandle_t mmu; ssize_t sz; int i; vm_offset_t pa, va; void *dpcpu; /* * Set up the Open Firmware pmap and add its mappings if not in real * mode. */ chosen = OF_finddevice("/chosen"); if (chosen != -1 && OF_getencprop(chosen, "mmu", &mmui, 4) != -1) { mmu = OF_instance_to_package(mmui); if (mmu == -1 || (sz = OF_getproplen(mmu, "translations")) == -1) sz = 0; if (sz > 6144 /* tmpstksz - 2 KB headroom */) panic("moea64_bootstrap: too many ofw translations"); if (sz > 0) moea64_add_ofw_mappings(mmup, mmu, sz); } /* * Calculate the last available physical address. */ Maxmem = 0; for (i = 0; phys_avail[i + 2] != 0; i += 2) Maxmem = MAX(Maxmem, powerpc_btop(phys_avail[i + 1])); /* * Initialize MMU. */ MMU_CPU_BOOTSTRAP(mmup,0); mtmsr(mfmsr() | PSL_DR | PSL_IR); pmap_bootstrapped++; /* * Set the start and end of kva. */ virtual_avail = VM_MIN_KERNEL_ADDRESS; virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS; /* * Map the entire KVA range into the SLB. We must not fault there. */ #ifdef __powerpc64__ for (va = virtual_avail; va < virtual_end; va += SEGMENT_LENGTH) moea64_bootstrap_slb_prefault(va, 0); #endif /* * Remap any early IO mappings (console framebuffer, etc.) */ bs_remap_earlyboot(); /* * Figure out how far we can extend virtual_end into segment 16 * without running into existing mappings. Segment 16 is guaranteed * to contain neither RAM nor devices (at least on Apple hardware), * but will generally contain some OFW mappings we should not * step on. */ #ifndef __powerpc64__ /* KVA is in high memory on PPC64 */ PMAP_LOCK(kernel_pmap); while (virtual_end < VM_MAX_KERNEL_ADDRESS && moea64_pvo_find_va(kernel_pmap, virtual_end+1) == NULL) virtual_end += PAGE_SIZE; PMAP_UNLOCK(kernel_pmap); #endif /* * Allocate a kernel stack with a guard page for thread0 and map it * into the kernel page map. */ pa = moea64_bootstrap_alloc(kstack_pages * PAGE_SIZE, PAGE_SIZE); va = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; virtual_avail = va + kstack_pages * PAGE_SIZE; CTR2(KTR_PMAP, "moea64_bootstrap: kstack0 at %#x (%#x)", pa, va); thread0.td_kstack = va; thread0.td_kstack_pages = kstack_pages; for (i = 0; i < kstack_pages; i++) { moea64_kenter(mmup, va, pa); pa += PAGE_SIZE; va += PAGE_SIZE; } /* * Allocate virtual address space for the message buffer. */ pa = msgbuf_phys = moea64_bootstrap_alloc(msgbufsize, PAGE_SIZE); msgbufp = (struct msgbuf *)virtual_avail; va = virtual_avail; virtual_avail += round_page(msgbufsize); while (va < virtual_avail) { moea64_kenter(mmup, va, pa); pa += PAGE_SIZE; va += PAGE_SIZE; } /* * Allocate virtual address space for the dynamic percpu area. */ pa = moea64_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE); dpcpu = (void *)virtual_avail; va = virtual_avail; virtual_avail += DPCPU_SIZE; while (va < virtual_avail) { moea64_kenter(mmup, va, pa); pa += PAGE_SIZE; va += PAGE_SIZE; } dpcpu_init(dpcpu, curcpu); crashdumpmap = (caddr_t)virtual_avail; virtual_avail += MAXDUMPPGS * PAGE_SIZE; /* * Allocate some things for page zeroing. We put this directly * in the page table and use MOEA64_PTE_REPLACE to avoid any * of the PVO book-keeping or other parts of the VM system * from even knowing that this hack exists. */ if (!hw_direct_map) { mtx_init(&moea64_scratchpage_mtx, "pvo zero page", NULL, MTX_DEF); for (i = 0; i < 2; i++) { moea64_scratchpage_va[i] = (virtual_end+1) - PAGE_SIZE; virtual_end -= PAGE_SIZE; moea64_kenter(mmup, moea64_scratchpage_va[i], 0); PMAP_LOCK(kernel_pmap); moea64_scratchpage_pvo[i] = moea64_pvo_find_va( kernel_pmap, (vm_offset_t)moea64_scratchpage_va[i]); PMAP_UNLOCK(kernel_pmap); } } numa_mem_regions(&numa_pregions, &numapregions_sz); } static void moea64_pmap_init_qpages(void) { struct pcpu *pc; int i; if (hw_direct_map) return; CPU_FOREACH(i) { pc = pcpu_find(i); pc->pc_qmap_addr = kva_alloc(PAGE_SIZE); if (pc->pc_qmap_addr == 0) panic("pmap_init_qpages: unable to allocate KVA"); PMAP_LOCK(kernel_pmap); pc->pc_aim.qmap_pvo = moea64_pvo_find_va(kernel_pmap, pc->pc_qmap_addr); PMAP_UNLOCK(kernel_pmap); mtx_init(&pc->pc_aim.qmap_lock, "qmap lock", NULL, MTX_DEF); } } SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, moea64_pmap_init_qpages, NULL); /* * Activate a user pmap. This mostly involves setting some non-CPU * state. */ void moea64_activate(mmu_t mmu, struct thread *td) { pmap_t pm; pm = &td->td_proc->p_vmspace->vm_pmap; CPU_SET(PCPU_GET(cpuid), &pm->pm_active); #ifdef __powerpc64__ PCPU_SET(aim.userslb, pm->pm_slb); __asm __volatile("slbmte %0, %1; isync" :: "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE)); #else PCPU_SET(curpmap, pm->pmap_phys); mtsrin(USER_SR << ADDR_SR_SHFT, td->td_pcb->pcb_cpu.aim.usr_vsid); #endif } void moea64_deactivate(mmu_t mmu, struct thread *td) { pmap_t pm; __asm __volatile("isync; slbie %0" :: "r"(USER_ADDR)); pm = &td->td_proc->p_vmspace->vm_pmap; CPU_CLR(PCPU_GET(cpuid), &pm->pm_active); #ifdef __powerpc64__ PCPU_SET(aim.userslb, NULL); #else PCPU_SET(curpmap, NULL); #endif } void moea64_unwire(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva) { struct pvo_entry key, *pvo; vm_page_t m; int64_t refchg; key.pvo_vaddr = sva; PMAP_LOCK(pm); for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); pvo != NULL && PVO_VADDR(pvo) < eva; pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) { if ((pvo->pvo_vaddr & PVO_WIRED) == 0) panic("moea64_unwire: pvo %p is missing PVO_WIRED", pvo); pvo->pvo_vaddr &= ~PVO_WIRED; refchg = MOEA64_PTE_REPLACE(mmu, pvo, 0 /* No invalidation */); if ((pvo->pvo_vaddr & PVO_MANAGED) && (pvo->pvo_pte.prot & VM_PROT_WRITE)) { if (refchg < 0) refchg = LPTE_CHG; m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); refchg |= atomic_readandclear_32(&m->md.mdpg_attrs); if (refchg & LPTE_CHG) vm_page_dirty(m); if (refchg & LPTE_REF) vm_page_aflag_set(m, PGA_REFERENCED); } pm->pm_stats.wired_count--; } PMAP_UNLOCK(pm); } /* * This goes through and sets the physical address of our * special scratch PTE to the PA we want to zero or copy. Because * of locking issues (this can get called in pvo_enter() by * the UMA allocator), we can't use most other utility functions here */ static __inline void moea64_set_scratchpage_pa(mmu_t mmup, int which, vm_paddr_t pa) { struct pvo_entry *pvo; KASSERT(!hw_direct_map, ("Using OEA64 scratchpage with a direct map!")); mtx_assert(&moea64_scratchpage_mtx, MA_OWNED); pvo = moea64_scratchpage_pvo[which]; PMAP_LOCK(pvo->pvo_pmap); pvo->pvo_pte.pa = moea64_calc_wimg(pa, VM_MEMATTR_DEFAULT) | (uint64_t)pa; MOEA64_PTE_REPLACE(mmup, pvo, MOEA64_PTE_INVALIDATE); PMAP_UNLOCK(pvo->pvo_pmap); isync(); } void moea64_copy_page(mmu_t mmu, vm_page_t msrc, vm_page_t mdst) { vm_offset_t dst; vm_offset_t src; dst = VM_PAGE_TO_PHYS(mdst); src = VM_PAGE_TO_PHYS(msrc); if (hw_direct_map) { bcopy((void *)PHYS_TO_DMAP(src), (void *)PHYS_TO_DMAP(dst), PAGE_SIZE); } else { mtx_lock(&moea64_scratchpage_mtx); moea64_set_scratchpage_pa(mmu, 0, src); moea64_set_scratchpage_pa(mmu, 1, dst); bcopy((void *)moea64_scratchpage_va[0], (void *)moea64_scratchpage_va[1], PAGE_SIZE); mtx_unlock(&moea64_scratchpage_mtx); } } static inline void moea64_copy_pages_dmap(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, vm_page_t *mb, vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; vm_offset_t a_pg_offset, b_pg_offset; int cnt; while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); a_cp = (char *)(uintptr_t)PHYS_TO_DMAP( VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])) + a_pg_offset; b_pg_offset = b_offset & PAGE_MASK; cnt = min(cnt, PAGE_SIZE - b_pg_offset); b_cp = (char *)(uintptr_t)PHYS_TO_DMAP( VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])) + b_pg_offset; bcopy(a_cp, b_cp, cnt); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } } static inline void moea64_copy_pages_nodmap(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, vm_page_t *mb, vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; vm_offset_t a_pg_offset, b_pg_offset; int cnt; mtx_lock(&moea64_scratchpage_mtx); while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); moea64_set_scratchpage_pa(mmu, 0, VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])); a_cp = (char *)moea64_scratchpage_va[0] + a_pg_offset; b_pg_offset = b_offset & PAGE_MASK; cnt = min(cnt, PAGE_SIZE - b_pg_offset); moea64_set_scratchpage_pa(mmu, 1, VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])); b_cp = (char *)moea64_scratchpage_va[1] + b_pg_offset; bcopy(a_cp, b_cp, cnt); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } mtx_unlock(&moea64_scratchpage_mtx); } void moea64_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, vm_page_t *mb, vm_offset_t b_offset, int xfersize) { if (hw_direct_map) { moea64_copy_pages_dmap(mmu, ma, a_offset, mb, b_offset, xfersize); } else { moea64_copy_pages_nodmap(mmu, ma, a_offset, mb, b_offset, xfersize); } } void moea64_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) { vm_paddr_t pa = VM_PAGE_TO_PHYS(m); if (size + off > PAGE_SIZE) panic("moea64_zero_page: size + off > PAGE_SIZE"); if (hw_direct_map) { bzero((caddr_t)(uintptr_t)PHYS_TO_DMAP(pa) + off, size); } else { mtx_lock(&moea64_scratchpage_mtx); moea64_set_scratchpage_pa(mmu, 0, pa); bzero((caddr_t)moea64_scratchpage_va[0] + off, size); mtx_unlock(&moea64_scratchpage_mtx); } } /* * Zero a page of physical memory by temporarily mapping it */ void moea64_zero_page(mmu_t mmu, vm_page_t m) { vm_paddr_t pa = VM_PAGE_TO_PHYS(m); vm_offset_t va, off; if (!hw_direct_map) { mtx_lock(&moea64_scratchpage_mtx); moea64_set_scratchpage_pa(mmu, 0, pa); va = moea64_scratchpage_va[0]; } else { va = PHYS_TO_DMAP(pa); } for (off = 0; off < PAGE_SIZE; off += cacheline_size) __asm __volatile("dcbz 0,%0" :: "r"(va + off)); if (!hw_direct_map) mtx_unlock(&moea64_scratchpage_mtx); } vm_offset_t moea64_quick_enter_page(mmu_t mmu, vm_page_t m) { struct pvo_entry *pvo; vm_paddr_t pa = VM_PAGE_TO_PHYS(m); if (hw_direct_map) return (PHYS_TO_DMAP(pa)); /* * MOEA64_PTE_REPLACE does some locking, so we can't just grab * a critical section and access the PCPU data like on i386. * Instead, pin the thread and grab the PCPU lock to prevent * a preempting thread from using the same PCPU data. */ sched_pin(); mtx_assert(PCPU_PTR(aim.qmap_lock), MA_NOTOWNED); pvo = PCPU_GET(aim.qmap_pvo); mtx_lock(PCPU_PTR(aim.qmap_lock)); pvo->pvo_pte.pa = moea64_calc_wimg(pa, pmap_page_get_memattr(m)) | (uint64_t)pa; MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_INVALIDATE); isync(); return (PCPU_GET(qmap_addr)); } void moea64_quick_remove_page(mmu_t mmu, vm_offset_t addr) { if (hw_direct_map) return; mtx_assert(PCPU_PTR(aim.qmap_lock), MA_OWNED); KASSERT(PCPU_GET(qmap_addr) == addr, ("moea64_quick_remove_page: invalid address")); mtx_unlock(PCPU_PTR(aim.qmap_lock)); sched_unpin(); } /* * Map the given physical page at the specified virtual address in the * target pmap with the protection requested. If specified the page * will be wired down. */ int moea64_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind) { struct pvo_entry *pvo, *oldpvo; struct pvo_head *pvo_head; uint64_t pte_lo; int error; if ((m->oflags & VPO_UNMANAGED) == 0) { if ((flags & PMAP_ENTER_QUICK_LOCKED) == 0) VM_PAGE_OBJECT_BUSY_ASSERT(m); else VM_OBJECT_ASSERT_LOCKED(m->object); } pvo = alloc_pvo_entry(0); if (pvo == NULL) return (KERN_RESOURCE_SHORTAGE); pvo->pvo_pmap = NULL; /* to be filled in later */ pvo->pvo_pte.prot = prot; pte_lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), pmap_page_get_memattr(m)); pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | pte_lo; if ((flags & PMAP_ENTER_WIRED) != 0) pvo->pvo_vaddr |= PVO_WIRED; if ((m->oflags & VPO_UNMANAGED) != 0 || !moea64_initialized) { pvo_head = NULL; } else { pvo_head = &m->md.mdpg_pvoh; pvo->pvo_vaddr |= PVO_MANAGED; } PV_PAGE_LOCK(m); PMAP_LOCK(pmap); if (pvo->pvo_pmap == NULL) init_pvo_entry(pvo, pmap, va); if (prot & VM_PROT_WRITE) if (pmap_bootstrapped && (m->oflags & VPO_UNMANAGED) == 0) vm_page_aflag_set(m, PGA_WRITEABLE); error = moea64_pvo_enter(mmu, pvo, pvo_head, &oldpvo); if (error == EEXIST) { if (oldpvo->pvo_vaddr == pvo->pvo_vaddr && oldpvo->pvo_pte.pa == pvo->pvo_pte.pa && oldpvo->pvo_pte.prot == prot) { /* Identical mapping already exists */ error = 0; /* If not in page table, reinsert it */ if (MOEA64_PTE_SYNCH(mmu, oldpvo) < 0) { STAT_MOEA64(moea64_pte_overflow--); MOEA64_PTE_INSERT(mmu, oldpvo); } /* Then just clean up and go home */ PV_PAGE_UNLOCK(m); PMAP_UNLOCK(pmap); free_pvo_entry(pvo); goto out; } else { /* Otherwise, need to kill it first */ KASSERT(oldpvo->pvo_pmap == pmap, ("pmap of old " "mapping does not match new mapping")); moea64_pvo_remove_from_pmap(mmu, oldpvo); moea64_pvo_enter(mmu, pvo, pvo_head, NULL); } } PMAP_UNLOCK(pmap); PV_PAGE_UNLOCK(m); /* Free any dead pages */ if (error == EEXIST) { moea64_pvo_remove_from_page(mmu, oldpvo); free_pvo_entry(oldpvo); } out: /* * Flush the page from the instruction cache if this page is * mapped executable and cacheable. */ if (pmap != kernel_pmap && (m->a.flags & PGA_EXECUTABLE) == 0 && (pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) { vm_page_aflag_set(m, PGA_EXECUTABLE); moea64_syncicache(mmu, pmap, va, VM_PAGE_TO_PHYS(m), PAGE_SIZE); } return (KERN_SUCCESS); } static void moea64_syncicache(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_paddr_t pa, vm_size_t sz) { /* * This is much trickier than on older systems because * we can't sync the icache on physical addresses directly * without a direct map. Instead we check a couple of cases * where the memory is already mapped in and, failing that, * use the same trick we use for page zeroing to create * a temporary mapping for this physical address. */ if (!pmap_bootstrapped) { /* * If PMAP is not bootstrapped, we are likely to be * in real mode. */ __syncicache((void *)(uintptr_t)pa, sz); } else if (pmap == kernel_pmap) { __syncicache((void *)va, sz); } else if (hw_direct_map) { __syncicache((void *)(uintptr_t)PHYS_TO_DMAP(pa), sz); } else { /* Use the scratch page to set up a temp mapping */ mtx_lock(&moea64_scratchpage_mtx); moea64_set_scratchpage_pa(mmu, 1, pa & ~ADDR_POFF); __syncicache((void *)(moea64_scratchpage_va[1] + (va & ADDR_POFF)), sz); mtx_unlock(&moea64_scratchpage_mtx); } } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void moea64_enter_object(mmu_t mmu, pmap_t pm, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { vm_page_t m; vm_pindex_t diff, psize; VM_OBJECT_ASSERT_LOCKED(m_start->object); psize = atop(end - start); m = m_start; while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { moea64_enter(mmu, pm, start + ptoa(diff), m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, 0); m = TAILQ_NEXT(m, listq); } } void moea64_enter_quick(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot) { moea64_enter(mmu, pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, 0); } vm_paddr_t moea64_extract(mmu_t mmu, pmap_t pm, vm_offset_t va) { struct pvo_entry *pvo; vm_paddr_t pa; PMAP_LOCK(pm); pvo = moea64_pvo_find_va(pm, va); if (pvo == NULL) pa = 0; else pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va - PVO_VADDR(pvo)); PMAP_UNLOCK(pm); return (pa); } /* * Atomically extract and hold the physical page with the given * pmap and virtual address pair if that mapping permits the given * protection. */ vm_page_t moea64_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot) { struct pvo_entry *pvo; vm_page_t m; m = NULL; PMAP_LOCK(pmap); pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); if (pvo != NULL && (pvo->pvo_pte.prot & prot) == prot) { m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); if (!vm_page_wire_mapped(m)) m = NULL; } PMAP_UNLOCK(pmap); return (m); } static mmu_t installed_mmu; static void * moea64_uma_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags, int wait) { struct pvo_entry *pvo; vm_offset_t va; vm_page_t m; int needed_lock; /* * This entire routine is a horrible hack to avoid bothering kmem * for new KVA addresses. Because this can get called from inside * kmem allocation routines, calling kmem for a new address here * can lead to multiply locking non-recursive mutexes. */ *flags = UMA_SLAB_PRIV; needed_lock = !PMAP_LOCKED(kernel_pmap); m = vm_page_alloc_domain(NULL, 0, domain, malloc2vm_flags(wait) | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ); if (m == NULL) return (NULL); va = VM_PAGE_TO_PHYS(m); pvo = alloc_pvo_entry(1 /* bootstrap */); pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE; pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | LPTE_M; if (needed_lock) PMAP_LOCK(kernel_pmap); init_pvo_entry(pvo, kernel_pmap, va); pvo->pvo_vaddr |= PVO_WIRED; moea64_pvo_enter(installed_mmu, pvo, NULL, NULL); if (needed_lock) PMAP_UNLOCK(kernel_pmap); if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) bzero((void *)va, PAGE_SIZE); return (void *)va; } extern int elf32_nxstack; void moea64_init(mmu_t mmu) { CTR0(KTR_PMAP, "moea64_init"); moea64_pvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); if (!hw_direct_map) { installed_mmu = mmu; uma_zone_set_allocf(moea64_pvo_zone, moea64_uma_page_alloc); } #ifdef COMPAT_FREEBSD32 elf32_nxstack = 1; #endif moea64_initialized = TRUE; } boolean_t moea64_is_referenced(mmu_t mmu, vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_is_referenced: page %p is not managed", m)); return (moea64_query_bit(mmu, m, LPTE_REF)); } boolean_t moea64_is_modified(mmu_t mmu, vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_is_modified: page %p is not managed", m)); /* * If the page is not busied then this check is racy. */ if (!pmap_page_is_write_mapped(m)) return (FALSE); return (moea64_query_bit(mmu, m, LPTE_CHG)); } boolean_t moea64_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t va) { struct pvo_entry *pvo; boolean_t rv = TRUE; PMAP_LOCK(pmap); pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); if (pvo != NULL) rv = FALSE; PMAP_UNLOCK(pmap); return (rv); } void moea64_clear_modify(mmu_t mmu, vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_clear_modify: page %p is not managed", m)); vm_page_assert_busied(m); if (!pmap_page_is_write_mapped(m)) return; moea64_clear_bit(mmu, m, LPTE_CHG); } /* * Clear the write and modified bits in each of the given page's mappings. */ void moea64_remove_write(mmu_t mmu, vm_page_t m) { struct pvo_entry *pvo; int64_t refchg, ret; pmap_t pmap; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_remove_write: page %p is not managed", m)); vm_page_assert_busied(m); if (!pmap_page_is_write_mapped(m)) return powerpc_sync(); PV_PAGE_LOCK(m); refchg = 0; LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { pmap = pvo->pvo_pmap; PMAP_LOCK(pmap); if (!(pvo->pvo_vaddr & PVO_DEAD) && (pvo->pvo_pte.prot & VM_PROT_WRITE)) { pvo->pvo_pte.prot &= ~VM_PROT_WRITE; ret = MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_PROT_UPDATE); if (ret < 0) ret = LPTE_CHG; refchg |= ret; if (pvo->pvo_pmap == kernel_pmap) isync(); } PMAP_UNLOCK(pmap); } if ((refchg | atomic_readandclear_32(&m->md.mdpg_attrs)) & LPTE_CHG) vm_page_dirty(m); vm_page_aflag_clear(m, PGA_WRITEABLE); PV_PAGE_UNLOCK(m); } /* * moea64_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * XXX: The exact number of bits to check and clear is a matter that * should be tested and standardized at some point in the future for * optimal aging of shared pages. */ int moea64_ts_referenced(mmu_t mmu, vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_ts_referenced: page %p is not managed", m)); return (moea64_clear_bit(mmu, m, LPTE_REF)); } /* * Modify the WIMG settings of all mappings for a page. */ void moea64_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma) { struct pvo_entry *pvo; int64_t refchg; pmap_t pmap; uint64_t lo; if ((m->oflags & VPO_UNMANAGED) != 0) { m->md.mdpg_cache_attrs = ma; return; } lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), ma); PV_PAGE_LOCK(m); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { pmap = pvo->pvo_pmap; PMAP_LOCK(pmap); if (!(pvo->pvo_vaddr & PVO_DEAD)) { pvo->pvo_pte.pa &= ~LPTE_WIMG; pvo->pvo_pte.pa |= lo; refchg = MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_INVALIDATE); if (refchg < 0) refchg = (pvo->pvo_pte.prot & VM_PROT_WRITE) ? LPTE_CHG : 0; if ((pvo->pvo_vaddr & PVO_MANAGED) && (pvo->pvo_pte.prot & VM_PROT_WRITE)) { refchg |= atomic_readandclear_32(&m->md.mdpg_attrs); if (refchg & LPTE_CHG) vm_page_dirty(m); if (refchg & LPTE_REF) vm_page_aflag_set(m, PGA_REFERENCED); } if (pvo->pvo_pmap == kernel_pmap) isync(); } PMAP_UNLOCK(pmap); } m->md.mdpg_cache_attrs = ma; PV_PAGE_UNLOCK(m); } /* * Map a wired page into kernel virtual address space. */ void moea64_kenter_attr(mmu_t mmu, vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma) { int error; struct pvo_entry *pvo, *oldpvo; do { pvo = alloc_pvo_entry(0); if (pvo == NULL) vm_wait(NULL); } while (pvo == NULL); pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; pvo->pvo_pte.pa = (pa & ~ADDR_POFF) | moea64_calc_wimg(pa, ma); pvo->pvo_vaddr |= PVO_WIRED; PMAP_LOCK(kernel_pmap); oldpvo = moea64_pvo_find_va(kernel_pmap, va); if (oldpvo != NULL) moea64_pvo_remove_from_pmap(mmu, oldpvo); init_pvo_entry(pvo, kernel_pmap, va); error = moea64_pvo_enter(mmu, pvo, NULL, NULL); PMAP_UNLOCK(kernel_pmap); /* Free any dead pages */ if (oldpvo != NULL) { moea64_pvo_remove_from_page(mmu, oldpvo); free_pvo_entry(oldpvo); } if (error != 0) panic("moea64_kenter: failed to enter va %#zx pa %#jx: %d", va, (uintmax_t)pa, error); } void moea64_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa) { moea64_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT); } /* * Extract the physical page address associated with the given kernel virtual * address. */ vm_paddr_t moea64_kextract(mmu_t mmu, vm_offset_t va) { struct pvo_entry *pvo; vm_paddr_t pa; /* * Shortcut the direct-mapped case when applicable. We never put * anything but 1:1 (or 62-bit aliased) mappings below * VM_MIN_KERNEL_ADDRESS. */ if (va < VM_MIN_KERNEL_ADDRESS) return (va & ~DMAP_BASE_ADDRESS); PMAP_LOCK(kernel_pmap); pvo = moea64_pvo_find_va(kernel_pmap, va); KASSERT(pvo != NULL, ("moea64_kextract: no addr found for %#" PRIxPTR, va)); pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va - PVO_VADDR(pvo)); PMAP_UNLOCK(kernel_pmap); return (pa); } /* * Remove a wired page from kernel virtual address space. */ void moea64_kremove(mmu_t mmu, vm_offset_t va) { moea64_remove(mmu, kernel_pmap, va, va + PAGE_SIZE); } /* * Provide a kernel pointer corresponding to a given userland pointer. * The returned pointer is valid until the next time this function is * called in this thread. This is used internally in copyin/copyout. */ static int moea64_map_user_ptr(mmu_t mmu, pmap_t pm, volatile const void *uaddr, void **kaddr, size_t ulen, size_t *klen) { size_t l; #ifdef __powerpc64__ struct slb *slb; #endif register_t slbv; *kaddr = (char *)USER_ADDR + ((uintptr_t)uaddr & ~SEGMENT_MASK); l = ((char *)USER_ADDR + SEGMENT_LENGTH) - (char *)(*kaddr); if (l > ulen) l = ulen; if (klen) *klen = l; else if (l != ulen) return (EFAULT); #ifdef __powerpc64__ /* Try lockless look-up first */ slb = user_va_to_slb_entry(pm, (vm_offset_t)uaddr); if (slb == NULL) { /* If it isn't there, we need to pre-fault the VSID */ PMAP_LOCK(pm); slbv = va_to_vsid(pm, (vm_offset_t)uaddr) << SLBV_VSID_SHIFT; PMAP_UNLOCK(pm); } else { slbv = slb->slbv; } /* Mark segment no-execute */ slbv |= SLBV_N; #else slbv = va_to_vsid(pm, (vm_offset_t)uaddr); /* Mark segment no-execute */ slbv |= SR_N; #endif /* If we have already set this VSID, we can just return */ if (curthread->td_pcb->pcb_cpu.aim.usr_vsid == slbv) return (0); __asm __volatile("isync"); curthread->td_pcb->pcb_cpu.aim.usr_segm = (uintptr_t)uaddr >> ADDR_SR_SHFT; curthread->td_pcb->pcb_cpu.aim.usr_vsid = slbv; #ifdef __powerpc64__ __asm __volatile ("slbie %0; slbmte %1, %2; isync" :: "r"(USER_ADDR), "r"(slbv), "r"(USER_SLB_SLBE)); #else __asm __volatile("mtsr %0,%1; isync" :: "n"(USER_SR), "r"(slbv)); #endif return (0); } /* * Figure out where a given kernel pointer (usually in a fault) points * to from the VM's perspective, potentially remapping into userland's * address space. */ static int moea64_decode_kernel_ptr(mmu_t mmu, vm_offset_t addr, int *is_user, vm_offset_t *decoded_addr) { vm_offset_t user_sr; if ((addr >> ADDR_SR_SHFT) == (USER_ADDR >> ADDR_SR_SHFT)) { user_sr = curthread->td_pcb->pcb_cpu.aim.usr_segm; addr &= ADDR_PIDX | ADDR_POFF; addr |= user_sr << ADDR_SR_SHFT; *decoded_addr = addr; *is_user = 1; } else { *decoded_addr = addr; *is_user = 0; } return (0); } /* * Map a range of physical addresses into kernel virtual address space. * * The value passed in *virt is a suggested virtual address for the mapping. * Architectures which can support a direct-mapped physical to virtual region * can return the appropriate address within that region, leaving '*virt' * unchanged. Other architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped region. */ vm_offset_t moea64_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start, vm_paddr_t pa_end, int prot) { vm_offset_t sva, va; if (hw_direct_map) { /* * Check if every page in the region is covered by the direct * map. The direct map covers all of physical memory. Use * moea64_calc_wimg() as a shortcut to see if the page is in * physical memory as a way to see if the direct map covers it. */ for (va = pa_start; va < pa_end; va += PAGE_SIZE) if (moea64_calc_wimg(va, VM_MEMATTR_DEFAULT) != LPTE_M) break; if (va == pa_end) return (PHYS_TO_DMAP(pa_start)); } sva = *virt; va = sva; /* XXX respect prot argument */ for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE) moea64_kenter(mmu, va, pa_start); *virt = va; return (sva); } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t moea64_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m) { int loops; struct pvo_entry *pvo; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("moea64_page_exists_quick: page %p is not managed", m)); loops = 0; rv = FALSE; PV_PAGE_LOCK(m); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { if (!(pvo->pvo_vaddr & PVO_DEAD) && pvo->pvo_pmap == pmap) { rv = TRUE; break; } if (++loops >= 16) break; } PV_PAGE_UNLOCK(m); return (rv); } void moea64_page_init(mmu_t mmu __unused, vm_page_t m) { m->md.mdpg_attrs = 0; m->md.mdpg_cache_attrs = VM_MEMATTR_DEFAULT; LIST_INIT(&m->md.mdpg_pvoh); } /* * Return the number of managed mappings to the given physical page * that are wired. */ int moea64_page_wired_mappings(mmu_t mmu, vm_page_t m) { struct pvo_entry *pvo; int count; count = 0; if ((m->oflags & VPO_UNMANAGED) != 0) return (count); PV_PAGE_LOCK(m); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) if ((pvo->pvo_vaddr & (PVO_DEAD | PVO_WIRED)) == PVO_WIRED) count++; PV_PAGE_UNLOCK(m); return (count); } static uintptr_t moea64_vsidcontext; uintptr_t moea64_get_unique_vsid(void) { u_int entropy; register_t hash; uint32_t mask; int i; entropy = 0; __asm __volatile("mftb %0" : "=r"(entropy)); mtx_lock(&moea64_slb_mutex); for (i = 0; i < NVSIDS; i += VSID_NBPW) { u_int n; /* * Create a new value by mutiplying by a prime and adding in * entropy from the timebase register. This is to make the * VSID more random so that the PT hash function collides * less often. (Note that the prime casues gcc to do shifts * instead of a multiply.) */ moea64_vsidcontext = (moea64_vsidcontext * 0x1105) + entropy; hash = moea64_vsidcontext & (NVSIDS - 1); if (hash == 0) /* 0 is special, avoid it */ continue; n = hash >> 5; mask = 1 << (hash & (VSID_NBPW - 1)); hash = (moea64_vsidcontext & VSID_HASHMASK); if (moea64_vsid_bitmap[n] & mask) { /* collision? */ /* anything free in this bucket? */ if (moea64_vsid_bitmap[n] == 0xffffffff) { entropy = (moea64_vsidcontext >> 20); continue; } i = ffs(~moea64_vsid_bitmap[n]) - 1; mask = 1 << i; hash &= rounddown2(VSID_HASHMASK, VSID_NBPW); hash |= i; } if (hash == VSID_VRMA) /* also special, avoid this too */ continue; KASSERT(!(moea64_vsid_bitmap[n] & mask), ("Allocating in-use VSID %#zx\n", hash)); moea64_vsid_bitmap[n] |= mask; mtx_unlock(&moea64_slb_mutex); return (hash); } mtx_unlock(&moea64_slb_mutex); panic("%s: out of segments",__func__); } #ifdef __powerpc64__ void moea64_pinit(mmu_t mmu, pmap_t pmap) { RB_INIT(&pmap->pmap_pvo); pmap->pm_slb_tree_root = slb_alloc_tree(); pmap->pm_slb = slb_alloc_user_cache(); pmap->pm_slb_len = 0; } #else void moea64_pinit(mmu_t mmu, pmap_t pmap) { int i; uint32_t hash; RB_INIT(&pmap->pmap_pvo); if (pmap_bootstrapped) pmap->pmap_phys = (pmap_t)moea64_kextract(mmu, (vm_offset_t)pmap); else pmap->pmap_phys = pmap; /* * Allocate some segment registers for this pmap. */ hash = moea64_get_unique_vsid(); for (i = 0; i < 16; i++) pmap->pm_sr[i] = VSID_MAKE(i, hash); KASSERT(pmap->pm_sr[0] != 0, ("moea64_pinit: pm_sr[0] = 0")); } #endif /* * Initialize the pmap associated with process 0. */ void moea64_pinit0(mmu_t mmu, pmap_t pm) { PMAP_LOCK_INIT(pm); moea64_pinit(mmu, pm); bzero(&pm->pm_stats, sizeof(pm->pm_stats)); } /* * Set the physical protection on the specified range of this map as requested. */ static void moea64_pvo_protect(mmu_t mmu, pmap_t pm, struct pvo_entry *pvo, vm_prot_t prot) { struct vm_page *pg; vm_prot_t oldprot; int32_t refchg; PMAP_LOCK_ASSERT(pm, MA_OWNED); /* * Change the protection of the page. */ oldprot = pvo->pvo_pte.prot; pvo->pvo_pte.prot = prot; pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); /* * If the PVO is in the page table, update mapping */ refchg = MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_PROT_UPDATE); if (refchg < 0) refchg = (oldprot & VM_PROT_WRITE) ? LPTE_CHG : 0; if (pm != kernel_pmap && pg != NULL && (pg->a.flags & PGA_EXECUTABLE) == 0 && (pvo->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) { if ((pg->oflags & VPO_UNMANAGED) == 0) vm_page_aflag_set(pg, PGA_EXECUTABLE); moea64_syncicache(mmu, pm, PVO_VADDR(pvo), pvo->pvo_pte.pa & LPTE_RPGN, PAGE_SIZE); } /* * Update vm about the REF/CHG bits if the page is managed and we have * removed write access. */ if (pg != NULL && (pvo->pvo_vaddr & PVO_MANAGED) && (oldprot & VM_PROT_WRITE)) { refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs); if (refchg & LPTE_CHG) vm_page_dirty(pg); if (refchg & LPTE_REF) vm_page_aflag_set(pg, PGA_REFERENCED); } } void moea64_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { struct pvo_entry *pvo, *tpvo, key; CTR4(KTR_PMAP, "moea64_protect: pm=%p sva=%#x eva=%#x prot=%#x", pm, sva, eva, prot); KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap, ("moea64_protect: non current pmap")); if ((prot & VM_PROT_READ) == VM_PROT_NONE) { moea64_remove(mmu, pm, sva, eva); return; } PMAP_LOCK(pm); key.pvo_vaddr = sva; for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) { tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo); moea64_pvo_protect(mmu, pm, pvo, prot); } PMAP_UNLOCK(pm); } /* * Map a list of wired pages into kernel virtual address space. This is * intended for temporary mappings which do not need page modification or * references recorded. Existing mappings in the region are overwritten. */ void moea64_qenter(mmu_t mmu, vm_offset_t va, vm_page_t *m, int count) { while (count-- > 0) { moea64_kenter(mmu, va, VM_PAGE_TO_PHYS(*m)); va += PAGE_SIZE; m++; } } /* * Remove page mappings from kernel virtual address space. Intended for * temporary mappings entered by moea64_qenter. */ void moea64_qremove(mmu_t mmu, vm_offset_t va, int count) { while (count-- > 0) { moea64_kremove(mmu, va); va += PAGE_SIZE; } } void moea64_release_vsid(uint64_t vsid) { int idx, mask; mtx_lock(&moea64_slb_mutex); idx = vsid & (NVSIDS-1); mask = 1 << (idx % VSID_NBPW); idx /= VSID_NBPW; KASSERT(moea64_vsid_bitmap[idx] & mask, ("Freeing unallocated VSID %#jx", vsid)); moea64_vsid_bitmap[idx] &= ~mask; mtx_unlock(&moea64_slb_mutex); } void moea64_release(mmu_t mmu, pmap_t pmap) { /* * Free segment registers' VSIDs */ #ifdef __powerpc64__ slb_free_tree(pmap); slb_free_user_cache(pmap->pm_slb); #else KASSERT(pmap->pm_sr[0] != 0, ("moea64_release: pm_sr[0] = 0")); moea64_release_vsid(VSID_TO_HASH(pmap->pm_sr[0])); #endif } /* * Remove all pages mapped by the specified pmap */ void moea64_remove_pages(mmu_t mmu, pmap_t pm) { struct pvo_entry *pvo, *tpvo; struct pvo_dlist tofree; SLIST_INIT(&tofree); PMAP_LOCK(pm); RB_FOREACH_SAFE(pvo, pvo_tree, &pm->pmap_pvo, tpvo) { if (pvo->pvo_vaddr & PVO_WIRED) continue; /* * For locking reasons, remove this from the page table and * pmap, but save delinking from the vm_page for a second * pass */ moea64_pvo_remove_from_pmap(mmu, pvo); SLIST_INSERT_HEAD(&tofree, pvo, pvo_dlink); } PMAP_UNLOCK(pm); while (!SLIST_EMPTY(&tofree)) { pvo = SLIST_FIRST(&tofree); SLIST_REMOVE_HEAD(&tofree, pvo_dlink); moea64_pvo_remove_from_page(mmu, pvo); free_pvo_entry(pvo); } } /* * Remove the given range of addresses from the specified map. */ void moea64_remove(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva) { struct pvo_entry *pvo, *tpvo, key; struct pvo_dlist tofree; /* * Perform an unsynchronized read. This is, however, safe. */ if (pm->pm_stats.resident_count == 0) return; key.pvo_vaddr = sva; SLIST_INIT(&tofree); PMAP_LOCK(pm); for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) { tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo); /* * For locking reasons, remove this from the page table and * pmap, but save delinking from the vm_page for a second * pass */ moea64_pvo_remove_from_pmap(mmu, pvo); SLIST_INSERT_HEAD(&tofree, pvo, pvo_dlink); } PMAP_UNLOCK(pm); while (!SLIST_EMPTY(&tofree)) { pvo = SLIST_FIRST(&tofree); SLIST_REMOVE_HEAD(&tofree, pvo_dlink); moea64_pvo_remove_from_page(mmu, pvo); free_pvo_entry(pvo); } } /* * Remove physical page from all pmaps in which it resides. moea64_pvo_remove() * will reflect changes in pte's back to the vm_page. */ void moea64_remove_all(mmu_t mmu, vm_page_t m) { struct pvo_entry *pvo, *next_pvo; struct pvo_head freequeue; int wasdead; pmap_t pmap; LIST_INIT(&freequeue); PV_PAGE_LOCK(m); LIST_FOREACH_SAFE(pvo, vm_page_to_pvoh(m), pvo_vlink, next_pvo) { pmap = pvo->pvo_pmap; PMAP_LOCK(pmap); wasdead = (pvo->pvo_vaddr & PVO_DEAD); if (!wasdead) moea64_pvo_remove_from_pmap(mmu, pvo); moea64_pvo_remove_from_page_locked(mmu, pvo, m); if (!wasdead) LIST_INSERT_HEAD(&freequeue, pvo, pvo_vlink); PMAP_UNLOCK(pmap); } KASSERT(!pmap_page_is_mapped(m), ("Page still has mappings")); KASSERT((m->a.flags & PGA_WRITEABLE) == 0, ("Page still writable")); PV_PAGE_UNLOCK(m); /* Clean up UMA allocations */ LIST_FOREACH_SAFE(pvo, &freequeue, pvo_vlink, next_pvo) free_pvo_entry(pvo); } /* * Allocate a physical page of memory directly from the phys_avail map. * Can only be called from moea64_bootstrap before avail start and end are * calculated. */ vm_offset_t moea64_bootstrap_alloc(vm_size_t size, vm_size_t align) { vm_offset_t s, e; int i, j; size = round_page(size); for (i = 0; phys_avail[i + 1] != 0; i += 2) { if (align != 0) s = roundup2(phys_avail[i], align); else s = phys_avail[i]; e = s + size; if (s < phys_avail[i] || e > phys_avail[i + 1]) continue; if (s + size > platform_real_maxaddr()) continue; if (s == phys_avail[i]) { phys_avail[i] += size; } else if (e == phys_avail[i + 1]) { phys_avail[i + 1] -= size; } else { for (j = phys_avail_count * 2; j > i; j -= 2) { phys_avail[j] = phys_avail[j - 2]; phys_avail[j + 1] = phys_avail[j - 1]; } phys_avail[i + 3] = phys_avail[i + 1]; phys_avail[i + 1] = s; phys_avail[i + 2] = e; phys_avail_count++; } return (s); } panic("moea64_bootstrap_alloc: could not allocate memory"); } static int moea64_pvo_enter(mmu_t mmu, struct pvo_entry *pvo, struct pvo_head *pvo_head, struct pvo_entry **oldpvop) { struct pvo_entry *old_pvo; int err; PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); STAT_MOEA64(moea64_pvo_enter_calls++); /* * Add to pmap list */ old_pvo = RB_INSERT(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo); if (old_pvo != NULL) { if (oldpvop != NULL) *oldpvop = old_pvo; return (EEXIST); } if (pvo_head != NULL) { LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink); } if (pvo->pvo_vaddr & PVO_WIRED) pvo->pvo_pmap->pm_stats.wired_count++; pvo->pvo_pmap->pm_stats.resident_count++; /* * Insert it into the hardware page table */ err = MOEA64_PTE_INSERT(mmu, pvo); if (err != 0) { panic("moea64_pvo_enter: overflow"); } STAT_MOEA64(moea64_pvo_entries++); if (pvo->pvo_pmap == kernel_pmap) isync(); #ifdef __powerpc64__ /* * Make sure all our bootstrap mappings are in the SLB as soon * as virtual memory is switched on. */ if (!pmap_bootstrapped) moea64_bootstrap_slb_prefault(PVO_VADDR(pvo), pvo->pvo_vaddr & PVO_LARGE); #endif return (0); } static void moea64_pvo_remove_from_pmap(mmu_t mmu, struct pvo_entry *pvo) { struct vm_page *pg; int32_t refchg; KASSERT(pvo->pvo_pmap != NULL, ("Trying to remove PVO with no pmap")); PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); KASSERT(!(pvo->pvo_vaddr & PVO_DEAD), ("Trying to remove dead PVO")); /* * If there is an active pte entry, we need to deactivate it */ refchg = MOEA64_PTE_UNSET(mmu, pvo); if (refchg < 0) { /* * If it was evicted from the page table, be pessimistic and * dirty the page. */ if (pvo->pvo_pte.prot & VM_PROT_WRITE) refchg = LPTE_CHG; else refchg = 0; } /* * Update our statistics. */ pvo->pvo_pmap->pm_stats.resident_count--; if (pvo->pvo_vaddr & PVO_WIRED) pvo->pvo_pmap->pm_stats.wired_count--; /* * Remove this PVO from the pmap list. */ RB_REMOVE(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo); /* * Mark this for the next sweep */ pvo->pvo_vaddr |= PVO_DEAD; /* Send RC bits to VM */ if ((pvo->pvo_vaddr & PVO_MANAGED) && (pvo->pvo_pte.prot & VM_PROT_WRITE)) { pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); if (pg != NULL) { refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs); if (refchg & LPTE_CHG) vm_page_dirty(pg); if (refchg & LPTE_REF) vm_page_aflag_set(pg, PGA_REFERENCED); } } } static inline void moea64_pvo_remove_from_page_locked(mmu_t mmu, struct pvo_entry *pvo, vm_page_t m) { KASSERT(pvo->pvo_vaddr & PVO_DEAD, ("Trying to delink live page")); /* Use NULL pmaps as a sentinel for races in page deletion */ if (pvo->pvo_pmap == NULL) return; pvo->pvo_pmap = NULL; /* * Update vm about page writeability/executability if managed */ PV_LOCKASSERT(pvo->pvo_pte.pa & LPTE_RPGN); if (pvo->pvo_vaddr & PVO_MANAGED) { if (m != NULL) { LIST_REMOVE(pvo, pvo_vlink); if (LIST_EMPTY(vm_page_to_pvoh(m))) vm_page_aflag_clear(m, PGA_WRITEABLE | PGA_EXECUTABLE); } } STAT_MOEA64(moea64_pvo_entries--); STAT_MOEA64(moea64_pvo_remove_calls++); } static void moea64_pvo_remove_from_page(mmu_t mmu, struct pvo_entry *pvo) { vm_page_t pg = NULL; if (pvo->pvo_vaddr & PVO_MANAGED) pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); PV_LOCK(pvo->pvo_pte.pa & LPTE_RPGN); moea64_pvo_remove_from_page_locked(mmu, pvo, pg); PV_UNLOCK(pvo->pvo_pte.pa & LPTE_RPGN); } static struct pvo_entry * moea64_pvo_find_va(pmap_t pm, vm_offset_t va) { struct pvo_entry key; PMAP_LOCK_ASSERT(pm, MA_OWNED); key.pvo_vaddr = va & ~ADDR_POFF; return (RB_FIND(pvo_tree, &pm->pmap_pvo, &key)); } static boolean_t moea64_query_bit(mmu_t mmu, vm_page_t m, uint64_t ptebit) { struct pvo_entry *pvo; int64_t ret; boolean_t rv; /* * See if this bit is stored in the page already. */ if (m->md.mdpg_attrs & ptebit) return (TRUE); /* * Examine each PTE. Sync so that any pending REF/CHG bits are * flushed to the PTEs. */ rv = FALSE; powerpc_sync(); PV_PAGE_LOCK(m); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { ret = 0; /* * See if this pvo has a valid PTE. if so, fetch the * REF/CHG bits from the valid PTE. If the appropriate * ptebit is set, return success. */ PMAP_LOCK(pvo->pvo_pmap); if (!(pvo->pvo_vaddr & PVO_DEAD)) ret = MOEA64_PTE_SYNCH(mmu, pvo); PMAP_UNLOCK(pvo->pvo_pmap); if (ret > 0) { atomic_set_32(&m->md.mdpg_attrs, ret & (LPTE_CHG | LPTE_REF)); if (ret & ptebit) { rv = TRUE; break; } } } PV_PAGE_UNLOCK(m); return (rv); } static u_int moea64_clear_bit(mmu_t mmu, vm_page_t m, u_int64_t ptebit) { u_int count; struct pvo_entry *pvo; int64_t ret; /* * Sync so that any pending REF/CHG bits are flushed to the PTEs (so * we can reset the right ones). */ powerpc_sync(); /* * For each pvo entry, clear the pte's ptebit. */ count = 0; PV_PAGE_LOCK(m); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { ret = 0; PMAP_LOCK(pvo->pvo_pmap); if (!(pvo->pvo_vaddr & PVO_DEAD)) ret = MOEA64_PTE_CLEAR(mmu, pvo, ptebit); PMAP_UNLOCK(pvo->pvo_pmap); if (ret > 0 && (ret & ptebit)) count++; } atomic_clear_32(&m->md.mdpg_attrs, ptebit); PV_PAGE_UNLOCK(m); return (count); } boolean_t moea64_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size) { struct pvo_entry *pvo, key; vm_offset_t ppa; int error = 0; if (hw_direct_map && mem_valid(pa, size) == 0) return (0); PMAP_LOCK(kernel_pmap); ppa = pa & ~ADDR_POFF; key.pvo_vaddr = DMAP_BASE_ADDRESS + ppa; for (pvo = RB_FIND(pvo_tree, &kernel_pmap->pmap_pvo, &key); ppa < pa + size; ppa += PAGE_SIZE, pvo = RB_NEXT(pvo_tree, &kernel_pmap->pmap_pvo, pvo)) { if (pvo == NULL || (pvo->pvo_pte.pa & LPTE_RPGN) != ppa) { error = EFAULT; break; } } PMAP_UNLOCK(kernel_pmap); return (error); } /* * Map a set of physical memory pages into the kernel virtual * address space. Return a pointer to where it is mapped. This * routine is intended to be used for mapping device memory, * NOT real memory. */ void * moea64_mapdev_attr(mmu_t mmu, vm_paddr_t pa, vm_size_t size, vm_memattr_t ma) { vm_offset_t va, tmpva, ppa, offset; ppa = trunc_page(pa); offset = pa & PAGE_MASK; size = roundup2(offset + size, PAGE_SIZE); va = kva_alloc(size); if (!va) panic("moea64_mapdev: Couldn't alloc kernel virtual memory"); for (tmpva = va; size > 0;) { moea64_kenter_attr(mmu, tmpva, ppa, ma); size -= PAGE_SIZE; tmpva += PAGE_SIZE; ppa += PAGE_SIZE; } return ((void *)(va + offset)); } void * moea64_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size) { return moea64_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT); } void moea64_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) { vm_offset_t base, offset; base = trunc_page(va); offset = va & PAGE_MASK; size = roundup2(offset + size, PAGE_SIZE); kva_free(base, size); } void moea64_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) { struct pvo_entry *pvo; vm_offset_t lim; vm_paddr_t pa; vm_size_t len; if (__predict_false(pm == NULL)) pm = &curthread->td_proc->p_vmspace->vm_pmap; PMAP_LOCK(pm); while (sz > 0) { lim = round_page(va+1); len = MIN(lim - va, sz); pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF); if (pvo != NULL && !(pvo->pvo_pte.pa & LPTE_I)) { pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va & ADDR_POFF); moea64_syncicache(mmu, pm, va, pa, len); } va += len; sz -= len; } PMAP_UNLOCK(pm); } void moea64_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, void **va) { *va = (void *)(uintptr_t)pa; } extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1]; void moea64_scan_init(mmu_t mmu) { struct pvo_entry *pvo; vm_offset_t va; int i; if (!do_minidump) { /* Initialize phys. segments for dumpsys(). */ memset(&dump_map, 0, sizeof(dump_map)); mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz); for (i = 0; i < pregions_sz; i++) { dump_map[i].pa_start = pregions[i].mr_start; dump_map[i].pa_size = pregions[i].mr_size; } return; } /* Virtual segments for minidumps: */ memset(&dump_map, 0, sizeof(dump_map)); /* 1st: kernel .data and .bss. */ dump_map[0].pa_start = trunc_page((uintptr_t)_etext); dump_map[0].pa_size = round_page((uintptr_t)_end) - dump_map[0].pa_start; /* 2nd: msgbuf and tables (see pmap_bootstrap()). */ dump_map[1].pa_start = (vm_paddr_t)(uintptr_t)msgbufp->msg_ptr; dump_map[1].pa_size = round_page(msgbufp->msg_size); /* 3rd: kernel VM. */ va = dump_map[1].pa_start + dump_map[1].pa_size; /* Find start of next chunk (from va). */ while (va < virtual_end) { /* Don't dump the buffer cache. */ if (va >= kmi.buffer_sva && va < kmi.buffer_eva) { va = kmi.buffer_eva; continue; } pvo = moea64_pvo_find_va(kernel_pmap, va & ~ADDR_POFF); if (pvo != NULL && !(pvo->pvo_vaddr & PVO_DEAD)) break; va += PAGE_SIZE; } if (va < virtual_end) { dump_map[2].pa_start = va; va += PAGE_SIZE; /* Find last page in chunk. */ while (va < virtual_end) { /* Don't run into the buffer cache. */ if (va == kmi.buffer_sva) break; pvo = moea64_pvo_find_va(kernel_pmap, va & ~ADDR_POFF); if (pvo == NULL || (pvo->pvo_vaddr & PVO_DEAD)) break; va += PAGE_SIZE; } dump_map[2].pa_size = va - dump_map[2].pa_start; } } #ifdef __powerpc64__ static size_t moea64_scan_pmap(mmu_t mmu) { struct pvo_entry *pvo; vm_paddr_t pa, pa_end; vm_offset_t va, pgva, kstart, kend, kstart_lp, kend_lp; uint64_t lpsize; lpsize = moea64_large_page_size; kstart = trunc_page((vm_offset_t)_etext); kend = round_page((vm_offset_t)_end); kstart_lp = kstart & ~moea64_large_page_mask; kend_lp = (kend + moea64_large_page_mask) & ~moea64_large_page_mask; CTR4(KTR_PMAP, "moea64_scan_pmap: kstart=0x%016lx, kend=0x%016lx, " "kstart_lp=0x%016lx, kend_lp=0x%016lx", kstart, kend, kstart_lp, kend_lp); PMAP_LOCK(kernel_pmap); RB_FOREACH(pvo, pvo_tree, &kernel_pmap->pmap_pvo) { va = pvo->pvo_vaddr; if (va & PVO_DEAD) continue; /* Skip DMAP (except kernel area) */ if (va >= DMAP_BASE_ADDRESS && va <= DMAP_MAX_ADDRESS) { if (va & PVO_LARGE) { pgva = va & ~moea64_large_page_mask; if (pgva < kstart_lp || pgva >= kend_lp) continue; } else { pgva = trunc_page(va); if (pgva < kstart || pgva >= kend) continue; } } pa = pvo->pvo_pte.pa & LPTE_RPGN; if (va & PVO_LARGE) { pa_end = pa + lpsize; for (; pa < pa_end; pa += PAGE_SIZE) { if (is_dumpable(pa)) dump_add_page(pa); } } else { if (is_dumpable(pa)) dump_add_page(pa); } } PMAP_UNLOCK(kernel_pmap); return (sizeof(struct lpte) * moea64_pteg_count * 8); } static struct dump_context dump_ctx; static void * moea64_dump_pmap_init(mmu_t mmu, unsigned blkpgs) { dump_ctx.ptex = 0; dump_ctx.ptex_end = moea64_pteg_count * 8; dump_ctx.blksz = blkpgs * PAGE_SIZE; return (&dump_ctx); } #else static size_t moea64_scan_pmap(mmu_t mmu) { return (0); } static void * moea64_dump_pmap_init(mmu_t mmu, unsigned blkpgs) { return (NULL); } #endif #ifdef __powerpc64__ static void moea64_map_range(mmu_t mmu, vm_offset_t va, vm_paddr_t pa, vm_size_t npages) { for (; npages > 0; --npages) { if (moea64_large_page_size != 0 && (pa & moea64_large_page_mask) == 0 && (va & moea64_large_page_mask) == 0 && npages >= (moea64_large_page_size >> PAGE_SHIFT)) { PMAP_LOCK(kernel_pmap); moea64_kenter_large(mmu, va, pa, 0, 0); PMAP_UNLOCK(kernel_pmap); pa += moea64_large_page_size; va += moea64_large_page_size; npages -= (moea64_large_page_size >> PAGE_SHIFT) - 1; } else { moea64_kenter(mmu, va, pa); pa += PAGE_SIZE; va += PAGE_SIZE; } } } static void moea64_page_array_startup(mmu_t mmu, long pages) { long dom_pages[MAXMEMDOM]; vm_paddr_t pa; vm_offset_t va, vm_page_base; vm_size_t needed, size; long page; int domain; int i; vm_page_base = 0xd000000000000000ULL; /* Short-circuit single-domain systems. */ if (vm_ndomains == 1) { size = round_page(pages * sizeof(struct vm_page)); pa = vm_phys_early_alloc(0, size); vm_page_base = moea64_map(mmu, &vm_page_base, pa, pa + size, VM_PROT_READ | VM_PROT_WRITE); vm_page_array_size = pages; vm_page_array = (vm_page_t)vm_page_base; return; } page = 0; for (i = 0; i < MAXMEMDOM; i++) dom_pages[i] = 0; /* Now get the number of pages required per domain. */ for (i = 0; i < vm_phys_nsegs; i++) { domain = vm_phys_segs[i].domain; KASSERT(domain < MAXMEMDOM, ("Invalid vm_phys_segs NUMA domain %d!\n", domain)); /* Get size of vm_page_array needed for this segment. */ size = btoc(vm_phys_segs[i].end - vm_phys_segs[i].start); dom_pages[domain] += size; } for (i = 0; phys_avail[i + 1] != 0; i+= 2) { domain = _vm_phys_domain(phys_avail[i]); KASSERT(domain < MAXMEMDOM, ("Invalid phys_avail NUMA domain %d!\n", domain)); size = btoc(phys_avail[i + 1] - phys_avail[i]); dom_pages[domain] += size; } /* * Map in chunks that can get us all 16MB pages. There will be some * overlap between domains, but that's acceptable for now. */ vm_page_array_size = 0; va = vm_page_base; for (i = 0; i < MAXMEMDOM && vm_page_array_size < pages; i++) { if (dom_pages[i] == 0) continue; size = ulmin(pages - vm_page_array_size, dom_pages[i]); size = round_page(size * sizeof(struct vm_page)); needed = size; size = roundup2(size, moea64_large_page_size); pa = vm_phys_early_alloc(i, size); vm_page_array_size += size / sizeof(struct vm_page); moea64_map_range(mmu, va, pa, size >> PAGE_SHIFT); /* Scoot up domain 0, to reduce the domain page overlap. */ if (i == 0) vm_page_base += size - needed; va += size; } vm_page_array = (vm_page_t)vm_page_base; vm_page_array_size = pages; } #endif Index: projects/clang1000-import/sys/powerpc/aim/moea64_if.m =================================================================== --- projects/clang1000-import/sys/powerpc/aim/moea64_if.m (revision 356919) +++ projects/clang1000-import/sys/powerpc/aim/moea64_if.m (revision 356920) @@ -1,121 +1,122 @@ #- # Copyright (c) 2010,2015 Nathan Whitehorn # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ # #include #include #include #include #include #include #include /** * MOEA64 kobj methods for 64-bit Book-S page table * manipulation routines used, for example, by hypervisors. */ INTERFACE moea64; +SINGLETON; CODE { static moea64_pte_replace_t moea64_pte_replace_default; static int64_t moea64_pte_replace_default(mmu_t mmu, struct pvo_entry *pvo, int flags) { int64_t refchg; refchg = MOEA64_PTE_UNSET(mmu, pvo); MOEA64_PTE_INSERT(mmu, pvo); return (refchg); } } /** * Return ref/changed bits from PTE referenced by _pvo if _pvo is currently in * the page table. Returns -1 if _pvo not currently present in the page table. */ METHOD int64_t pte_synch { mmu_t _mmu; struct pvo_entry *_pvo; }; /** * Clear bits ptebit (a mask) from the low word of the PTE referenced by * _pvo. Return previous values of ref/changed bits or -1 if _pvo is not * currently in the page table. */ METHOD int64_t pte_clear { mmu_t _mmu; struct pvo_entry *_pvo; uint64_t _ptebit; }; /** * Invalidate the PTE referenced by _pvo, returning its ref/changed bits. * Returns -1 if PTE not currently present in page table. */ METHOD int64_t pte_unset { mmu_t _mmu; struct pvo_entry *_pvo; }; /** * Update the reference PTE to correspond to the contents of _pvo. Has the * same ref/changed semantics as pte_unset() (and should clear R/C bits). May * change the PVO's location in the page table or return with it unmapped if * PVO_WIRED is not set. By default, does unset() followed by insert(). * * _flags is a bitmask describing what level of page invalidation should occur: * 0 means no invalidation is required * MOEA64_PTE_PROT_UPDATE signifies that the page protection bits are changing * MOEA64_PTE_INVALIDATE requires an invalidation of the same strength as * pte_unset() followed by pte_insert() */ METHOD int64_t pte_replace { mmu_t _mmu; struct pvo_entry *_pvo; int _flags; } DEFAULT moea64_pte_replace_default; /** * Insert a PTE corresponding to _pvo into the page table, returning any errors * encountered and (optionally) setting the PVO slot value to some * representation of where the entry was placed. * * Must not replace PTEs marked LPTE_WIRED. If an existing valid PTE is spilled, * must synchronize ref/changed bits as in pte_unset(). */ METHOD int pte_insert { mmu_t _mmu; struct pvo_entry *_pvo; }; Index: projects/clang1000-import/sys/powerpc/aim/trap_subr32.S =================================================================== --- projects/clang1000-import/sys/powerpc/aim/trap_subr32.S (revision 356919) +++ projects/clang1000-import/sys/powerpc/aim/trap_subr32.S (revision 356920) @@ -1,931 +1,931 @@ /* $FreeBSD$ */ /* $NetBSD: trap_subr.S,v 1.20 2002/04/22 23:20:08 kleink Exp $ */ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * NOTICE: This is not a standalone file. to use it, #include it in * your port's locore.S, like so: * * #include */ /* * Save/restore segment registers */ #define RESTORE_SRS(pmap,sr) mtsr 0,sr; \ lwz sr,1*4(pmap); mtsr 1,sr; \ lwz sr,2*4(pmap); mtsr 2,sr; \ lwz sr,3*4(pmap); mtsr 3,sr; \ lwz sr,4*4(pmap); mtsr 4,sr; \ lwz sr,5*4(pmap); mtsr 5,sr; \ lwz sr,6*4(pmap); mtsr 6,sr; \ lwz sr,7*4(pmap); mtsr 7,sr; \ lwz sr,8*4(pmap); mtsr 8,sr; \ lwz sr,9*4(pmap); mtsr 9,sr; \ lwz sr,10*4(pmap); mtsr 10,sr; \ lwz sr,11*4(pmap); mtsr 11,sr; \ /* Skip segment 12 (USER_SR), which is restored differently */ \ lwz sr,13*4(pmap); mtsr 13,sr; \ lwz sr,14*4(pmap); mtsr 14,sr; \ lwz sr,15*4(pmap); mtsr 15,sr; isync; /* * User SRs are loaded through a pointer to the current pmap. */ #define RESTORE_USER_SRS(pmap,sr) \ GET_CPUINFO(pmap); \ lwz pmap,PC_CURPMAP(pmap); \ lwzu sr,PM_SR(pmap); \ RESTORE_SRS(pmap,sr) \ /* Restore SR 12 */ \ lwz sr,12*4(pmap); mtsr 12,sr; isync /* * Kernel SRs are loaded directly from kernel_pmap_ */ #define RESTORE_KERN_SRS(pmap,sr) \ lwz pmap,TRAP_TOCBASE(0); \ lwz pmap,CNAME(kernel_pmap_store)@got(pmap); \ lwzu sr,PM_SR(pmap); \ RESTORE_SRS(pmap,sr) /* * FRAME_SETUP assumes: * SPRG1 SP (1) * SPRG3 trap type * savearea r28-r31,DAR,DSISR (DAR & DSISR only for DSI traps) * r28 LR * r29 CR * r30 scratch * r31 scratch * r1 kernel stack * SRR0/1 as at start of trap */ #define FRAME_SETUP(savearea) \ /* Have to enable translation to allow access of kernel stack: */ \ GET_CPUINFO(%r31); \ mfsrr0 %r30; \ stw %r30,(savearea+CPUSAVE_SRR0)(%r31); /* save SRR0 */ \ mfsrr1 %r30; \ stw %r30,(savearea+CPUSAVE_SRR1)(%r31); /* save SRR1 */ \ mfmsr %r30; \ ori %r30,%r30,(PSL_DR|PSL_IR|PSL_RI)@l; /* relocation on */ \ mtmsr %r30; /* stack can now be accessed */ \ isync; \ mfsprg1 %r31; /* get saved SP */ \ stwu %r31,-FRAMELEN(%r1); /* save it in the callframe */ \ stw %r0, FRAME_0+8(%r1); /* save r0 in the trapframe */ \ stw %r31,FRAME_1+8(%r1); /* save SP " " */ \ stw %r2, FRAME_2+8(%r1); /* save r2 " " */ \ stw %r28,FRAME_LR+8(%r1); /* save LR " " */ \ stw %r29,FRAME_CR+8(%r1); /* save CR " " */ \ GET_CPUINFO(%r2); \ lwz %r28,(savearea+CPUSAVE_R28)(%r2); /* get saved r28 */ \ lwz %r29,(savearea+CPUSAVE_R29)(%r2); /* get saved r29 */ \ lwz %r30,(savearea+CPUSAVE_R30)(%r2); /* get saved r30 */ \ lwz %r31,(savearea+CPUSAVE_R31)(%r2); /* get saved r31 */ \ stw %r3, FRAME_3+8(%r1); /* save r3-r31 */ \ stw %r4, FRAME_4+8(%r1); \ stw %r5, FRAME_5+8(%r1); \ stw %r6, FRAME_6+8(%r1); \ stw %r7, FRAME_7+8(%r1); \ stw %r8, FRAME_8+8(%r1); \ stw %r9, FRAME_9+8(%r1); \ stw %r10, FRAME_10+8(%r1); \ stw %r11, FRAME_11+8(%r1); \ stw %r12, FRAME_12+8(%r1); \ stw %r13, FRAME_13+8(%r1); \ stw %r14, FRAME_14+8(%r1); \ stw %r15, FRAME_15+8(%r1); \ stw %r16, FRAME_16+8(%r1); \ stw %r17, FRAME_17+8(%r1); \ stw %r18, FRAME_18+8(%r1); \ stw %r19, FRAME_19+8(%r1); \ stw %r20, FRAME_20+8(%r1); \ stw %r21, FRAME_21+8(%r1); \ stw %r22, FRAME_22+8(%r1); \ stw %r23, FRAME_23+8(%r1); \ stw %r24, FRAME_24+8(%r1); \ stw %r25, FRAME_25+8(%r1); \ stw %r26, FRAME_26+8(%r1); \ stw %r27, FRAME_27+8(%r1); \ stw %r28, FRAME_28+8(%r1); \ stw %r29, FRAME_29+8(%r1); \ stw %r30, FRAME_30+8(%r1); \ stw %r31, FRAME_31+8(%r1); \ lwz %r28,(savearea+CPUSAVE_AIM_DAR)(%r2); /* saved DAR */ \ lwz %r29,(savearea+CPUSAVE_AIM_DSISR)(%r2);/* saved DSISR */\ lwz %r30,(savearea+CPUSAVE_SRR0)(%r2); /* saved SRR0 */ \ lwz %r31,(savearea+CPUSAVE_SRR1)(%r2); /* saved SRR1 */ \ mfxer %r3; \ mfctr %r4; \ mfsprg3 %r5; \ stw %r3, FRAME_XER+8(1); /* save xer/ctr/exc */ \ stw %r4, FRAME_CTR+8(1); \ stw %r5, FRAME_EXC+8(1); \ stw %r28,FRAME_AIM_DAR+8(1); \ stw %r29,FRAME_AIM_DSISR+8(1); /* save dsisr/srr0/srr1 */ \ stw %r30,FRAME_SRR0+8(1); \ stw %r31,FRAME_SRR1+8(1); \ lwz %r2,PC_CURTHREAD(%r2) /* set curthread pointer */ #define FRAME_LEAVE(savearea) \ /* Disable exceptions: */ \ mfmsr %r2; \ andi. %r2,%r2,~PSL_EE@l; \ mtmsr %r2; \ isync; \ /* Now restore regs: */ \ lwz %r2,FRAME_SRR0+8(%r1); \ lwz %r3,FRAME_SRR1+8(%r1); \ lwz %r4,FRAME_CTR+8(%r1); \ lwz %r5,FRAME_XER+8(%r1); \ lwz %r6,FRAME_LR+8(%r1); \ GET_CPUINFO(%r7); \ stw %r2,(savearea+CPUSAVE_SRR0)(%r7); /* save SRR0 */ \ stw %r3,(savearea+CPUSAVE_SRR1)(%r7); /* save SRR1 */ \ lwz %r7,FRAME_CR+8(%r1); \ mtctr %r4; \ mtxer %r5; \ mtlr %r6; \ mtsprg1 %r7; /* save cr */ \ lwz %r31,FRAME_31+8(%r1); /* restore r0-31 */ \ lwz %r30,FRAME_30+8(%r1); \ lwz %r29,FRAME_29+8(%r1); \ lwz %r28,FRAME_28+8(%r1); \ lwz %r27,FRAME_27+8(%r1); \ lwz %r26,FRAME_26+8(%r1); \ lwz %r25,FRAME_25+8(%r1); \ lwz %r24,FRAME_24+8(%r1); \ lwz %r23,FRAME_23+8(%r1); \ lwz %r22,FRAME_22+8(%r1); \ lwz %r21,FRAME_21+8(%r1); \ lwz %r20,FRAME_20+8(%r1); \ lwz %r19,FRAME_19+8(%r1); \ lwz %r18,FRAME_18+8(%r1); \ lwz %r17,FRAME_17+8(%r1); \ lwz %r16,FRAME_16+8(%r1); \ lwz %r15,FRAME_15+8(%r1); \ lwz %r14,FRAME_14+8(%r1); \ lwz %r13,FRAME_13+8(%r1); \ lwz %r12,FRAME_12+8(%r1); \ lwz %r11,FRAME_11+8(%r1); \ lwz %r10,FRAME_10+8(%r1); \ lwz %r9, FRAME_9+8(%r1); \ lwz %r8, FRAME_8+8(%r1); \ lwz %r7, FRAME_7+8(%r1); \ lwz %r6, FRAME_6+8(%r1); \ lwz %r5, FRAME_5+8(%r1); \ lwz %r4, FRAME_4+8(%r1); \ lwz %r3, FRAME_3+8(%r1); \ lwz %r2, FRAME_2+8(%r1); \ lwz %r0, FRAME_0+8(%r1); \ lwz %r1, FRAME_1+8(%r1); \ /* Can't touch %r1 from here on */ \ mtsprg2 %r2; /* save r2 & r3 */ \ mtsprg3 %r3; \ /* Disable translation, machine check and recoverability: */ \ mfmsr %r2; \ andi. %r2,%r2,~(PSL_DR|PSL_IR|PSL_ME|PSL_RI)@l; \ mtmsr %r2; \ isync; \ /* Decide whether we return to user mode: */ \ GET_CPUINFO(%r2); \ lwz %r3,(savearea+CPUSAVE_SRR1)(%r2); \ mtcr %r3; \ bf 17,1f; /* branch if PSL_PR is false */ \ /* Restore user SRs */ \ RESTORE_USER_SRS(%r2,%r3); \ 1: mfsprg1 %r2; /* restore cr */ \ mtcr %r2; \ GET_CPUINFO(%r2); \ lwz %r3,(savearea+CPUSAVE_SRR0)(%r2); /* restore srr0 */ \ mtsrr0 %r3; \ lwz %r3,(savearea+CPUSAVE_SRR1)(%r2); /* restore srr1 */ \ \ /* Make sure HV bit of MSR propagated to SRR1 */ \ mfmsr %r2; \ or %r3,%r2,%r3; \ \ mtsrr1 %r3; \ mfsprg2 %r2; /* restore r2 & r3 */ \ mfsprg3 %r3 #ifdef KDTRACE_HOOKS .data .globl dtrace_invop_calltrap_addr .align 4 .type dtrace_invop_calltrap_addr, @object .size dtrace_invop_calltrap_addr, 4 dtrace_invop_calltrap_addr: .word 0 .word 0 .text #endif /* * The next two routines are 64-bit glue code. The first is used to test if * we are on a 64-bit system. By copying it to the illegal instruction * handler, we can test for 64-bit mode by trying to execute a 64-bit * instruction and seeing what happens. The second gets copied in front * of all the other handlers to restore 32-bit bridge mode when traps * are taken. */ /* 64-bit test code. Sets SPRG2 to 0 if an illegal instruction is executed */ .globl CNAME(testppc64),CNAME(testppc64size) CNAME(testppc64): mtsprg1 %r31 mfsrr0 %r31 addi %r31, %r31, 4 mtsrr0 %r31 li %r31, 0 mtsprg2 %r31 mfsprg1 %r31 rfi CNAME(testppc64size) = .-CNAME(testppc64) /* 64-bit bridge mode restore snippet. Gets copied in front of everything else * on 64-bit systems. */ .globl CNAME(restorebridge),CNAME(restorebridgesize) CNAME(restorebridge): mtsprg1 %r31 mfmsr %r31 clrldi %r31,%r31,1 mtmsrd %r31 mfsprg1 %r31 isync CNAME(restorebridgesize) = .-CNAME(restorebridge) /* * Processor reset exception handler. These are typically * the first instructions the processor executes after a * software reset. We do this in two bits so that we are * not still hanging around in the trap handling region * once the MMU is turned on. */ .globl CNAME(rstcode), CNAME(rstcodeend) CNAME(rstcode): lwz %r31, TRAP_GENTRAP(0) addi %r31, %r31, (cpu_reset - generictrap) mtlr %r31 blrl CNAME(rstcodeend): cpu_reset: bl 1f .space 124 1: mflr %r1 addi %r1,%r1,(124-16)@l bl CNAME(cpudep_ap_early_bootstrap) lis %r3,1@l bl CNAME(pmap_cpu_bootstrap) bl CNAME(cpudep_ap_bootstrap) mr %r1,%r3 bl CNAME(cpudep_ap_setup) GET_CPUINFO(%r5) lwz %r3,(PC_RESTORE)(%r5) cmplwi %cr0,%r3,0 beq %cr0,2f li %r4, 1 b CNAME(longjmp) 2: #ifdef SMP bl CNAME(machdep_ap_bootstrap) #endif /* Should not be reached */ 9: b 9b /* * This code gets copied to all the trap vectors * (except ISI/DSI, ALI, and the interrupts) */ .globl CNAME(trapcode),CNAME(trapcodeend) CNAME(trapcode): mtsprg1 %r1 /* save SP */ mflr %r1 /* Save the old LR in r1 */ mtsprg2 %r1 /* And then in SPRG2 */ - lwz %r1, TRAP_GENTRAP(0) /* Get branch address */ + lwz %r1, TRAP_ENTRY(0) /* Get branch address */ mtlr %r1 li %r1, 0xe0 /* How to get the vector from LR */ blrl /* LR & (0xff00 | r1) is exception # */ CNAME(trapcodeend): /* * For ALI: has to save DSISR and DAR */ .globl CNAME(alitrap),CNAME(aliend) CNAME(alitrap): mtsprg1 %r1 /* save SP */ GET_CPUINFO(%r1) stw %r28,(PC_TEMPSAVE+CPUSAVE_R28)(%r1) /* free r28-r31 */ stw %r29,(PC_TEMPSAVE+CPUSAVE_R29)(%r1) stw %r30,(PC_TEMPSAVE+CPUSAVE_R30)(%r1) stw %r31,(PC_TEMPSAVE+CPUSAVE_R31)(%r1) mfdar %r30 mfdsisr %r31 stw %r30,(PC_TEMPSAVE+CPUSAVE_AIM_DAR)(%r1) stw %r31,(PC_TEMPSAVE+CPUSAVE_AIM_DSISR)(%r1) mfsprg1 %r1 /* restore SP, in case of branch */ mflr %r28 /* save LR */ mfcr %r29 /* save CR */ /* Put our exception vector in SPRG3 */ li %r31, EXC_ALI mtsprg3 %r31 /* Test whether we already had PR set */ mfsrr1 %r31 mtcr %r31 /* Jump to s_trap */ lwz %r31, TRAP_GENTRAP(0) addi %r31, %r31, (s_trap - generictrap) mtlr %r31 blrl CNAME(aliend): /* * G2 specific: instuction TLB miss. */ .globl CNAME(imisstrap),CNAME(imisssize) CNAME(imisstrap): mfspr %r2, SPR_HASH1 /* get first pointer */ addi %r1, 0, 8 /* load 8 for counter */ mfctr %r0 /* save counter */ mfspr %r3, SPR_ICMP /* get first compare value */ addi %r2, %r2, -8 /* pre dec the pointer */ im0: mtctr %r1 /* load counter */ im1: lwzu %r1, 8(%r2) /* get next pte */ cmp 0, 0, %r1, %r3 /* see if found pte */ bdnzf 2, im1 /* dec count br if cmp ne and if * count not zero */ bne instr_sec_hash /* if not found set up second hash * or exit */ lwz %r1, +4(%r2) /* load tlb entry lower-word */ andi. %r3, %r1, 8 /* check G bit */ bne do_isi_prot /* if guarded, take an ISI */ mtctr %r0 /* restore counter */ mfspr %r0, SPR_IMISS /* get the miss address for the tlbli */ mfspr %r3, SPR_SRR1 /* get the saved cr0 bits */ mtcrf 0x80, %r3 /* restore CR0 */ mtspr SPR_RPA, %r1 /* set the pte */ ori %r1, %r1, 0x100 /* set reference bit */ srwi %r1, %r1, 8 /* get byte 7 of pte */ tlbli %r0 /* load the itlb */ stb %r1, +6(%r2) /* update page table */ rfi /* return to executing program */ instr_sec_hash: andi. %r1, %r3, 0x0040 /* see if we have done second hash */ bne do_isi /* if so, go to ISI interrupt */ mfspr %r2, SPR_HASH2 /* get the second pointer */ ori %r3, %r3, 0x0040 /* change the compare value */ addi %r1, %r0, 8 /* load 8 for counter */ addi %r2, %r2, -8 /* pre dec for update on load */ b im0 /* try second hash */ /* Create a faked ISI interrupt as the address was not found */ do_isi_prot: mfspr %r3, SPR_SRR1 /* get srr1 */ andi. %r2, %r3, 0xffff /* clean upper srr1 */ addis %r2, %r2, 0x0800 /* or in srr<4> = 1 to flag prot * violation */ b isi1 do_isi: mfspr %r3, SPR_SRR1 /* get srr1 */ andi. %r2, %r3, 0xffff /* clean srr1 */ addis %r2, %r2, 0x4000 /* or in srr1<1> = 1 to flag pte * not found */ isi1: mtctr %r0 /* restore counter */ mtspr SPR_SRR1, %r2 /* set srr1 */ mfmsr %r0 /* get msr */ xoris %r0, %r0, 0x2 /* flip the msr bit */ mtcrf 0x80, %r3 /* restore CR0 */ mtmsr %r0 /* flip back to the native gprs */ ba EXC_ISI /* go to instr. access interrupt */ CNAME(imisssize) = .-CNAME(imisstrap) /* * G2 specific: data load TLB miss. */ .globl CNAME(dlmisstrap),CNAME(dlmisssize) CNAME(dlmisstrap): mfspr %r2, SPR_HASH1 /* get first pointer */ addi %r1, 0, 8 /* load 8 for counter */ mfctr %r0 /* save counter */ mfspr %r3, SPR_DCMP /* get first compare value */ addi %r2, %r2, -8 /* pre dec the pointer */ dm0: mtctr %r1 /* load counter */ dm1: lwzu %r1, 8(%r2) /* get next pte */ cmp 0, 0, %r1, %r3 /* see if found pte */ bdnzf 2, dm1 /* dec count br if cmp ne and if * count not zero */ bne data_sec_hash /* if not found set up second hash * or exit */ lwz %r1, +4(%r2) /* load tlb entry lower-word */ mtctr %r0 /* restore counter */ mfspr %r0, SPR_DMISS /* get the miss address for the tlbld */ mfspr %r3, SPR_SRR1 /* get the saved cr0 bits */ mtcrf 0x80, %r3 /* restore CR0 */ mtspr SPR_RPA, %r1 /* set the pte */ ori %r1, %r1, 0x100 /* set reference bit */ srwi %r1, %r1, 8 /* get byte 7 of pte */ tlbld %r0 /* load the dtlb */ stb %r1, +6(%r2) /* update page table */ rfi /* return to executing program */ data_sec_hash: andi. %r1, %r3, 0x0040 /* see if we have done second hash */ bne do_dsi /* if so, go to DSI interrupt */ mfspr %r2, SPR_HASH2 /* get the second pointer */ ori %r3, %r3, 0x0040 /* change the compare value */ addi %r1, 0, 8 /* load 8 for counter */ addi %r2, %r2, -8 /* pre dec for update on load */ b dm0 /* try second hash */ CNAME(dlmisssize) = .-CNAME(dlmisstrap) /* * G2 specific: data store TLB miss. */ .globl CNAME(dsmisstrap),CNAME(dsmisssize) CNAME(dsmisstrap): mfspr %r2, SPR_HASH1 /* get first pointer */ addi %r1, 0, 8 /* load 8 for counter */ mfctr %r0 /* save counter */ mfspr %r3, SPR_DCMP /* get first compare value */ addi %r2, %r2, -8 /* pre dec the pointer */ ds0: mtctr %r1 /* load counter */ ds1: lwzu %r1, 8(%r2) /* get next pte */ cmp 0, 0, %r1, %r3 /* see if found pte */ bdnzf 2, ds1 /* dec count br if cmp ne and if * count not zero */ bne data_store_sec_hash /* if not found set up second hash * or exit */ lwz %r1, +4(%r2) /* load tlb entry lower-word */ andi. %r3, %r1, 0x80 /* check the C-bit */ beq data_store_chk_prot /* if (C==0) * go check protection modes */ ds2: mtctr %r0 /* restore counter */ mfspr %r0, SPR_DMISS /* get the miss address for the tlbld */ mfspr %r3, SPR_SRR1 /* get the saved cr0 bits */ mtcrf 0x80, %r3 /* restore CR0 */ mtspr SPR_RPA, %r1 /* set the pte */ tlbld %r0 /* load the dtlb */ rfi /* return to executing program */ data_store_sec_hash: andi. %r1, %r3, 0x0040 /* see if we have done second hash */ bne do_dsi /* if so, go to DSI interrupt */ mfspr %r2, SPR_HASH2 /* get the second pointer */ ori %r3, %r3, 0x0040 /* change the compare value */ addi %r1, 0, 8 /* load 8 for counter */ addi %r2, %r2, -8 /* pre dec for update on load */ b ds0 /* try second hash */ /* Check the protection before setting PTE(c-bit) */ data_store_chk_prot: rlwinm. %r3,%r1,30,0,1 /* test PP */ bge- chk0 /* if (PP == 00 or PP == 01) * goto chk0: */ andi. %r3, %r1, 1 /* test PP[0] */ beq+ chk2 /* return if PP[0] == 0 */ b do_dsi_prot /* else DSIp */ chk0: mfspr %r3,SPR_SRR1 /* get old msr */ andis. %r3,%r3,0x0008 /* test the KEY bit (SRR1-bit 12) */ beq chk2 /* if (KEY==0) goto chk2: */ b do_dsi_prot /* else do_dsi_prot */ chk2: ori %r1, %r1, 0x180 /* set reference and change bit */ sth %r1, 6(%r2) /* update page table */ b ds2 /* and back we go */ /* Create a faked DSI interrupt as the address was not found */ do_dsi: mfspr %r3, SPR_SRR1 /* get srr1 */ rlwinm %r1,%r3,9,6,6 /* get srr1 to bit 6 for * load/store, zero rest */ addis %r1, %r1, 0x4000 /* or in dsisr<1> = 1 to flag pte * not found */ b dsi1 do_dsi_prot: mfspr %r3, SPR_SRR1 /* get srr1 */ rlwinm %r1,%r3,9,6,6 /* get srr1 to bit 6 for *load/store, zero rest */ addis %r1, %r1, 0x0800 /* or in dsisr<4> = 1 to flag prot * violation */ dsi1: mtctr %r0 /* restore counter */ andi. %r2, %r3, 0xffff /* clear upper bits of srr1 */ mtspr SPR_SRR1, %r2 /* set srr1 */ mtspr SPR_DSISR, %r1 /* load the dsisr */ mfspr %r1, SPR_DMISS /* get miss address */ rlwinm. %r2,%r2,0,31,31 /* test LE bit */ beq dsi2 /* if little endian then: */ xor %r1, %r1, 0x07 /* de-mung the data address */ dsi2: mtspr SPR_DAR, %r1 /* put in dar */ mfmsr %r0 /* get msr */ xoris %r0, %r0, 0x2 /* flip the msr bit */ mtcrf 0x80, %r3 /* restore CR0 */ mtmsr %r0 /* flip back to the native gprs */ ba EXC_DSI /* branch to DSI interrupt */ CNAME(dsmisssize) = .-CNAME(dsmisstrap) /* * Similar to the above for DSI * Has to handle BAT spills * and standard pagetable spills */ .globl CNAME(dsitrap),CNAME(dsiend) CNAME(dsitrap): mtsprg1 %r1 /* save SP */ GET_CPUINFO(%r1) stw %r28,(PC_DISISAVE+CPUSAVE_R28)(%r1) /* free r28-r31 */ stw %r29,(PC_DISISAVE+CPUSAVE_R29)(%r1) stw %r30,(PC_DISISAVE+CPUSAVE_R30)(%r1) stw %r31,(PC_DISISAVE+CPUSAVE_R31)(%r1) mfsprg1 %r1 /* restore SP */ mfcr %r29 /* save CR */ mfxer %r30 /* save XER */ mtsprg2 %r30 /* in SPRG2 */ mfsrr1 %r31 /* test kernel mode */ mtcr %r31 bt 17,1f /* branch if PSL_PR is set */ mfdar %r31 /* get fault address */ rlwinm %r31,%r31,7,25,28 /* get segment * 8 */ /* get batu */ lwz %r30,TRAP_TOCBASE(0) lwz %r30,CNAME(battable)@got(%r30) add %r31,%r30,%r31 lwz %r30,0(%r31) mtcr %r30 bf 30,1f /* branch if supervisor valid is false */ /* get batl */ lwz %r31,4(%r31) /* We randomly use the highest two bat registers here */ mftb %r28 andi. %r28,%r28,1 bne 2f mtdbatu 2,%r30 mtdbatl 2,%r31 b 3f 2: mtdbatu 3,%r30 mtdbatl 3,%r31 3: mfsprg2 %r30 /* restore XER */ mtxer %r30 mtcr %r29 /* restore CR */ mtsprg1 %r1 GET_CPUINFO(%r1) lwz %r28,(PC_DISISAVE+CPUSAVE_R28)(%r1) /* restore r28-r31 */ lwz %r29,(PC_DISISAVE+CPUSAVE_R29)(%r1) lwz %r30,(PC_DISISAVE+CPUSAVE_R30)(%r1) lwz %r31,(PC_DISISAVE+CPUSAVE_R31)(%r1) mfsprg1 %r1 rfi /* return to trapped code */ 1: mflr %r28 /* save LR (SP already saved) */ /* Jump to disitrap */ lwz %r1, TRAP_GENTRAP(0) addi %r1, %r1, (disitrap - generictrap) mtlr %r1 blrl CNAME(dsiend): /* * Preamble code for DSI/ISI traps */ disitrap: /* Write the trap vector to SPRG3 by computing LR & 0xff00 */ mflr %r1 andi. %r1,%r1,0xff00 mtsprg3 %r1 GET_CPUINFO(%r1) lwz %r30,(PC_DISISAVE+CPUSAVE_R28)(%r1) stw %r30,(PC_TEMPSAVE+CPUSAVE_R28)(%r1) lwz %r31,(PC_DISISAVE+CPUSAVE_R29)(%r1) stw %r31,(PC_TEMPSAVE+CPUSAVE_R29)(%r1) lwz %r30,(PC_DISISAVE+CPUSAVE_R30)(%r1) stw %r30,(PC_TEMPSAVE+CPUSAVE_R30)(%r1) lwz %r31,(PC_DISISAVE+CPUSAVE_R31)(%r1) stw %r31,(PC_TEMPSAVE+CPUSAVE_R31)(%r1) mfdar %r30 mfdsisr %r31 stw %r30,(PC_TEMPSAVE+CPUSAVE_AIM_DAR)(%r1) stw %r31,(PC_TEMPSAVE+CPUSAVE_AIM_DSISR)(%r1) #ifdef KDB /* Try to detect a kernel stack overflow */ mfsrr1 %r31 mtcr %r31 bt 17,realtrap /* branch is user mode */ mfsprg1 %r31 /* get old SP */ clrrwi %r31,%r31,12 /* Round SP down to nearest page */ sub. %r30,%r31,%r30 /* SP - DAR */ bge 1f neg %r30,%r30 /* modulo value */ 1: cmplwi %cr0,%r30,4096 /* is DAR within a page of SP? */ bge %cr0,realtrap /* no, too far away. */ /* Now convert this DSI into a DDB trap. */ GET_CPUINFO(%r1) lwz %r30,(PC_TEMPSAVE+CPUSAVE_AIM_DAR)(%r1) /* get DAR */ stw %r30,(PC_DBSAVE +CPUSAVE_AIM_DAR)(%r1) /* save DAR */ lwz %r31,(PC_TEMPSAVE+CPUSAVE_AIM_DSISR)(%r1) /* get DSISR */ stw %r31,(PC_DBSAVE +CPUSAVE_AIM_DSISR)(%r1) /* save DSISR */ lwz %r30,(PC_DISISAVE+CPUSAVE_R28)(%r1) /* get r28 */ stw %r30,(PC_DBSAVE +CPUSAVE_R28)(%r1) /* save r28 */ lwz %r31,(PC_DISISAVE+CPUSAVE_R29)(%r1) /* get r29 */ stw %r31,(PC_DBSAVE +CPUSAVE_R29)(%r1) /* save r29 */ lwz %r30,(PC_DISISAVE+CPUSAVE_R30)(%r1) /* get r30 */ stw %r30,(PC_DBSAVE +CPUSAVE_R30)(%r1) /* save r30 */ lwz %r31,(PC_DISISAVE+CPUSAVE_R31)(%r1) /* get r31 */ stw %r31,(PC_DBSAVE +CPUSAVE_R31)(%r1) /* save r31 */ b dbtrap #endif /* XXX need stack probe here */ realtrap: /* Test whether we already had PR set */ mfsrr1 %r1 mtcr %r1 mfsprg1 %r1 /* restore SP (might have been overwritten) */ bf 17,k_trap /* branch if PSL_PR is false */ GET_CPUINFO(%r1) lwz %r1,PC_CURPCB(%r1) RESTORE_KERN_SRS(%r30,%r31) /* enable kernel mapping */ b s_trap /* * generictrap does some standard setup for trap handling to minimize * the code that need be installed in the actual vectors. It expects * the following conditions. * * R1 - Trap vector = LR & (0xff00 | R1) * SPRG1 - Original R1 contents * SPRG2 - Original LR */ .globl CNAME(generictrap64) generictrap64: mtsprg3 %r31 mfmsr %r31 clrldi %r31,%r31,1 mtmsrd %r31 mfsprg3 %r31 isync .globl CNAME(generictrap) generictrap: /* Save R1 for computing the exception vector */ mtsprg3 %r1 /* Save interesting registers */ GET_CPUINFO(%r1) stw %r28,(PC_TEMPSAVE+CPUSAVE_R28)(%r1) /* free r28-r31 */ stw %r29,(PC_TEMPSAVE+CPUSAVE_R29)(%r1) stw %r30,(PC_TEMPSAVE+CPUSAVE_R30)(%r1) stw %r31,(PC_TEMPSAVE+CPUSAVE_R31)(%r1) mfsprg1 %r1 /* restore SP, in case of branch */ mfsprg2 %r28 /* save LR */ mfcr %r29 /* save CR */ /* Compute the exception vector from the link register */ mfsprg3 %r31 ori %r31,%r31,0xff00 mflr %r30 and %r30,%r30,%r31 mtsprg3 %r30 /* Test whether we already had PR set */ mfsrr1 %r31 mtcr %r31 s_trap: bf 17,k_trap /* branch if PSL_PR is false */ GET_CPUINFO(%r1) u_trap: lwz %r1,PC_CURPCB(%r1) RESTORE_KERN_SRS(%r30,%r31) /* enable kernel mapping */ /* * Now the common trap catching code. */ k_trap: FRAME_SETUP(PC_TEMPSAVE) /* Restore USER_SR */ GET_CPUINFO(%r30) lwz %r30,PC_CURPCB(%r30) lwz %r30,PCB_AIM_USR_VSID(%r30) mtsr USER_SR,%r30; sync; isync /* Call C interrupt dispatcher: */ trapagain: addi %r3,%r1,8 bl CNAME(powerpc_interrupt) .globl CNAME(trapexit) /* backtrace code sentinel */ CNAME(trapexit): /* Disable interrupts: */ mfmsr %r3 andi. %r3,%r3,~PSL_EE@l mtmsr %r3 isync /* Test AST pending: */ lwz %r5,FRAME_SRR1+8(%r1) mtcr %r5 bf 17,1f /* branch if PSL_PR is false */ GET_CPUINFO(%r3) /* get per-CPU pointer */ lwz %r4, TD_FLAGS(%r2) /* get thread flags value * (r2 is curthread) */ lis %r5, (TDF_ASTPENDING|TDF_NEEDRESCHED)@h ori %r5,%r5, (TDF_ASTPENDING|TDF_NEEDRESCHED)@l and. %r4,%r4,%r5 beq 1f mfmsr %r3 /* re-enable interrupts */ ori %r3,%r3,PSL_EE@l mtmsr %r3 isync addi %r3,%r1,8 bl CNAME(ast) .globl CNAME(asttrapexit) /* backtrace code sentinel #2 */ CNAME(asttrapexit): b trapexit /* test ast ret value ? */ 1: FRAME_LEAVE(PC_TEMPSAVE) .globl CNAME(rfi_patch1) /* replace rfi with rfid on ppc64 */ CNAME(rfi_patch1): rfi .globl CNAME(rfid_patch) CNAME(rfid_patch): rfid #if defined(KDB) /* * Deliberate entry to dbtrap */ .globl CNAME(breakpoint) CNAME(breakpoint): mtsprg1 %r1 mfmsr %r3 mtsrr1 %r3 andi. %r3,%r3,~(PSL_EE|PSL_ME)@l mtmsr %r3 /* disable interrupts */ isync GET_CPUINFO(%r3) stw %r28,(PC_DBSAVE+CPUSAVE_R28)(%r3) stw %r29,(PC_DBSAVE+CPUSAVE_R29)(%r3) stw %r30,(PC_DBSAVE+CPUSAVE_R30)(%r3) stw %r31,(PC_DBSAVE+CPUSAVE_R31)(%r3) mflr %r28 li %r29,EXC_BPT mtlr %r29 mfcr %r29 mtsrr0 %r28 /* * Now the kdb trap catching code. */ dbtrap: /* Write the trap vector to SPRG3 by computing LR & 0xff00 */ mflr %r1 andi. %r1,%r1,0xff00 mtsprg3 %r1 lwz %r1,TRAP_TOCBASE(0) /* get new SP */ lwz %r1,trapstk@got(%r1) addi %r1,%r1,TRAPSTKSZ-16 FRAME_SETUP(PC_DBSAVE) /* Call C trap code: */ addi %r3,%r1,8 bl CNAME(db_trap_glue) or. %r3,%r3,%r3 bne dbleave /* This wasn't for KDB, so switch to real trap: */ lwz %r3,FRAME_EXC+8(%r1) /* save exception */ GET_CPUINFO(%r4) stw %r3,(PC_DBSAVE+CPUSAVE_R31)(%r4) FRAME_LEAVE(PC_DBSAVE) mtsprg1 %r1 /* prepare for entrance to realtrap */ GET_CPUINFO(%r1) stw %r28,(PC_TEMPSAVE+CPUSAVE_R28)(%r1) stw %r29,(PC_TEMPSAVE+CPUSAVE_R29)(%r1) stw %r30,(PC_TEMPSAVE+CPUSAVE_R30)(%r1) stw %r31,(PC_TEMPSAVE+CPUSAVE_R31)(%r1) mflr %r28 mfcr %r29 lwz %r31,(PC_DBSAVE+CPUSAVE_R31)(%r1) mtsprg3 %r31 /* SPRG3 was clobbered by FRAME_LEAVE */ mfsprg1 %r1 b realtrap dbleave: FRAME_LEAVE(PC_DBSAVE) .globl CNAME(rfi_patch2) /* replace rfi with rfid on ppc64 */ CNAME(rfi_patch2): rfi /* * In case of KDB we want a separate trap catcher for it */ .globl CNAME(dblow),CNAME(dbend) CNAME(dblow): mtsprg1 %r1 /* save SP */ mtsprg2 %r29 /* save r29 */ mfcr %r29 /* save CR in r29 */ mfsrr1 %r1 mtcr %r1 bf 17,1f /* branch if privileged */ /* Unprivileged case */ mtcr %r29 /* put the condition register back */ mfsprg2 %r29 /* ... and r29 */ mflr %r1 /* save LR */ mtsprg2 %r1 /* And then in SPRG2 */ - lwz %r1, TRAP_GENTRAP(0) /* Get branch address */ + lwz %r1, TRAP_ENTRY(0) /* Get branch address */ mtlr %r1 li %r1, 0 /* How to get the vector from LR */ blrl /* LR & (0xff00 | r1) is exception # */ 1: /* Privileged, so drop to KDB */ GET_CPUINFO(%r1) stw %r28,(PC_DBSAVE+CPUSAVE_R28)(%r1) /* free r28 */ mfsprg2 %r28 /* r29 holds cr... */ stw %r28,(PC_DBSAVE+CPUSAVE_R29)(%r1) /* free r29 */ stw %r30,(PC_DBSAVE+CPUSAVE_R30)(%r1) /* free r30 */ stw %r31,(PC_DBSAVE+CPUSAVE_R31)(%r1) /* free r31 */ mflr %r28 /* save LR */ /* Jump to dbtrap */ lwz %r1, TRAP_GENTRAP(0) addi %r1, %r1, (dbtrap - generictrap) mtlr %r1 blrl CNAME(dbend): #endif /* KDB */ Index: projects/clang1000-import/sys/powerpc/aim/trap_subr64.S =================================================================== --- projects/clang1000-import/sys/powerpc/aim/trap_subr64.S (revision 356919) +++ projects/clang1000-import/sys/powerpc/aim/trap_subr64.S (revision 356920) @@ -1,977 +1,977 @@ /* $FreeBSD$ */ /* $NetBSD: trap_subr.S,v 1.20 2002/04/22 23:20:08 kleink Exp $ */ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * NOTICE: This is not a standalone file. to use it, #include it in * your port's locore.S, like so: * * #include */ /* Locate the per-CPU data structure */ #define GET_CPUINFO(r) \ mfsprg0 r #define GET_TOCBASE(r) \ lis r,DMAP_BASE_ADDRESS@highesta; /* To real-mode alias/dmap */ \ sldi r,r,32; \ ori r,r,TRAP_TOCBASE; /* Magic address for TOC */ \ ld r,0(r) /* * Restore SRs for a pmap * * Requires that r28-r31 be scratch, with r28 initialized to the SLB cache */ /* * User SRs are loaded through a pointer to the current pmap. */ restore_usersrs: GET_CPUINFO(%r28) ld %r28,PC_USERSLB(%r28) cmpdi %r28, 0 /* If user SLB pointer NULL, exit */ beqlr li %r29, 0 /* Set the counter to zero */ slbia slbmfee %r31,%r29 clrrdi %r31,%r31,28 slbie %r31 1: ld %r31, 0(%r28) /* Load SLB entry pointer */ cmpdi %r31, 0 /* If NULL, stop */ beqlr ld %r30, 0(%r31) /* Load SLBV */ ld %r31, 8(%r31) /* Load SLBE */ or %r31, %r31, %r29 /* Set SLBE slot */ slbmte %r30, %r31 /* Install SLB entry */ addi %r28, %r28, 8 /* Advance pointer */ addi %r29, %r29, 1 b 1b /* Repeat */ /* * Kernel SRs are loaded directly from the PCPU fields */ restore_kernsrs: GET_CPUINFO(%r28) addi %r28,%r28,PC_KERNSLB ld %r29,16(%r28) /* One past USER_SLB_SLOT */ cmpdi %r29,0 beqlr /* If first kernel entry is invalid, * SLBs not in use, so exit early */ /* Otherwise, set up SLBs */ li %r29, 0 /* Set the counter to zero */ slbia slbmfee %r31,%r29 clrrdi %r31,%r31,28 slbie %r31 1: cmpdi %r29, USER_SLB_SLOT /* Skip the user slot */ beq- 2f ld %r31, 8(%r28) /* Load SLBE */ cmpdi %r31, 0 /* If SLBE is not valid, stop */ beqlr ld %r30, 0(%r28) /* Load SLBV */ slbmte %r30, %r31 /* Install SLB entry */ 2: addi %r28, %r28, 16 /* Advance pointer */ addi %r29, %r29, 1 cmpdi %r29, 64 /* Repeat if we are not at the end */ blt 1b blr /* * FRAME_SETUP assumes: * SPRG1 SP (1) * SPRG3 trap type * savearea r27-r31,DAR,DSISR (DAR & DSISR only for DSI traps) * r28 LR * r29 CR * r30 scratch * r31 scratch * r1 kernel stack * SRR0/1 as at start of trap * * NOTE: SPRG1 is never used while the MMU is on, making it safe to reuse * in any real-mode fault handler, including those handling double faults. */ #define FRAME_SETUP(savearea) \ /* Have to enable translation to allow access of kernel stack: */ \ GET_CPUINFO(%r31); \ mfsrr0 %r30; \ std %r30,(savearea+CPUSAVE_SRR0)(%r31); /* save SRR0 */ \ mfsrr1 %r30; \ std %r30,(savearea+CPUSAVE_SRR1)(%r31); /* save SRR1 */ \ mfsprg1 %r31; /* get saved SP (clears SPRG1) */ \ mfmsr %r30; \ ori %r30,%r30,(PSL_DR|PSL_IR|PSL_RI)@l; /* relocation on */ \ mtmsr %r30; /* stack can now be accessed */ \ isync; \ stdu %r31,-(FRAMELEN+288)(%r1); /* save it in the callframe */ \ std %r0, FRAME_0+48(%r1); /* save r0 in the trapframe */ \ std %r31,FRAME_1+48(%r1); /* save SP " " */ \ std %r2, FRAME_2+48(%r1); /* save r2 " " */ \ std %r28,FRAME_LR+48(%r1); /* save LR " " */ \ std %r29,FRAME_CR+48(%r1); /* save CR " " */ \ GET_CPUINFO(%r2); \ ld %r27,(savearea+CPUSAVE_R27)(%r2); /* get saved r27 */ \ ld %r28,(savearea+CPUSAVE_R28)(%r2); /* get saved r28 */ \ ld %r29,(savearea+CPUSAVE_R29)(%r2); /* get saved r29 */ \ ld %r30,(savearea+CPUSAVE_R30)(%r2); /* get saved r30 */ \ ld %r31,(savearea+CPUSAVE_R31)(%r2); /* get saved r31 */ \ std %r3, FRAME_3+48(%r1); /* save r3-r31 */ \ std %r4, FRAME_4+48(%r1); \ std %r5, FRAME_5+48(%r1); \ std %r6, FRAME_6+48(%r1); \ std %r7, FRAME_7+48(%r1); \ std %r8, FRAME_8+48(%r1); \ std %r9, FRAME_9+48(%r1); \ std %r10, FRAME_10+48(%r1); \ std %r11, FRAME_11+48(%r1); \ std %r12, FRAME_12+48(%r1); \ std %r13, FRAME_13+48(%r1); \ std %r14, FRAME_14+48(%r1); \ std %r15, FRAME_15+48(%r1); \ std %r16, FRAME_16+48(%r1); \ std %r17, FRAME_17+48(%r1); \ std %r18, FRAME_18+48(%r1); \ std %r19, FRAME_19+48(%r1); \ std %r20, FRAME_20+48(%r1); \ std %r21, FRAME_21+48(%r1); \ std %r22, FRAME_22+48(%r1); \ std %r23, FRAME_23+48(%r1); \ std %r24, FRAME_24+48(%r1); \ std %r25, FRAME_25+48(%r1); \ std %r26, FRAME_26+48(%r1); \ std %r27, FRAME_27+48(%r1); \ std %r28, FRAME_28+48(%r1); \ std %r29, FRAME_29+48(%r1); \ std %r30, FRAME_30+48(%r1); \ std %r31, FRAME_31+48(%r1); \ ld %r28,(savearea+CPUSAVE_AIM_DAR)(%r2); /* saved DAR */ \ ld %r29,(savearea+CPUSAVE_AIM_DSISR)(%r2);/* saved DSISR */\ ld %r30,(savearea+CPUSAVE_SRR0)(%r2); /* saved SRR0 */ \ ld %r31,(savearea+CPUSAVE_SRR1)(%r2); /* saved SRR1 */ \ mfxer %r3; \ mfctr %r4; \ mfsprg3 %r5; \ std %r3, FRAME_XER+48(1); /* save xer/ctr/exc */ \ std %r4, FRAME_CTR+48(1); \ std %r5, FRAME_EXC+48(1); \ std %r28,FRAME_AIM_DAR+48(1); \ std %r29,FRAME_AIM_DSISR+48(1); /* save dsisr/srr0/srr1 */ \ std %r30,FRAME_SRR0+48(1); \ std %r31,FRAME_SRR1+48(1); \ ld %r13,PC_CURTHREAD(%r2) /* set kernel curthread */ #define FRAME_LEAVE(savearea) \ /* Disable exceptions: */ \ mfmsr %r2; \ andi. %r2,%r2,~PSL_EE@l; \ mtmsr %r2; \ isync; \ /* Now restore regs: */ \ ld %r2,FRAME_SRR0+48(%r1); \ ld %r3,FRAME_SRR1+48(%r1); \ ld %r4,FRAME_CTR+48(%r1); \ ld %r5,FRAME_XER+48(%r1); \ ld %r6,FRAME_LR+48(%r1); \ GET_CPUINFO(%r7); \ std %r2,(savearea+CPUSAVE_SRR0)(%r7); /* save SRR0 */ \ std %r3,(savearea+CPUSAVE_SRR1)(%r7); /* save SRR1 */ \ ld %r7,FRAME_CR+48(%r1); \ mtctr %r4; \ mtxer %r5; \ mtlr %r6; \ mtsprg2 %r7; /* save cr */ \ ld %r31,FRAME_31+48(%r1); /* restore r0-31 */ \ ld %r30,FRAME_30+48(%r1); \ ld %r29,FRAME_29+48(%r1); \ ld %r28,FRAME_28+48(%r1); \ ld %r27,FRAME_27+48(%r1); \ ld %r26,FRAME_26+48(%r1); \ ld %r25,FRAME_25+48(%r1); \ ld %r24,FRAME_24+48(%r1); \ ld %r23,FRAME_23+48(%r1); \ ld %r22,FRAME_22+48(%r1); \ ld %r21,FRAME_21+48(%r1); \ ld %r20,FRAME_20+48(%r1); \ ld %r19,FRAME_19+48(%r1); \ ld %r18,FRAME_18+48(%r1); \ ld %r17,FRAME_17+48(%r1); \ ld %r16,FRAME_16+48(%r1); \ ld %r15,FRAME_15+48(%r1); \ ld %r14,FRAME_14+48(%r1); \ ld %r13,FRAME_13+48(%r1); \ ld %r12,FRAME_12+48(%r1); \ ld %r11,FRAME_11+48(%r1); \ ld %r10,FRAME_10+48(%r1); \ ld %r9, FRAME_9+48(%r1); \ ld %r8, FRAME_8+48(%r1); \ ld %r7, FRAME_7+48(%r1); \ ld %r6, FRAME_6+48(%r1); \ ld %r5, FRAME_5+48(%r1); \ ld %r4, FRAME_4+48(%r1); \ ld %r3, FRAME_3+48(%r1); \ ld %r2, FRAME_2+48(%r1); \ ld %r0, FRAME_0+48(%r1); \ ld %r1, FRAME_1+48(%r1); \ /* Can't touch %r1 from here on */ \ mtsprg3 %r3; /* save r3 */ \ /* Disable translation, machine check and recoverability: */ \ mfmsr %r3; \ andi. %r3,%r3,~(PSL_DR|PSL_IR|PSL_ME|PSL_RI)@l; \ mtmsr %r3; \ isync; \ /* Decide whether we return to user mode: */ \ GET_CPUINFO(%r3); \ ld %r3,(savearea+CPUSAVE_SRR1)(%r3); \ mtcr %r3; \ bf 17,1f; /* branch if PSL_PR is false */ \ /* Restore user SRs */ \ GET_CPUINFO(%r3); \ std %r27,(savearea+CPUSAVE_R27)(%r3); \ std %r28,(savearea+CPUSAVE_R28)(%r3); \ std %r29,(savearea+CPUSAVE_R29)(%r3); \ std %r30,(savearea+CPUSAVE_R30)(%r3); \ std %r31,(savearea+CPUSAVE_R31)(%r3); \ mflr %r27; /* preserve LR */ \ bl restore_usersrs; /* uses r28-r31 */ \ mtlr %r27; \ ld %r31,(savearea+CPUSAVE_R31)(%r3); \ ld %r30,(savearea+CPUSAVE_R30)(%r3); \ ld %r29,(savearea+CPUSAVE_R29)(%r3); \ ld %r28,(savearea+CPUSAVE_R28)(%r3); \ ld %r27,(savearea+CPUSAVE_R27)(%r3); \ 1: mfsprg2 %r3; /* restore cr */ \ mtcr %r3; \ GET_CPUINFO(%r3); \ ld %r3,(savearea+CPUSAVE_SRR0)(%r3); /* restore srr0 */ \ mtsrr0 %r3; \ GET_CPUINFO(%r3); \ ld %r3,(savearea+CPUSAVE_SRR1)(%r3); /* restore srr1 */ \ mtsrr1 %r3; \ mfsprg3 %r3 /* restore r3 */ #ifdef KDTRACE_HOOKS .data .globl dtrace_invop_calltrap_addr .align 8 .type dtrace_invop_calltrap_addr, @object .size dtrace_invop_calltrap_addr, 8 dtrace_invop_calltrap_addr: .word 0 .word 0 .text #endif /* * Processor reset exception handler. These are typically * the first instructions the processor executes after a * software reset. We do this in two bits so that we are * not still hanging around in the trap handling region * once the MMU is turned on. */ .globl CNAME(rstcode), CNAME(rstcodeend), CNAME(cpu_reset_handler) .globl CNAME(cpu_wakeup_handler) .p2align 3 CNAME(rstcode): /* * Check if this is software reset or * processor is waking up from power saving mode * It is software reset when 46:47 = 0b00 */ /* 0x00 */ - ld %r2,TRAP_GENTRAP(0) /* Real-mode &generictrap */ + ld %r2,TRAP_ENTRY(0) /* Real-mode &generictrap */ mfsrr1 %r9 /* Load SRR1 into r9 */ andis. %r9,%r9,0x3 /* Logic AND with 46:47 bits */ beq 2f /* Branch if software reset */ /* 0x10 */ /* Reset was wakeup */ addi %r9,%r2,(cpu_wakeup_handler-generictrap) b 1f /* Was power save, do the wakeup */ /* Reset was software reset */ /* Explicitly set MSR[SF] */ 2: mfmsr %r9 li %r8,1 /* 0x20 */ insrdi %r9,%r8,1,0 mtmsrd %r9 isync addi %r9,%r2,(cpu_reset_handler-generictrap) /* 0x30 */ 1: mtlr %r9 blr /* Branch to either cpu_reset_handler * or cpu_wakeup_handler. */ CNAME(rstcodeend): cpu_reset_handler: GET_TOCBASE(%r2) addis %r1,%r2,TOC_REF(tmpstk)@ha ld %r1,TOC_REF(tmpstk)@l(%r1) /* get new SP */ addi %r1,%r1,(TMPSTKSZ-48) bl CNAME(cpudep_ap_early_bootstrap) /* Set PCPU */ nop lis %r3,1@l bl CNAME(pmap_cpu_bootstrap) /* Turn on virtual memory */ nop bl CNAME(cpudep_ap_bootstrap) /* Set up PCPU and stack */ nop mr %r1,%r3 /* Use new stack */ bl CNAME(cpudep_ap_setup) nop GET_CPUINFO(%r5) ld %r3,(PC_RESTORE)(%r5) cmpldi %cr0,%r3,0 beq %cr0,2f nop li %r4,1 bl CNAME(longjmp) nop 2: #ifdef SMP bl CNAME(machdep_ap_bootstrap) /* And away! */ nop #endif /* Should not be reached */ 9: b 9b cpu_wakeup_handler: GET_TOCBASE(%r2) /* Check for false wake up due to badly SRR1 set (eg. by OPAL) */ addis %r3,%r2,TOC_REF(can_wakeup)@ha ld %r3,TOC_REF(can_wakeup)@l(%r3) ld %r3,0(%r3) cmpdi %r3,0 beq cpu_reset_handler /* Turn on MMU after return from interrupt */ mfsrr1 %r3 ori %r3,%r3,(PSL_IR | PSL_DR) mtsrr1 %r3 /* Turn on MMU (needed to access PCB) */ mfmsr %r3 ori %r3,%r3,(PSL_IR | PSL_DR) mtmsr %r3 isync mfsprg0 %r3 ld %r3,PC_CURTHREAD(%r3) /* Get current thread */ ld %r3,TD_PCB(%r3) /* Get PCB of current thread */ ld %r12,PCB_CONTEXT(%r3) /* Load the non-volatile GP regs. */ ld %r13,PCB_CONTEXT+1*8(%r3) ld %r14,PCB_CONTEXT+2*8(%r3) ld %r15,PCB_CONTEXT+3*8(%r3) ld %r16,PCB_CONTEXT+4*8(%r3) ld %r17,PCB_CONTEXT+5*8(%r3) ld %r18,PCB_CONTEXT+6*8(%r3) ld %r19,PCB_CONTEXT+7*8(%r3) ld %r20,PCB_CONTEXT+8*8(%r3) ld %r21,PCB_CONTEXT+9*8(%r3) ld %r22,PCB_CONTEXT+10*8(%r3) ld %r23,PCB_CONTEXT+11*8(%r3) ld %r24,PCB_CONTEXT+12*8(%r3) ld %r25,PCB_CONTEXT+13*8(%r3) ld %r26,PCB_CONTEXT+14*8(%r3) ld %r27,PCB_CONTEXT+15*8(%r3) ld %r28,PCB_CONTEXT+16*8(%r3) ld %r29,PCB_CONTEXT+17*8(%r3) ld %r30,PCB_CONTEXT+18*8(%r3) ld %r31,PCB_CONTEXT+19*8(%r3) ld %r5,PCB_CR(%r3) /* Load the condition register */ mtcr %r5 ld %r5,PCB_LR(%r3) /* Load the link register */ mtsrr0 %r5 ld %r1,PCB_SP(%r3) /* Load the stack pointer */ ld %r2,PCB_TOC(%r3) /* Load the TOC pointer */ rfid /* * This code gets copied to all the trap vectors * (except ISI/DSI, ALI, and the interrupts). Has to fit in 8 instructions! */ .globl CNAME(trapcode),CNAME(trapcodeend) .p2align 3 CNAME(trapcode): mtsprg1 %r1 /* save SP */ mflr %r1 /* Save the old LR in r1 */ mtsprg2 %r1 /* And then in SPRG2 */ - ld %r1,TRAP_GENTRAP(0) + ld %r1,TRAP_ENTRY(0) mtlr %r1 li %r1, 0xe0 /* How to get the vector from LR */ blrl /* Branch to generictrap */ CNAME(trapcodeend): /* Same thing for traps setting HSRR0/HSRR1 */ .globl CNAME(hypertrapcode),CNAME(hypertrapcodeend) .p2align 3 CNAME(hypertrapcode): mtsprg1 %r1 /* save SP */ mflr %r1 /* Save the old LR in r1 */ mtsprg2 %r1 /* And then in SPRG2 */ ld %r1,TRAP_GENTRAP(0) addi %r1,%r1,(generichypertrap-generictrap) mtlr %r1 li %r1, 0xe0 /* How to get the vector from LR */ blrl /* Branch to generichypertrap */ CNAME(hypertrapcodeend): /* * For SLB misses: do special things for the kernel * * Note: SPRG1 is always safe to overwrite any time the MMU was on, which is * the only time this can be called. */ .globl CNAME(slbtrap),CNAME(slbtrapend) .p2align 3 CNAME(slbtrap): /* 0x00 */ mtsprg1 %r1 /* save SP */ GET_CPUINFO(%r1) std %r2,(PC_SLBSAVE+16)(%r1) /* save r2 */ mfcr %r2 /* 0x10 */ std %r2,(PC_SLBSAVE+104)(%r1) /* save CR */ mfsrr1 %r2 /* test kernel mode */ mtcr %r2 bf 17,2f /* branch if PSL_PR is false */ /* 0x20 */ /* User mode */ ld %r2,(PC_SLBSAVE+104)(%r1) mtcr %r2 /* restore CR */ ld %r2,(PC_SLBSAVE+16)(%r1) /* restore r2 */ mflr %r1 /* 0x30 */ mtsprg2 %r1 /* save LR in SPRG2 */ - ld %r1,TRAP_GENTRAP(0) /* real-mode &generictrap */ + ld %r1,TRAP_ENTRY(0) /* real-mode &generictrap */ mtlr %r1 li %r1, 0x80 /* How to get the vector from LR */ /* 0x40 */ blrl /* Branch to generictrap */ 2: mflr %r2 /* Save the old LR in r2 */ /* Kernel mode */ ld %r1,TRAP_GENTRAP(0) /* Real-mode &generictrap */ addi %r1,%r1,(kern_slbtrap-generictrap) /* 0x50 */ mtlr %r1 GET_CPUINFO(%r1) blrl /* Branch to kern_slbtrap */ /* must fit in 128 bytes! */ CNAME(slbtrapend): /* * On entry: * SPRG1: SP * r1: pcpu * r2: LR * LR: branch address in trap region */ kern_slbtrap: std %r2,(PC_SLBSAVE+136)(%r1) /* old LR */ std %r3,(PC_SLBSAVE+24)(%r1) /* save R3 */ /* Check if this needs to be handled as a regular trap (userseg miss) */ mflr %r2 andi. %r2,%r2,0xff80 cmpwi %r2,EXC_DSE bne 1f mfdar %r2 b 2f 1: mfsrr0 %r2 2: /* r2 now contains the fault address */ lis %r3,SEGMENT_MASK@highesta ori %r3,%r3,SEGMENT_MASK@highera sldi %r3,%r3,32 oris %r3,%r3,SEGMENT_MASK@ha ori %r3,%r3,SEGMENT_MASK@l and %r2,%r2,%r3 /* R2 = segment base address */ lis %r3,USER_ADDR@highesta ori %r3,%r3,USER_ADDR@highera sldi %r3,%r3,32 oris %r3,%r3,USER_ADDR@ha ori %r3,%r3,USER_ADDR@l cmpd %r2,%r3 /* Compare fault base to USER_ADDR */ bne 3f /* User seg miss, handle as a regular trap */ ld %r2,(PC_SLBSAVE+104)(%r1) /* Restore CR */ mtcr %r2 ld %r2,(PC_SLBSAVE+16)(%r1) /* Restore R2,R3 */ ld %r3,(PC_SLBSAVE+24)(%r1) ld %r1,(PC_SLBSAVE+136)(%r1) /* Save the old LR in r1 */ mtsprg2 %r1 /* And then in SPRG2 */ li %r1, 0x80 /* How to get the vector from LR */ b generictrap /* Retain old LR using b */ 3: /* Real kernel SLB miss */ std %r0,(PC_SLBSAVE+0)(%r1) /* free all volatile regs */ mfsprg1 %r2 /* Old R1 */ std %r2,(PC_SLBSAVE+8)(%r1) /* R2,R3 already saved */ std %r4,(PC_SLBSAVE+32)(%r1) std %r5,(PC_SLBSAVE+40)(%r1) std %r6,(PC_SLBSAVE+48)(%r1) std %r7,(PC_SLBSAVE+56)(%r1) std %r8,(PC_SLBSAVE+64)(%r1) std %r9,(PC_SLBSAVE+72)(%r1) std %r10,(PC_SLBSAVE+80)(%r1) std %r11,(PC_SLBSAVE+88)(%r1) std %r12,(PC_SLBSAVE+96)(%r1) /* CR already saved */ mfxer %r2 /* save XER */ std %r2,(PC_SLBSAVE+112)(%r1) mflr %r2 /* save LR (SP already saved) */ std %r2,(PC_SLBSAVE+120)(%r1) mfctr %r2 /* save CTR */ std %r2,(PC_SLBSAVE+128)(%r1) /* Call handler */ addi %r1,%r1,PC_SLBSTACK-48+1024 li %r2,~15 and %r1,%r1,%r2 GET_TOCBASE(%r2) mflr %r3 andi. %r3,%r3,0xff80 mfdar %r4 mfsrr0 %r5 bl handle_kernel_slb_spill nop /* Save r28-31, restore r4-r12 */ GET_CPUINFO(%r1) ld %r4,(PC_SLBSAVE+32)(%r1) ld %r5,(PC_SLBSAVE+40)(%r1) ld %r6,(PC_SLBSAVE+48)(%r1) ld %r7,(PC_SLBSAVE+56)(%r1) ld %r8,(PC_SLBSAVE+64)(%r1) ld %r9,(PC_SLBSAVE+72)(%r1) ld %r10,(PC_SLBSAVE+80)(%r1) ld %r11,(PC_SLBSAVE+88)(%r1) ld %r12,(PC_SLBSAVE+96)(%r1) std %r28,(PC_SLBSAVE+64)(%r1) std %r29,(PC_SLBSAVE+72)(%r1) std %r30,(PC_SLBSAVE+80)(%r1) std %r31,(PC_SLBSAVE+88)(%r1) /* Restore kernel mapping */ bl restore_kernsrs /* Restore remaining registers */ ld %r28,(PC_SLBSAVE+64)(%r1) ld %r29,(PC_SLBSAVE+72)(%r1) ld %r30,(PC_SLBSAVE+80)(%r1) ld %r31,(PC_SLBSAVE+88)(%r1) ld %r2,(PC_SLBSAVE+104)(%r1) mtcr %r2 ld %r2,(PC_SLBSAVE+112)(%r1) mtxer %r2 ld %r2,(PC_SLBSAVE+120)(%r1) mtlr %r2 ld %r2,(PC_SLBSAVE+128)(%r1) mtctr %r2 ld %r2,(PC_SLBSAVE+136)(%r1) mtlr %r2 /* Restore r0-r3 */ ld %r0,(PC_SLBSAVE+0)(%r1) ld %r2,(PC_SLBSAVE+16)(%r1) ld %r3,(PC_SLBSAVE+24)(%r1) mfsprg1 %r1 /* Back to whatever we were doing */ rfid /* * For ALI: has to save DSISR and DAR */ .globl CNAME(alitrap),CNAME(aliend) CNAME(alitrap): mtsprg1 %r1 /* save SP */ GET_CPUINFO(%r1) std %r27,(PC_TEMPSAVE+CPUSAVE_R27)(%r1) /* free r27-r31 */ std %r28,(PC_TEMPSAVE+CPUSAVE_R28)(%r1) std %r29,(PC_TEMPSAVE+CPUSAVE_R29)(%r1) std %r30,(PC_TEMPSAVE+CPUSAVE_R30)(%r1) std %r31,(PC_TEMPSAVE+CPUSAVE_R31)(%r1) mfdar %r30 mfdsisr %r31 std %r30,(PC_TEMPSAVE+CPUSAVE_AIM_DAR)(%r1) std %r31,(PC_TEMPSAVE+CPUSAVE_AIM_DSISR)(%r1) mfsprg1 %r1 /* restore SP, in case of branch */ mflr %r28 /* save LR */ mfcr %r29 /* save CR */ ld %r31,TRAP_GENTRAP(0) addi %r31,%r31,(s_trap - generictrap) mtlr %r31 /* Put our exception vector in SPRG3 */ li %r31, EXC_ALI mtsprg3 %r31 /* Test whether we already had PR set */ mfsrr1 %r31 mtcr %r31 blrl /* Branch to s_trap */ CNAME(aliend): /* * Similar to the above for DSI * Has to handle standard pagetable spills */ .globl CNAME(dsitrap),CNAME(dsiend) .p2align 3 CNAME(dsitrap): mtsprg1 %r1 /* save SP */ GET_CPUINFO(%r1) std %r27,(PC_DISISAVE+CPUSAVE_R27)(%r1) /* free r27-r31 */ std %r28,(PC_DISISAVE+CPUSAVE_R28)(%r1) std %r29,(PC_DISISAVE+CPUSAVE_R29)(%r1) std %r30,(PC_DISISAVE+CPUSAVE_R30)(%r1) std %r31,(PC_DISISAVE+CPUSAVE_R31)(%r1) mfcr %r29 /* save CR */ mfxer %r30 /* save XER */ mtsprg2 %r30 /* in SPRG2 */ mfsrr1 %r31 /* test kernel mode */ mtcr %r31 mflr %r28 /* save LR (SP already saved) */ ld %r1,TRAP_GENTRAP(0) addi %r1,%r1,(disitrap-generictrap) mtlr %r1 blrl /* Branch to disitrap */ CNAME(dsiend): /* * Preamble code for DSI/ISI traps */ disitrap: /* Write the trap vector to SPRG3 by computing LR & 0xff00 */ mflr %r1 andi. %r1,%r1,0xff00 mtsprg3 %r1 GET_CPUINFO(%r1) ld %r31,(PC_DISISAVE+CPUSAVE_R27)(%r1) std %r31,(PC_TEMPSAVE+CPUSAVE_R27)(%r1) ld %r30,(PC_DISISAVE+CPUSAVE_R28)(%r1) std %r30,(PC_TEMPSAVE+CPUSAVE_R28)(%r1) ld %r31,(PC_DISISAVE+CPUSAVE_R29)(%r1) std %r31,(PC_TEMPSAVE+CPUSAVE_R29)(%r1) ld %r30,(PC_DISISAVE+CPUSAVE_R30)(%r1) std %r30,(PC_TEMPSAVE+CPUSAVE_R30)(%r1) ld %r31,(PC_DISISAVE+CPUSAVE_R31)(%r1) std %r31,(PC_TEMPSAVE+CPUSAVE_R31)(%r1) mfdar %r30 mfdsisr %r31 std %r30,(PC_TEMPSAVE+CPUSAVE_AIM_DAR)(%r1) std %r31,(PC_TEMPSAVE+CPUSAVE_AIM_DSISR)(%r1) #ifdef KDB /* Try to detect a kernel stack overflow */ mfsrr1 %r31 mtcr %r31 bt 17,realtrap /* branch is user mode */ mfsprg1 %r31 /* get old SP */ clrrdi %r31,%r31,12 /* Round SP down to nearest page */ sub. %r30,%r31,%r30 /* SP - DAR */ bge 1f neg %r30,%r30 /* modulo value */ 1: cmpldi %cr0,%r30,4096 /* is DAR within a page of SP? */ bge %cr0,realtrap /* no, too far away. */ /* Now convert this DSI into a DDB trap. */ GET_CPUINFO(%r1) ld %r30,(PC_TEMPSAVE+CPUSAVE_AIM_DAR)(%r1) /* get DAR */ std %r30,(PC_DBSAVE +CPUSAVE_AIM_DAR)(%r1) /* save DAR */ ld %r30,(PC_TEMPSAVE+CPUSAVE_AIM_DSISR)(%r1) /* get DSISR */ std %r30,(PC_DBSAVE +CPUSAVE_AIM_DSISR)(%r1) /* save DSISR */ ld %r31,(PC_DISISAVE+CPUSAVE_R27)(%r1) /* get r27 */ std %r31,(PC_DBSAVE +CPUSAVE_R27)(%r1) /* save r27 */ ld %r30,(PC_DISISAVE+CPUSAVE_R28)(%r1) /* get r28 */ std %r30,(PC_DBSAVE +CPUSAVE_R28)(%r1) /* save r28 */ ld %r31,(PC_DISISAVE+CPUSAVE_R29)(%r1) /* get r29 */ std %r31,(PC_DBSAVE +CPUSAVE_R29)(%r1) /* save r29 */ ld %r30,(PC_DISISAVE+CPUSAVE_R30)(%r1) /* get r30 */ std %r30,(PC_DBSAVE +CPUSAVE_R30)(%r1) /* save r30 */ ld %r31,(PC_DISISAVE+CPUSAVE_R31)(%r1) /* get r31 */ std %r31,(PC_DBSAVE +CPUSAVE_R31)(%r1) /* save r31 */ b dbtrap #endif /* XXX need stack probe here */ realtrap: /* Test whether we already had PR set */ mfsrr1 %r1 mtcr %r1 mfsprg1 %r1 /* restore SP (might have been overwritten) */ bf 17,k_trap /* branch if PSL_PR is false */ GET_CPUINFO(%r1) ld %r1,PC_CURPCB(%r1) mr %r27,%r28 /* Save LR, r29 */ mtsprg2 %r29 bl restore_kernsrs /* enable kernel mapping */ mfsprg2 %r29 mr %r28,%r27 b s_trap /* * generictrap does some standard setup for trap handling to minimize * the code that need be installed in the actual vectors. It expects * the following conditions. * * R1 - Trap vector = LR & (0xff00 | R1) * SPRG1 - Original R1 contents * SPRG2 - Original LR */ generichypertrap: mtsprg3 %r1 mfspr %r1, SPR_HSRR0 mtsrr0 %r1 mfspr %r1, SPR_HSRR1 mtsrr1 %r1 mfsprg3 %r1 .globl CNAME(generictrap) generictrap: /* Save R1 for computing the exception vector */ mtsprg3 %r1 /* Save interesting registers */ GET_CPUINFO(%r1) std %r27,(PC_TEMPSAVE+CPUSAVE_R27)(%r1) /* free r27-r31 */ std %r28,(PC_TEMPSAVE+CPUSAVE_R28)(%r1) std %r29,(PC_TEMPSAVE+CPUSAVE_R29)(%r1) std %r30,(PC_TEMPSAVE+CPUSAVE_R30)(%r1) std %r31,(PC_TEMPSAVE+CPUSAVE_R31)(%r1) mfdar %r30 std %r30,(PC_TEMPSAVE+CPUSAVE_AIM_DAR)(%r1) mfsprg1 %r1 /* restore SP, in case of branch */ mfsprg2 %r28 /* save LR */ mfcr %r29 /* save CR */ /* Compute the exception vector from the link register */ mfsprg3 %r31 ori %r31,%r31,0xff00 mflr %r30 addi %r30,%r30,-4 /* The branch instruction, not the next */ and %r30,%r30,%r31 mtsprg3 %r30 /* Test whether we already had PR set */ mfsrr1 %r31 mtcr %r31 s_trap: bf 17,k_trap /* branch if PSL_PR is false */ GET_CPUINFO(%r1) u_trap: ld %r1,PC_CURPCB(%r1) mr %r27,%r28 /* Save LR, r29 */ mtsprg2 %r29 bl restore_kernsrs /* enable kernel mapping */ mfsprg2 %r29 mr %r28,%r27 /* * Now the common trap catching code. */ k_trap: FRAME_SETUP(PC_TEMPSAVE) /* Call C interrupt dispatcher: */ trapagain: GET_TOCBASE(%r2) addi %r3,%r1,48 bl CNAME(powerpc_interrupt) nop .globl CNAME(trapexit) /* backtrace code sentinel */ CNAME(trapexit): /* Disable interrupts: */ mfmsr %r3 andi. %r3,%r3,~PSL_EE@l mtmsr %r3 isync /* Test AST pending: */ ld %r5,FRAME_SRR1+48(%r1) mtcr %r5 bf 17,1f /* branch if PSL_PR is false */ GET_CPUINFO(%r3) /* get per-CPU pointer */ lwz %r4, TD_FLAGS(%r13) /* get thread flags value */ lis %r5, (TDF_ASTPENDING|TDF_NEEDRESCHED)@h ori %r5,%r5, (TDF_ASTPENDING|TDF_NEEDRESCHED)@l and. %r4,%r4,%r5 beq 1f mfmsr %r3 /* re-enable interrupts */ ori %r3,%r3,PSL_EE@l mtmsr %r3 isync GET_TOCBASE(%r2) addi %r3,%r1,48 bl CNAME(ast) nop .globl CNAME(asttrapexit) /* backtrace code sentinel #2 */ CNAME(asttrapexit): b trapexit /* test ast ret value ? */ 1: FRAME_LEAVE(PC_TEMPSAVE) rfid #if defined(KDB) /* * Deliberate entry to dbtrap */ ASENTRY_NOPROF(breakpoint) mtsprg1 %r1 mfmsr %r3 mtsrr1 %r3 andi. %r3,%r3,~(PSL_EE|PSL_ME)@l mtmsr %r3 /* disable interrupts */ isync GET_CPUINFO(%r3) std %r27,(PC_DBSAVE+CPUSAVE_R27)(%r3) std %r28,(PC_DBSAVE+CPUSAVE_R28)(%r3) std %r29,(PC_DBSAVE+CPUSAVE_R29)(%r3) std %r30,(PC_DBSAVE+CPUSAVE_R30)(%r3) std %r31,(PC_DBSAVE+CPUSAVE_R31)(%r3) mflr %r28 li %r29,EXC_BPT mtlr %r29 mfcr %r29 mtsrr0 %r28 /* * Now the kdb trap catching code. */ dbtrap: /* Write the trap vector to SPRG3 by computing LR & 0xff00 */ mflr %r1 andi. %r1,%r1,0xff00 mtsprg3 %r1 GET_TOCBASE(%r1) /* get new SP */ addis %r1,%r1,TOC_REF(trapstk)@ha ld %r1,TOC_REF(trapstk)@l(%r1) addi %r1,%r1,(TRAPSTKSZ-48) FRAME_SETUP(PC_DBSAVE) /* Call C trap code: */ GET_TOCBASE(%r2) addi %r3,%r1,48 bl CNAME(db_trap_glue) nop or. %r3,%r3,%r3 bne dbleave /* This wasn't for KDB, so switch to real trap: */ ld %r3,FRAME_EXC+48(%r1) /* save exception */ GET_CPUINFO(%r4) std %r3,(PC_DBSAVE+CPUSAVE_R31)(%r4) FRAME_LEAVE(PC_DBSAVE) mtsprg1 %r1 /* prepare for entrance to realtrap */ GET_CPUINFO(%r1) std %r27,(PC_TEMPSAVE+CPUSAVE_R27)(%r1) std %r28,(PC_TEMPSAVE+CPUSAVE_R28)(%r1) std %r29,(PC_TEMPSAVE+CPUSAVE_R29)(%r1) std %r30,(PC_TEMPSAVE+CPUSAVE_R30)(%r1) std %r31,(PC_TEMPSAVE+CPUSAVE_R31)(%r1) mflr %r28 mfcr %r29 ld %r31,(PC_DBSAVE+CPUSAVE_R31)(%r1) mtsprg3 %r31 /* SPRG3 was clobbered by FRAME_LEAVE */ mfsprg1 %r1 b realtrap dbleave: FRAME_LEAVE(PC_DBSAVE) rfid /* * In case of KDB we want a separate trap catcher for it */ .globl CNAME(dblow),CNAME(dbend) .p2align 3 CNAME(dblow): mtsprg1 %r1 /* save SP */ mtsprg2 %r29 /* save r29 */ mfcr %r29 /* save CR in r29 */ mfsrr1 %r1 mtcr %r1 bf 17,1f /* branch if privileged */ /* Unprivileged case */ mtcr %r29 /* put the condition register back */ mfsprg2 %r29 /* ... and r29 */ mflr %r1 /* save LR */ mtsprg2 %r1 /* And then in SPRG2 */ - ld %r1, TRAP_GENTRAP(0) /* Get branch address */ + ld %r1, TRAP_ENTRY(0) /* Get branch address */ mtlr %r1 li %r1, 0 /* How to get the vector from LR */ blrl /* Branch to generictrap */ /* No fallthrough */ 1: GET_CPUINFO(%r1) std %r27,(PC_DBSAVE+CPUSAVE_R27)(%r1) /* free r27 */ std %r28,(PC_DBSAVE+CPUSAVE_R28)(%r1) /* free r28 */ mfsprg2 %r28 /* r29 holds cr... */ std %r28,(PC_DBSAVE+CPUSAVE_R29)(%r1) /* free r29 */ std %r30,(PC_DBSAVE+CPUSAVE_R30)(%r1) /* free r30 */ std %r31,(PC_DBSAVE+CPUSAVE_R31)(%r1) /* free r31 */ mflr %r28 /* save LR */ ld %r1,TRAP_GENTRAP(0) addi %r1,%r1,(dbtrap-generictrap) mtlr %r1 blrl /* Branch to dbtrap */ CNAME(dbend): #endif /* KDB */ Index: projects/clang1000-import/sys/powerpc/amigaone/cpld.h =================================================================== --- projects/clang1000-import/sys/powerpc/amigaone/cpld.h (nonexistent) +++ projects/clang1000-import/sys/powerpc/amigaone/cpld.h (revision 356920) @@ -0,0 +1,45 @@ +/*- + * Copyright (c) 2020 Justin Hibbits + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef AMIGAONE_CPLD_H +#define AMIGAONE_CPLD_H + +#include + +/* + * Write 'words' to 'offset' offset in dual-port RAM, then write cmd to mailbox. + */ +struct cpld_cmd_data { + unsigned int cmd; + unsigned int len; + unsigned int offset; + void *words; +}; + +#define IOCCPLDSEND _IOW('c', 2, struct cpld_cmd_data) +#define IOCCPLDRECV _IOW('c', 3, struct cpld_cmd_data) + +#endif /* AMIGAONE_CPLD_H */ Property changes on: projects/clang1000-import/sys/powerpc/amigaone/cpld.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/clang1000-import/sys/powerpc/amigaone/cpld_x5000.c =================================================================== --- projects/clang1000-import/sys/powerpc/amigaone/cpld_x5000.c (nonexistent) +++ projects/clang1000-import/sys/powerpc/amigaone/cpld_x5000.c (revision 356920) @@ -0,0 +1,337 @@ +/*- + * Copyright (c) 2020 Justin Hibbits + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include "cpld.h" + +/* + * A driver for the AmigaOne X5000 "Cyrus+" CPLD. + * + * This is the interface between the CPU and the "Xena" (XMOS) chip. Since the + * XMOS is programmable via a SPI-attached flash memory, there's no direct + * driver written for the Xena attachment. Instead, a userspace process would + * communicate with the Xena by issuing ioctl()s to this CPLD. + */ + +/* Resource access addresses. */ +#define CPLD_MEM_ADDR 0x0000 +#define CPLD_MEM_DATA 0x8000 + +#define CPLD_MAX_DRAM_WORDS 0x800 + +/* CPLD Registers. */ +#define CPLD_REG_SIG1 0x00 +#define CPLD_REG_SIG2 0x01 +#define CPLD_REG_HWREV 0x02 +#define CPLD_REG_MBC2X 0x05 +#define CPLD_REG_MBX2C 0x06 +#define CPLD_REG_XDEBUG 0x0c +#define CPLD_REG_XJTAG 0x0d +#define CPLD_REG_FAN_TACHO 0x10 +#define CPLD_REG_DATE_LW 0x21 +#define CPLD_REG_DATE_UW 0x22 +#define CPLD_REG_TIME_LW 0x23 +#define CPLD_REG_TIME_UW 0x24 +#define CPLD_REG_SCR1 0x30 +#define CPLD_REG_SCR2 0x31 +#define CPLD_REG_RAM 0x8000 + +struct cpld_softc { + device_t sc_dev; + struct resource *sc_mem; + struct cdev *sc_cdev; + struct mtx sc_mutex; + bool sc_isopen; +}; + +static d_open_t cpld_open; +static d_close_t cpld_close; +static d_ioctl_t cpld_ioctl; + +static struct cdevsw cpld_cdevsw = { + .d_version = D_VERSION, + .d_open = cpld_open, + .d_close = cpld_close, + .d_ioctl = cpld_ioctl, + .d_name = "nvram", +}; + +static device_probe_t cpld_probe; +static device_attach_t cpld_attach; +static int cpld_fan_sysctl(SYSCTL_HANDLER_ARGS); + +static device_method_t cpld_methods[] = { + DEVMETHOD(device_probe, cpld_probe), + DEVMETHOD(device_attach, cpld_attach), + + DEVMETHOD_END +}; + +static driver_t cpld_driver = { + "cpld", + cpld_methods, + sizeof(struct cpld_softc) +}; + +static devclass_t cpld_devclass; +DRIVER_MODULE(cpld, lbc, cpld_driver, cpld_devclass, 0, 0); + +static void +cpld_write(struct cpld_softc *sc, int addr, int data) +{ + bus_write_2(sc->sc_mem, CPLD_MEM_ADDR, addr); + bus_write_2(sc->sc_mem, CPLD_MEM_DATA, data); +} + +static int +cpld_read(struct cpld_softc *sc, int addr) +{ + bus_write_2(sc->sc_mem, CPLD_MEM_ADDR, addr); + + return (bus_read_2(sc->sc_mem, CPLD_MEM_DATA)); +} + +static int +cpld_probe(device_t dev) +{ + if (!ofw_bus_is_compatible(dev, "aeon,cyrus-cpld")) + return (ENXIO); + + device_set_desc(dev, "AmigaOne Cyrus CPLD"); + + return (BUS_PROBE_GENERIC); +} + +static int +cpld_attach(device_t dev) +{ + struct make_dev_args mda; + struct cpld_softc *sc; + int rid; + int date, time, tmp; + int err; + struct sysctl_ctx_list *ctx; + struct sysctl_oid *tree; + + sc = device_get_softc(dev); + sc->sc_dev = dev; + + rid = 0; + sc->sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, + RF_ACTIVE|RF_SHAREABLE); + if (sc->sc_mem == NULL) { + device_printf(dev, "Unable to allocate memory resource.\n"); + return (ENXIO); + } + mtx_init(&sc->sc_mutex, "cpld", NULL, MTX_DEF); + if (bootverbose) { + date = (cpld_read(sc, CPLD_REG_DATE_UW) << 16) | + cpld_read(sc, CPLD_REG_DATE_LW); + time = (cpld_read(sc, CPLD_REG_TIME_UW) << 16) | + cpld_read(sc, CPLD_REG_TIME_LW); + + device_printf(dev, "Build date: %04x-%02x-%02x\n", + (date >> 16) & 0xffff, (date >> 8) & 0xff, date & 0xff); + device_printf(dev, "Build time: %02x:%02x:%02x\n", + (time >> 16) & 0xff, (time >> 8) & 0xff, time & 0xff); + } + + tmp = cpld_read(sc, CPLD_REG_HWREV); + device_printf(dev, "Hardware revision: %d\n", tmp); + + ctx = device_get_sysctl_ctx(dev); + tree = device_get_sysctl_tree(dev); + + SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, + "cpu_fan", CTLTYPE_INT | CTLFLAG_RD, sc, 0, + cpld_fan_sysctl, "I", "CPU Fan speed in RPM"); + + make_dev_args_init(&mda); + mda.mda_flags = MAKEDEV_CHECKNAME; + mda.mda_devsw = &cpld_cdevsw; + mda.mda_uid = UID_ROOT; + mda.mda_gid = GID_WHEEL; + mda.mda_mode = 0660; + mda.mda_si_drv1 = sc; + err = make_dev_s(&mda, &sc->sc_cdev, "cpld"); + if (err != 0) { + device_printf(dev, "Error creating character device: %d\n", err); + device_printf(dev, "Only sysctl interfaces will be available.\n"); + } + + return (0); +} + +static int +cpld_fan_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct cpld_softc *sc; + int error, old, rpm; + + sc = arg1; + mtx_lock(&sc->sc_mutex); + /* Read until we get some level of read stability. */ + rpm = cpld_read(sc, CPLD_REG_FAN_TACHO); + do { + old = rpm; + rpm = cpld_read(sc, CPLD_REG_FAN_TACHO); + } while (abs(rpm - old) > 10); + mtx_unlock(&sc->sc_mutex); + + /* Convert RPS->RPM. */ + rpm *= 60; + error = sysctl_handle_int(oidp, &rpm, 0, req); + + return (error); +} + +static int +cpld_open(struct cdev *dev, int flags, int fmt, struct thread *td) +{ + struct cpld_softc *sc = dev->si_drv1; + + if (sc->sc_isopen) + return (EBUSY); + sc->sc_isopen = 1; + return (0); +} + +static int +cpld_close(struct cdev *dev, int fflag, int devtype, struct thread *td) +{ + struct cpld_softc *sc = dev->si_drv1; + + sc->sc_isopen = 0; + return (0); +} + +static int +cpld_send(device_t dev, struct cpld_cmd_data *d) +{ + struct cpld_softc *sc; + uint16_t *word; + int i; + + if (d->cmd > USHRT_MAX) + return (EINVAL); + + sc = device_get_softc(dev); + + mtx_lock(&sc->sc_mutex); + for (i = 0, word = d->words; i < d->len; i++, word++) { + if (i == 0) + cpld_write(sc, CPLD_REG_RAM, *word); + else + bus_write_4(sc->sc_mem, CPLD_MEM_DATA, *word); + } + + cpld_write(sc, CPLD_REG_MBC2X, d->cmd); + mtx_unlock(&sc->sc_mutex); + + return (0); +} + +static int +cpld_recv(device_t dev, struct cpld_cmd_data *d) +{ + struct cpld_softc *sc; + uint16_t *word; + int i; + + sc = device_get_softc(dev); + + mtx_lock(&sc->sc_mutex); + d->cmd = cpld_read(sc, CPLD_REG_MBX2C); + + for (i = 0, word = d->words; i < d->len; i++, word++) { + if (i == 0) + *word = cpld_read(sc, CPLD_REG_RAM); + else + *word = bus_read_4(sc->sc_mem, CPLD_MEM_DATA); + } + mtx_unlock(&sc->sc_mutex); + + return (0); +} + +static int +cpld_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) +{ + struct cpld_softc *sc; + struct cpld_cmd_data *d; + void *xfer_data, *tmp; + int err; + + sc = dev->si_drv1; + + err = 0; + d = (struct cpld_cmd_data *)data; + if (d->len + d->offset > CPLD_MAX_DRAM_WORDS) { + return (EINVAL); + } + xfer_data = malloc(d->len * sizeof(uint16_t), M_TEMP, M_WAITOK); + + switch (cmd) { + case IOCCPLDSEND: + err = copyin(d->words, xfer_data, d->len * sizeof(uint16_t)); + d->words = xfer_data; + if (err == 0) + err = cpld_send(sc->sc_dev, d); + break; + case IOCCPLDRECV: + tmp = d->words; + d->words = xfer_data; + err = cpld_recv(sc->sc_dev, d); + d->words = tmp; + if (err == 0) + err = copyout(xfer_data, d->words, + d->len * sizeof(uint16_t)); + break; + default: + err = ENOTTY; + break; + } + free(xfer_data, M_TEMP); + + return (err); +} Property changes on: projects/clang1000-import/sys/powerpc/amigaone/cpld_x5000.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/clang1000-import/sys/powerpc/booke/pmap.c =================================================================== --- projects/clang1000-import/sys/powerpc/booke/pmap.c (revision 356919) +++ projects/clang1000-import/sys/powerpc/booke/pmap.c (revision 356920) @@ -1,4411 +1,4426 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski * Copyright (C) 2006 Semihalf, Marian Balakowicz * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Some hw specific parts of this pmap were derived or influenced * by NetBSD's ibm4xx pmap module. More generic code is shared with * a few other pmap modules from the FreeBSD tree. */ /* * VM layout notes: * * Kernel and user threads run within one common virtual address space * defined by AS=0. * * 32-bit pmap: * Virtual address space layout: * ----------------------------- * 0x0000_0000 - 0x7fff_ffff : user process * 0x8000_0000 - 0xbfff_ffff : pmap_mapdev()-ed area (PCI/PCIE etc.) * 0xc000_0000 - 0xc0ff_ffff : kernel reserved * 0xc000_0000 - data_end : kernel code+data, env, metadata etc. * 0xc100_0000 - 0xffff_ffff : KVA * 0xc100_0000 - 0xc100_3fff : reserved for page zero/copy * 0xc100_4000 - 0xc200_3fff : reserved for ptbl bufs * 0xc200_4000 - 0xc200_8fff : guard page + kstack0 * 0xc200_9000 - 0xfeef_ffff : actual free KVA space * * 64-bit pmap: * Virtual address space layout: * ----------------------------- * 0x0000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : user process * 0x0000_0000_0000_0000 - 0x8fff_ffff_ffff_ffff : text, data, heap, maps, libraries * 0x9000_0000_0000_0000 - 0xafff_ffff_ffff_ffff : mmio region * 0xb000_0000_0000_0000 - 0xbfff_ffff_ffff_ffff : stack * 0xc000_0000_0000_0000 - 0xcfff_ffff_ffff_ffff : kernel reserved * 0xc000_0000_0000_0000 - endkernel-1 : kernel code & data * endkernel - msgbufp-1 : flat device tree * msgbufp - kernel_pdir-1 : message buffer * kernel_pdir - kernel_pp2d-1 : kernel page directory * kernel_pp2d - . : kernel pointers to page directory * pmap_zero_copy_min - crashdumpmap-1 : reserved for page zero/copy * crashdumpmap - ptbl_buf_pool_vabase-1 : reserved for ptbl bufs * ptbl_buf_pool_vabase - virtual_avail-1 : user page directories and page tables * virtual_avail - 0xcfff_ffff_ffff_ffff : actual free KVA space * 0xd000_0000_0000_0000 - 0xdfff_ffff_ffff_ffff : coprocessor region * 0xe000_0000_0000_0000 - 0xefff_ffff_ffff_ffff : mmio region * 0xf000_0000_0000_0000 - 0xffff_ffff_ffff_ffff : direct map * 0xf000_0000_0000_0000 - +Maxmem : physmem map * - 0xffff_ffff_ffff_ffff : device direct map */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mmu_if.h" #define SPARSE_MAPDEV #ifdef DEBUG #define debugf(fmt, args...) printf(fmt, ##args) #else #define debugf(fmt, args...) #endif #ifdef __powerpc64__ #define PRI0ptrX "016lx" #else #define PRI0ptrX "08x" #endif #define TODO panic("%s: not implemented", __func__); extern unsigned char _etext[]; extern unsigned char _end[]; extern uint32_t *bootinfo; vm_paddr_t kernload; vm_offset_t kernstart; vm_size_t kernsize; /* Message buffer and tables. */ static vm_offset_t data_start; static vm_size_t data_end; /* Phys/avail memory regions. */ static struct mem_region *availmem_regions; static int availmem_regions_sz; static struct mem_region *physmem_regions; static int physmem_regions_sz; #ifndef __powerpc64__ /* Reserved KVA space and mutex for mmu_booke_zero_page. */ static vm_offset_t zero_page_va; static struct mtx zero_page_mutex; /* Reserved KVA space and mutex for mmu_booke_copy_page. */ static vm_offset_t copy_page_src_va; static vm_offset_t copy_page_dst_va; static struct mtx copy_page_mutex; #endif static struct mtx tlbivax_mutex; /**************************************************************************/ /* PMAP */ /**************************************************************************/ static int mmu_booke_enter_locked(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t, u_int flags, int8_t psind); unsigned int kptbl_min; /* Index of the first kernel ptbl. */ unsigned int kernel_ptbls; /* Number of KVA ptbls. */ #ifdef __powerpc64__ unsigned int kernel_pdirs; #endif static uma_zone_t ptbl_root_zone; /* * If user pmap is processed with mmu_booke_remove and the resident count * drops to 0, there are no more pages to remove, so we need not continue. */ #define PMAP_REMOVE_DONE(pmap) \ ((pmap) != kernel_pmap && (pmap)->pm_stats.resident_count == 0) #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__) extern int elf32_nxstack; #endif /**************************************************************************/ /* TLB and TID handling */ /**************************************************************************/ /* Translation ID busy table */ static volatile pmap_t tidbusy[MAXCPU][TID_MAX + 1]; /* * TLB0 capabilities (entry, way numbers etc.). These can vary between e500 * core revisions and should be read from h/w registers during early config. */ uint32_t tlb0_entries; uint32_t tlb0_ways; uint32_t tlb0_entries_per_way; uint32_t tlb1_entries; #define TLB0_ENTRIES (tlb0_entries) #define TLB0_WAYS (tlb0_ways) #define TLB0_ENTRIES_PER_WAY (tlb0_entries_per_way) #define TLB1_ENTRIES (tlb1_entries) static vm_offset_t tlb1_map_base = (vm_offset_t)VM_MAXUSER_ADDRESS + PAGE_SIZE; static tlbtid_t tid_alloc(struct pmap *); static void tid_flush(tlbtid_t tid); #ifdef DDB #ifdef __powerpc64__ static void tlb_print_entry(int, uint32_t, uint64_t, uint32_t, uint32_t); #else static void tlb_print_entry(int, uint32_t, uint32_t, uint32_t, uint32_t); #endif #endif static void tlb1_read_entry(tlb_entry_t *, unsigned int); static void tlb1_write_entry(tlb_entry_t *, unsigned int); static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *); static vm_size_t tlb1_mapin_region(vm_offset_t, vm_paddr_t, vm_size_t, int); static vm_size_t tsize2size(unsigned int); static unsigned int size2tsize(vm_size_t); static unsigned long ilog2(unsigned long); static void set_mas4_defaults(void); static inline void tlb0_flush_entry(vm_offset_t); static inline unsigned int tlb0_tableidx(vm_offset_t, unsigned int); /**************************************************************************/ /* Page table management */ /**************************************************************************/ static struct rwlock_padalign pvh_global_lock; /* Data for the pv entry allocation mechanism */ static uma_zone_t pvzone; static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; #define PV_ENTRY_ZONE_MIN 2048 /* min pv entries in uma zone */ #ifndef PMAP_SHPGPERPROC #define PMAP_SHPGPERPROC 200 #endif #ifdef __powerpc64__ #define PMAP_ROOT_SIZE (sizeof(pte_t***) * PP2D_NENTRIES) static pte_t *ptbl_alloc(mmu_t, pmap_t, pte_t **, unsigned int, boolean_t); static void ptbl_free(mmu_t, pmap_t, pte_t **, unsigned int, vm_page_t); static void ptbl_hold(mmu_t, pmap_t, pte_t **, unsigned int); static int ptbl_unhold(mmu_t, pmap_t, vm_offset_t); #else #define PMAP_ROOT_SIZE (sizeof(pte_t**) * PDIR_NENTRIES) static void ptbl_init(void); static struct ptbl_buf *ptbl_buf_alloc(void); static void ptbl_buf_free(struct ptbl_buf *); static void ptbl_free_pmap_ptbl(pmap_t, pte_t *); static pte_t *ptbl_alloc(mmu_t, pmap_t, unsigned int, boolean_t); static void ptbl_free(mmu_t, pmap_t, unsigned int); static void ptbl_hold(mmu_t, pmap_t, unsigned int); static int ptbl_unhold(mmu_t, pmap_t, unsigned int); #endif static vm_paddr_t pte_vatopa(mmu_t, pmap_t, vm_offset_t); static int pte_enter(mmu_t, pmap_t, vm_page_t, vm_offset_t, uint32_t, boolean_t); static int pte_remove(mmu_t, pmap_t, vm_offset_t, uint8_t); static pte_t *pte_find(mmu_t, pmap_t, vm_offset_t); static void kernel_pte_alloc(vm_offset_t, vm_offset_t, vm_offset_t); static pv_entry_t pv_alloc(void); static void pv_free(pv_entry_t); static void pv_insert(pmap_t, vm_offset_t, vm_page_t); static void pv_remove(pmap_t, vm_offset_t, vm_page_t); static void booke_pmap_init_qpages(void); struct ptbl_buf { TAILQ_ENTRY(ptbl_buf) link; /* list link */ vm_offset_t kva; /* va of mapping */ }; #ifndef __powerpc64__ /* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */ #define PTBL_BUFS (128 * 16) /* ptbl free list and a lock used for access synchronization. */ static TAILQ_HEAD(, ptbl_buf) ptbl_buf_freelist; static struct mtx ptbl_buf_freelist_lock; /* Base address of kva space allocated fot ptbl bufs. */ static vm_offset_t ptbl_buf_pool_vabase; /* Pointer to ptbl_buf structures. */ static struct ptbl_buf *ptbl_bufs; #endif #ifdef SMP extern tlb_entry_t __boot_tlb1[]; void pmap_bootstrap_ap(volatile uint32_t *); #endif /* * Kernel MMU interface */ static void mmu_booke_clear_modify(mmu_t, vm_page_t); static void mmu_booke_copy(mmu_t, pmap_t, pmap_t, vm_offset_t, vm_size_t, vm_offset_t); static void mmu_booke_copy_page(mmu_t, vm_page_t, vm_page_t); static void mmu_booke_copy_pages(mmu_t, vm_page_t *, vm_offset_t, vm_page_t *, vm_offset_t, int); static int mmu_booke_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t, u_int flags, int8_t psind); static void mmu_booke_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_page_t, vm_prot_t); static void mmu_booke_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t); static vm_paddr_t mmu_booke_extract(mmu_t, pmap_t, vm_offset_t); static vm_page_t mmu_booke_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t); static void mmu_booke_init(mmu_t); static boolean_t mmu_booke_is_modified(mmu_t, vm_page_t); static boolean_t mmu_booke_is_prefaultable(mmu_t, pmap_t, vm_offset_t); static boolean_t mmu_booke_is_referenced(mmu_t, vm_page_t); static int mmu_booke_ts_referenced(mmu_t, vm_page_t); static vm_offset_t mmu_booke_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t, int); static int mmu_booke_mincore(mmu_t, pmap_t, vm_offset_t, vm_paddr_t *); static void mmu_booke_object_init_pt(mmu_t, pmap_t, vm_offset_t, vm_object_t, vm_pindex_t, vm_size_t); static boolean_t mmu_booke_page_exists_quick(mmu_t, pmap_t, vm_page_t); static void mmu_booke_page_init(mmu_t, vm_page_t); static int mmu_booke_page_wired_mappings(mmu_t, vm_page_t); static void mmu_booke_pinit(mmu_t, pmap_t); static void mmu_booke_pinit0(mmu_t, pmap_t); static void mmu_booke_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_prot_t); static void mmu_booke_qenter(mmu_t, vm_offset_t, vm_page_t *, int); static void mmu_booke_qremove(mmu_t, vm_offset_t, int); static void mmu_booke_release(mmu_t, pmap_t); static void mmu_booke_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); static void mmu_booke_remove_all(mmu_t, vm_page_t); static void mmu_booke_remove_write(mmu_t, vm_page_t); static void mmu_booke_unwire(mmu_t, pmap_t, vm_offset_t, vm_offset_t); static void mmu_booke_zero_page(mmu_t, vm_page_t); static void mmu_booke_zero_page_area(mmu_t, vm_page_t, int, int); static void mmu_booke_activate(mmu_t, struct thread *); static void mmu_booke_deactivate(mmu_t, struct thread *); static void mmu_booke_bootstrap(mmu_t, vm_offset_t, vm_offset_t); static void *mmu_booke_mapdev(mmu_t, vm_paddr_t, vm_size_t); static void *mmu_booke_mapdev_attr(mmu_t, vm_paddr_t, vm_size_t, vm_memattr_t); static void mmu_booke_unmapdev(mmu_t, vm_offset_t, vm_size_t); static vm_paddr_t mmu_booke_kextract(mmu_t, vm_offset_t); static void mmu_booke_kenter(mmu_t, vm_offset_t, vm_paddr_t); static void mmu_booke_kenter_attr(mmu_t, vm_offset_t, vm_paddr_t, vm_memattr_t); static void mmu_booke_kremove(mmu_t, vm_offset_t); static boolean_t mmu_booke_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t); static void mmu_booke_sync_icache(mmu_t, pmap_t, vm_offset_t, vm_size_t); static void mmu_booke_dumpsys_map(mmu_t, vm_paddr_t pa, size_t, void **); static void mmu_booke_dumpsys_unmap(mmu_t, vm_paddr_t pa, size_t, void *); static void mmu_booke_scan_init(mmu_t); static vm_offset_t mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m); static void mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr); static int mmu_booke_change_attr(mmu_t mmu, vm_offset_t addr, vm_size_t sz, vm_memattr_t mode); static int mmu_booke_map_user_ptr(mmu_t mmu, pmap_t pm, volatile const void *uaddr, void **kaddr, size_t ulen, size_t *klen); static int mmu_booke_decode_kernel_ptr(mmu_t mmu, vm_offset_t addr, int *is_user, vm_offset_t *decoded_addr); static void mmu_booke_page_array_startup(mmu_t , long); static mmu_method_t mmu_booke_methods[] = { /* pmap dispatcher interface */ MMUMETHOD(mmu_clear_modify, mmu_booke_clear_modify), MMUMETHOD(mmu_copy, mmu_booke_copy), MMUMETHOD(mmu_copy_page, mmu_booke_copy_page), MMUMETHOD(mmu_copy_pages, mmu_booke_copy_pages), MMUMETHOD(mmu_enter, mmu_booke_enter), MMUMETHOD(mmu_enter_object, mmu_booke_enter_object), MMUMETHOD(mmu_enter_quick, mmu_booke_enter_quick), MMUMETHOD(mmu_extract, mmu_booke_extract), MMUMETHOD(mmu_extract_and_hold, mmu_booke_extract_and_hold), MMUMETHOD(mmu_init, mmu_booke_init), MMUMETHOD(mmu_is_modified, mmu_booke_is_modified), MMUMETHOD(mmu_is_prefaultable, mmu_booke_is_prefaultable), MMUMETHOD(mmu_is_referenced, mmu_booke_is_referenced), MMUMETHOD(mmu_ts_referenced, mmu_booke_ts_referenced), MMUMETHOD(mmu_map, mmu_booke_map), MMUMETHOD(mmu_mincore, mmu_booke_mincore), MMUMETHOD(mmu_object_init_pt, mmu_booke_object_init_pt), MMUMETHOD(mmu_page_exists_quick,mmu_booke_page_exists_quick), MMUMETHOD(mmu_page_init, mmu_booke_page_init), MMUMETHOD(mmu_page_wired_mappings, mmu_booke_page_wired_mappings), MMUMETHOD(mmu_pinit, mmu_booke_pinit), MMUMETHOD(mmu_pinit0, mmu_booke_pinit0), MMUMETHOD(mmu_protect, mmu_booke_protect), MMUMETHOD(mmu_qenter, mmu_booke_qenter), MMUMETHOD(mmu_qremove, mmu_booke_qremove), MMUMETHOD(mmu_release, mmu_booke_release), MMUMETHOD(mmu_remove, mmu_booke_remove), MMUMETHOD(mmu_remove_all, mmu_booke_remove_all), MMUMETHOD(mmu_remove_write, mmu_booke_remove_write), MMUMETHOD(mmu_sync_icache, mmu_booke_sync_icache), MMUMETHOD(mmu_unwire, mmu_booke_unwire), MMUMETHOD(mmu_zero_page, mmu_booke_zero_page), MMUMETHOD(mmu_zero_page_area, mmu_booke_zero_page_area), MMUMETHOD(mmu_activate, mmu_booke_activate), MMUMETHOD(mmu_deactivate, mmu_booke_deactivate), MMUMETHOD(mmu_quick_enter_page, mmu_booke_quick_enter_page), MMUMETHOD(mmu_quick_remove_page, mmu_booke_quick_remove_page), MMUMETHOD(mmu_page_array_startup, mmu_booke_page_array_startup), /* Internal interfaces */ MMUMETHOD(mmu_bootstrap, mmu_booke_bootstrap), MMUMETHOD(mmu_dev_direct_mapped,mmu_booke_dev_direct_mapped), MMUMETHOD(mmu_mapdev, mmu_booke_mapdev), MMUMETHOD(mmu_mapdev_attr, mmu_booke_mapdev_attr), MMUMETHOD(mmu_kenter, mmu_booke_kenter), MMUMETHOD(mmu_kenter_attr, mmu_booke_kenter_attr), MMUMETHOD(mmu_kextract, mmu_booke_kextract), MMUMETHOD(mmu_kremove, mmu_booke_kremove), MMUMETHOD(mmu_unmapdev, mmu_booke_unmapdev), MMUMETHOD(mmu_change_attr, mmu_booke_change_attr), MMUMETHOD(mmu_map_user_ptr, mmu_booke_map_user_ptr), MMUMETHOD(mmu_decode_kernel_ptr, mmu_booke_decode_kernel_ptr), /* dumpsys() support */ MMUMETHOD(mmu_dumpsys_map, mmu_booke_dumpsys_map), MMUMETHOD(mmu_dumpsys_unmap, mmu_booke_dumpsys_unmap), MMUMETHOD(mmu_scan_init, mmu_booke_scan_init), { 0, 0 } }; MMU_DEF(booke_mmu, MMU_TYPE_BOOKE, mmu_booke_methods, 0); static __inline uint32_t tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma) { uint32_t attrib; int i; if (ma != VM_MEMATTR_DEFAULT) { switch (ma) { case VM_MEMATTR_UNCACHEABLE: return (MAS2_I | MAS2_G); case VM_MEMATTR_WRITE_COMBINING: case VM_MEMATTR_WRITE_BACK: case VM_MEMATTR_PREFETCHABLE: return (MAS2_I); case VM_MEMATTR_WRITE_THROUGH: return (MAS2_W | MAS2_M); case VM_MEMATTR_CACHEABLE: return (MAS2_M); } } /* * Assume the page is cache inhibited and access is guarded unless * it's in our available memory array. */ attrib = _TLB_ENTRY_IO; for (i = 0; i < physmem_regions_sz; i++) { if ((pa >= physmem_regions[i].mr_start) && (pa < (physmem_regions[i].mr_start + physmem_regions[i].mr_size))) { attrib = _TLB_ENTRY_MEM; break; } } return (attrib); } static inline void tlb_miss_lock(void) { #ifdef SMP struct pcpu *pc; if (!smp_started) return; STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { if (pc != pcpup) { CTR3(KTR_PMAP, "%s: tlb miss LOCK of CPU=%d, " "tlb_lock=%p", __func__, pc->pc_cpuid, pc->pc_booke.tlb_lock); KASSERT((pc->pc_cpuid != PCPU_GET(cpuid)), ("tlb_miss_lock: tried to lock self")); tlb_lock(pc->pc_booke.tlb_lock); CTR1(KTR_PMAP, "%s: locked", __func__); } } #endif } static inline void tlb_miss_unlock(void) { #ifdef SMP struct pcpu *pc; if (!smp_started) return; STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { if (pc != pcpup) { CTR2(KTR_PMAP, "%s: tlb miss UNLOCK of CPU=%d", __func__, pc->pc_cpuid); tlb_unlock(pc->pc_booke.tlb_lock); CTR1(KTR_PMAP, "%s: unlocked", __func__); } } #endif } /* Return number of entries in TLB0. */ static __inline void tlb0_get_tlbconf(void) { uint32_t tlb0_cfg; tlb0_cfg = mfspr(SPR_TLB0CFG); tlb0_entries = tlb0_cfg & TLBCFG_NENTRY_MASK; tlb0_ways = (tlb0_cfg & TLBCFG_ASSOC_MASK) >> TLBCFG_ASSOC_SHIFT; tlb0_entries_per_way = tlb0_entries / tlb0_ways; } /* Return number of entries in TLB1. */ static __inline void tlb1_get_tlbconf(void) { uint32_t tlb1_cfg; tlb1_cfg = mfspr(SPR_TLB1CFG); tlb1_entries = tlb1_cfg & TLBCFG_NENTRY_MASK; } /**************************************************************************/ /* Page table related */ /**************************************************************************/ #ifdef __powerpc64__ /* Initialize pool of kva ptbl buffers. */ static void ptbl_init(void) { } /* Get a pointer to a PTE in a page table. */ static __inline pte_t * pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) { pte_t **pdir; pte_t *ptbl; KASSERT((pmap != NULL), ("pte_find: invalid pmap")); pdir = pmap->pm_pp2d[PP2D_IDX(va)]; if (!pdir) return NULL; ptbl = pdir[PDIR_IDX(va)]; return ((ptbl != NULL) ? &ptbl[PTBL_IDX(va)] : NULL); } /* * allocate a page of pointers to page directories, do not preallocate the * page tables */ static pte_t ** pdir_alloc(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx, bool nosleep) { vm_page_t m; pte_t **pdir; int req; req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; while ((m = vm_page_alloc(NULL, pp2d_idx, req)) == NULL) { PMAP_UNLOCK(pmap); if (nosleep) { return (NULL); } vm_wait(NULL); PMAP_LOCK(pmap); } /* Zero whole ptbl. */ pdir = (pte_t **)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); mmu_booke_zero_page(mmu, m); return (pdir); } /* Free pdir pages and invalidate pdir entry. */ static void pdir_free(mmu_t mmu, pmap_t pmap, unsigned int pp2d_idx, vm_page_t m) { pte_t **pdir; pdir = pmap->pm_pp2d[pp2d_idx]; KASSERT((pdir != NULL), ("pdir_free: null pdir")); pmap->pm_pp2d[pp2d_idx] = NULL; vm_wire_sub(1); vm_page_free_zero(m); } /* * Decrement pdir pages hold count and attempt to free pdir pages. Called * when removing directory entry from pdir. * * Return 1 if pdir pages were freed. */ static int pdir_unhold(mmu_t mmu, pmap_t pmap, u_int pp2d_idx) { pte_t **pdir; vm_paddr_t pa; vm_page_t m; KASSERT((pmap != kernel_pmap), ("pdir_unhold: unholding kernel pdir!")); pdir = pmap->pm_pp2d[pp2d_idx]; /* decrement hold count */ pa = DMAP_TO_PHYS((vm_offset_t) pdir); m = PHYS_TO_VM_PAGE(pa); /* * Free pdir page if there are no dir entries in this pdir. */ m->ref_count--; if (m->ref_count == 0) { pdir_free(mmu, pmap, pp2d_idx, m); return (1); } return (0); } /* * Increment hold count for pdir pages. This routine is used when new ptlb * entry is being inserted into pdir. */ static void pdir_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir) { vm_page_t m; KASSERT((pmap != kernel_pmap), ("pdir_hold: holding kernel pdir!")); KASSERT((pdir != NULL), ("pdir_hold: null pdir")); m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pdir)); m->ref_count++; } /* Allocate page table. */ static pte_t * ptbl_alloc(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx, boolean_t nosleep) { vm_page_t m; pte_t *ptbl; int req; KASSERT((pdir[pdir_idx] == NULL), ("%s: valid ptbl entry exists!", __func__)); req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; while ((m = vm_page_alloc(NULL, pdir_idx, req)) == NULL) { PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); if (nosleep) { return (NULL); } vm_wait(NULL); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); } /* Zero whole ptbl. */ ptbl = (pte_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); mmu_booke_zero_page(mmu, m); return (ptbl); } /* Free ptbl pages and invalidate pdir entry. */ static void ptbl_free(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx, vm_page_t m) { pte_t *ptbl; ptbl = pdir[pdir_idx]; KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); pdir[pdir_idx] = NULL; vm_wire_sub(1); vm_page_free_zero(m); } /* * Decrement ptbl pages hold count and attempt to free ptbl pages. Called * when removing pte entry from ptbl. * * Return 1 if ptbl pages were freed. */ static int ptbl_unhold(mmu_t mmu, pmap_t pmap, vm_offset_t va) { pte_t *ptbl; vm_page_t m; u_int pp2d_idx; pte_t **pdir; u_int pdir_idx; pp2d_idx = PP2D_IDX(va); pdir_idx = PDIR_IDX(va); KASSERT((pmap != kernel_pmap), ("ptbl_unhold: unholding kernel ptbl!")); pdir = pmap->pm_pp2d[pp2d_idx]; ptbl = pdir[pdir_idx]; /* decrement hold count */ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl)); /* * Free ptbl pages if there are no pte entries in this ptbl. * ref_count has the same value for all ptbl pages, so check the * last page. */ m->ref_count--; if (m->ref_count == 0) { ptbl_free(mmu, pmap, pdir, pdir_idx, m); pdir_unhold(mmu, pmap, pp2d_idx); return (1); } return (0); } /* * Increment hold count for ptbl pages. This routine is used when new pte * entry is being inserted into ptbl. */ static void ptbl_hold(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx) { pte_t *ptbl; vm_page_t m; KASSERT((pmap != kernel_pmap), ("ptbl_hold: holding kernel ptbl!")); ptbl = pdir[pdir_idx]; KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t) ptbl)); m->ref_count++; } #else /* Initialize pool of kva ptbl buffers. */ static void ptbl_init(void) { int i; CTR3(KTR_PMAP, "%s: s (ptbl_bufs = 0x%08x size 0x%08x)", __func__, (uint32_t)ptbl_bufs, sizeof(struct ptbl_buf) * PTBL_BUFS); CTR3(KTR_PMAP, "%s: s (ptbl_buf_pool_vabase = 0x%08x size = 0x%08x)", __func__, ptbl_buf_pool_vabase, PTBL_BUFS * PTBL_PAGES * PAGE_SIZE); mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF); TAILQ_INIT(&ptbl_buf_freelist); for (i = 0; i < PTBL_BUFS; i++) { ptbl_bufs[i].kva = ptbl_buf_pool_vabase + i * PTBL_PAGES * PAGE_SIZE; TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link); } } /* Get a ptbl_buf from the freelist. */ static struct ptbl_buf * ptbl_buf_alloc(void) { struct ptbl_buf *buf; mtx_lock(&ptbl_buf_freelist_lock); buf = TAILQ_FIRST(&ptbl_buf_freelist); if (buf != NULL) TAILQ_REMOVE(&ptbl_buf_freelist, buf, link); mtx_unlock(&ptbl_buf_freelist_lock); CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); return (buf); } /* Return ptbl buff to free pool. */ static void ptbl_buf_free(struct ptbl_buf *buf) { CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf); mtx_lock(&ptbl_buf_freelist_lock); TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link); mtx_unlock(&ptbl_buf_freelist_lock); } /* * Search the list of allocated ptbl bufs and find on list of allocated ptbls */ static void ptbl_free_pmap_ptbl(pmap_t pmap, pte_t *ptbl) { struct ptbl_buf *pbuf; CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); PMAP_LOCK_ASSERT(pmap, MA_OWNED); TAILQ_FOREACH(pbuf, &pmap->pm_ptbl_list, link) if (pbuf->kva == (vm_offset_t)ptbl) { /* Remove from pmap ptbl buf list. */ TAILQ_REMOVE(&pmap->pm_ptbl_list, pbuf, link); /* Free corresponding ptbl buf. */ ptbl_buf_free(pbuf); break; } } /* Allocate page table. */ static pte_t * ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx, boolean_t nosleep) { vm_page_t mtbl[PTBL_PAGES]; vm_page_t m; struct ptbl_buf *pbuf; unsigned int pidx; pte_t *ptbl; int i, j; CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, (pmap == kernel_pmap), pdir_idx); KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), ("ptbl_alloc: invalid pdir_idx")); KASSERT((pmap->pm_pdir[pdir_idx] == NULL), ("pte_alloc: valid ptbl entry exists!")); pbuf = ptbl_buf_alloc(); if (pbuf == NULL) panic("pte_alloc: couldn't alloc kernel virtual memory"); ptbl = (pte_t *)pbuf->kva; CTR2(KTR_PMAP, "%s: ptbl kva = %p", __func__, ptbl); for (i = 0; i < PTBL_PAGES; i++) { pidx = (PTBL_PAGES * pdir_idx) + i; while ((m = vm_page_alloc(NULL, pidx, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); if (nosleep) { ptbl_free_pmap_ptbl(pmap, ptbl); for (j = 0; j < i; j++) vm_page_free(mtbl[j]); vm_wire_sub(i); return (NULL); } vm_wait(NULL); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); } mtbl[i] = m; } /* Map allocated pages into kernel_pmap. */ mmu_booke_qenter(mmu, (vm_offset_t)ptbl, mtbl, PTBL_PAGES); /* Zero whole ptbl. */ bzero((caddr_t)ptbl, PTBL_PAGES * PAGE_SIZE); /* Add pbuf to the pmap ptbl bufs list. */ TAILQ_INSERT_TAIL(&pmap->pm_ptbl_list, pbuf, link); return (ptbl); } /* Free ptbl pages and invalidate pdir entry. */ static void ptbl_free(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) { pte_t *ptbl; vm_paddr_t pa; vm_offset_t va; vm_page_t m; int i; CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, (pmap == kernel_pmap), pdir_idx); KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), ("ptbl_free: invalid pdir_idx")); ptbl = pmap->pm_pdir[pdir_idx]; CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl); KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); /* * Invalidate the pdir entry as soon as possible, so that other CPUs * don't attempt to look up the page tables we are releasing. */ mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); pmap->pm_pdir[pdir_idx] = NULL; tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); for (i = 0; i < PTBL_PAGES; i++) { va = ((vm_offset_t)ptbl + (i * PAGE_SIZE)); pa = pte_vatopa(mmu, kernel_pmap, va); m = PHYS_TO_VM_PAGE(pa); vm_page_free_zero(m); vm_wire_sub(1); mmu_booke_kremove(mmu, va); } ptbl_free_pmap_ptbl(pmap, ptbl); } /* * Decrement ptbl pages hold count and attempt to free ptbl pages. * Called when removing pte entry from ptbl. * * Return 1 if ptbl pages were freed. */ static int ptbl_unhold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) { pte_t *ptbl; vm_paddr_t pa; vm_page_t m; int i; CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap, (pmap == kernel_pmap), pdir_idx); KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), ("ptbl_unhold: invalid pdir_idx")); KASSERT((pmap != kernel_pmap), ("ptbl_unhold: unholding kernel ptbl!")); ptbl = pmap->pm_pdir[pdir_idx]; //debugf("ptbl_unhold: ptbl = 0x%08x\n", (u_int32_t)ptbl); KASSERT(((vm_offset_t)ptbl >= VM_MIN_KERNEL_ADDRESS), ("ptbl_unhold: non kva ptbl")); /* decrement hold count */ for (i = 0; i < PTBL_PAGES; i++) { pa = pte_vatopa(mmu, kernel_pmap, (vm_offset_t)ptbl + (i * PAGE_SIZE)); m = PHYS_TO_VM_PAGE(pa); m->ref_count--; } /* * Free ptbl pages if there are no pte etries in this ptbl. * ref_count has the same value for all ptbl pages, so check the last * page. */ if (m->ref_count == 0) { ptbl_free(mmu, pmap, pdir_idx); //debugf("ptbl_unhold: e (freed ptbl)\n"); return (1); } return (0); } /* * Increment hold count for ptbl pages. This routine is used when a new pte * entry is being inserted into the ptbl. */ static void ptbl_hold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) { vm_paddr_t pa; pte_t *ptbl; vm_page_t m; int i; CTR3(KTR_PMAP, "%s: pmap = %p pdir_idx = %d", __func__, pmap, pdir_idx); KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), ("ptbl_hold: invalid pdir_idx")); KASSERT((pmap != kernel_pmap), ("ptbl_hold: holding kernel ptbl!")); ptbl = pmap->pm_pdir[pdir_idx]; KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); for (i = 0; i < PTBL_PAGES; i++) { pa = pte_vatopa(mmu, kernel_pmap, (vm_offset_t)ptbl + (i * PAGE_SIZE)); m = PHYS_TO_VM_PAGE(pa); m->ref_count++; } } #endif /* Allocate pv_entry structure. */ pv_entry_t pv_alloc(void) { pv_entry_t pv; pv_entry_count++; if (pv_entry_count > pv_entry_high_water) pagedaemon_wakeup(0); /* XXX powerpc NUMA */ pv = uma_zalloc(pvzone, M_NOWAIT); return (pv); } /* Free pv_entry structure. */ static __inline void pv_free(pv_entry_t pve) { pv_entry_count--; uma_zfree(pvzone, pve); } /* Allocate and initialize pv_entry structure. */ static void pv_insert(pmap_t pmap, vm_offset_t va, vm_page_t m) { pv_entry_t pve; //int su = (pmap == kernel_pmap); //debugf("pv_insert: s (su = %d pmap = 0x%08x va = 0x%08x m = 0x%08x)\n", su, // (u_int32_t)pmap, va, (u_int32_t)m); pve = pv_alloc(); if (pve == NULL) panic("pv_insert: no pv entries!"); pve->pv_pmap = pmap; pve->pv_va = va; /* add to pv_list */ PMAP_LOCK_ASSERT(pmap, MA_OWNED); rw_assert(&pvh_global_lock, RA_WLOCKED); TAILQ_INSERT_TAIL(&m->md.pv_list, pve, pv_link); //debugf("pv_insert: e\n"); } /* Destroy pv entry. */ static void pv_remove(pmap_t pmap, vm_offset_t va, vm_page_t m) { pv_entry_t pve; //int su = (pmap == kernel_pmap); //debugf("pv_remove: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va); PMAP_LOCK_ASSERT(pmap, MA_OWNED); rw_assert(&pvh_global_lock, RA_WLOCKED); /* find pv entry */ TAILQ_FOREACH(pve, &m->md.pv_list, pv_link) { if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { /* remove from pv_list */ TAILQ_REMOVE(&m->md.pv_list, pve, pv_link); if (TAILQ_EMPTY(&m->md.pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); /* free pv entry struct */ pv_free(pve); break; } } //debugf("pv_remove: e\n"); } #ifdef __powerpc64__ /* * Clean pte entry, try to free page table page if requested. * * Return 1 if ptbl pages were freed, otherwise return 0. */ static int pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, u_int8_t flags) { vm_page_t m; pte_t *pte; pte = pte_find(mmu, pmap, va); KASSERT(pte != NULL, ("%s: NULL pte", __func__)); if (!PTE_ISVALID(pte)) return (0); /* Get vm_page_t for mapped pte. */ m = PHYS_TO_VM_PAGE(PTE_PA(pte)); if (PTE_ISWIRED(pte)) pmap->pm_stats.wired_count--; /* Handle managed entry. */ if (PTE_ISMANAGED(pte)) { /* Handle modified pages. */ if (PTE_ISMODIFIED(pte)) vm_page_dirty(m); /* Referenced pages. */ if (PTE_ISREFERENCED(pte)) vm_page_aflag_set(m, PGA_REFERENCED); /* Remove pv_entry from pv_list. */ pv_remove(pmap, va, m); } else if (pmap == kernel_pmap && m && m->md.pv_tracked) { pv_remove(pmap, va, m); if (TAILQ_EMPTY(&m->md.pv_list)) m->md.pv_tracked = false; } mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); tlb0_flush_entry(va); *pte = 0; tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); pmap->pm_stats.resident_count--; if (flags & PTBL_UNHOLD) { return (ptbl_unhold(mmu, pmap, va)); } return (0); } /* * Insert PTE for a given page and virtual address. */ static int pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, boolean_t nosleep) { unsigned int pp2d_idx = PP2D_IDX(va); unsigned int pdir_idx = PDIR_IDX(va); unsigned int ptbl_idx = PTBL_IDX(va); pte_t *ptbl, *pte, pte_tmp; pte_t **pdir; /* Get the page directory pointer. */ pdir = pmap->pm_pp2d[pp2d_idx]; if (pdir == NULL) pdir = pdir_alloc(mmu, pmap, pp2d_idx, nosleep); /* Get the page table pointer. */ ptbl = pdir[pdir_idx]; if (ptbl == NULL) { /* Allocate page table pages. */ ptbl = ptbl_alloc(mmu, pmap, pdir, pdir_idx, nosleep); if (ptbl == NULL) { KASSERT(nosleep, ("nosleep and NULL ptbl")); return (ENOMEM); } pte = &ptbl[ptbl_idx]; } else { /* * Check if there is valid mapping for requested va, if there * is, remove it. */ pte = &ptbl[ptbl_idx]; if (PTE_ISVALID(pte)) { pte_remove(mmu, pmap, va, PTBL_HOLD); } else { /* * pte is not used, increment hold count for ptbl * pages. */ if (pmap != kernel_pmap) ptbl_hold(mmu, pmap, pdir, pdir_idx); } } if (pdir[pdir_idx] == NULL) { if (pmap != kernel_pmap && pmap->pm_pp2d[pp2d_idx] != NULL) pdir_hold(mmu, pmap, pdir); pdir[pdir_idx] = ptbl; } if (pmap->pm_pp2d[pp2d_idx] == NULL) pmap->pm_pp2d[pp2d_idx] = pdir; /* * Insert pv_entry into pv_list for mapped page if part of managed * memory. */ if ((m->oflags & VPO_UNMANAGED) == 0) { flags |= PTE_MANAGED; /* Create and insert pv entry. */ pv_insert(pmap, va, m); } pmap->pm_stats.resident_count++; pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); pte_tmp |= (PTE_VALID | flags); mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); tlb0_flush_entry(va); *pte = pte_tmp; tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); return (0); } /* Return the pa for the given pmap/va. */ static vm_paddr_t pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va) { vm_paddr_t pa = 0; pte_t *pte; pte = pte_find(mmu, pmap, va); if ((pte != NULL) && PTE_ISVALID(pte)) pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); return (pa); } /* allocate pte entries to manage (addr & mask) to (addr & mask) + size */ static void kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir) { int i, j; vm_offset_t va; pte_t *pte; va = addr; /* Initialize kernel pdir */ for (i = 0; i < kernel_pdirs; i++) { kernel_pmap->pm_pp2d[i + PP2D_IDX(va)] = (pte_t **)(pdir + (i * PAGE_SIZE * PDIR_PAGES)); for (j = PDIR_IDX(va + (i * PAGE_SIZE * PDIR_NENTRIES * PTBL_NENTRIES)); j < PDIR_NENTRIES; j++) { kernel_pmap->pm_pp2d[i + PP2D_IDX(va)][j] = (pte_t *)(pdir + (kernel_pdirs * PAGE_SIZE) + (((i * PDIR_NENTRIES) + j) * PAGE_SIZE)); } } /* * Fill in PTEs covering kernel code and data. They are not required * for address translation, as this area is covered by static TLB1 * entries, but for pte_vatopa() to work correctly with kernel area * addresses. */ for (va = addr; va < data_end; va += PAGE_SIZE) { pte = &(kernel_pmap->pm_pp2d[PP2D_IDX(va)][PDIR_IDX(va)][PTBL_IDX(va)]); *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID | PTE_PS_4KB; } } #else /* * Clean pte entry, try to free page table page if requested. * * Return 1 if ptbl pages were freed, otherwise return 0. */ static int pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, uint8_t flags) { unsigned int pdir_idx = PDIR_IDX(va); unsigned int ptbl_idx = PTBL_IDX(va); vm_page_t m; pte_t *ptbl; pte_t *pte; //int su = (pmap == kernel_pmap); //debugf("pte_remove: s (su = %d pmap = 0x%08x va = 0x%08x flags = %d)\n", // su, (u_int32_t)pmap, va, flags); ptbl = pmap->pm_pdir[pdir_idx]; KASSERT(ptbl, ("pte_remove: null ptbl")); pte = &ptbl[ptbl_idx]; if (pte == NULL || !PTE_ISVALID(pte)) return (0); if (PTE_ISWIRED(pte)) pmap->pm_stats.wired_count--; /* Get vm_page_t for mapped pte. */ m = PHYS_TO_VM_PAGE(PTE_PA(pte)); /* Handle managed entry. */ if (PTE_ISMANAGED(pte)) { if (PTE_ISMODIFIED(pte)) vm_page_dirty(m); if (PTE_ISREFERENCED(pte)) vm_page_aflag_set(m, PGA_REFERENCED); pv_remove(pmap, va, m); } else if (pmap == kernel_pmap && m && m->md.pv_tracked) { /* * Always pv_insert()/pv_remove() on MPC85XX, in case DPAA is * used. This is needed by the NCSW support code for fast * VA<->PA translation. */ pv_remove(pmap, va, m); if (TAILQ_EMPTY(&m->md.pv_list)) m->md.pv_tracked = false; } mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); tlb0_flush_entry(va); *pte = 0; tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); pmap->pm_stats.resident_count--; if (flags & PTBL_UNHOLD) { //debugf("pte_remove: e (unhold)\n"); return (ptbl_unhold(mmu, pmap, pdir_idx)); } //debugf("pte_remove: e\n"); return (0); } /* * Insert PTE for a given page and virtual address. */ static int pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags, boolean_t nosleep) { unsigned int pdir_idx = PDIR_IDX(va); unsigned int ptbl_idx = PTBL_IDX(va); pte_t *ptbl, *pte, pte_tmp; CTR4(KTR_PMAP, "%s: su = %d pmap = %p va = %p", __func__, pmap == kernel_pmap, pmap, va); /* Get the page table pointer. */ ptbl = pmap->pm_pdir[pdir_idx]; if (ptbl == NULL) { /* Allocate page table pages. */ ptbl = ptbl_alloc(mmu, pmap, pdir_idx, nosleep); if (ptbl == NULL) { KASSERT(nosleep, ("nosleep and NULL ptbl")); return (ENOMEM); } pmap->pm_pdir[pdir_idx] = ptbl; pte = &ptbl[ptbl_idx]; } else { /* * Check if there is valid mapping for requested * va, if there is, remove it. */ pte = &pmap->pm_pdir[pdir_idx][ptbl_idx]; if (PTE_ISVALID(pte)) { pte_remove(mmu, pmap, va, PTBL_HOLD); } else { /* * pte is not used, increment hold count * for ptbl pages. */ if (pmap != kernel_pmap) ptbl_hold(mmu, pmap, pdir_idx); } } /* * Insert pv_entry into pv_list for mapped page if part of managed * memory. */ if ((m->oflags & VPO_UNMANAGED) == 0) { flags |= PTE_MANAGED; /* Create and insert pv entry. */ pv_insert(pmap, va, m); } pmap->pm_stats.resident_count++; pte_tmp = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m)); pte_tmp |= (PTE_VALID | flags | PTE_PS_4KB); /* 4KB pages only */ mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); tlb0_flush_entry(va); *pte = pte_tmp; tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); return (0); } /* Return the pa for the given pmap/va. */ static vm_paddr_t pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va) { vm_paddr_t pa = 0; pte_t *pte; pte = pte_find(mmu, pmap, va); if ((pte != NULL) && PTE_ISVALID(pte)) pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); return (pa); } /* Get a pointer to a PTE in a page table. */ static pte_t * pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) { unsigned int pdir_idx = PDIR_IDX(va); unsigned int ptbl_idx = PTBL_IDX(va); KASSERT((pmap != NULL), ("pte_find: invalid pmap")); if (pmap->pm_pdir[pdir_idx]) return (&(pmap->pm_pdir[pdir_idx][ptbl_idx])); return (NULL); } /* Set up kernel page tables. */ static void kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir) { int i; vm_offset_t va; pte_t *pte; /* Initialize kernel pdir */ for (i = 0; i < kernel_ptbls; i++) kernel_pmap->pm_pdir[kptbl_min + i] = (pte_t *)(pdir + (i * PAGE_SIZE * PTBL_PAGES)); /* * Fill in PTEs covering kernel code and data. They are not required * for address translation, as this area is covered by static TLB1 * entries, but for pte_vatopa() to work correctly with kernel area * addresses. */ for (va = addr; va < data_end; va += PAGE_SIZE) { pte = &(kernel_pmap->pm_pdir[PDIR_IDX(va)][PTBL_IDX(va)]); *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart)); *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID | PTE_PS_4KB; } } #endif /**************************************************************************/ /* PMAP related */ /**************************************************************************/ /* * This is called during booke_init, before the system is really initialized. */ static void mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend) { vm_paddr_t phys_kernelend; struct mem_region *mp, *mp1; int cnt, i, j; vm_paddr_t s, e, sz; vm_paddr_t physsz, hwphyssz; u_int phys_avail_count; vm_size_t kstack0_sz; vm_offset_t kernel_pdir, kstack0; vm_paddr_t kstack0_phys; void *dpcpu; vm_offset_t kernel_ptbl_root; debugf("mmu_booke_bootstrap: entered\n"); /* Set interesting system properties */ #ifdef __powerpc64__ hw_direct_map = 1; #else hw_direct_map = 0; #endif #if defined(COMPAT_FREEBSD32) || !defined(__powerpc64__) elf32_nxstack = 1; #endif /* Initialize invalidation mutex */ mtx_init(&tlbivax_mutex, "tlbivax", NULL, MTX_SPIN); /* Read TLB0 size and associativity. */ tlb0_get_tlbconf(); /* * Align kernel start and end address (kernel image). * Note that kernel end does not necessarily relate to kernsize. * kernsize is the size of the kernel that is actually mapped. */ data_start = round_page(kernelend); data_end = data_start; /* Allocate the dynamic per-cpu area. */ dpcpu = (void *)data_end; data_end += DPCPU_SIZE; /* Allocate space for the message buffer. */ msgbufp = (struct msgbuf *)data_end; data_end += msgbufsize; debugf(" msgbufp at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", (uintptr_t)msgbufp, data_end); data_end = round_page(data_end); #ifdef __powerpc64__ kernel_ptbl_root = data_end; data_end += PP2D_NENTRIES * sizeof(pte_t**); #else /* Allocate space for ptbl_bufs. */ ptbl_bufs = (struct ptbl_buf *)data_end; data_end += sizeof(struct ptbl_buf) * PTBL_BUFS; debugf(" ptbl_bufs at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", (uintptr_t)ptbl_bufs, data_end); data_end = round_page(data_end); kernel_ptbl_root = data_end; data_end += PDIR_NENTRIES * sizeof(pte_t*); #endif /* Allocate PTE tables for kernel KVA. */ kernel_pdir = data_end; kernel_ptbls = howmany(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, PDIR_SIZE); #ifdef __powerpc64__ kernel_pdirs = howmany(kernel_ptbls, PDIR_NENTRIES); data_end += kernel_pdirs * PDIR_PAGES * PAGE_SIZE; #endif data_end += kernel_ptbls * PTBL_PAGES * PAGE_SIZE; debugf(" kernel ptbls: %d\n", kernel_ptbls); debugf(" kernel pdir at 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", kernel_pdir, data_end); /* Retrieve phys/avail mem regions */ mem_regions(&physmem_regions, &physmem_regions_sz, &availmem_regions, &availmem_regions_sz); if (PHYS_AVAIL_ENTRIES < availmem_regions_sz) panic("mmu_booke_bootstrap: phys_avail too small"); data_end = round_page(data_end); vm_page_array = (vm_page_t)data_end; /* * Get a rough idea (upper bound) on the size of the page array. The * vm_page_array will not handle any more pages than we have in the * avail_regions array, and most likely much less. */ sz = 0; for (mp = availmem_regions; mp->mr_size; mp++) { sz += mp->mr_size; } sz = (round_page(sz) / (PAGE_SIZE + sizeof(struct vm_page))); data_end += round_page(sz * sizeof(struct vm_page)); /* Pre-round up to 1MB. This wastes some space, but saves TLB entries */ data_end = roundup2(data_end, 1 << 20); debugf(" data_end: 0x%"PRI0ptrX"\n", data_end); debugf(" kernstart: %#zx\n", kernstart); debugf(" kernsize: %#zx\n", kernsize); if (data_end - kernstart > kernsize) { kernsize += tlb1_mapin_region(kernstart + kernsize, kernload + kernsize, (data_end - kernstart) - kernsize, _TLB_ENTRY_MEM); } data_end = kernstart + kernsize; debugf(" updated data_end: 0x%"PRI0ptrX"\n", data_end); /* * Clear the structures - note we can only do it safely after the * possible additional TLB1 translations are in place (above) so that * all range up to the currently calculated 'data_end' is covered. */ dpcpu_init(dpcpu, 0); #ifdef __powerpc64__ memset((void *)kernel_pdir, 0, kernel_pdirs * PDIR_PAGES * PAGE_SIZE + kernel_ptbls * PTBL_PAGES * PAGE_SIZE); #else memset((void *)ptbl_bufs, 0, sizeof(struct ptbl_buf) * PTBL_SIZE); memset((void *)kernel_pdir, 0, kernel_ptbls * PTBL_PAGES * PAGE_SIZE); #endif /*******************************************************/ /* Set the start and end of kva. */ /*******************************************************/ virtual_avail = round_page(data_end); virtual_end = VM_MAX_KERNEL_ADDRESS; #ifndef __powerpc64__ /* Allocate KVA space for page zero/copy operations. */ zero_page_va = virtual_avail; virtual_avail += PAGE_SIZE; copy_page_src_va = virtual_avail; virtual_avail += PAGE_SIZE; copy_page_dst_va = virtual_avail; virtual_avail += PAGE_SIZE; debugf("zero_page_va = 0x%"PRI0ptrX"\n", zero_page_va); debugf("copy_page_src_va = 0x%"PRI0ptrX"\n", copy_page_src_va); debugf("copy_page_dst_va = 0x%"PRI0ptrX"\n", copy_page_dst_va); /* Initialize page zero/copy mutexes. */ mtx_init(&zero_page_mutex, "mmu_booke_zero_page", NULL, MTX_DEF); mtx_init(©_page_mutex, "mmu_booke_copy_page", NULL, MTX_DEF); /* Allocate KVA space for ptbl bufs. */ ptbl_buf_pool_vabase = virtual_avail; virtual_avail += PTBL_BUFS * PTBL_PAGES * PAGE_SIZE; debugf("ptbl_buf_pool_vabase = 0x%"PRI0ptrX" end = 0x%"PRI0ptrX"\n", ptbl_buf_pool_vabase, virtual_avail); #endif /* Calculate corresponding physical addresses for the kernel region. */ phys_kernelend = kernload + kernsize; debugf("kernel image and allocated data:\n"); debugf(" kernload = 0x%09jx\n", (uintmax_t)kernload); debugf(" kernstart = 0x%"PRI0ptrX"\n", kernstart); debugf(" kernsize = 0x%"PRI0ptrX"\n", kernsize); /* * Remove kernel physical address range from avail regions list. Page * align all regions. Non-page aligned memory isn't very interesting * to us. Also, sort the entries for ascending addresses. */ sz = 0; cnt = availmem_regions_sz; debugf("processing avail regions:\n"); for (mp = availmem_regions; mp->mr_size; mp++) { s = mp->mr_start; e = mp->mr_start + mp->mr_size; debugf(" %09jx-%09jx -> ", (uintmax_t)s, (uintmax_t)e); /* Check whether this region holds all of the kernel. */ if (s < kernload && e > phys_kernelend) { availmem_regions[cnt].mr_start = phys_kernelend; availmem_regions[cnt++].mr_size = e - phys_kernelend; e = kernload; } /* Look whether this regions starts within the kernel. */ if (s >= kernload && s < phys_kernelend) { if (e <= phys_kernelend) goto empty; s = phys_kernelend; } /* Now look whether this region ends within the kernel. */ if (e > kernload && e <= phys_kernelend) { if (s >= kernload) goto empty; e = kernload; } /* Now page align the start and size of the region. */ s = round_page(s); e = trunc_page(e); if (e < s) e = s; sz = e - s; debugf("%09jx-%09jx = %jx\n", (uintmax_t)s, (uintmax_t)e, (uintmax_t)sz); /* Check whether some memory is left here. */ if (sz == 0) { empty: memmove(mp, mp + 1, (cnt - (mp - availmem_regions)) * sizeof(*mp)); cnt--; mp--; continue; } /* Do an insertion sort. */ for (mp1 = availmem_regions; mp1 < mp; mp1++) if (s < mp1->mr_start) break; if (mp1 < mp) { memmove(mp1 + 1, mp1, (char *)mp - (char *)mp1); mp1->mr_start = s; mp1->mr_size = sz; } else { mp->mr_start = s; mp->mr_size = sz; } } availmem_regions_sz = cnt; /*******************************************************/ /* Steal physical memory for kernel stack from the end */ /* of the first avail region */ /*******************************************************/ kstack0_sz = kstack_pages * PAGE_SIZE; kstack0_phys = availmem_regions[0].mr_start + availmem_regions[0].mr_size; kstack0_phys -= kstack0_sz; availmem_regions[0].mr_size -= kstack0_sz; /*******************************************************/ /* Fill in phys_avail table, based on availmem_regions */ /*******************************************************/ phys_avail_count = 0; physsz = 0; hwphyssz = 0; TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); debugf("fill in phys_avail:\n"); for (i = 0, j = 0; i < availmem_regions_sz; i++, j += 2) { debugf(" region: 0x%jx - 0x%jx (0x%jx)\n", (uintmax_t)availmem_regions[i].mr_start, (uintmax_t)availmem_regions[i].mr_start + availmem_regions[i].mr_size, (uintmax_t)availmem_regions[i].mr_size); if (hwphyssz != 0 && (physsz + availmem_regions[i].mr_size) >= hwphyssz) { debugf(" hw.physmem adjust\n"); if (physsz < hwphyssz) { phys_avail[j] = availmem_regions[i].mr_start; phys_avail[j + 1] = availmem_regions[i].mr_start + hwphyssz - physsz; physsz = hwphyssz; phys_avail_count++; dump_avail[j] = phys_avail[j]; dump_avail[j + 1] = phys_avail[j + 1]; } break; } phys_avail[j] = availmem_regions[i].mr_start; phys_avail[j + 1] = availmem_regions[i].mr_start + availmem_regions[i].mr_size; phys_avail_count++; physsz += availmem_regions[i].mr_size; dump_avail[j] = phys_avail[j]; dump_avail[j + 1] = phys_avail[j + 1]; } physmem = btoc(physsz); /* Calculate the last available physical address. */ for (i = 0; phys_avail[i + 2] != 0; i += 2) ; Maxmem = powerpc_btop(phys_avail[i + 1]); debugf("Maxmem = 0x%08lx\n", Maxmem); debugf("phys_avail_count = %d\n", phys_avail_count); debugf("physsz = 0x%09jx physmem = %jd (0x%09jx)\n", (uintmax_t)physsz, (uintmax_t)physmem, (uintmax_t)physmem); #ifdef __powerpc64__ /* * Map the physical memory contiguously in TLB1. * Round so it fits into a single mapping. */ tlb1_mapin_region(DMAP_BASE_ADDRESS, 0, phys_avail[i + 1], _TLB_ENTRY_MEM); #endif /*******************************************************/ /* Initialize (statically allocated) kernel pmap. */ /*******************************************************/ PMAP_LOCK_INIT(kernel_pmap); #ifdef __powerpc64__ kernel_pmap->pm_pp2d = (pte_t ***)kernel_ptbl_root; #else kptbl_min = VM_MIN_KERNEL_ADDRESS / PDIR_SIZE; kernel_pmap->pm_pdir = (pte_t **)kernel_ptbl_root; #endif debugf("kernel_pmap = 0x%"PRI0ptrX"\n", (uintptr_t)kernel_pmap); kernel_pte_alloc(virtual_avail, kernstart, kernel_pdir); for (i = 0; i < MAXCPU; i++) { kernel_pmap->pm_tid[i] = TID_KERNEL; /* Initialize each CPU's tidbusy entry 0 with kernel_pmap */ tidbusy[i][TID_KERNEL] = kernel_pmap; } /* Mark kernel_pmap active on all CPUs */ CPU_FILL(&kernel_pmap->pm_active); /* * Initialize the global pv list lock. */ rw_init(&pvh_global_lock, "pmap pv global"); /*******************************************************/ /* Final setup */ /*******************************************************/ /* Enter kstack0 into kernel map, provide guard page */ kstack0 = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; thread0.td_kstack = kstack0; thread0.td_kstack_pages = kstack_pages; debugf("kstack_sz = 0x%08jx\n", (uintmax_t)kstack0_sz); debugf("kstack0_phys at 0x%09jx - 0x%09jx\n", (uintmax_t)kstack0_phys, (uintmax_t)kstack0_phys + kstack0_sz); debugf("kstack0 at 0x%"PRI0ptrX" - 0x%"PRI0ptrX"\n", kstack0, kstack0 + kstack0_sz); virtual_avail += KSTACK_GUARD_PAGES * PAGE_SIZE + kstack0_sz; for (i = 0; i < kstack_pages; i++) { mmu_booke_kenter(mmu, kstack0, kstack0_phys); kstack0 += PAGE_SIZE; kstack0_phys += PAGE_SIZE; } pmap_bootstrapped = 1; debugf("virtual_avail = %"PRI0ptrX"\n", virtual_avail); debugf("virtual_end = %"PRI0ptrX"\n", virtual_end); debugf("mmu_booke_bootstrap: exit\n"); } #ifdef SMP void tlb1_ap_prep(void) { tlb_entry_t *e, tmp; unsigned int i; /* Prepare TLB1 image for AP processors */ e = __boot_tlb1; for (i = 0; i < TLB1_ENTRIES; i++) { tlb1_read_entry(&tmp, i); if ((tmp.mas1 & MAS1_VALID) && (tmp.mas2 & _TLB_ENTRY_SHARED)) memcpy(e++, &tmp, sizeof(tmp)); } } void pmap_bootstrap_ap(volatile uint32_t *trcp __unused) { int i; /* * Finish TLB1 configuration: the BSP already set up its TLB1 and we * have the snapshot of its contents in the s/w __boot_tlb1[] table * created by tlb1_ap_prep(), so use these values directly to * (re)program AP's TLB1 hardware. * * Start at index 1 because index 0 has the kernel map. */ for (i = 1; i < TLB1_ENTRIES; i++) { if (__boot_tlb1[i].mas1 & MAS1_VALID) tlb1_write_entry(&__boot_tlb1[i], i); } set_mas4_defaults(); } #endif static void booke_pmap_init_qpages(void) { struct pcpu *pc; int i; CPU_FOREACH(i) { pc = pcpu_find(i); pc->pc_qmap_addr = kva_alloc(PAGE_SIZE); if (pc->pc_qmap_addr == 0) panic("pmap_init_qpages: unable to allocate KVA"); } } SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, booke_pmap_init_qpages, NULL); /* * Get the physical page address for the given pmap/virtual address. */ static vm_paddr_t mmu_booke_extract(mmu_t mmu, pmap_t pmap, vm_offset_t va) { vm_paddr_t pa; PMAP_LOCK(pmap); pa = pte_vatopa(mmu, pmap, va); PMAP_UNLOCK(pmap); return (pa); } /* * Extract the physical page address associated with the given * kernel virtual address. */ static vm_paddr_t mmu_booke_kextract(mmu_t mmu, vm_offset_t va) { tlb_entry_t e; vm_paddr_t p = 0; int i; #ifdef __powerpc64__ if (va >= DMAP_BASE_ADDRESS && va <= DMAP_MAX_ADDRESS) return (DMAP_TO_PHYS(va)); #endif if (va >= VM_MIN_KERNEL_ADDRESS && va <= VM_MAX_KERNEL_ADDRESS) p = pte_vatopa(mmu, kernel_pmap, va); if (p == 0) { /* Check TLB1 mappings */ for (i = 0; i < TLB1_ENTRIES; i++) { tlb1_read_entry(&e, i); if (!(e.mas1 & MAS1_VALID)) continue; if (va >= e.virt && va < e.virt + e.size) return (e.phys + (va - e.virt)); } } return (p); } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ static void mmu_booke_init(mmu_t mmu) { int shpgperproc = PMAP_SHPGPERPROC; /* * Initialize the address space (zone) for the pv entries. Set a * high water mark so that the system can recover from excessive * numbers of pv entries. */ pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count; TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); pv_entry_high_water = 9 * (pv_entry_max / 10); uma_zone_reserve_kva(pvzone, pv_entry_max); /* Pre-fill pvzone with initial number of pv entries. */ uma_prealloc(pvzone, PV_ENTRY_ZONE_MIN); /* Create a UMA zone for page table roots. */ ptbl_root_zone = uma_zcreate("pmap root", PMAP_ROOT_SIZE, NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, UMA_ZONE_VM); /* Initialize ptbl allocation. */ ptbl_init(); } /* * Map a list of wired pages into kernel virtual address space. This is * intended for temporary mappings which do not need page modification or * references recorded. Existing mappings in the region are overwritten. */ static void mmu_booke_qenter(mmu_t mmu, vm_offset_t sva, vm_page_t *m, int count) { vm_offset_t va; va = sva; while (count-- > 0) { mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(*m)); va += PAGE_SIZE; m++; } } /* * Remove page mappings from kernel virtual address space. Intended for * temporary mappings entered by mmu_booke_qenter. */ static void mmu_booke_qremove(mmu_t mmu, vm_offset_t sva, int count) { vm_offset_t va; va = sva; while (count-- > 0) { mmu_booke_kremove(mmu, va); va += PAGE_SIZE; } } /* * Map a wired page into kernel virtual address space. */ static void mmu_booke_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa) { mmu_booke_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT); } static void mmu_booke_kenter_attr(mmu_t mmu, vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma) { uint32_t flags; pte_t *pte; KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_kenter: invalid va")); flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; flags |= tlb_calc_wimg(pa, ma) << PTE_MAS2_SHIFT; flags |= PTE_PS_4KB; pte = pte_find(mmu, kernel_pmap, va); KASSERT((pte != NULL), ("mmu_booke_kenter: invalid va. NULL PTE")); mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); if (PTE_ISVALID(pte)) { CTR1(KTR_PMAP, "%s: replacing entry!", __func__); /* Flush entry from TLB0 */ tlb0_flush_entry(va); } *pte = PTE_RPN_FROM_PA(pa) | flags; //debugf("mmu_booke_kenter: pdir_idx = %d ptbl_idx = %d va=0x%08x " // "pa=0x%08x rpn=0x%08x flags=0x%08x\n", // pdir_idx, ptbl_idx, va, pa, pte->rpn, pte->flags); /* Flush the real memory from the instruction cache. */ if ((flags & (PTE_I | PTE_G)) == 0) __syncicache((void *)va, PAGE_SIZE); tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); } /* * Remove a page from kernel page table. */ static void mmu_booke_kremove(mmu_t mmu, vm_offset_t va) { pte_t *pte; CTR2(KTR_PMAP,"%s: s (va = 0x%"PRI0ptrX")\n", __func__, va); KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_kremove: invalid va")); pte = pte_find(mmu, kernel_pmap, va); if (!PTE_ISVALID(pte)) { CTR1(KTR_PMAP, "%s: invalid pte", __func__); return; } mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); /* Invalidate entry in TLB0, update PTE. */ tlb0_flush_entry(va); *pte = 0; tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); } /* * Provide a kernel pointer corresponding to a given userland pointer. * The returned pointer is valid until the next time this function is * called in this thread. This is used internally in copyin/copyout. */ int mmu_booke_map_user_ptr(mmu_t mmu, pmap_t pm, volatile const void *uaddr, void **kaddr, size_t ulen, size_t *klen) { if (trunc_page((uintptr_t)uaddr + ulen) > VM_MAXUSER_ADDRESS) return (EFAULT); *kaddr = (void *)(uintptr_t)uaddr; if (klen) *klen = ulen; return (0); } /* * Figure out where a given kernel pointer (usually in a fault) points * to from the VM's perspective, potentially remapping into userland's * address space. */ static int mmu_booke_decode_kernel_ptr(mmu_t mmu, vm_offset_t addr, int *is_user, vm_offset_t *decoded_addr) { if (trunc_page(addr) <= VM_MAXUSER_ADDRESS) *is_user = 1; else *is_user = 0; *decoded_addr = addr; return (0); } /* * Initialize pmap associated with process 0. */ static void mmu_booke_pinit0(mmu_t mmu, pmap_t pmap) { PMAP_LOCK_INIT(pmap); mmu_booke_pinit(mmu, pmap); PCPU_SET(curpmap, pmap); } /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ static void mmu_booke_pinit(mmu_t mmu, pmap_t pmap) { int i; CTR4(KTR_PMAP, "%s: pmap = %p, proc %d '%s'", __func__, pmap, curthread->td_proc->p_pid, curthread->td_proc->p_comm); KASSERT((pmap != kernel_pmap), ("pmap_pinit: initializing kernel_pmap")); for (i = 0; i < MAXCPU; i++) pmap->pm_tid[i] = TID_NONE; CPU_ZERO(&kernel_pmap->pm_active); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); #ifdef __powerpc64__ pmap->pm_pp2d = uma_zalloc(ptbl_root_zone, M_WAITOK); bzero(pmap->pm_pp2d, sizeof(pte_t **) * PP2D_NENTRIES); #else pmap->pm_pdir = uma_zalloc(ptbl_root_zone, M_WAITOK); bzero(pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES); TAILQ_INIT(&pmap->pm_ptbl_list); #endif } /* * Release any resources held by the given physical map. * Called when a pmap initialized by mmu_booke_pinit is being released. * Should only be called if the map contains no valid mappings. */ static void mmu_booke_release(mmu_t mmu, pmap_t pmap) { KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); #ifdef __powerpc64__ uma_zfree(ptbl_root_zone, pmap->pm_pp2d); #else uma_zfree(ptbl_root_zone, pmap->pm_pdir); #endif } /* * Insert the given physical page at the specified virtual address in the * target physical map with the protection requested. If specified the page * will be wired down. */ static int mmu_booke_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind) { int error; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); error = mmu_booke_enter_locked(mmu, pmap, va, m, prot, flags, psind); PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); return (error); } static int mmu_booke_enter_locked(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int pmap_flags, int8_t psind __unused) { pte_t *pte; vm_paddr_t pa; uint32_t flags; int error, su, sync; pa = VM_PAGE_TO_PHYS(m); su = (pmap == kernel_pmap); sync = 0; //debugf("mmu_booke_enter_locked: s (pmap=0x%08x su=%d tid=%d m=0x%08x va=0x%08x " // "pa=0x%08x prot=0x%08x flags=%#x)\n", // (u_int32_t)pmap, su, pmap->pm_tid, // (u_int32_t)m, va, pa, prot, flags); if (su) { KASSERT(((va >= virtual_avail) && (va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_enter_locked: kernel pmap, non kernel va")); } else { KASSERT((va <= VM_MAXUSER_ADDRESS), ("mmu_booke_enter_locked: user pmap, non user va")); } if ((m->oflags & VPO_UNMANAGED) == 0) { if ((pmap_flags & PMAP_ENTER_QUICK_LOCKED) == 0) VM_PAGE_OBJECT_BUSY_ASSERT(m); else VM_OBJECT_ASSERT_LOCKED(m->object); } PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * If there is an existing mapping, and the physical address has not * changed, must be protection or wiring change. */ if (((pte = pte_find(mmu, pmap, va)) != NULL) && (PTE_ISVALID(pte)) && (PTE_PA(pte) == pa)) { /* * Before actually updating pte->flags we calculate and * prepare its new value in a helper var. */ flags = *pte; flags &= ~(PTE_UW | PTE_UX | PTE_SW | PTE_SX | PTE_MODIFIED); /* Wiring change, just update stats. */ if ((pmap_flags & PMAP_ENTER_WIRED) != 0) { if (!PTE_ISWIRED(pte)) { flags |= PTE_WIRED; pmap->pm_stats.wired_count++; } } else { if (PTE_ISWIRED(pte)) { flags &= ~PTE_WIRED; pmap->pm_stats.wired_count--; } } if (prot & VM_PROT_WRITE) { /* Add write permissions. */ flags |= PTE_SW; if (!su) flags |= PTE_UW; if ((flags & PTE_MANAGED) != 0) vm_page_aflag_set(m, PGA_WRITEABLE); } else { /* Handle modified pages, sense modify status. */ /* * The PTE_MODIFIED flag could be set by underlying * TLB misses since we last read it (above), possibly * other CPUs could update it so we check in the PTE * directly rather than rely on that saved local flags * copy. */ if (PTE_ISMODIFIED(pte)) vm_page_dirty(m); } if (prot & VM_PROT_EXECUTE) { flags |= PTE_SX; if (!su) flags |= PTE_UX; /* * Check existing flags for execute permissions: if we * are turning execute permissions on, icache should * be flushed. */ if ((*pte & (PTE_UX | PTE_SX)) == 0) sync++; } flags &= ~PTE_REFERENCED; /* * The new flags value is all calculated -- only now actually * update the PTE. */ mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); tlb0_flush_entry(va); *pte &= ~PTE_FLAGS_MASK; *pte |= flags; tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); } else { /* * If there is an existing mapping, but it's for a different * physical address, pte_enter() will delete the old mapping. */ //if ((pte != NULL) && PTE_ISVALID(pte)) // debugf("mmu_booke_enter_locked: replace\n"); //else // debugf("mmu_booke_enter_locked: new\n"); /* Now set up the flags and install the new mapping. */ flags = (PTE_SR | PTE_VALID); flags |= PTE_M; if (!su) flags |= PTE_UR; if (prot & VM_PROT_WRITE) { flags |= PTE_SW; if (!su) flags |= PTE_UW; if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_aflag_set(m, PGA_WRITEABLE); } if (prot & VM_PROT_EXECUTE) { flags |= PTE_SX; if (!su) flags |= PTE_UX; } /* If its wired update stats. */ if ((pmap_flags & PMAP_ENTER_WIRED) != 0) flags |= PTE_WIRED; error = pte_enter(mmu, pmap, m, va, flags, (pmap_flags & PMAP_ENTER_NOSLEEP) != 0); if (error != 0) return (KERN_RESOURCE_SHORTAGE); if ((flags & PMAP_ENTER_WIRED) != 0) pmap->pm_stats.wired_count++; /* Flush the real memory from the instruction cache. */ if (prot & VM_PROT_EXECUTE) sync++; } if (sync && (su || pmap == PCPU_GET(curpmap))) { __syncicache((void *)va, PAGE_SIZE); sync = 0; } return (KERN_SUCCESS); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ static void mmu_booke_enter_object(mmu_t mmu, pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { vm_page_t m; vm_pindex_t diff, psize; VM_OBJECT_ASSERT_LOCKED(m_start->object); psize = atop(end - start); m = m_start; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { mmu_booke_enter_locked(mmu, pmap, start + ptoa(diff), m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, 0); m = TAILQ_NEXT(m, listq); } rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } static void mmu_booke_enter_quick(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); mmu_booke_enter_locked(mmu, pmap, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, 0); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly rounded to the page size. */ static void mmu_booke_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_offset_t endva) { pte_t *pte; uint8_t hold_flag; int su = (pmap == kernel_pmap); //debugf("mmu_booke_remove: s (su = %d pmap=0x%08x tid=%d va=0x%08x endva=0x%08x)\n", // su, (u_int32_t)pmap, pmap->pm_tid, va, endva); if (su) { KASSERT(((va >= virtual_avail) && (va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_remove: kernel pmap, non kernel va")); } else { KASSERT((va <= VM_MAXUSER_ADDRESS), ("mmu_booke_remove: user pmap, non user va")); } if (PMAP_REMOVE_DONE(pmap)) { //debugf("mmu_booke_remove: e (empty)\n"); return; } hold_flag = PTBL_HOLD_FLAG(pmap); //debugf("mmu_booke_remove: hold_flag = %d\n", hold_flag); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); for (; va < endva; va += PAGE_SIZE) { pte = pte_find(mmu, pmap, va); if ((pte != NULL) && PTE_ISVALID(pte)) pte_remove(mmu, pmap, va, hold_flag); } PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); //debugf("mmu_booke_remove: e\n"); } /* * Remove physical page from all pmaps in which it resides. */ static void mmu_booke_remove_all(mmu_t mmu, vm_page_t m) { pv_entry_t pv, pvn; uint8_t hold_flag; rw_wlock(&pvh_global_lock); for (pv = TAILQ_FIRST(&m->md.pv_list); pv != NULL; pv = pvn) { pvn = TAILQ_NEXT(pv, pv_link); PMAP_LOCK(pv->pv_pmap); hold_flag = PTBL_HOLD_FLAG(pv->pv_pmap); pte_remove(mmu, pv->pv_pmap, pv->pv_va, hold_flag); PMAP_UNLOCK(pv->pv_pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); rw_wunlock(&pvh_global_lock); } /* * Map a range of physical addresses into kernel virtual address space. */ static vm_offset_t mmu_booke_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start, vm_paddr_t pa_end, int prot) { vm_offset_t sva = *virt; vm_offset_t va = sva; #ifdef __powerpc64__ /* XXX: Handle memory not starting at 0x0. */ if (pa_end < ctob(Maxmem)) return (PHYS_TO_DMAP(pa_start)); #endif while (pa_start < pa_end) { mmu_booke_kenter(mmu, va, pa_start); va += PAGE_SIZE; pa_start += PAGE_SIZE; } *virt = va; return (sva); } /* * The pmap must be activated before it's address space can be accessed in any * way. */ static void mmu_booke_activate(mmu_t mmu, struct thread *td) { pmap_t pmap; u_int cpuid; pmap = &td->td_proc->p_vmspace->vm_pmap; CTR5(KTR_PMAP, "%s: s (td = %p, proc = '%s', id = %d, pmap = 0x%"PRI0ptrX")", __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); KASSERT((pmap != kernel_pmap), ("mmu_booke_activate: kernel_pmap!")); sched_pin(); cpuid = PCPU_GET(cpuid); CPU_SET_ATOMIC(cpuid, &pmap->pm_active); PCPU_SET(curpmap, pmap); if (pmap->pm_tid[cpuid] == TID_NONE) tid_alloc(pmap); /* Load PID0 register with pmap tid value. */ mtspr(SPR_PID0, pmap->pm_tid[cpuid]); __asm __volatile("isync"); mtspr(SPR_DBCR0, td->td_pcb->pcb_cpu.booke.dbcr0); sched_unpin(); CTR3(KTR_PMAP, "%s: e (tid = %d for '%s')", __func__, pmap->pm_tid[PCPU_GET(cpuid)], td->td_proc->p_comm); } /* * Deactivate the specified process's address space. */ static void mmu_booke_deactivate(mmu_t mmu, struct thread *td) { pmap_t pmap; pmap = &td->td_proc->p_vmspace->vm_pmap; CTR5(KTR_PMAP, "%s: td=%p, proc = '%s', id = %d, pmap = 0x%"PRI0ptrX, __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); td->td_pcb->pcb_cpu.booke.dbcr0 = mfspr(SPR_DBCR0); CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmap->pm_active); PCPU_SET(curpmap, NULL); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ static void mmu_booke_copy(mmu_t mmu, pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { } /* * Set the physical protection on the specified range of this map as requested. */ static void mmu_booke_protect(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { vm_offset_t va; vm_page_t m; pte_t *pte; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { mmu_booke_remove(mmu, pmap, sva, eva); return; } if (prot & VM_PROT_WRITE) return; PMAP_LOCK(pmap); for (va = sva; va < eva; va += PAGE_SIZE) { if ((pte = pte_find(mmu, pmap, va)) != NULL) { if (PTE_ISVALID(pte)) { m = PHYS_TO_VM_PAGE(PTE_PA(pte)); mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); /* Handle modified pages. */ if (PTE_ISMODIFIED(pte) && PTE_ISMANAGED(pte)) vm_page_dirty(m); tlb0_flush_entry(va); *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); } } } PMAP_UNLOCK(pmap); } /* * Clear the write and modified bits in each of the given page's mappings. */ static void mmu_booke_remove_write(mmu_t mmu, vm_page_t m) { pv_entry_t pv; pte_t *pte; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("mmu_booke_remove_write: page %p is not managed", m)); vm_page_assert_busied(m); if (!pmap_page_is_write_mapped(m)) return; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { PMAP_LOCK(pv->pv_pmap); if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { if (PTE_ISVALID(pte)) { m = PHYS_TO_VM_PAGE(PTE_PA(pte)); mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); /* Handle modified pages. */ if (PTE_ISMODIFIED(pte)) vm_page_dirty(m); /* Flush mapping from TLB0. */ *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED); tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); } } PMAP_UNLOCK(pv->pv_pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); rw_wunlock(&pvh_global_lock); } static void mmu_booke_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) { pte_t *pte; vm_paddr_t pa = 0; int sync_sz, valid; #ifndef __powerpc64__ pmap_t pmap; vm_page_t m; vm_offset_t addr; int active; #endif #ifndef __powerpc64__ rw_wlock(&pvh_global_lock); pmap = PCPU_GET(curpmap); active = (pm == kernel_pmap || pm == pmap) ? 1 : 0; #endif while (sz > 0) { PMAP_LOCK(pm); pte = pte_find(mmu, pm, va); valid = (pte != NULL && PTE_ISVALID(pte)) ? 1 : 0; if (valid) pa = PTE_PA(pte); PMAP_UNLOCK(pm); sync_sz = PAGE_SIZE - (va & PAGE_MASK); sync_sz = min(sync_sz, sz); if (valid) { #ifdef __powerpc64__ pa += (va & PAGE_MASK); __syncicache((void *)PHYS_TO_DMAP(pa), sync_sz); #else if (!active) { /* Create a mapping in the active pmap. */ addr = 0; m = PHYS_TO_VM_PAGE(pa); PMAP_LOCK(pmap); pte_enter(mmu, pmap, m, addr, PTE_SR | PTE_VALID, FALSE); addr += (va & PAGE_MASK); __syncicache((void *)addr, sync_sz); pte_remove(mmu, pmap, addr, PTBL_UNHOLD); PMAP_UNLOCK(pmap); } else __syncicache((void *)va, sync_sz); #endif } va += sync_sz; sz -= sync_sz; } #ifndef __powerpc64__ rw_wunlock(&pvh_global_lock); #endif } /* * Atomically extract and hold the physical page with the given * pmap and virtual address pair if that mapping permits the given * protection. */ static vm_page_t mmu_booke_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pte_t *pte; vm_page_t m; uint32_t pte_wbit; m = NULL; PMAP_LOCK(pmap); pte = pte_find(mmu, pmap, va); if ((pte != NULL) && PTE_ISVALID(pte)) { if (pmap == kernel_pmap) pte_wbit = PTE_SW; else pte_wbit = PTE_UW; if ((*pte & pte_wbit) != 0 || (prot & VM_PROT_WRITE) == 0) { m = PHYS_TO_VM_PAGE(PTE_PA(pte)); if (!vm_page_wire_mapped(m)) m = NULL; } } PMAP_UNLOCK(pmap); return (m); } /* * Initialize a vm_page's machine-dependent fields. */ static void mmu_booke_page_init(mmu_t mmu, vm_page_t m) { m->md.pv_tracked = 0; TAILQ_INIT(&m->md.pv_list); } /* * mmu_booke_zero_page_area zeros the specified hardware page by * mapping it into virtual memory and using bzero to clear * its contents. * * off and size must reside within a single page. */ static void mmu_booke_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) { vm_offset_t va; /* XXX KASSERT off and size are within a single page? */ #ifdef __powerpc64__ va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); bzero((caddr_t)va + off, size); #else mtx_lock(&zero_page_mutex); va = zero_page_va; mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); bzero((caddr_t)va + off, size); mmu_booke_kremove(mmu, va); mtx_unlock(&zero_page_mutex); #endif } /* * mmu_booke_zero_page zeros the specified hardware page. */ static void mmu_booke_zero_page(mmu_t mmu, vm_page_t m) { vm_offset_t off, va; #ifdef __powerpc64__ va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); for (off = 0; off < PAGE_SIZE; off += cacheline_size) __asm __volatile("dcbz 0,%0" :: "r"(va + off)); #else va = zero_page_va; mtx_lock(&zero_page_mutex); mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); for (off = 0; off < PAGE_SIZE; off += cacheline_size) __asm __volatile("dcbz 0,%0" :: "r"(va + off)); mmu_booke_kremove(mmu, va); mtx_unlock(&zero_page_mutex); #endif } /* * mmu_booke_copy_page copies the specified (machine independent) page by * mapping the page into virtual memory and using memcopy to copy the page, * one machine dependent page at a time. */ static void mmu_booke_copy_page(mmu_t mmu, vm_page_t sm, vm_page_t dm) { vm_offset_t sva, dva; #ifdef __powerpc64__ sva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(sm)); dva = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dm)); memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); #else sva = copy_page_src_va; dva = copy_page_dst_va; mtx_lock(©_page_mutex); mmu_booke_kenter(mmu, sva, VM_PAGE_TO_PHYS(sm)); mmu_booke_kenter(mmu, dva, VM_PAGE_TO_PHYS(dm)); memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); mmu_booke_kremove(mmu, dva); mmu_booke_kremove(mmu, sva); mtx_unlock(©_page_mutex); #endif } static inline void mmu_booke_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, vm_page_t *mb, vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; vm_offset_t a_pg_offset, b_pg_offset; int cnt; #ifdef __powerpc64__ vm_page_t pa, pb; while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; pa = ma[a_offset >> PAGE_SHIFT]; b_pg_offset = b_offset & PAGE_MASK; pb = mb[b_offset >> PAGE_SHIFT]; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); cnt = min(cnt, PAGE_SIZE - b_pg_offset); a_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pa)) + a_pg_offset); b_cp = (caddr_t)((uintptr_t)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pb)) + b_pg_offset); bcopy(a_cp, b_cp, cnt); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } #else mtx_lock(©_page_mutex); while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); mmu_booke_kenter(mmu, copy_page_src_va, VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])); a_cp = (char *)copy_page_src_va + a_pg_offset; b_pg_offset = b_offset & PAGE_MASK; cnt = min(cnt, PAGE_SIZE - b_pg_offset); mmu_booke_kenter(mmu, copy_page_dst_va, VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])); b_cp = (char *)copy_page_dst_va + b_pg_offset; bcopy(a_cp, b_cp, cnt); mmu_booke_kremove(mmu, copy_page_dst_va); mmu_booke_kremove(mmu, copy_page_src_va); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } mtx_unlock(©_page_mutex); #endif } static vm_offset_t mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m) { #ifdef __powerpc64__ return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); #else vm_paddr_t paddr; vm_offset_t qaddr; uint32_t flags; pte_t *pte; paddr = VM_PAGE_TO_PHYS(m); flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID; flags |= tlb_calc_wimg(paddr, pmap_page_get_memattr(m)) << PTE_MAS2_SHIFT; flags |= PTE_PS_4KB; critical_enter(); qaddr = PCPU_GET(qmap_addr); pte = pte_find(mmu, kernel_pmap, qaddr); KASSERT(*pte == 0, ("mmu_booke_quick_enter_page: PTE busy")); /* * XXX: tlbivax is broadcast to other cores, but qaddr should * not be present in other TLBs. Is there a better instruction * sequence to use? Or just forget it & use mmu_booke_kenter()... */ __asm __volatile("tlbivax 0, %0" :: "r"(qaddr & MAS2_EPN_MASK)); __asm __volatile("isync; msync"); *pte = PTE_RPN_FROM_PA(paddr) | flags; /* Flush the real memory from the instruction cache. */ if ((flags & (PTE_I | PTE_G)) == 0) __syncicache((void *)qaddr, PAGE_SIZE); return (qaddr); #endif } static void mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr) { #ifndef __powerpc64__ pte_t *pte; pte = pte_find(mmu, kernel_pmap, addr); KASSERT(PCPU_GET(qmap_addr) == addr, ("mmu_booke_quick_remove_page: invalid address")); KASSERT(*pte != 0, ("mmu_booke_quick_remove_page: PTE not in use")); *pte = 0; critical_exit(); #endif } /* * Return whether or not the specified physical page was modified * in any of physical maps. */ static boolean_t mmu_booke_is_modified(mmu_t mmu, vm_page_t m) { pte_t *pte; pv_entry_t pv; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("mmu_booke_is_modified: page %p is not managed", m)); rv = FALSE; /* * If the page is not busied then this check is racy. */ if (!pmap_page_is_write_mapped(m)) return (FALSE); rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { PMAP_LOCK(pv->pv_pmap); if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && PTE_ISVALID(pte)) { if (PTE_ISMODIFIED(pte)) rv = TRUE; } PMAP_UNLOCK(pv->pv_pmap); if (rv) break; } rw_wunlock(&pvh_global_lock); return (rv); } /* * Return whether or not the specified virtual address is eligible * for prefault. */ static boolean_t mmu_booke_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t addr) { return (FALSE); } /* * Return whether or not the specified physical page was referenced * in any physical maps. */ static boolean_t mmu_booke_is_referenced(mmu_t mmu, vm_page_t m) { pte_t *pte; pv_entry_t pv; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("mmu_booke_is_referenced: page %p is not managed", m)); rv = FALSE; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { PMAP_LOCK(pv->pv_pmap); if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && PTE_ISVALID(pte)) { if (PTE_ISREFERENCED(pte)) rv = TRUE; } PMAP_UNLOCK(pv->pv_pmap); if (rv) break; } rw_wunlock(&pvh_global_lock); return (rv); } /* * Clear the modify bits on the specified physical page. */ static void mmu_booke_clear_modify(mmu_t mmu, vm_page_t m) { pte_t *pte; pv_entry_t pv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("mmu_booke_clear_modify: page %p is not managed", m)); vm_page_assert_busied(m); if (!pmap_page_is_write_mapped(m)) return; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { PMAP_LOCK(pv->pv_pmap); if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && PTE_ISVALID(pte)) { mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); if (*pte & (PTE_SW | PTE_UW | PTE_MODIFIED)) { tlb0_flush_entry(pv->pv_va); *pte &= ~(PTE_SW | PTE_UW | PTE_MODIFIED | PTE_REFERENCED); } tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); } PMAP_UNLOCK(pv->pv_pmap); } rw_wunlock(&pvh_global_lock); } /* * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * As an optimization, update the page's dirty field if a modified bit is * found while counting reference bits. This opportunistic update can be * performed at low cost and can eliminate the need for some future calls * to pmap_is_modified(). However, since this function stops after * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some * dirty pages. Those dirty pages will only be detected by a future call * to pmap_is_modified(). */ static int mmu_booke_ts_referenced(mmu_t mmu, vm_page_t m) { pte_t *pte; pv_entry_t pv; int count; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("mmu_booke_ts_referenced: page %p is not managed", m)); count = 0; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { PMAP_LOCK(pv->pv_pmap); if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL && PTE_ISVALID(pte)) { if (PTE_ISMODIFIED(pte)) vm_page_dirty(m); if (PTE_ISREFERENCED(pte)) { mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); tlb0_flush_entry(pv->pv_va); *pte &= ~PTE_REFERENCED; tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); if (++count >= PMAP_TS_REFERENCED_MAX) { PMAP_UNLOCK(pv->pv_pmap); break; } } } PMAP_UNLOCK(pv->pv_pmap); } rw_wunlock(&pvh_global_lock); return (count); } /* * Clear the wired attribute from the mappings for the specified range of * addresses in the given pmap. Every valid mapping within that range must * have the wired attribute set. In contrast, invalid mappings cannot have * the wired attribute set, so they are ignored. * * The wired attribute of the page table entry is not a hardware feature, so * there is no need to invalidate any TLB entries. */ static void mmu_booke_unwire(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t va; pte_t *pte; PMAP_LOCK(pmap); for (va = sva; va < eva; va += PAGE_SIZE) { if ((pte = pte_find(mmu, pmap, va)) != NULL && PTE_ISVALID(pte)) { if (!PTE_ISWIRED(pte)) panic("mmu_booke_unwire: pte %p isn't wired", pte); *pte &= ~PTE_WIRED; pmap->pm_stats.wired_count--; } } PMAP_UNLOCK(pmap); } /* * Return true if the pmap's pv is one of the first 16 pvs linked to from this * page. This count may be changed upwards or downwards in the future; it is * only necessary that true be returned for a small subset of pmaps for proper * page aging. */ static boolean_t mmu_booke_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m) { pv_entry_t pv; int loops; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("mmu_booke_page_exists_quick: page %p is not managed", m)); loops = 0; rv = FALSE; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { if (pv->pv_pmap == pmap) { rv = TRUE; break; } if (++loops >= 16) break; } rw_wunlock(&pvh_global_lock); return (rv); } /* * Return the number of managed mappings to the given physical page that are * wired. */ static int mmu_booke_page_wired_mappings(mmu_t mmu, vm_page_t m) { pv_entry_t pv; pte_t *pte; int count = 0; if ((m->oflags & VPO_UNMANAGED) != 0) return (count); rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { PMAP_LOCK(pv->pv_pmap); if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) if (PTE_ISVALID(pte) && PTE_ISWIRED(pte)) count++; PMAP_UNLOCK(pv->pv_pmap); } rw_wunlock(&pvh_global_lock); return (count); } static int mmu_booke_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size) { int i; vm_offset_t va; /* * This currently does not work for entries that * overlap TLB1 entries. */ for (i = 0; i < TLB1_ENTRIES; i ++) { if (tlb1_iomapped(i, pa, size, &va) == 0) return (0); } return (EFAULT); } void mmu_booke_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, void **va) { vm_paddr_t ppa; vm_offset_t ofs; vm_size_t gran; /* Minidumps are based on virtual memory addresses. */ if (do_minidump) { *va = (void *)(vm_offset_t)pa; return; } /* Raw physical memory dumps don't have a virtual address. */ /* We always map a 256MB page at 256M. */ gran = 256 * 1024 * 1024; ppa = rounddown2(pa, gran); ofs = pa - ppa; *va = (void *)gran; tlb1_set_entry((vm_offset_t)va, ppa, gran, _TLB_ENTRY_IO); if (sz > (gran - ofs)) tlb1_set_entry((vm_offset_t)(va + gran), ppa + gran, gran, _TLB_ENTRY_IO); } void mmu_booke_dumpsys_unmap(mmu_t mmu, vm_paddr_t pa, size_t sz, void *va) { vm_paddr_t ppa; vm_offset_t ofs; vm_size_t gran; tlb_entry_t e; int i; /* Minidumps are based on virtual memory addresses. */ /* Nothing to do... */ if (do_minidump) return; for (i = 0; i < TLB1_ENTRIES; i++) { tlb1_read_entry(&e, i); if (!(e.mas1 & MAS1_VALID)) break; } /* Raw physical memory dumps don't have a virtual address. */ i--; e.mas1 = 0; e.mas2 = 0; e.mas3 = 0; tlb1_write_entry(&e, i); gran = 256 * 1024 * 1024; ppa = rounddown2(pa, gran); ofs = pa - ppa; if (sz > (gran - ofs)) { i--; e.mas1 = 0; e.mas2 = 0; e.mas3 = 0; tlb1_write_entry(&e, i); } } extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1]; void mmu_booke_scan_init(mmu_t mmu) { vm_offset_t va; pte_t *pte; int i; if (!do_minidump) { /* Initialize phys. segments for dumpsys(). */ memset(&dump_map, 0, sizeof(dump_map)); mem_regions(&physmem_regions, &physmem_regions_sz, &availmem_regions, &availmem_regions_sz); for (i = 0; i < physmem_regions_sz; i++) { dump_map[i].pa_start = physmem_regions[i].mr_start; dump_map[i].pa_size = physmem_regions[i].mr_size; } return; } /* Virtual segments for minidumps: */ memset(&dump_map, 0, sizeof(dump_map)); /* 1st: kernel .data and .bss. */ dump_map[0].pa_start = trunc_page((uintptr_t)_etext); dump_map[0].pa_size = round_page((uintptr_t)_end) - dump_map[0].pa_start; /* 2nd: msgbuf and tables (see pmap_bootstrap()). */ dump_map[1].pa_start = data_start; dump_map[1].pa_size = data_end - data_start; /* 3rd: kernel VM. */ va = dump_map[1].pa_start + dump_map[1].pa_size; /* Find start of next chunk (from va). */ while (va < virtual_end) { /* Don't dump the buffer cache. */ if (va >= kmi.buffer_sva && va < kmi.buffer_eva) { va = kmi.buffer_eva; continue; } pte = pte_find(mmu, kernel_pmap, va); if (pte != NULL && PTE_ISVALID(pte)) break; va += PAGE_SIZE; } if (va < virtual_end) { dump_map[2].pa_start = va; va += PAGE_SIZE; /* Find last page in chunk. */ while (va < virtual_end) { /* Don't run into the buffer cache. */ if (va == kmi.buffer_sva) break; pte = pte_find(mmu, kernel_pmap, va); if (pte == NULL || !PTE_ISVALID(pte)) break; va += PAGE_SIZE; } dump_map[2].pa_size = va - dump_map[2].pa_start; } } /* * Map a set of physical memory pages into the kernel virtual address space. * Return a pointer to where it is mapped. This routine is intended to be used * for mapping device memory, NOT real memory. */ static void * mmu_booke_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size) { return (mmu_booke_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT)); } static int tlb1_find_pa(vm_paddr_t pa, tlb_entry_t *e) { int i; for (i = 0; i < TLB1_ENTRIES; i++) { tlb1_read_entry(e, i); if ((e->mas1 & MAS1_VALID) == 0) return (i); } return (-1); } static void * mmu_booke_mapdev_attr(mmu_t mmu, vm_paddr_t pa, vm_size_t size, vm_memattr_t ma) { tlb_entry_t e; vm_paddr_t tmppa; void *res; uintptr_t va, tmpva; vm_size_t sz; int i; int wimge; /* * Check if this is premapped in TLB1. */ sz = size; tmppa = pa; va = ~0; wimge = tlb_calc_wimg(pa, ma); for (i = 0; i < TLB1_ENTRIES; i++) { tlb1_read_entry(&e, i); if (!(e.mas1 & MAS1_VALID)) continue; if (wimge != (e.mas2 & (MAS2_WIMGE_MASK & ~_TLB_ENTRY_SHARED))) continue; if (tmppa >= e.phys && tmppa < e.phys + e.size) { va = e.virt + (pa - e.phys); tmppa = e.phys + e.size; sz -= MIN(sz, e.size); while (sz > 0 && (i = tlb1_find_pa(tmppa, &e)) != -1) { if (wimge != (e.mas2 & (MAS2_WIMGE_MASK & ~_TLB_ENTRY_SHARED))) break; sz -= MIN(sz, e.size); tmppa = e.phys + e.size; } if (sz != 0) break; return ((void *)va); } } size = roundup(size, PAGE_SIZE); /* * The device mapping area is between VM_MAXUSER_ADDRESS and * VM_MIN_KERNEL_ADDRESS. This gives 1GB of device addressing. */ #ifdef SPARSE_MAPDEV /* * With a sparse mapdev, align to the largest starting region. This * could feasibly be optimized for a 'best-fit' alignment, but that * calculation could be very costly. * Align to the smaller of: * - first set bit in overlap of (pa & size mask) * - largest size envelope * * It's possible the device mapping may start at a PA that's not larger * than the size mask, so we need to offset in to maximize the TLB entry * range and minimize the number of used TLB entries. */ do { tmpva = tlb1_map_base; sz = ffsl((~((1 << flsl(size-1)) - 1)) & pa); sz = sz ? min(roundup(sz + 3, 4), flsl(size) - 1) : flsl(size) - 1; va = roundup(tlb1_map_base, 1 << sz) | (((1 << sz) - 1) & pa); #ifdef __powerpc64__ } while (!atomic_cmpset_long(&tlb1_map_base, tmpva, va + size)); #else } while (!atomic_cmpset_int(&tlb1_map_base, tmpva, va + size)); #endif #else #ifdef __powerpc64__ va = atomic_fetchadd_long(&tlb1_map_base, size); #else va = atomic_fetchadd_int(&tlb1_map_base, size); #endif #endif res = (void *)va; if (tlb1_mapin_region(va, pa, size, tlb_calc_wimg(pa, ma)) != size) return (NULL); return (res); } /* * 'Unmap' a range mapped by mmu_booke_mapdev(). */ static void mmu_booke_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) { #ifdef SUPPORTS_SHRINKING_TLB1 vm_offset_t base, offset; /* * Unmap only if this is inside kernel virtual space. */ if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)) { base = trunc_page(va); offset = va & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); kva_free(base, size); } #endif } /* * mmu_booke_object_init_pt preloads the ptes for a given object into the * specified pmap. This eliminates the blast of soft faults on process startup * and immediately after an mmap. */ static void mmu_booke_object_init_pt(mmu_t mmu, pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("mmu_booke_object_init_pt: non-device object")); } /* * Perform the pmap work for mincore. */ static int mmu_booke_mincore(mmu_t mmu, pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap) { /* XXX: this should be implemented at some point */ return (0); } static int mmu_booke_change_attr(mmu_t mmu, vm_offset_t addr, vm_size_t sz, vm_memattr_t mode) { vm_offset_t va; pte_t *pte; int i, j; tlb_entry_t e; addr = trunc_page(addr); /* Only allow changes to mapped kernel addresses. This includes: * - KVA * - DMAP (powerpc64) * - Device mappings */ if (addr <= VM_MAXUSER_ADDRESS || #ifdef __powerpc64__ (addr >= tlb1_map_base && addr < DMAP_BASE_ADDRESS) || (addr > DMAP_MAX_ADDRESS && addr < VM_MIN_KERNEL_ADDRESS) || #else (addr >= tlb1_map_base && addr < VM_MIN_KERNEL_ADDRESS) || #endif (addr > VM_MAX_KERNEL_ADDRESS)) return (EINVAL); /* Check TLB1 mappings */ for (i = 0; i < TLB1_ENTRIES; i++) { tlb1_read_entry(&e, i); if (!(e.mas1 & MAS1_VALID)) continue; if (addr >= e.virt && addr < e.virt + e.size) break; } if (i < TLB1_ENTRIES) { /* Only allow full mappings to be modified for now. */ /* Validate the range. */ for (j = i, va = addr; va < addr + sz; va += e.size, j++) { tlb1_read_entry(&e, j); if (va != e.virt || (sz - (va - addr) < e.size)) return (EINVAL); } for (va = addr; va < addr + sz; va += e.size, i++) { tlb1_read_entry(&e, i); e.mas2 &= ~MAS2_WIMGE_MASK; e.mas2 |= tlb_calc_wimg(e.phys, mode); /* * Write it out to the TLB. Should really re-sync with other * cores. */ tlb1_write_entry(&e, i); } return (0); } /* Not in TLB1, try through pmap */ /* First validate the range. */ for (va = addr; va < addr + sz; va += PAGE_SIZE) { pte = pte_find(mmu, kernel_pmap, va); if (pte == NULL || !PTE_ISVALID(pte)) return (EINVAL); } mtx_lock_spin(&tlbivax_mutex); tlb_miss_lock(); for (va = addr; va < addr + sz; va += PAGE_SIZE) { pte = pte_find(mmu, kernel_pmap, va); *pte &= ~(PTE_MAS2_MASK << PTE_MAS2_SHIFT); *pte |= tlb_calc_wimg(PTE_PA(pte), mode) << PTE_MAS2_SHIFT; tlb0_flush_entry(va); } tlb_miss_unlock(); mtx_unlock_spin(&tlbivax_mutex); return (0); } static void mmu_booke_page_array_startup(mmu_t mmu, long pages) { vm_page_array_size = pages; } /**************************************************************************/ /* TID handling */ /**************************************************************************/ /* * Allocate a TID. If necessary, steal one from someone else. * The new TID is flushed from the TLB before returning. */ static tlbtid_t tid_alloc(pmap_t pmap) { tlbtid_t tid; int thiscpu; KASSERT((pmap != kernel_pmap), ("tid_alloc: kernel pmap")); CTR2(KTR_PMAP, "%s: s (pmap = %p)", __func__, pmap); thiscpu = PCPU_GET(cpuid); tid = PCPU_GET(booke.tid_next); if (tid > TID_MAX) tid = TID_MIN; PCPU_SET(booke.tid_next, tid + 1); /* If we are stealing TID then clear the relevant pmap's field */ if (tidbusy[thiscpu][tid] != NULL) { CTR2(KTR_PMAP, "%s: warning: stealing tid %d", __func__, tid); tidbusy[thiscpu][tid]->pm_tid[thiscpu] = TID_NONE; /* Flush all entries from TLB0 matching this TID. */ tid_flush(tid); } tidbusy[thiscpu][tid] = pmap; pmap->pm_tid[thiscpu] = tid; __asm __volatile("msync; isync"); CTR3(KTR_PMAP, "%s: e (%02d next = %02d)", __func__, tid, PCPU_GET(booke.tid_next)); return (tid); } /**************************************************************************/ /* TLB0 handling */ /**************************************************************************/ /* Convert TLB0 va and way number to tlb0[] table index. */ static inline unsigned int tlb0_tableidx(vm_offset_t va, unsigned int way) { unsigned int idx; idx = (way * TLB0_ENTRIES_PER_WAY); idx += (va & MAS2_TLB0_ENTRY_IDX_MASK) >> MAS2_TLB0_ENTRY_IDX_SHIFT; return (idx); } /* * Invalidate TLB0 entry. */ static inline void tlb0_flush_entry(vm_offset_t va) { CTR2(KTR_PMAP, "%s: s va=0x%08x", __func__, va); mtx_assert(&tlbivax_mutex, MA_OWNED); __asm __volatile("tlbivax 0, %0" :: "r"(va & MAS2_EPN_MASK)); __asm __volatile("isync; msync"); __asm __volatile("tlbsync; msync"); CTR1(KTR_PMAP, "%s: e", __func__); } /**************************************************************************/ /* TLB1 handling */ /**************************************************************************/ /* * TLB1 mapping notes: * * TLB1[0] Kernel text and data. * TLB1[1-15] Additional kernel text and data mappings (if required), PCI * windows, other devices mappings. */ /* * Read an entry from given TLB1 slot. */ void tlb1_read_entry(tlb_entry_t *entry, unsigned int slot) { register_t msr; uint32_t mas0; KASSERT((entry != NULL), ("%s(): Entry is NULL!", __func__)); msr = mfmsr(); __asm __volatile("wrteei 0"); mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(slot); mtspr(SPR_MAS0, mas0); __asm __volatile("isync; tlbre"); entry->mas1 = mfspr(SPR_MAS1); entry->mas2 = mfspr(SPR_MAS2); entry->mas3 = mfspr(SPR_MAS3); switch ((mfpvr() >> 16) & 0xFFFF) { case FSL_E500v2: case FSL_E500mc: case FSL_E5500: case FSL_E6500: entry->mas7 = mfspr(SPR_MAS7); break; default: entry->mas7 = 0; break; } __asm __volatile("wrtee %0" :: "r"(msr)); entry->virt = entry->mas2 & MAS2_EPN_MASK; entry->phys = ((vm_paddr_t)(entry->mas7 & MAS7_RPN) << 32) | (entry->mas3 & MAS3_RPN); entry->size = tsize2size((entry->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT); } struct tlbwrite_args { tlb_entry_t *e; unsigned int idx; }; static uint32_t tlb1_find_free(void) { tlb_entry_t e; int i; for (i = 0; i < TLB1_ENTRIES; i++) { tlb1_read_entry(&e, i); if ((e.mas1 & MAS1_VALID) == 0) return (i); } return (-1); } static void tlb1_write_entry_int(void *arg) { struct tlbwrite_args *args = arg; uint32_t idx, mas0; idx = args->idx; if (idx == -1) { idx = tlb1_find_free(); if (idx == -1) panic("No free TLB1 entries!\n"); } /* Select entry */ mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(idx); mtspr(SPR_MAS0, mas0); mtspr(SPR_MAS1, args->e->mas1); mtspr(SPR_MAS2, args->e->mas2); mtspr(SPR_MAS3, args->e->mas3); switch ((mfpvr() >> 16) & 0xFFFF) { case FSL_E500mc: case FSL_E5500: case FSL_E6500: mtspr(SPR_MAS8, 0); /* FALLTHROUGH */ case FSL_E500v2: mtspr(SPR_MAS7, args->e->mas7); break; default: break; } __asm __volatile("isync; tlbwe; isync; msync"); } static void tlb1_write_entry_sync(void *arg) { /* Empty synchronization point for smp_rendezvous(). */ } /* * Write given entry to TLB1 hardware. */ static void tlb1_write_entry(tlb_entry_t *e, unsigned int idx) { struct tlbwrite_args args; args.e = e; args.idx = idx; #ifdef SMP if ((e->mas2 & _TLB_ENTRY_SHARED) && smp_started) { mb(); smp_rendezvous(tlb1_write_entry_sync, tlb1_write_entry_int, tlb1_write_entry_sync, &args); } else #endif { register_t msr; msr = mfmsr(); __asm __volatile("wrteei 0"); tlb1_write_entry_int(&args); __asm __volatile("wrtee %0" :: "r"(msr)); } } /* * Return the largest uint value log such that 2^log <= num. */ static unsigned long ilog2(unsigned long num) { long lz; #ifdef __powerpc64__ __asm ("cntlzd %0, %1" : "=r" (lz) : "r" (num)); return (63 - lz); #else __asm ("cntlzw %0, %1" : "=r" (lz) : "r" (num)); return (31 - lz); #endif } /* * Convert TLB TSIZE value to mapped region size. */ static vm_size_t tsize2size(unsigned int tsize) { /* * size = 4^tsize KB * size = 4^tsize * 2^10 = 2^(2 * tsize - 10) */ return ((1 << (2 * tsize)) * 1024); } /* * Convert region size (must be power of 4) to TLB TSIZE value. */ static unsigned int size2tsize(vm_size_t size) { return (ilog2(size) / 2 - 5); } /* * Register permanent kernel mapping in TLB1. * * Entries are created starting from index 0 (current free entry is * kept in tlb1_idx) and are not supposed to be invalidated. */ int tlb1_set_entry(vm_offset_t va, vm_paddr_t pa, vm_size_t size, uint32_t flags) { tlb_entry_t e; uint32_t ts, tid; int tsize, index; /* First try to update an existing entry. */ for (index = 0; index < TLB1_ENTRIES; index++) { tlb1_read_entry(&e, index); /* Check if we're just updating the flags, and update them. */ if (e.phys == pa && e.virt == va && e.size == size) { e.mas2 = (va & MAS2_EPN_MASK) | flags; tlb1_write_entry(&e, index); return (0); } } /* Convert size to TSIZE */ tsize = size2tsize(size); tid = (TID_KERNEL << MAS1_TID_SHIFT) & MAS1_TID_MASK; /* XXX TS is hard coded to 0 for now as we only use single address space */ ts = (0 << MAS1_TS_SHIFT) & MAS1_TS_MASK; e.phys = pa; e.virt = va; e.size = size; e.mas1 = MAS1_VALID | MAS1_IPROT | ts | tid; e.mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK); e.mas2 = (va & MAS2_EPN_MASK) | flags; /* Set supervisor RWX permission bits */ e.mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX; e.mas7 = (pa >> 32) & MAS7_RPN; tlb1_write_entry(&e, -1); return (0); } /* * Map in contiguous RAM region into the TLB1. */ static vm_size_t tlb1_mapin_region(vm_offset_t va, vm_paddr_t pa, vm_size_t size, int wimge) { vm_offset_t base; vm_size_t mapped, sz, ssize; mapped = 0; base = va; ssize = size; while (size > 0) { sz = 1UL << (ilog2(size) & ~1); /* Align size to PA */ if (pa % sz != 0) { do { sz >>= 2; } while (pa % sz != 0); } /* Now align from there to VA */ if (va % sz != 0) { do { sz >>= 2; } while (va % sz != 0); } - /* Now align from there to VA */ +#ifdef __powerpc64__ + /* + * Clamp TLB1 entries to 4G. + * + * While the e6500 supports up to 1TB mappings, the e5500 + * only supports up to 4G mappings. (0b1011) + * + * If any e6500 machines capable of supporting a very + * large amount of memory appear in the future, we can + * revisit this. + * + * For now, though, since we have plenty of space in TLB1, + * always avoid creating entries larger than 4GB. + */ + sz = MIN(sz, 1UL << 32); +#endif if (bootverbose) printf("Wiring VA=%p to PA=%jx (size=%lx)\n", (void *)va, (uintmax_t)pa, (long)sz); if (tlb1_set_entry(va, pa, sz, _TLB_ENTRY_SHARED | wimge) < 0) return (mapped); size -= sz; pa += sz; va += sz; } mapped = (va - base); if (bootverbose) printf("mapped size 0x%"PRIxPTR" (wasted space 0x%"PRIxPTR")\n", mapped, mapped - ssize); return (mapped); } /* * TLB1 initialization routine, to be called after the very first * assembler level setup done in locore.S. */ void tlb1_init() { vm_offset_t mas2; uint32_t mas0, mas1, mas3, mas7; uint32_t tsz; tlb1_get_tlbconf(); mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(0); mtspr(SPR_MAS0, mas0); __asm __volatile("isync; tlbre"); mas1 = mfspr(SPR_MAS1); mas2 = mfspr(SPR_MAS2); mas3 = mfspr(SPR_MAS3); mas7 = mfspr(SPR_MAS7); kernload = ((vm_paddr_t)(mas7 & MAS7_RPN) << 32) | (mas3 & MAS3_RPN); tsz = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; kernsize += (tsz > 0) ? tsize2size(tsz) : 0; kernstart = trunc_page(mas2); /* Setup TLB miss defaults */ set_mas4_defaults(); } /* * pmap_early_io_unmap() should be used in short conjunction with * pmap_early_io_map(), as in the following snippet: * * x = pmap_early_io_map(...); * * pmap_early_io_unmap(x, size); * * And avoiding more allocations between. */ void pmap_early_io_unmap(vm_offset_t va, vm_size_t size) { int i; tlb_entry_t e; vm_size_t isize; size = roundup(size, PAGE_SIZE); isize = size; for (i = 0; i < TLB1_ENTRIES && size > 0; i++) { tlb1_read_entry(&e, i); if (!(e.mas1 & MAS1_VALID)) continue; if (va <= e.virt && (va + isize) >= (e.virt + e.size)) { size -= e.size; e.mas1 &= ~MAS1_VALID; tlb1_write_entry(&e, i); } } if (tlb1_map_base == va + isize) tlb1_map_base -= isize; } vm_offset_t pmap_early_io_map(vm_paddr_t pa, vm_size_t size) { vm_paddr_t pa_base; vm_offset_t va, sz; int i; tlb_entry_t e; KASSERT(!pmap_bootstrapped, ("Do not use after PMAP is up!")); for (i = 0; i < TLB1_ENTRIES; i++) { tlb1_read_entry(&e, i); if (!(e.mas1 & MAS1_VALID)) continue; if (pa >= e.phys && (pa + size) <= (e.phys + e.size)) return (e.virt + (pa - e.phys)); } pa_base = rounddown(pa, PAGE_SIZE); size = roundup(size + (pa - pa_base), PAGE_SIZE); tlb1_map_base = roundup2(tlb1_map_base, 1 << (ilog2(size) & ~1)); va = tlb1_map_base + (pa - pa_base); do { sz = 1 << (ilog2(size) & ~1); tlb1_set_entry(tlb1_map_base, pa_base, sz, _TLB_ENTRY_SHARED | _TLB_ENTRY_IO); size -= sz; pa_base += sz; tlb1_map_base += sz; } while (size > 0); return (va); } void pmap_track_page(pmap_t pmap, vm_offset_t va) { vm_paddr_t pa; vm_page_t page; struct pv_entry *pve; va = trunc_page(va); pa = pmap_kextract(va); page = PHYS_TO_VM_PAGE(pa); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); TAILQ_FOREACH(pve, &page->md.pv_list, pv_link) { if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { goto out; } } page->md.pv_tracked = true; pv_insert(pmap, va, page); out: PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); } /* * Setup MAS4 defaults. * These values are loaded to MAS0-2 on a TLB miss. */ static void set_mas4_defaults(void) { uint32_t mas4; /* Defaults: TLB0, PID0, TSIZED=4K */ mas4 = MAS4_TLBSELD0; mas4 |= (TLB_SIZE_4K << MAS4_TSIZED_SHIFT) & MAS4_TSIZED_MASK; #ifdef SMP mas4 |= MAS4_MD; #endif mtspr(SPR_MAS4, mas4); __asm __volatile("isync"); } /* * Return 0 if the physical IO range is encompassed by one of the * the TLB1 entries, otherwise return related error code. */ static int tlb1_iomapped(int i, vm_paddr_t pa, vm_size_t size, vm_offset_t *va) { uint32_t prot; vm_paddr_t pa_start; vm_paddr_t pa_end; unsigned int entry_tsize; vm_size_t entry_size; tlb_entry_t e; *va = (vm_offset_t)NULL; tlb1_read_entry(&e, i); /* Skip invalid entries */ if (!(e.mas1 & MAS1_VALID)) return (EINVAL); /* * The entry must be cache-inhibited, guarded, and r/w * so it can function as an i/o page */ prot = e.mas2 & (MAS2_I | MAS2_G); if (prot != (MAS2_I | MAS2_G)) return (EPERM); prot = e.mas3 & (MAS3_SR | MAS3_SW); if (prot != (MAS3_SR | MAS3_SW)) return (EPERM); /* The address should be within the entry range. */ entry_tsize = (e.mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; KASSERT((entry_tsize), ("tlb1_iomapped: invalid entry tsize")); entry_size = tsize2size(entry_tsize); pa_start = (((vm_paddr_t)e.mas7 & MAS7_RPN) << 32) | (e.mas3 & MAS3_RPN); pa_end = pa_start + entry_size; if ((pa < pa_start) || ((pa + size) > pa_end)) return (ERANGE); /* Return virtual address of this mapping. */ *va = (e.mas2 & MAS2_EPN_MASK) + (pa - pa_start); return (0); } /* * Invalidate all TLB0 entries which match the given TID. Note this is * dedicated for cases when invalidations should NOT be propagated to other * CPUs. */ static void tid_flush(tlbtid_t tid) { register_t msr; uint32_t mas0, mas1, mas2; int entry, way; /* Don't evict kernel translations */ if (tid == TID_KERNEL) return; msr = mfmsr(); __asm __volatile("wrteei 0"); /* * Newer (e500mc and later) have tlbilx, which doesn't broadcast, so use * it for PID invalidation. */ switch ((mfpvr() >> 16) & 0xffff) { case FSL_E500mc: case FSL_E5500: case FSL_E6500: mtspr(SPR_MAS6, tid << MAS6_SPID0_SHIFT); /* tlbilxpid */ __asm __volatile("isync; .long 0x7c200024; isync; msync"); __asm __volatile("wrtee %0" :: "r"(msr)); return; } for (way = 0; way < TLB0_WAYS; way++) for (entry = 0; entry < TLB0_ENTRIES_PER_WAY; entry++) { mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); mtspr(SPR_MAS0, mas0); mas2 = entry << MAS2_TLB0_ENTRY_IDX_SHIFT; mtspr(SPR_MAS2, mas2); __asm __volatile("isync; tlbre"); mas1 = mfspr(SPR_MAS1); if (!(mas1 & MAS1_VALID)) continue; if (((mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT) != tid) continue; mas1 &= ~MAS1_VALID; mtspr(SPR_MAS1, mas1); __asm __volatile("isync; tlbwe; isync; msync"); } __asm __volatile("wrtee %0" :: "r"(msr)); } #ifdef DDB /* Print out contents of the MAS registers for each TLB0 entry */ static void #ifdef __powerpc64__ tlb_print_entry(int i, uint32_t mas1, uint64_t mas2, uint32_t mas3, #else tlb_print_entry(int i, uint32_t mas1, uint32_t mas2, uint32_t mas3, #endif uint32_t mas7) { int as; char desc[3]; tlbtid_t tid; vm_size_t size; unsigned int tsize; desc[2] = '\0'; if (mas1 & MAS1_VALID) desc[0] = 'V'; else desc[0] = ' '; if (mas1 & MAS1_IPROT) desc[1] = 'P'; else desc[1] = ' '; as = (mas1 & MAS1_TS_MASK) ? 1 : 0; tid = MAS1_GETTID(mas1); tsize = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; size = 0; if (tsize) size = tsize2size(tsize); printf("%3d: (%s) [AS=%d] " "sz = 0x%jx tsz = %d tid = %d mas1 = 0x%08x " "mas2(va) = 0x%"PRI0ptrX" mas3(pa) = 0x%08x mas7 = 0x%08x\n", i, desc, as, (uintmax_t)size, tsize, tid, mas1, mas2, mas3, mas7); } DB_SHOW_COMMAND(tlb0, tlb0_print_tlbentries) { uint32_t mas0, mas1, mas3, mas7; #ifdef __powerpc64__ uint64_t mas2; #else uint32_t mas2; #endif int entryidx, way, idx; printf("TLB0 entries:\n"); for (way = 0; way < TLB0_WAYS; way ++) for (entryidx = 0; entryidx < TLB0_ENTRIES_PER_WAY; entryidx++) { mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); mtspr(SPR_MAS0, mas0); mas2 = entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT; mtspr(SPR_MAS2, mas2); __asm __volatile("isync; tlbre"); mas1 = mfspr(SPR_MAS1); mas2 = mfspr(SPR_MAS2); mas3 = mfspr(SPR_MAS3); mas7 = mfspr(SPR_MAS7); idx = tlb0_tableidx(mas2, way); tlb_print_entry(idx, mas1, mas2, mas3, mas7); } } /* * Print out contents of the MAS registers for each TLB1 entry */ DB_SHOW_COMMAND(tlb1, tlb1_print_tlbentries) { uint32_t mas0, mas1, mas3, mas7; #ifdef __powerpc64__ uint64_t mas2; #else uint32_t mas2; #endif int i; printf("TLB1 entries:\n"); for (i = 0; i < TLB1_ENTRIES; i++) { mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(i); mtspr(SPR_MAS0, mas0); __asm __volatile("isync; tlbre"); mas1 = mfspr(SPR_MAS1); mas2 = mfspr(SPR_MAS2); mas3 = mfspr(SPR_MAS3); mas7 = mfspr(SPR_MAS7); tlb_print_entry(i, mas1, mas2, mas3, mas7); } } #endif Index: projects/clang1000-import/sys/powerpc/include/ofw_machdep.h =================================================================== --- projects/clang1000-import/sys/powerpc/include/ofw_machdep.h (revision 356919) +++ projects/clang1000-import/sys/powerpc/include/ofw_machdep.h (revision 356920) @@ -1,57 +1,60 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2001 by Thomas Moestl . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_OFW_MACHDEP_H_ #define _MACHINE_OFW_MACHDEP_H_ #include #include #include #include #include #include +struct mem_region; +struct numa_mem_region; + typedef uint32_t cell_t; void OF_getetheraddr(device_t dev, u_char *addr); void OF_initial_setup(void *fdt_ptr, void *junk, int (*openfirm)(void *)); boolean_t OF_bootstrap(void); void OF_reboot(void); void ofw_mem_regions(struct mem_region *, int *, struct mem_region *, int *); void ofw_numa_mem_regions(struct numa_mem_region *, int *); void ofw_quiesce(void); /* Must be called before VM is up! */ void ofw_save_trap_vec(char *); int ofw_pcibus_get_domain(device_t dev, device_t child, int *domain); int ofw_pcibus_get_cpus(device_t dev, device_t child, enum cpu_sets op, size_t setsize, cpuset_t *cpuset); #endif /* _MACHINE_OFW_MACHDEP_H_ */ Index: projects/clang1000-import/sys/powerpc/include/platform.h =================================================================== --- projects/clang1000-import/sys/powerpc/include/platform.h (revision 356919) +++ projects/clang1000-import/sys/powerpc/include/platform.h (revision 356920) @@ -1,75 +1,77 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1996 Wolfgang Solfrank. * Copyright (C) 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: powerpc.h,v 1.3 2000/06/01 00:49:59 matt Exp $ * $FreeBSD$ */ #ifndef _MACHINE_PLATFORM_H_ #define _MACHINE_PLATFORM_H_ +#include #include #include struct mem_region { uint64_t mr_start; uint64_t mr_size; }; struct numa_mem_region { uint64_t mr_start; uint64_t mr_size; uint64_t mr_domain; }; /* Documentation for these functions is in platform_if.m */ void mem_regions(struct mem_region **, int *, struct mem_region **, int *); void numa_mem_regions(struct numa_mem_region **, int *); vm_offset_t platform_real_maxaddr(void); u_long platform_timebase_freq(struct cpuref *); int platform_smp_first_cpu(struct cpuref *); int platform_smp_next_cpu(struct cpuref *); int platform_smp_get_bsp(struct cpuref *); int platform_smp_start_cpu(struct pcpu *); void platform_smp_timebase_sync(u_long tb, int ap); void platform_smp_ap_init(void); void platform_smp_probe_threads(void); +int platform_node_numa_domain(phandle_t); const char *installed_platform(void); void platform_probe_and_attach(void); void platform_sleep(void); #endif /* _MACHINE_PLATFORM_H_ */ Index: projects/clang1000-import/sys/powerpc/include/trap.h =================================================================== --- projects/clang1000-import/sys/powerpc/include/trap.h (revision 356919) +++ projects/clang1000-import/sys/powerpc/include/trap.h (revision 356920) @@ -1,161 +1,162 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: trap.h,v 1.7 2002/02/22 13:51:40 kleink Exp $ * $FreeBSD$ */ #ifndef _POWERPC_TRAP_H_ #define _POWERPC_TRAP_H_ #define EXC_RSVD 0x0000 /* Reserved */ #define EXC_RST 0x0100 /* Reset; all but IBM4xx */ #define EXC_MCHK 0x0200 /* Machine Check */ #define EXC_DSI 0x0300 /* Data Storage Interrupt */ #define EXC_DSE 0x0380 /* Data Segment Interrupt */ #define EXC_ISI 0x0400 /* Instruction Storage Interrupt */ #define EXC_ISE 0x0480 /* Instruction Segment Interrupt */ #define EXC_EXI 0x0500 /* External Interrupt */ #define EXC_ALI 0x0600 /* Alignment Interrupt */ #define EXC_PGM 0x0700 /* Program Interrupt */ #define EXC_FPU 0x0800 /* Floating-point Unavailable */ #define EXC_DECR 0x0900 /* Decrementer Interrupt */ #define EXC_SC 0x0c00 /* System Call */ #define EXC_TRC 0x0d00 /* Trace */ #define EXC_FPA 0x0e00 /* Floating-point Assist */ /* The following is only available on the 601: */ #define EXC_RUNMODETRC 0x2000 /* Run Mode/Trace Exception */ /* The following are only available on 970(G5): */ #define EXC_VECAST_G5 0x1700 /* AltiVec Assist */ /* The following are only available on 7400(G4): */ #define EXC_VEC 0x0f20 /* AltiVec Unavailable */ #define EXC_VECAST_G4 0x1600 /* AltiVec Assist */ /* The following are only available on 604/750/7400: */ #define EXC_PERF 0x0f00 /* Performance Monitoring */ #define EXC_BPT 0x1300 /* Instruction Breakpoint */ #define EXC_SMI 0x1400 /* System Managment Interrupt */ /* The following are only available on 750/7400: */ #define EXC_THRM 0x1700 /* Thermal Management Interrupt */ /* And these are only on the 603: */ #define EXC_IMISS 0x1000 /* Instruction translation miss */ #define EXC_DLMISS 0x1100 /* Data load translation miss */ #define EXC_DSMISS 0x1200 /* Data store translation miss */ /* Power ISA 2.06+: */ #define EXC_HDSI 0x0e00 /* Hypervisor Data Storage */ #define EXC_HISI 0x0e20 /* Hypervisor Instruction Storage */ #define EXC_HEA 0x0e40 /* Hypervisor Emulation Assistance */ #define EXC_HMI 0x0e60 /* Hypervisor Maintenance */ #define EXC_VSX 0x0f40 /* VSX Unavailable */ /* Power ISA 2.07+: */ #define EXC_FAC 0x0f60 /* Facility Unavailable */ #define EXC_HFAC 0x0f80 /* Hypervisor Facility Unavailable */ /* Power ISA 3.0+: */ #define EXC_HVI 0x0ea0 /* Hypervisor Virtualization */ /* The following are available on 4xx and 85xx */ #define EXC_CRIT 0x0100 /* Critical Input Interrupt */ #define EXC_PIT 0x1000 /* Programmable Interval Timer */ #define EXC_FIT 0x1010 /* Fixed Interval Timer */ #define EXC_WDOG 0x1020 /* Watchdog Timer */ #define EXC_DTMISS 0x1100 /* Data TLB Miss */ #define EXC_ITMISS 0x1200 /* Instruction TLB Miss */ #define EXC_APU 0x1300 /* Auxiliary Processing Unit */ #define EXC_DEBUG 0x2f10 /* Debug trap */ #define EXC_VECAST_E 0x2f20 /* Altivec Assist (Book-E) */ #define EXC_SPFPD 0x2f30 /* SPE Floating-point Data */ #define EXC_SPFPR 0x2f40 /* SPE Floating-point Round */ /* POWER8 */ #define EXC_SOFT_PATCH 0x1500 /* POWER8 Soft Patch Exception */ #define EXC_LAST 0x2f00 /* Last possible exception vector */ #define EXC_AST 0x3000 /* Fake AST vector */ /* Trap was in user mode */ #define EXC_USER 0x10000 /* * EXC_ALI sets bits in the DSISR and DAR to provide enough * information to recover from the unaligned access without needing to * parse the offending instruction. This includes certain bits of the * opcode, and information about what registers are used. The opcode * indicator values below come from Appendix F of Book III of "The * PowerPC Architecture". */ #define EXC_ALI_OPCODE_INDICATOR(dsisr) ((dsisr >> 10) & 0x7f) #define EXC_ALI_LFD 0x09 #define EXC_ALI_STFD 0x0b /* Macros to extract register information */ #define EXC_ALI_RST(dsisr) ((dsisr >> 5) & 0x1f) /* source or target */ #define EXC_ALI_RA(dsisr) (dsisr & 0x1f) #define EXC_ALI_INST_RST(instr) ((instr >> 21) & 0x1f) /* * SRR1 bits for program exception traps. These identify what caused * the program exception. See section 6.5.9 of the Power ISA Version * 2.05. */ #define EXC_PGM_FPENABLED (1UL << 20) #define EXC_PGM_ILLEGAL (1UL << 19) #define EXC_PGM_PRIV (1UL << 18) #define EXC_PGM_TRAP (1UL << 17) /* DTrace trap opcode. */ #define EXC_DTRACE 0x7ffff808 /* Magic pointer to store TOC base and other info for trap handlers on ppc64 */ -#define TRAP_GENTRAP 0x1f0 -#define TRAP_TOCBASE 0x1f8 +#define TRAP_ENTRY 0x1e8 +#define TRAP_GENTRAP 0x1f0 +#define TRAP_TOCBASE 0x1f8 #ifndef LOCORE struct trapframe; struct thread; extern int (*hmi_handler)(struct trapframe *); void trap(struct trapframe *); int ppc_instr_emulate(struct trapframe *, struct thread *); #endif #endif /* _POWERPC_TRAP_H_ */ Index: projects/clang1000-import/sys/powerpc/ofw/ofw_machdep.c =================================================================== --- projects/clang1000-import/sys/powerpc/ofw/ofw_machdep.c (revision 356919) +++ projects/clang1000-import/sys/powerpc/ofw/ofw_machdep.c (revision 356920) @@ -1,878 +1,872 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1996 Wolfgang Solfrank. * Copyright (C) 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: ofw_machdep.c,v 1.5 2000/05/23 13:25:43 tsubai Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_platform.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef POWERNV #include #endif static void *fdt; int ofw_real_mode; #ifdef AIM extern register_t ofmsr[5]; extern void *openfirmware_entry; char save_trap_init[0x2f00]; /* EXC_LAST */ char save_trap_of[0x2f00]; /* EXC_LAST */ int ofwcall(void *); static int openfirmware(void *args); #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wfortify-source" __inline void ofw_save_trap_vec(char *save_trap_vec) { if (!ofw_real_mode || !hw_direct_map) return; bcopy((void *)PHYS_TO_DMAP(EXC_RST), save_trap_vec, EXC_LAST - EXC_RST); } static __inline void ofw_restore_trap_vec(char *restore_trap_vec) { if (!ofw_real_mode || !hw_direct_map) return; bcopy(restore_trap_vec, (void *)PHYS_TO_DMAP(EXC_RST), EXC_LAST - EXC_RST); __syncicache((void *)PHYS_TO_DMAP(EXC_RSVD), EXC_LAST - EXC_RSVD); } #pragma clang diagnostic pop /* * Saved SPRG0-3 from OpenFirmware. Will be restored prior to the callback. */ register_t ofw_sprg0_save; static __inline void ofw_sprg_prepare(void) { if (ofw_real_mode) return; /* * Assume that interrupt are disabled at this point, or * SPRG1-3 could be trashed */ #ifdef __powerpc64__ __asm __volatile("mtsprg1 %0\n\t" "mtsprg2 %1\n\t" "mtsprg3 %2\n\t" : : "r"(ofmsr[2]), "r"(ofmsr[3]), "r"(ofmsr[4])); #else __asm __volatile("mfsprg0 %0\n\t" "mtsprg0 %1\n\t" "mtsprg1 %2\n\t" "mtsprg2 %3\n\t" "mtsprg3 %4\n\t" : "=&r"(ofw_sprg0_save) : "r"(ofmsr[1]), "r"(ofmsr[2]), "r"(ofmsr[3]), "r"(ofmsr[4])); #endif } static __inline void ofw_sprg_restore(void) { if (ofw_real_mode) return; /* * Note that SPRG1-3 contents are irrelevant. They are scratch * registers used in the early portion of trap handling when * interrupts are disabled. * * PCPU data cannot be used until this routine is called ! */ #ifndef __powerpc64__ __asm __volatile("mtsprg0 %0" :: "r"(ofw_sprg0_save)); #endif } #endif static int parse_ofw_memory(phandle_t node, const char *prop, struct mem_region *output) { cell_t address_cells, size_cells; cell_t OFmem[4 * PHYS_AVAIL_SZ]; int sz, i, j; phandle_t phandle; sz = 0; /* * Get #address-cells from root node, defaulting to 1 if it cannot * be found. */ phandle = OF_finddevice("/"); if (OF_getencprop(phandle, "#address-cells", &address_cells, sizeof(address_cells)) < (ssize_t)sizeof(address_cells)) address_cells = 1; if (OF_getencprop(phandle, "#size-cells", &size_cells, sizeof(size_cells)) < (ssize_t)sizeof(size_cells)) size_cells = 1; /* * Get memory. */ if (node == -1 || (sz = OF_getencprop(node, prop, OFmem, sizeof(OFmem))) <= 0) panic("Physical memory map not found"); i = 0; j = 0; while (i < sz/sizeof(cell_t)) { output[j].mr_start = OFmem[i++]; if (address_cells == 2) { output[j].mr_start <<= 32; output[j].mr_start += OFmem[i++]; } output[j].mr_size = OFmem[i++]; if (size_cells == 2) { output[j].mr_size <<= 32; output[j].mr_size += OFmem[i++]; } if (output[j].mr_start > BUS_SPACE_MAXADDR) continue; /* * Constrain memory to that which we can access. * 32-bit AIM can only reference 32 bits of address currently, * but Book-E can access 36 bits. */ if (((uint64_t)output[j].mr_start + (uint64_t)output[j].mr_size - 1) > BUS_SPACE_MAXADDR) { output[j].mr_size = BUS_SPACE_MAXADDR - output[j].mr_start + 1; } j++; } return (j); } static int parse_numa_ofw_memory(phandle_t node, const char *prop, struct numa_mem_region *output) { cell_t address_cells, size_cells; cell_t OFmem[4 * PHYS_AVAIL_SZ]; int sz, i, j; phandle_t phandle; sz = 0; /* * Get #address-cells from root node, defaulting to 1 if it cannot * be found. */ phandle = OF_finddevice("/"); if (OF_getencprop(phandle, "#address-cells", &address_cells, sizeof(address_cells)) < (ssize_t)sizeof(address_cells)) address_cells = 1; if (OF_getencprop(phandle, "#size-cells", &size_cells, sizeof(size_cells)) < (ssize_t)sizeof(size_cells)) size_cells = 1; /* * Get memory. */ if (node == -1 || (sz = OF_getencprop(node, prop, OFmem, sizeof(OFmem))) <= 0) panic("Physical memory map not found"); i = 0; j = 0; while (i < sz/sizeof(cell_t)) { output[j].mr_start = OFmem[i++]; if (address_cells == 2) { output[j].mr_start <<= 32; output[j].mr_start += OFmem[i++]; } output[j].mr_size = OFmem[i++]; if (size_cells == 2) { output[j].mr_size <<= 32; output[j].mr_size += OFmem[i++]; } j++; } return (j); } #ifdef FDT static int excise_reserved_regions(struct mem_region *avail, int asz, struct mem_region *exclude, int esz) { int i, j, k; for (i = 0; i < asz; i++) { for (j = 0; j < esz; j++) { /* * Case 1: Exclusion region encloses complete * available entry. Drop it and move on. */ if (exclude[j].mr_start <= avail[i].mr_start && exclude[j].mr_start + exclude[j].mr_size >= avail[i].mr_start + avail[i].mr_size) { for (k = i+1; k < asz; k++) avail[k-1] = avail[k]; asz--; i--; /* Repeat some entries */ continue; } /* * Case 2: Exclusion region starts in available entry. * Trim it to where the entry begins and append * a new available entry with the region after * the excluded region, if any. */ if (exclude[j].mr_start >= avail[i].mr_start && exclude[j].mr_start < avail[i].mr_start + avail[i].mr_size) { if (exclude[j].mr_start + exclude[j].mr_size < avail[i].mr_start + avail[i].mr_size) { avail[asz].mr_start = exclude[j].mr_start + exclude[j].mr_size; avail[asz].mr_size = avail[i].mr_start + avail[i].mr_size - avail[asz].mr_start; asz++; } avail[i].mr_size = exclude[j].mr_start - avail[i].mr_start; } /* * Case 3: Exclusion region ends in available entry. * Move start point to where the exclusion zone ends. * The case of a contained exclusion zone has already * been caught in case 2. */ if (exclude[j].mr_start + exclude[j].mr_size >= avail[i].mr_start && exclude[j].mr_start + exclude[j].mr_size < avail[i].mr_start + avail[i].mr_size) { avail[i].mr_size += avail[i].mr_start; avail[i].mr_start = exclude[j].mr_start + exclude[j].mr_size; avail[i].mr_size -= avail[i].mr_start; } } } return (asz); } static int excise_initrd_region(struct mem_region *avail, int asz) { phandle_t chosen; uint64_t start, end; ssize_t size; struct mem_region initrdmap[1]; pcell_t cell[2]; chosen = OF_finddevice("/chosen"); size = OF_getencprop(chosen, "linux,initrd-start", cell, sizeof(cell)); if (size < 0) return (asz); else if (size == 4) start = cell[0]; else if (size == 8) start = (uint64_t)cell[0] << 32 | cell[1]; else { /* Invalid value length */ printf("WARNING: linux,initrd-start must be either 4 or 8 bytes long\n"); return (asz); } size = OF_getencprop(chosen, "linux,initrd-end", cell, sizeof(cell)); if (size < 0) return (asz); else if (size == 4) end = cell[0]; else if (size == 8) end = (uint64_t)cell[0] << 32 | cell[1]; else { /* Invalid value length */ printf("WARNING: linux,initrd-end must be either 4 or 8 bytes long\n"); return (asz); } if (end <= start) return (asz); initrdmap[0].mr_start = start; initrdmap[0].mr_size = end - start; asz = excise_reserved_regions(avail, asz, initrdmap, 1); return (asz); } #ifdef POWERNV static int excise_msi_region(struct mem_region *avail, int asz) { uint64_t start, end; struct mem_region initrdmap[1]; /* * This range of physical addresses is used to implement optimized * 32 bit MSI interrupts on POWER9. Exclude it to avoid accidentally * using it for DMA, as this will cause an immediate PHB fence. * While we could theoretically turn off this behavior in the ETU, * doing so would break 32-bit MSI, so just reserve the range in * the physical map instead. * See section 4.4.2.8 of the PHB4 specification. */ start = 0x00000000ffff0000ul; end = 0x00000000fffffffful; initrdmap[0].mr_start = start; initrdmap[0].mr_size = end - start; asz = excise_reserved_regions(avail, asz, initrdmap, 1); return (asz); } #endif static int excise_fdt_reserved(struct mem_region *avail, int asz) { struct mem_region fdtmap[32]; ssize_t fdtmapsize; phandle_t chosen; int j, fdtentries; chosen = OF_finddevice("/chosen"); fdtmapsize = OF_getprop(chosen, "fdtmemreserv", fdtmap, sizeof(fdtmap)); for (j = 0; j < fdtmapsize/sizeof(fdtmap[0]); j++) { fdtmap[j].mr_start = be64toh(fdtmap[j].mr_start) & ~PAGE_MASK; fdtmap[j].mr_size = round_page(be64toh(fdtmap[j].mr_size)); } KASSERT(j*sizeof(fdtmap[0]) < sizeof(fdtmap), ("Exceeded number of FDT reservations")); /* Add a virtual entry for the FDT itself */ if (fdt != NULL) { fdtmap[j].mr_start = (vm_offset_t)fdt & ~PAGE_MASK; fdtmap[j].mr_size = round_page(fdt_totalsize(fdt)); fdtmapsize += sizeof(fdtmap[0]); } fdtentries = fdtmapsize/sizeof(fdtmap[0]); asz = excise_reserved_regions(avail, asz, fdtmap, fdtentries); return (asz); } #endif /* * This is called during powerpc_init, before the system is really initialized. * It shall provide the total and the available regions of RAM. * The available regions need not take the kernel into account. */ void ofw_numa_mem_regions(struct numa_mem_region *memp, int *memsz) { phandle_t phandle; - int res, count, msz; + int count, msz; char name[31]; - cell_t associativity[5]; struct numa_mem_region *curmemp; msz = 0; /* * Get memory from all the /memory nodes. */ for (phandle = OF_child(OF_peer(0)); phandle != 0; phandle = OF_peer(phandle)) { if (OF_getprop(phandle, "name", name, sizeof(name)) <= 0) continue; if (strncmp(name, "memory@", strlen("memory@")) != 0) continue; count = parse_numa_ofw_memory(phandle, "reg", &memp[msz]); if (count == 0) continue; curmemp = &memp[msz]; - res = OF_getproplen(phandle, "ibm,associativity"); - if (res <= 0) - continue; MPASS(count == 1); - OF_getencprop(phandle, "ibm,associativity", - associativity, res); - curmemp->mr_domain = associativity[3]; + curmemp->mr_domain = platform_node_numa_domain(phandle); if (bootverbose) printf("%s %#jx-%#jx domain(%ju)\n", name, (uintmax_t)curmemp->mr_start, (uintmax_t)curmemp->mr_start + curmemp->mr_size, (uintmax_t)curmemp->mr_domain); msz += count; } *memsz = msz; } /* * This is called during powerpc_init, before the system is really initialized. * It shall provide the total and the available regions of RAM. * The available regions need not take the kernel into account. */ void ofw_mem_regions(struct mem_region *memp, int *memsz, struct mem_region *availp, int *availsz) { phandle_t phandle; int asz, msz; int res; char name[31]; asz = msz = 0; /* * Get memory from all the /memory nodes. */ for (phandle = OF_child(OF_peer(0)); phandle != 0; phandle = OF_peer(phandle)) { if (OF_getprop(phandle, "name", name, sizeof(name)) <= 0) continue; if (strncmp(name, "memory", sizeof(name)) != 0 && strncmp(name, "memory@", strlen("memory@")) != 0) continue; res = parse_ofw_memory(phandle, "reg", &memp[msz]); msz += res; /* * On POWER9 Systems we might have both linux,usable-memory and * reg properties. 'reg' denotes all available memory, but we * must use 'linux,usable-memory', a subset, as some memory * regions are reserved for NVLink. */ if (OF_getproplen(phandle, "linux,usable-memory") >= 0) res = parse_ofw_memory(phandle, "linux,usable-memory", &availp[asz]); else if (OF_getproplen(phandle, "available") >= 0) res = parse_ofw_memory(phandle, "available", &availp[asz]); else res = parse_ofw_memory(phandle, "reg", &availp[asz]); asz += res; } #ifdef FDT phandle = OF_finddevice("/chosen"); if (OF_hasprop(phandle, "fdtmemreserv")) asz = excise_fdt_reserved(availp, asz); /* If the kernel is being loaded through kexec, initrd region is listed * in /chosen but the region is not marked as reserved, so, we might exclude * it here. */ if (OF_hasprop(phandle, "linux,initrd-start")) asz = excise_initrd_region(availp, asz); #endif #ifdef POWERNV if (opal_check() == 0) asz = excise_msi_region(availp, asz); #endif *memsz = msz; *availsz = asz; } void OF_initial_setup(void *fdt_ptr, void *junk, int (*openfirm)(void *)) { #ifdef AIM ofmsr[0] = mfmsr(); #ifdef __powerpc64__ ofmsr[0] &= ~PSL_SF; #else __asm __volatile("mfsprg0 %0" : "=&r"(ofmsr[1])); #endif __asm __volatile("mfsprg1 %0" : "=&r"(ofmsr[2])); __asm __volatile("mfsprg2 %0" : "=&r"(ofmsr[3])); __asm __volatile("mfsprg3 %0" : "=&r"(ofmsr[4])); openfirmware_entry = openfirm; if (ofmsr[0] & PSL_DR) ofw_real_mode = 0; else ofw_real_mode = 1; ofw_save_trap_vec(save_trap_init); #else ofw_real_mode = 1; #endif fdt = fdt_ptr; } boolean_t OF_bootstrap() { boolean_t status = FALSE; int err = 0; #ifdef AIM if (openfirmware_entry != NULL) { if (ofw_real_mode) { status = OF_install(OFW_STD_REAL, 0); } else { #ifdef __powerpc64__ status = OF_install(OFW_STD_32BIT, 0); #else status = OF_install(OFW_STD_DIRECT, 0); #endif } if (status != TRUE) return status; err = OF_init(openfirmware); } else #endif if (fdt != NULL) { #ifdef FDT #ifdef AIM bus_space_tag_t fdt_bt; vm_offset_t tmp_fdt_ptr; vm_size_t fdt_size; uintptr_t fdt_va; #endif status = OF_install(OFW_FDT, 0); if (status != TRUE) return status; #ifdef AIM /* AIM-only for now -- Book-E does this remapping in early init */ /* Get the FDT size for mapping if we can */ tmp_fdt_ptr = pmap_early_io_map((vm_paddr_t)fdt, PAGE_SIZE); if (fdt_check_header((void *)tmp_fdt_ptr) != 0) { pmap_early_io_unmap(tmp_fdt_ptr, PAGE_SIZE); return FALSE; } fdt_size = fdt_totalsize((void *)tmp_fdt_ptr); pmap_early_io_unmap(tmp_fdt_ptr, PAGE_SIZE); /* * Map this for real. Use bus_space_map() to take advantage * of its auto-remapping function once the kernel is loaded. * This is a dirty hack, but what we have. */ #ifdef _LITTLE_ENDIAN fdt_bt = &bs_le_tag; #else fdt_bt = &bs_be_tag; #endif bus_space_map(fdt_bt, (vm_paddr_t)fdt, fdt_size, 0, &fdt_va); err = OF_init((void *)fdt_va); #else err = OF_init(fdt); #endif #endif } #ifdef FDT_DTB_STATIC /* * Check for a statically included blob already in the kernel and * needing no mapping. */ else { status = OF_install(OFW_FDT, 0); if (status != TRUE) return status; err = OF_init(&fdt_static_dtb); } #endif if (err != 0) { OF_install(NULL, 0); status = FALSE; } return (status); } #ifdef AIM void ofw_quiesce(void) { struct { cell_t name; cell_t nargs; cell_t nreturns; } args; KASSERT(!pmap_bootstrapped, ("Cannot call ofw_quiesce after VM is up")); args.name = (cell_t)(uintptr_t)"quiesce"; args.nargs = 0; args.nreturns = 0; openfirmware(&args); } static int openfirmware_core(void *args) { int result; register_t oldmsr; if (openfirmware_entry == NULL) return (-1); /* * Turn off exceptions - we really don't want to end up * anywhere unexpected with PCPU set to something strange * or the stack pointer wrong. */ oldmsr = intr_disable(); ofw_sprg_prepare(); /* Save trap vectors */ ofw_save_trap_vec(save_trap_of); /* Restore initially saved trap vectors */ ofw_restore_trap_vec(save_trap_init); #ifndef __powerpc64__ /* * Clear battable[] translations */ if (!(cpu_features & PPC_FEATURE_64)) __asm __volatile("mtdbatu 2, %0\n" "mtdbatu 3, %0" : : "r" (0)); isync(); #endif result = ofwcall(args); /* Restore trap vecotrs */ ofw_restore_trap_vec(save_trap_of); ofw_sprg_restore(); intr_restore(oldmsr); return (result); } #ifdef SMP struct ofw_rv_args { void *args; int retval; volatile int in_progress; }; static void ofw_rendezvous_dispatch(void *xargs) { struct ofw_rv_args *rv_args = xargs; /* NOTE: Interrupts are disabled here */ if (PCPU_GET(cpuid) == 0) { /* * Execute all OF calls on CPU 0 */ rv_args->retval = openfirmware_core(rv_args->args); rv_args->in_progress = 0; } else { /* * Spin with interrupts off on other CPUs while OF has * control of the machine. */ while (rv_args->in_progress) cpu_spinwait(); } } #endif static int openfirmware(void *args) { int result; #ifdef SMP struct ofw_rv_args rv_args; #endif if (openfirmware_entry == NULL) return (-1); #ifdef SMP if (cold) { result = openfirmware_core(args); } else { rv_args.args = args; rv_args.in_progress = 1; smp_rendezvous(smp_no_rendezvous_barrier, ofw_rendezvous_dispatch, smp_no_rendezvous_barrier, &rv_args); result = rv_args.retval; } #else result = openfirmware_core(args); #endif return (result); } void OF_reboot() { struct { cell_t name; cell_t nargs; cell_t nreturns; cell_t arg; } args; args.name = (cell_t)(uintptr_t)"interpret"; args.nargs = 1; args.nreturns = 0; args.arg = (cell_t)(uintptr_t)"reset-all"; openfirmware_core(&args); /* Don't do rendezvous! */ for (;;); /* just in case */ } #endif /* AIM */ void OF_getetheraddr(device_t dev, u_char *addr) { phandle_t node; node = ofw_bus_get_node(dev); OF_getprop(node, "local-mac-address", addr, ETHER_ADDR_LEN); } /* * Return a bus handle and bus tag that corresponds to the register * numbered regno for the device referenced by the package handle * dev. This function is intended to be used by console drivers in * early boot only. It works by mapping the address of the device's * register in the address space of its parent and recursively walk * the device tree upward this way. */ int OF_decode_addr(phandle_t dev, int regno, bus_space_tag_t *tag, bus_space_handle_t *handle, bus_size_t *sz) { bus_addr_t addr; bus_size_t size; pcell_t pci_hi; int flags, res; res = ofw_reg_to_paddr(dev, regno, &addr, &size, &pci_hi); if (res < 0) return (res); if (pci_hi == OFW_PADDR_NOT_PCI) { *tag = &bs_be_tag; flags = 0; } else { *tag = &bs_le_tag; flags = (pci_hi & OFW_PCI_PHYS_HI_PREFETCHABLE) ? BUS_SPACE_MAP_PREFETCHABLE: 0; } if (sz != NULL) *sz = size; return (bus_space_map(*tag, addr, size, flags, handle)); } Index: projects/clang1000-import/sys/powerpc/ofw/ofw_pcibus.c =================================================================== --- projects/clang1000-import/sys/powerpc/ofw/ofw_pcibus.c (revision 356919) +++ projects/clang1000-import/sys/powerpc/ofw/ofw_pcibus.c (revision 356920) @@ -1,461 +1,430 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1997, Stefan Esser * Copyright (c) 2000, Michael Smith * Copyright (c) 2000, BSDi * Copyright (c) 2003, Thomas Moestl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ofw_pcibus.h" #include "pcib_if.h" #include "pci_if.h" typedef uint32_t ofw_pci_intr_t; /* Methods */ static device_probe_t ofw_pcibus_probe; static device_attach_t ofw_pcibus_attach; static pci_alloc_devinfo_t ofw_pcibus_alloc_devinfo; static pci_assign_interrupt_t ofw_pcibus_assign_interrupt; static ofw_bus_get_devinfo_t ofw_pcibus_get_devinfo; static bus_child_deleted_t ofw_pcibus_child_deleted; static int ofw_pcibus_child_pnpinfo_str_method(device_t cbdev, device_t child, char *buf, size_t buflen); static void ofw_pcibus_enum_devtree(device_t dev, u_int domain, u_int busno); static void ofw_pcibus_enum_bus(device_t dev, u_int domain, u_int busno); static device_method_t ofw_pcibus_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ofw_pcibus_probe), DEVMETHOD(device_attach, ofw_pcibus_attach), /* Bus interface */ DEVMETHOD(bus_child_deleted, ofw_pcibus_child_deleted), DEVMETHOD(bus_child_pnpinfo_str, ofw_pcibus_child_pnpinfo_str_method), DEVMETHOD(bus_rescan, bus_null_rescan), DEVMETHOD(bus_get_cpus, ofw_pcibus_get_cpus), DEVMETHOD(bus_get_domain, ofw_pcibus_get_domain), /* PCI interface */ DEVMETHOD(pci_alloc_devinfo, ofw_pcibus_alloc_devinfo), DEVMETHOD(pci_assign_interrupt, ofw_pcibus_assign_interrupt), /* ofw_bus interface */ DEVMETHOD(ofw_bus_get_devinfo, ofw_pcibus_get_devinfo), DEVMETHOD(ofw_bus_get_compat, ofw_bus_gen_get_compat), DEVMETHOD(ofw_bus_get_model, ofw_bus_gen_get_model), DEVMETHOD(ofw_bus_get_name, ofw_bus_gen_get_name), DEVMETHOD(ofw_bus_get_node, ofw_bus_gen_get_node), DEVMETHOD(ofw_bus_get_type, ofw_bus_gen_get_type), DEVMETHOD_END }; static devclass_t pci_devclass; DEFINE_CLASS_1(pci, ofw_pcibus_driver, ofw_pcibus_methods, sizeof(struct pci_softc), pci_driver); EARLY_DRIVER_MODULE(ofw_pcibus, pcib, ofw_pcibus_driver, pci_devclass, 0, 0, BUS_PASS_BUS); MODULE_VERSION(ofw_pcibus, 1); MODULE_DEPEND(ofw_pcibus, pci, 1, 1, 1); static int ofw_devices_only = 0; TUNABLE_INT("hw.pci.ofw_devices_only", &ofw_devices_only); static int ofw_pcibus_probe(device_t dev) { if (ofw_bus_get_node(dev) == -1) return (ENXIO); device_set_desc(dev, "OFW PCI bus"); return (BUS_PROBE_DEFAULT); } static int ofw_pcibus_attach(device_t dev) { u_int busno, domain; int error; error = pci_attach_common(dev); if (error) return (error); domain = pcib_get_domain(dev); busno = pcib_get_bus(dev); /* * Attach those children represented in the device tree. */ ofw_pcibus_enum_devtree(dev, domain, busno); /* * We now attach any laggard devices. FDT, for instance, allows * the device tree to enumerate only some PCI devices. Apple's * OF device tree on some Grackle-based hardware can also miss * functions on multi-function cards. */ if (!ofw_devices_only) ofw_pcibus_enum_bus(dev, domain, busno); return (bus_generic_attach(dev)); } struct pci_devinfo * ofw_pcibus_alloc_devinfo(device_t dev) { struct ofw_pcibus_devinfo *dinfo; dinfo = malloc(sizeof(*dinfo), M_DEVBUF, M_WAITOK | M_ZERO); return (&dinfo->opd_dinfo); } static void ofw_pcibus_enum_devtree(device_t dev, u_int domain, u_int busno) { device_t pcib; struct ofw_pci_register pcir; struct ofw_pcibus_devinfo *dinfo; phandle_t node, child; u_int func, slot; int intline; pcib = device_get_parent(dev); node = ofw_bus_get_node(dev); for (child = OF_child(node); child != 0; child = OF_peer(child)) { if (OF_getencprop(child, "reg", (pcell_t *)&pcir, sizeof(pcir)) == -1) continue; slot = OFW_PCI_PHYS_HI_DEVICE(pcir.phys_hi); func = OFW_PCI_PHYS_HI_FUNCTION(pcir.phys_hi); /* Some OFW device trees contain dupes. */ if (pci_find_dbsf(domain, busno, slot, func) != NULL) continue; /* * The preset in the intline register is usually bogus. Reset * it such that the PCI code will reroute the interrupt if * needed. */ intline = PCI_INVALID_IRQ; if (OF_getproplen(child, "interrupts") > 0) intline = 0; PCIB_WRITE_CONFIG(pcib, busno, slot, func, PCIR_INTLINE, intline, 1); /* * Now set up the PCI and OFW bus layer devinfo and add it * to the PCI bus. */ dinfo = (struct ofw_pcibus_devinfo *)pci_read_device(pcib, dev, domain, busno, slot, func); if (dinfo == NULL) continue; if (ofw_bus_gen_setup_devinfo(&dinfo->opd_obdinfo, child) != 0) { pci_freecfg((struct pci_devinfo *)dinfo); continue; } dinfo->opd_dma_tag = NULL; pci_add_child(dev, (struct pci_devinfo *)dinfo); /* * Some devices don't have an intpin set, but do have * interrupts. These are fully specified, and set in the * interrupts property, so add that value to the device's * resource list. */ if (dinfo->opd_dinfo.cfg.intpin == 0) ofw_bus_intr_to_rl(dev, child, &dinfo->opd_dinfo.resources, NULL); } } /* * The following is an almost exact clone of pci_add_children(), with the * addition that it (a) will not add children that have already been added, * and (b) will set up the OFW devinfo to point to invalid values. This is * to handle non-enumerated PCI children as exist in FDT and on the second * function of the Rage 128 in my Blue & White G3. */ static void ofw_pcibus_enum_bus(device_t dev, u_int domain, u_int busno) { device_t pcib; struct ofw_pcibus_devinfo *dinfo; int maxslots; int s, f, pcifunchigh; uint8_t hdrtype; pcib = device_get_parent(dev); maxslots = PCIB_MAXSLOTS(pcib); for (s = 0; s <= maxslots; s++) { pcifunchigh = 0; f = 0; DELAY(1); hdrtype = PCIB_READ_CONFIG(pcib, busno, s, f, PCIR_HDRTYPE, 1); if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE) continue; if (hdrtype & PCIM_MFDEV) pcifunchigh = PCI_FUNCMAX; for (f = 0; f <= pcifunchigh; f++) { /* Filter devices we have already added */ if (pci_find_dbsf(domain, busno, s, f) != NULL) continue; dinfo = (struct ofw_pcibus_devinfo *)pci_read_device( pcib, dev, domain, busno, s, f); if (dinfo == NULL) continue; dinfo->opd_dma_tag = NULL; dinfo->opd_obdinfo.obd_node = -1; dinfo->opd_obdinfo.obd_name = NULL; dinfo->opd_obdinfo.obd_compat = NULL; dinfo->opd_obdinfo.obd_type = NULL; dinfo->opd_obdinfo.obd_model = NULL; /* * For non OFW-devices, don't believe 0 * for an interrupt. */ if (dinfo->opd_dinfo.cfg.intline == 0) { dinfo->opd_dinfo.cfg.intline = PCI_INVALID_IRQ; PCIB_WRITE_CONFIG(pcib, busno, s, f, PCIR_INTLINE, PCI_INVALID_IRQ, 1); } pci_add_child(dev, (struct pci_devinfo *)dinfo); } } } static void ofw_pcibus_child_deleted(device_t dev, device_t child) { struct ofw_pcibus_devinfo *dinfo; dinfo = device_get_ivars(child); ofw_bus_gen_destroy_devinfo(&dinfo->opd_obdinfo); pci_child_deleted(dev, child); } static int ofw_pcibus_child_pnpinfo_str_method(device_t cbdev, device_t child, char *buf, size_t buflen) { pci_child_pnpinfo_str_method(cbdev, child, buf, buflen); if (ofw_bus_get_node(child) != -1) { strlcat(buf, " ", buflen); /* Separate info */ ofw_bus_gen_child_pnpinfo_str(cbdev, child, buf, buflen); } return (0); } static int ofw_pcibus_assign_interrupt(device_t dev, device_t child) { ofw_pci_intr_t intr[2]; phandle_t node, iparent; int isz, icells; node = ofw_bus_get_node(child); if (node == -1) { /* Non-firmware enumerated child, use standard routing */ intr[0] = pci_get_intpin(child); return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child, intr[0])); } /* * Try to determine the node's interrupt parent so we know which * PIC to use. */ iparent = -1; if (OF_getencprop(node, "interrupt-parent", &iparent, sizeof(iparent)) < 0) iparent = -1; icells = 1; if (iparent != -1) OF_getencprop(OF_node_from_xref(iparent), "#interrupt-cells", &icells, sizeof(icells)); /* * Any AAPL,interrupts property gets priority and is * fully specified (i.e. does not need routing) */ isz = OF_getencprop(node, "AAPL,interrupts", intr, sizeof(intr)); if (isz == sizeof(intr[0])*icells) return ((iparent == -1) ? intr[0] : ofw_bus_map_intr(dev, iparent, icells, intr)); isz = OF_getencprop(node, "interrupts", intr, sizeof(intr)); if (isz == sizeof(intr[0])*icells) { if (iparent != -1) intr[0] = ofw_bus_map_intr(dev, iparent, icells, intr); } else { /* No property: our best guess is the intpin. */ intr[0] = pci_get_intpin(child); } /* * If we got intr from a property, it may or may not be an intpin. * For on-board devices, it frequently is not, and is completely out * of the valid intpin range. For PCI slots, it hopefully is, * otherwise we will have trouble interfacing with non-OFW buses * such as cardbus. * Since we cannot tell which it is without violating layering, we * will always use the route_interrupt method, and treat exceptions * on the level they become apparent. */ return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child, intr[0])); } static const struct ofw_bus_devinfo * ofw_pcibus_get_devinfo(device_t bus, device_t dev) { struct ofw_pcibus_devinfo *dinfo; dinfo = device_get_ivars(dev); return (&dinfo->opd_obdinfo); } -static int -ofw_pcibus_parse_associativity(device_t dev, int *domain) -{ - phandle_t node; - cell_t associativity[5]; - int res; - - if ((node = ofw_bus_get_node(dev)) == -1) { - if (bootverbose) - device_printf(dev, "no ofw node found\n"); - return (ENXIO); - } - res = OF_getproplen(node, "ibm,associativity"); - if (res <= 0) - return (ENXIO); - OF_getencprop(node, "ibm,associativity", - associativity, res); - - *domain = associativity[3]; - if (bootverbose) - device_printf(dev, "domain(%d)\n", *domain); - return (0); -} - int ofw_pcibus_get_cpus(device_t dev, device_t child, enum cpu_sets op, size_t setsize, cpuset_t *cpuset) { int d, error; - error = ofw_pcibus_parse_associativity(child, &d); - if (error) - return (bus_generic_get_cpus(dev, child, op, setsize, cpuset)); + d = platform_node_numa_domain(ofw_bus_get_node(dev)); switch (op) { case LOCAL_CPUS: if (setsize != sizeof(cpuset_t)) return (EINVAL); *cpuset = cpuset_domain[d]; return (0); case INTR_CPUS: error = bus_generic_get_cpus(dev, child, op, setsize, cpuset); if (error != 0) return (error); if (setsize != sizeof(cpuset_t)) return (EINVAL); CPU_AND(cpuset, &cpuset_domain[d]); return (0); default: return (bus_generic_get_cpus(dev, child, op, setsize, cpuset)); } return (0); } /* * Fetch the NUMA domain for the given device 'dev'. * * If a device has a _PXM method, map that to a NUMA domain. * Otherwise, pass the request up to the parent. * If there's no matching domain or the domain cannot be * determined, return ENOENT. */ int ofw_pcibus_get_domain(device_t dev, device_t child, int *domain) { - int d, error; + *domain = platform_node_numa_domain(ofw_bus_get_node(child)); - error = ofw_pcibus_parse_associativity(child, &d); - /* No ofw node; go up a level */ - if (error) - return (bus_generic_get_domain(dev, child, domain)); - *domain = d; return (0); } Index: projects/clang1000-import/sys/powerpc/powernv/platform_powernv.c =================================================================== --- projects/clang1000-import/sys/powerpc/powernv/platform_powernv.c (revision 356919) +++ projects/clang1000-import/sys/powerpc/powernv/platform_powernv.c (revision 356920) @@ -1,507 +1,554 @@ /*- * Copyright (c) 2015 Nathan Whitehorn * Copyright (c) 2017-2018 Semihalf * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "platform_if.h" #include "opal.h" #ifdef SMP extern void *ap_pcpu; #endif void (*powernv_smp_ap_extra_init)(void); static int powernv_probe(platform_t); static int powernv_attach(platform_t); void powernv_mem_regions(platform_t, struct mem_region *phys, int *physsz, struct mem_region *avail, int *availsz); static void powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz); static u_long powernv_timebase_freq(platform_t, struct cpuref *cpuref); static int powernv_smp_first_cpu(platform_t, struct cpuref *cpuref); static int powernv_smp_next_cpu(platform_t, struct cpuref *cpuref); static int powernv_smp_get_bsp(platform_t, struct cpuref *cpuref); static void powernv_smp_ap_init(platform_t); #ifdef SMP static int powernv_smp_start_cpu(platform_t, struct pcpu *cpu); static void powernv_smp_probe_threads(platform_t); static struct cpu_group *powernv_smp_topo(platform_t plat); #endif static void powernv_reset(platform_t); static void powernv_cpu_idle(sbintime_t sbt); static int powernv_cpuref_init(void); +static int powernv_node_numa_domain(platform_t platform, phandle_t node); static platform_method_t powernv_methods[] = { PLATFORMMETHOD(platform_probe, powernv_probe), PLATFORMMETHOD(platform_attach, powernv_attach), PLATFORMMETHOD(platform_mem_regions, powernv_mem_regions), PLATFORMMETHOD(platform_numa_mem_regions, powernv_numa_mem_regions), PLATFORMMETHOD(platform_timebase_freq, powernv_timebase_freq), PLATFORMMETHOD(platform_smp_ap_init, powernv_smp_ap_init), PLATFORMMETHOD(platform_smp_first_cpu, powernv_smp_first_cpu), PLATFORMMETHOD(platform_smp_next_cpu, powernv_smp_next_cpu), PLATFORMMETHOD(platform_smp_get_bsp, powernv_smp_get_bsp), #ifdef SMP PLATFORMMETHOD(platform_smp_start_cpu, powernv_smp_start_cpu), PLATFORMMETHOD(platform_smp_probe_threads, powernv_smp_probe_threads), PLATFORMMETHOD(platform_smp_topo, powernv_smp_topo), #endif + PLATFORMMETHOD(platform_node_numa_domain, powernv_node_numa_domain), PLATFORMMETHOD(platform_reset, powernv_reset), { 0, 0 } }; static platform_def_t powernv_platform = { "powernv", powernv_methods, 0 }; static struct cpuref platform_cpuref[MAXCPU]; static int platform_cpuref_cnt; static int platform_cpuref_valid; +static int platform_associativity; PLATFORM_DEF(powernv_platform); static uint64_t powernv_boot_pir; static int powernv_probe(platform_t plat) { if (opal_check() == 0) return (BUS_PROBE_SPECIFIC); return (ENXIO); } static int powernv_attach(platform_t plat) { uint32_t nptlp, shift = 0, slb_encoding = 0; int32_t lp_size, lp_encoding; char buf[255]; + pcell_t refpoints[3]; pcell_t prop; phandle_t cpu; + phandle_t opal; int res, len, idx; register_t msr; /* Ping OPAL again just to make sure */ opal_check(); #if BYTE_ORDER == LITTLE_ENDIAN opal_call(OPAL_REINIT_CPUS, 2 /* Little endian */); #else opal_call(OPAL_REINIT_CPUS, 1 /* Big endian */); #endif + opal = OF_finddevice("/ibm,opal"); + platform_associativity = 4; /* Skiboot default. */ + if (OF_getencprop(opal, "ibm,associativity-reference-points", refpoints, + sizeof(refpoints)) > 0) { + platform_associativity = refpoints[0]; + } + if (cpu_idle_hook == NULL) cpu_idle_hook = powernv_cpu_idle; powernv_boot_pir = mfspr(SPR_PIR); /* LPID must not be altered when PSL_DR or PSL_IR is set */ msr = mfmsr(); mtmsr(msr & ~(PSL_DR | PSL_IR)); /* Direct interrupts to SRR instead of HSRR and reset LPCR otherwise */ mtspr(SPR_LPID, 0); isync(); if (cpu_features2 & PPC_FEATURE2_ARCH_3_00) lpcr |= LPCR_HVICE; mtspr(SPR_LPCR, lpcr); isync(); mtmsr(msr); powernv_cpuref_init(); /* Set SLB count from device tree */ cpu = OF_peer(0); cpu = OF_child(cpu); while (cpu != 0) { res = OF_getprop(cpu, "name", buf, sizeof(buf)); if (res > 0 && strcmp(buf, "cpus") == 0) break; cpu = OF_peer(cpu); } if (cpu == 0) goto out; cpu = OF_child(cpu); while (cpu != 0) { res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); if (res > 0 && strcmp(buf, "cpu") == 0) break; cpu = OF_peer(cpu); } if (cpu == 0) goto out; res = OF_getencprop(cpu, "ibm,slb-size", &prop, sizeof(prop)); if (res > 0) n_slbs = prop; /* * Scan the large page size property for PAPR compatible machines. * See PAPR D.5 Changes to Section 5.1.4, 'CPU Node Properties' * for the encoding of the property. */ len = OF_getproplen(cpu, "ibm,segment-page-sizes"); if (len > 0) { /* * We have to use a variable length array on the stack * since we have very limited stack space. */ pcell_t arr[len/sizeof(cell_t)]; res = OF_getencprop(cpu, "ibm,segment-page-sizes", arr, sizeof(arr)); len /= 4; idx = 0; while (len > 0) { shift = arr[idx]; slb_encoding = arr[idx + 1]; nptlp = arr[idx + 2]; idx += 3; len -= 3; while (len > 0 && nptlp) { lp_size = arr[idx]; lp_encoding = arr[idx+1]; if (slb_encoding == SLBV_L && lp_encoding == 0) break; idx += 2; len -= 2; nptlp--; } if (nptlp && slb_encoding == SLBV_L && lp_encoding == 0) break; } if (len == 0) panic("Standard large pages (SLB[L] = 1, PTE[LP] = 0) " "not supported by this system."); moea64_large_page_shift = shift; moea64_large_page_size = 1ULL << lp_size; } out: return (0); } void powernv_mem_regions(platform_t plat, struct mem_region *phys, int *physsz, struct mem_region *avail, int *availsz) { ofw_mem_regions(phys, physsz, avail, availsz); } static void powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz) { ofw_numa_mem_regions(phys, physsz); } static u_long powernv_timebase_freq(platform_t plat, struct cpuref *cpuref) { char buf[8]; phandle_t cpu, dev, root; int res; int32_t ticks = -1; root = OF_peer(0); dev = OF_child(root); while (dev != 0) { res = OF_getprop(dev, "name", buf, sizeof(buf)); if (res > 0 && strcmp(buf, "cpus") == 0) break; dev = OF_peer(dev); } for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) { res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); if (res > 0 && strcmp(buf, "cpu") == 0) break; } if (cpu == 0) return (512000000); OF_getencprop(cpu, "timebase-frequency", &ticks, sizeof(ticks)); if (ticks <= 0) panic("Unable to determine timebase frequency!"); return (ticks); } static int powernv_cpuref_init(void) { phandle_t cpu, dev; char buf[32]; int a, res, tmp_cpuref_cnt; static struct cpuref tmp_cpuref[MAXCPU]; cell_t interrupt_servers[32]; uint64_t bsp; if (platform_cpuref_valid) return (0); dev = OF_peer(0); dev = OF_child(dev); while (dev != 0) { res = OF_getprop(dev, "name", buf, sizeof(buf)); if (res > 0 && strcmp(buf, "cpus") == 0) break; dev = OF_peer(dev); } bsp = 0; tmp_cpuref_cnt = 0; for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) { res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); if (res > 0 && strcmp(buf, "cpu") == 0) { res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s"); if (res > 0) { OF_getencprop(cpu, "ibm,ppc-interrupt-server#s", interrupt_servers, res); for (a = 0; a < res/sizeof(cell_t); a++) { tmp_cpuref[tmp_cpuref_cnt].cr_hwref = interrupt_servers[a]; tmp_cpuref[tmp_cpuref_cnt].cr_cpuid = tmp_cpuref_cnt; - tmp_cpuref[tmp_cpuref_cnt].cr_domain = interrupt_servers[a] >> 11; + tmp_cpuref[tmp_cpuref_cnt].cr_domain = + powernv_node_numa_domain(NULL, cpu); if (interrupt_servers[a] == (uint32_t)powernv_boot_pir) bsp = tmp_cpuref_cnt; tmp_cpuref_cnt++; } } } } /* Map IDs, so BSP has CPUID 0 regardless of hwref */ for (a = bsp; a < tmp_cpuref_cnt; a++) { platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref; platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt; platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain; platform_cpuref_cnt++; } for (a = 0; a < bsp; a++) { platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref; platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt; platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain; platform_cpuref_cnt++; } platform_cpuref_valid = 1; return (0); } static int powernv_smp_first_cpu(platform_t plat, struct cpuref *cpuref) { if (platform_cpuref_valid == 0) return (EINVAL); cpuref->cr_cpuid = 0; cpuref->cr_hwref = platform_cpuref[0].cr_hwref; cpuref->cr_domain = platform_cpuref[0].cr_domain; return (0); } static int powernv_smp_next_cpu(platform_t plat, struct cpuref *cpuref) { int id; if (platform_cpuref_valid == 0) return (EINVAL); id = cpuref->cr_cpuid + 1; if (id >= platform_cpuref_cnt) return (ENOENT); cpuref->cr_cpuid = platform_cpuref[id].cr_cpuid; cpuref->cr_hwref = platform_cpuref[id].cr_hwref; cpuref->cr_domain = platform_cpuref[id].cr_domain; return (0); } static int powernv_smp_get_bsp(platform_t plat, struct cpuref *cpuref) { cpuref->cr_cpuid = platform_cpuref[0].cr_cpuid; cpuref->cr_hwref = platform_cpuref[0].cr_hwref; cpuref->cr_domain = platform_cpuref[0].cr_domain; return (0); } #ifdef SMP static int powernv_smp_start_cpu(platform_t plat, struct pcpu *pc) { int result; ap_pcpu = pc; powerpc_sync(); result = opal_call(OPAL_START_CPU, pc->pc_hwref, EXC_RST); if (result != OPAL_SUCCESS) { printf("OPAL error (%d): unable to start AP %d\n", result, (int)pc->pc_hwref); return (ENXIO); } return (0); } static void powernv_smp_probe_threads(platform_t plat) { char buf[8]; phandle_t cpu, dev, root; int res, nthreads; root = OF_peer(0); dev = OF_child(root); while (dev != 0) { res = OF_getprop(dev, "name", buf, sizeof(buf)); if (res > 0 && strcmp(buf, "cpus") == 0) break; dev = OF_peer(dev); } nthreads = 1; for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) { res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); if (res <= 0 || strcmp(buf, "cpu") != 0) continue; res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s"); if (res >= 0) nthreads = res / sizeof(cell_t); else nthreads = 1; break; } smp_threads_per_core = nthreads; if (mp_ncpus % nthreads == 0) mp_ncores = mp_ncpus / nthreads; } static struct cpu_group * powernv_smp_topo(platform_t plat) { if (mp_ncpus % smp_threads_per_core != 0) { printf("WARNING: Irregular SMP topology. Performance may be " "suboptimal (%d threads, %d on first core)\n", mp_ncpus, smp_threads_per_core); return (smp_topo_none()); } /* Don't do anything fancier for non-threaded SMP */ if (smp_threads_per_core == 1) return (smp_topo_none()); return (smp_topo_1level(CG_SHARE_L1, smp_threads_per_core, CG_FLAG_SMT)); } #endif static void powernv_reset(platform_t platform) { opal_call(OPAL_CEC_REBOOT); } static void powernv_smp_ap_init(platform_t platform) { if (powernv_smp_ap_extra_init != NULL) powernv_smp_ap_extra_init(); } static void powernv_cpu_idle(sbintime_t sbt) { +} + +static int +powernv_node_numa_domain(platform_t platform, phandle_t node) +{ + /* XXX: Is locking necessary in here? */ + static int numa_domains[MAXMEMDOM]; + static int numa_max_domain; + cell_t associativity[5]; + int i, res; + + res = OF_getproplen(node, "ibm,associativity"); + + /* If already at the root, use default domain. */ + if (res == 0) + return (0); + else if (res < 0) + /* If this node doesn't have associativity, check its parent. */ + return (powernv_node_numa_domain(platform, OF_parent(node))); + + OF_getencprop(node, "ibm,associativity", + associativity, res); + + for (i = 0; i < numa_max_domain; i++) { + if (numa_domains[i] == associativity[platform_associativity]) + return (i); + } + if (i < MAXMEMDOM) + numa_domains[numa_max_domain++] = + associativity[platform_associativity]; + else + i = 0; + + return (i); } /* Set up the Nest MMU on POWER9 relatively early, but after pmap is setup. */ static void powernv_setup_nmmu(void *unused) { if (opal_check() != 0) return; opal_call(OPAL_NMMU_SET_PTCR, -1, mfspr(SPR_PTCR)); } SYSINIT(powernv_setup_nmmu, SI_SUB_CPU, SI_ORDER_ANY, powernv_setup_nmmu, NULL); Index: projects/clang1000-import/sys/powerpc/powerpc/mmu_if.m =================================================================== --- projects/clang1000-import/sys/powerpc/powerpc/mmu_if.m (revision 356919) +++ projects/clang1000-import/sys/powerpc/powerpc/mmu_if.m (revision 356920) @@ -1,1086 +1,1087 @@ #- # Copyright (c) 2005 Peter Grehan # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ # #include #include #include #include #include #include #include /** * @defgroup MMU mmu - KObj methods for PowerPC MMU implementations * @brief A set of methods required by all MMU implementations. These * are basically direct call-thru's from the pmap machine-dependent * code. * Thanks to Bruce M Simpson's pmap man pages for routine descriptions. *@{ */ INTERFACE mmu; +SINGLETON; # # Default implementations of some methods # CODE { static void mmu_null_copy(mmu_t mmu, pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { return; } static void mmu_null_growkernel(mmu_t mmu, vm_offset_t addr) { return; } static void mmu_null_init(mmu_t mmu) { return; } static boolean_t mmu_null_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t va) { return (FALSE); } static void mmu_null_object_init_pt(mmu_t mmu, pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t index, vm_size_t size) { return; } static void mmu_null_page_init(mmu_t mmu, vm_page_t m) { return; } static void mmu_null_remove_pages(mmu_t mmu, pmap_t pmap) { return; } static int mmu_null_mincore(mmu_t mmu, pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap) { return (0); } static void mmu_null_deactivate(struct thread *td) { return; } static void mmu_null_align_superpage(mmu_t mmu, vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { return; } static void *mmu_null_mapdev_attr(mmu_t mmu, vm_paddr_t pa, vm_size_t size, vm_memattr_t ma) { return MMU_MAPDEV(mmu, pa, size); } static void mmu_null_kenter_attr(mmu_t mmu, vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma) { MMU_KENTER(mmu, va, pa); } static void mmu_null_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma) { return; } static int mmu_null_change_attr(mmu_t mmu, vm_offset_t va, vm_size_t sz, vm_memattr_t mode) { return (0); } static size_t mmu_null_scan_pmap(mmu_t mmu) { return (0); } static void *mmu_null_dump_pmap_init(mmu_t mmu, unsigned blkpgs) { return (NULL); } static void * mmu_null_dump_pmap(mmu_t mmu, void *ctx, void *buf, u_long *nbytes) { return (NULL); } }; /** * @brief Apply the given advice to the specified range of addresses within * the given pmap. Depending on the advice, clear the referenced and/or * modified flags in each mapping and set the mapped page's dirty field. * * @param _pmap physical map * @param _start virtual range start * @param _end virtual range end * @param _advice advice to apply */ METHOD void advise { mmu_t _mmu; pmap_t _pmap; vm_offset_t _start; vm_offset_t _end; int _advice; }; /** * @brief Clear the 'modified' bit on the given physical page * * @param _pg physical page */ METHOD void clear_modify { mmu_t _mmu; vm_page_t _pg; }; /** * @brief Clear the write and modified bits in each of the given * physical page's mappings * * @param _pg physical page */ METHOD void remove_write { mmu_t _mmu; vm_page_t _pg; }; /** * @brief Copy the address range given by the source physical map, virtual * address and length to the destination physical map and virtual address. * This routine is optional (xxx default null implementation ?) * * @param _dst_pmap destination physical map * @param _src_pmap source physical map * @param _dst_addr destination virtual address * @param _len size of range * @param _src_addr source virtual address */ METHOD void copy { mmu_t _mmu; pmap_t _dst_pmap; pmap_t _src_pmap; vm_offset_t _dst_addr; vm_size_t _len; vm_offset_t _src_addr; } DEFAULT mmu_null_copy; /** * @brief Copy the source physical page to the destination physical page * * @param _src source physical page * @param _dst destination physical page */ METHOD void copy_page { mmu_t _mmu; vm_page_t _src; vm_page_t _dst; }; METHOD void copy_pages { mmu_t _mmu; vm_page_t *_ma; vm_offset_t _a_offset; vm_page_t *_mb; vm_offset_t _b_offset; int _xfersize; }; /** * @brief Create a mapping between a virtual/physical address pair in the * passed physical map with the specified protection and wiring * * @param _pmap physical map * @param _va mapping virtual address * @param _p mapping physical page * @param _prot mapping page protection * @param _flags pmap_enter flags * @param _psind superpage size index */ METHOD int enter { mmu_t _mmu; pmap_t _pmap; vm_offset_t _va; vm_page_t _p; vm_prot_t _prot; u_int _flags; int8_t _psind; }; /** * @brief Maps a sequence of resident pages belonging to the same object. * * @param _pmap physical map * @param _start virtual range start * @param _end virtual range end * @param _m_start physical page mapped at start * @param _prot mapping page protection */ METHOD void enter_object { mmu_t _mmu; pmap_t _pmap; vm_offset_t _start; vm_offset_t _end; vm_page_t _m_start; vm_prot_t _prot; }; /** * @brief A faster entry point for page mapping where it is possible * to short-circuit some of the tests in pmap_enter. * * @param _pmap physical map (and also currently active pmap) * @param _va mapping virtual address * @param _pg mapping physical page * @param _prot new page protection - used to see if page is exec. */ METHOD void enter_quick { mmu_t _mmu; pmap_t _pmap; vm_offset_t _va; vm_page_t _pg; vm_prot_t _prot; }; /** * @brief Reverse map the given virtual address, returning the physical * page associated with the address if a mapping exists. * * @param _pmap physical map * @param _va mapping virtual address * * @retval 0 No mapping found * @retval addr The mapping physical address */ METHOD vm_paddr_t extract { mmu_t _mmu; pmap_t _pmap; vm_offset_t _va; }; /** * @brief Reverse map the given virtual address, returning the * physical page if found. The page must be held (by calling * vm_page_hold) if the page protection matches the given protection * * @param _pmap physical map * @param _va mapping virtual address * @param _prot protection used to determine if physical page * should be locked * * @retval NULL No mapping found * @retval page Pointer to physical page. Held if protections match */ METHOD vm_page_t extract_and_hold { mmu_t _mmu; pmap_t _pmap; vm_offset_t _va; vm_prot_t _prot; }; /** * @brief Increase kernel virtual address space to the given virtual address. * Not really required for PowerPC, so optional unless the MMU implementation * can use it. * * @param _va new upper limit for kernel virtual address space */ METHOD void growkernel { mmu_t _mmu; vm_offset_t _va; } DEFAULT mmu_null_growkernel; /** * @brief Called from vm_mem_init. Zone allocation is available at * this stage so a convenient time to create zones. This routine is * for MMU-implementation convenience and is optional. */ METHOD void init { mmu_t _mmu; } DEFAULT mmu_null_init; /** * @brief Return if the page has been marked by MMU hardware to have been * modified * * @param _pg physical page to test * * @retval boolean TRUE if page has been modified */ METHOD boolean_t is_modified { mmu_t _mmu; vm_page_t _pg; }; /** * @brief Return whether the specified virtual address is a candidate to be * prefaulted in. This routine is optional. * * @param _pmap physical map * @param _va virtual address to test * * @retval boolean TRUE if the address is a candidate. */ METHOD boolean_t is_prefaultable { mmu_t _mmu; pmap_t _pmap; vm_offset_t _va; } DEFAULT mmu_null_is_prefaultable; /** * @brief Return whether or not the specified physical page was referenced * in any physical maps. * * @params _pg physical page * * @retval boolean TRUE if page has been referenced */ METHOD boolean_t is_referenced { mmu_t _mmu; vm_page_t _pg; }; /** * @brief Return a count of referenced bits for a page, clearing those bits. * Not all referenced bits need to be cleared, but it is necessary that 0 * only be returned when there are none set. * * @params _m physical page * * @retval int count of referenced bits */ METHOD int ts_referenced { mmu_t _mmu; vm_page_t _pg; }; /** * @brief Map the requested physical address range into kernel virtual * address space. The value in _virt is taken as a hint. The virtual * address of the range is returned, or NULL if the mapping could not * be created. The range can be direct-mapped if that is supported. * * @param *_virt Hint for start virtual address, and also return * value * @param _start physical address range start * @param _end physical address range end * @param _prot protection of range (currently ignored) * * @retval NULL could not map the area * @retval addr, *_virt mapping start virtual address */ METHOD vm_offset_t map { mmu_t _mmu; vm_offset_t *_virt; vm_paddr_t _start; vm_paddr_t _end; int _prot; }; /** * @brief Used to create a contiguous set of read-only mappings for a * given object to try and eliminate a cascade of on-demand faults as * the object is accessed sequentially. This routine is optional. * * @param _pmap physical map * @param _addr mapping start virtual address * @param _object device-backed V.M. object to be mapped * @param _pindex page-index within object of mapping start * @param _size size in bytes of mapping */ METHOD void object_init_pt { mmu_t _mmu; pmap_t _pmap; vm_offset_t _addr; vm_object_t _object; vm_pindex_t _pindex; vm_size_t _size; } DEFAULT mmu_null_object_init_pt; /** * @brief Used to determine if the specified page has a mapping for the * given physical map, by scanning the list of reverse-mappings from the * page. The list is scanned to a maximum of 16 entries. * * @param _pmap physical map * @param _pg physical page * * @retval bool TRUE if the physical map was found in the first 16 * reverse-map list entries off the physical page. */ METHOD boolean_t page_exists_quick { mmu_t _mmu; pmap_t _pmap; vm_page_t _pg; }; /** * @brief Initialise the machine-dependent section of the physical page * data structure. This routine is optional. * * @param _pg physical page */ METHOD void page_init { mmu_t _mmu; vm_page_t _pg; } DEFAULT mmu_null_page_init; /** * @brief Count the number of managed mappings to the given physical * page that are wired. * * @param _pg physical page * * @retval int the number of wired, managed mappings to the * given physical page */ METHOD int page_wired_mappings { mmu_t _mmu; vm_page_t _pg; }; /** * @brief Initialise a physical map data structure * * @param _pmap physical map */ METHOD void pinit { mmu_t _mmu; pmap_t _pmap; }; /** * @brief Initialise the physical map for process 0, the initial process * in the system. * XXX default to pinit ? * * @param _pmap physical map */ METHOD void pinit0 { mmu_t _mmu; pmap_t _pmap; }; /** * @brief Set the protection for physical pages in the given virtual address * range to the given value. * * @param _pmap physical map * @param _start virtual range start * @param _end virtual range end * @param _prot new page protection */ METHOD void protect { mmu_t _mmu; pmap_t _pmap; vm_offset_t _start; vm_offset_t _end; vm_prot_t _prot; }; /** * @brief Create a mapping in kernel virtual address space for the given array * of wired physical pages. * * @param _start mapping virtual address start * @param *_m array of physical page pointers * @param _count array elements */ METHOD void qenter { mmu_t _mmu; vm_offset_t _start; vm_page_t *_pg; int _count; }; /** * @brief Remove the temporary mappings created by qenter. * * @param _start mapping virtual address start * @param _count number of pages in mapping */ METHOD void qremove { mmu_t _mmu; vm_offset_t _start; int _count; }; /** * @brief Release per-pmap resources, e.g. mutexes, allocated memory etc. There * should be no existing mappings for the physical map at this point * * @param _pmap physical map */ METHOD void release { mmu_t _mmu; pmap_t _pmap; }; /** * @brief Remove all mappings in the given physical map for the start/end * virtual address range. The range will be page-aligned. * * @param _pmap physical map * @param _start mapping virtual address start * @param _end mapping virtual address end */ METHOD void remove { mmu_t _mmu; pmap_t _pmap; vm_offset_t _start; vm_offset_t _end; }; /** * @brief Traverse the reverse-map list off the given physical page and * remove all mappings. Clear the PGA_WRITEABLE attribute from the page. * * @param _pg physical page */ METHOD void remove_all { mmu_t _mmu; vm_page_t _pg; }; /** * @brief Remove all mappings in the given start/end virtual address range * for the given physical map. Similar to the remove method, but it used * when tearing down all mappings in an address space. This method is * optional, since pmap_remove will be called for each valid vm_map in * the address space later. * * @param _pmap physical map * @param _start mapping virtual address start * @param _end mapping virtual address end */ METHOD void remove_pages { mmu_t _mmu; pmap_t _pmap; } DEFAULT mmu_null_remove_pages; /** * @brief Clear the wired attribute from the mappings for the specified range * of addresses in the given pmap. * * @param _pmap physical map * @param _start virtual range start * @param _end virtual range end */ METHOD void unwire { mmu_t _mmu; pmap_t _pmap; vm_offset_t _start; vm_offset_t _end; }; /** * @brief Zero a physical page. It is not assumed that the page is mapped, * so a temporary (or direct) mapping may need to be used. * * @param _pg physical page */ METHOD void zero_page { mmu_t _mmu; vm_page_t _pg; }; /** * @brief Zero a portion of a physical page, starting at a given offset and * for a given size (multiples of 512 bytes for 4k pages). * * @param _pg physical page * @param _off byte offset from start of page * @param _size size of area to zero */ METHOD void zero_page_area { mmu_t _mmu; vm_page_t _pg; int _off; int _size; }; /** * @brief Extract mincore(2) information from a mapping. * * @param _pmap physical map * @param _addr page virtual address * @param _pa page physical address * * @retval 0 no result * @retval non-zero mincore(2) flag values */ METHOD int mincore { mmu_t _mmu; pmap_t _pmap; vm_offset_t _addr; vm_paddr_t *_pap; } DEFAULT mmu_null_mincore; /** * @brief Perform any operations required to allow a physical map to be used * before it's address space is accessed. * * @param _td thread associated with physical map */ METHOD void activate { mmu_t _mmu; struct thread *_td; }; /** * @brief Perform any operations required to deactivate a physical map, * for instance as it is context-switched out. * * @param _td thread associated with physical map */ METHOD void deactivate { mmu_t _mmu; struct thread *_td; } DEFAULT mmu_null_deactivate; /** * @brief Return a hint for the best virtual address to map a tentative * virtual address range in a given VM object. The default is to just * return the given tentative start address. * * @param _obj VM backing object * @param _offset starting offset with the VM object * @param _addr initial guess at virtual address * @param _size size of virtual address range */ METHOD void align_superpage { mmu_t _mmu; vm_object_t _obj; vm_ooffset_t _offset; vm_offset_t *_addr; vm_size_t _size; } DEFAULT mmu_null_align_superpage; /** * INTERNAL INTERFACES */ /** * @brief Bootstrap the VM system. At the completion of this routine, the * kernel will be running in its own address space with full control over * paging. * * @param _start start of reserved memory (obsolete ???) * @param _end end of reserved memory (obsolete ???) * XXX I think the intent of these was to allow * the memory used by kernel text+data+bss and * loader variables/load-time kld's to be carved out * of available physical mem. * */ METHOD void bootstrap { mmu_t _mmu; vm_offset_t _start; vm_offset_t _end; }; /** * @brief Set up the MMU on the current CPU. Only called by the PMAP layer * for alternate CPUs on SMP systems. * * @param _ap Set to 1 if the CPU being set up is an AP * */ METHOD void cpu_bootstrap { mmu_t _mmu; int _ap; }; /** * @brief Create a kernel mapping for a given physical address range. * Called by bus code on behalf of device drivers. The mapping does not * have to be a virtual address: it can be a direct-mapped physical address * if that is supported by the MMU. * * @param _pa start physical address * @param _size size in bytes of mapping * * @retval addr address of mapping. */ METHOD void * mapdev { mmu_t _mmu; vm_paddr_t _pa; vm_size_t _size; }; /** * @brief Create a kernel mapping for a given physical address range. * Called by bus code on behalf of device drivers. The mapping does not * have to be a virtual address: it can be a direct-mapped physical address * if that is supported by the MMU. * * @param _pa start physical address * @param _size size in bytes of mapping * @param _attr cache attributes * * @retval addr address of mapping. */ METHOD void * mapdev_attr { mmu_t _mmu; vm_paddr_t _pa; vm_size_t _size; vm_memattr_t _attr; } DEFAULT mmu_null_mapdev_attr; /** * @brief Change cache control attributes for a page. Should modify all * mappings for that page. * * @param _m page to modify * @param _ma new cache control attributes */ METHOD void page_set_memattr { mmu_t _mmu; vm_page_t _pg; vm_memattr_t _ma; } DEFAULT mmu_null_page_set_memattr; /** * @brief Remove the mapping created by mapdev. Called when a driver * is unloaded. * * @param _va Mapping address returned from mapdev * @param _size size in bytes of mapping */ METHOD void unmapdev { mmu_t _mmu; vm_offset_t _va; vm_size_t _size; }; /** * @brief Provide a kernel-space pointer that can be used to access the * given userland address. The kernel accessible length returned in klen * may be less than the requested length of the userland buffer (ulen). If * so, retry with a higher address to get access to the later parts of the * buffer. Returns EFAULT if no mapping can be made, else zero. * * @param _pm PMAP for the user pointer. * @param _uaddr Userland address to map. * @param _kaddr Corresponding kernel address. * @param _ulen Length of user buffer. * @param _klen Available subset of ulen with _kaddr. */ METHOD int map_user_ptr { mmu_t _mmu; pmap_t _pm; volatile const void *_uaddr; void **_kaddr; size_t _ulen; size_t *_klen; }; /** * @brief Decode a kernel pointer, as visible to the current thread, * by setting whether it corresponds to a user or kernel address and * the address in the respective memory maps to which the address as * seen in the kernel corresponds. This is essentially the inverse of * MMU_MAP_USER_PTR() above and is used in kernel-space fault handling. * Returns 0 on success or EFAULT if the address could not be mapped. */ METHOD int decode_kernel_ptr { mmu_t _mmu; vm_offset_t addr; int *is_user; vm_offset_t *decoded_addr; }; /** * @brief Reverse-map a kernel virtual address * * @param _va kernel virtual address to reverse-map * * @retval pa physical address corresponding to mapping */ METHOD vm_paddr_t kextract { mmu_t _mmu; vm_offset_t _va; }; /** * @brief Map a wired page into kernel virtual address space * * @param _va mapping virtual address * @param _pa mapping physical address */ METHOD void kenter { mmu_t _mmu; vm_offset_t _va; vm_paddr_t _pa; }; /** * @brief Map a wired page into kernel virtual address space * * @param _va mapping virtual address * @param _pa mapping physical address * @param _ma mapping cache control attributes */ METHOD void kenter_attr { mmu_t _mmu; vm_offset_t _va; vm_paddr_t _pa; vm_memattr_t _ma; } DEFAULT mmu_null_kenter_attr; /** * @brief Unmap a wired page from kernel virtual address space * * @param _va mapped virtual address */ METHOD void kremove { mmu_t _mmu; vm_offset_t _va; }; /** * @brief Determine if the given physical address range has been direct-mapped. * * @param _pa physical address start * @param _size physical address range size * * @retval bool TRUE if the range is direct-mapped. */ METHOD boolean_t dev_direct_mapped { mmu_t _mmu; vm_paddr_t _pa; vm_size_t _size; }; /** * @brief Enforce instruction cache coherency. Typically called after a * region of memory has been modified and before execution of or within * that region is attempted. Setting breakpoints in a process through * ptrace(2) is one example of when the instruction cache needs to be * made coherent. * * @param _pm the physical map of the virtual address * @param _va the virtual address of the modified region * @param _sz the size of the modified region */ METHOD void sync_icache { mmu_t _mmu; pmap_t _pm; vm_offset_t _va; vm_size_t _sz; }; /** * @brief Create temporary memory mapping for use by dumpsys(). * * @param _pa The physical page to map. * @param _sz The requested size of the mapping. * @param _va The virtual address of the mapping. */ METHOD void dumpsys_map { mmu_t _mmu; vm_paddr_t _pa; size_t _sz; void **_va; }; /** * @brief Remove temporary dumpsys() mapping. * * @param _pa The physical page to map. * @param _sz The requested size of the mapping. * @param _va The virtual address of the mapping. */ METHOD void dumpsys_unmap { mmu_t _mmu; vm_paddr_t _pa; size_t _sz; void *_va; }; /** * @brief Initialize memory chunks for dumpsys. */ METHOD void scan_init { mmu_t _mmu; }; /** * @brief Scan kernel PMAP, adding mapped physical pages to dump. * * @retval pmap_size Number of bytes used by all PTE entries. */ METHOD size_t scan_pmap { mmu_t _mmu; } DEFAULT mmu_null_scan_pmap; /** * @brief Initialize a PMAP dump. * * @param _blkpgs Size of a dump block, in pages. * * @retval ctx Dump context, used by dump_pmap. */ METHOD void * dump_pmap_init { mmu_t _mmu; unsigned _blkpgs; } DEFAULT mmu_null_dump_pmap_init; /** * @brief Dump a block of PTEs. * The size of the dump block is specified in dump_pmap_init and * the 'buf' argument must be big enough to hold a full block. * If the page table resides in regular memory, then the 'buf' * argument is ignored and a pointer to the specified dump block * is returned instead, avoiding memory copy. Else, the buffer is * filled with PTEs and the own buffer pointer is returned. * In the end, the cursor in 'ctx' is adjusted to point to the next block. * * @param _ctx Dump context, retrieved from dump_pmap_init. * @param _buf Buffer to hold the dump block contents. * @param _nbytes Number of bytes dumped. * * @retval NULL No more blocks to dump. * @retval buf Pointer to dumped data (may be different than _buf). */ METHOD void * dump_pmap { mmu_t _mmu; void *_ctx; void *_buf; u_long *_nbytes; } DEFAULT mmu_null_dump_pmap; /** * @brief Create a temporary thread-local KVA mapping of a single page. * * @param _pg The physical page to map * * @retval addr The temporary KVA */ METHOD vm_offset_t quick_enter_page { mmu_t _mmu; vm_page_t _pg; }; /** * @brief Undo a mapping created by quick_enter_page * * @param _va The mapped KVA */ METHOD void quick_remove_page { mmu_t _mmu; vm_offset_t _va; }; /** * @brief Change the specified virtual address range's memory type. * * @param _va The virtual base address to change * * @param _sz Size of the region to change * * @param _mode New mode to set on the VA range * * @retval error 0 on success, EINVAL or ENOMEM on error. */ METHOD int change_attr { mmu_t _mmu; vm_offset_t _va; vm_size_t _sz; vm_memattr_t _mode; } DEFAULT mmu_null_change_attr; /** * @brief Initialize the page array. * * @param _pages The number of pages to be accounted by the array. */ METHOD void page_array_startup { mmu_t _mmu; long _pages; }; Index: projects/clang1000-import/sys/powerpc/powerpc/platform.c =================================================================== --- projects/clang1000-import/sys/powerpc/powerpc/platform.c (revision 356919) +++ projects/clang1000-import/sys/powerpc/powerpc/platform.c (revision 356920) @@ -1,399 +1,406 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005 Peter Grehan * Copyright (c) 2009 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Dispatch platform calls to the appropriate platform implementation * through a previously registered kernel object. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include "platform_if.h" static platform_def_t *plat_def_impl; static platform_t plat_obj; static struct kobj_ops plat_kernel_kops; static struct platform_kobj plat_kernel_obj; static char plat_name[64] = ""; SYSCTL_STRING(_hw, OID_AUTO, platform, CTLFLAG_RD | CTLFLAG_TUN, plat_name, 0, "Platform currently in use"); static struct mem_affinity mem_info[VM_PHYSSEG_MAX + 1]; static int vm_locality_table[MAXMEMDOM * MAXMEMDOM]; static struct mem_region pregions[PHYS_AVAIL_SZ]; static struct numa_mem_region numa_pregions[PHYS_AVAIL_SZ]; static struct mem_region aregions[PHYS_AVAIL_SZ]; static int nnumapregions, npregions, naregions; /* * Memory region utilities: determine if two regions overlap, * and merge two overlapping regions into one */ static int memr_overlap(struct mem_region *r1, struct mem_region *r2) { if ((r1->mr_start + r1->mr_size) < r2->mr_start || (r2->mr_start + r2->mr_size) < r1->mr_start) return (FALSE); return (TRUE); } static void memr_merge(struct mem_region *from, struct mem_region *to) { vm_offset_t end; end = uqmax(to->mr_start + to->mr_size, from->mr_start + from->mr_size); to->mr_start = uqmin(from->mr_start, to->mr_start); to->mr_size = end - to->mr_start; } /* * Quick sort callout for comparing memory regions. */ static int mr_cmp(const void *a, const void *b) { const struct mem_region *regiona, *regionb; regiona = a; regionb = b; if (regiona->mr_start < regionb->mr_start) return (-1); else if (regiona->mr_start > regionb->mr_start) return (1); else return (0); } void numa_mem_regions(struct numa_mem_region **phys, int *physsz) { struct mem_affinity *mi; int i, j, maxdom, ndomain, offset; nnumapregions = 0; PLATFORM_NUMA_MEM_REGIONS(plat_obj, numa_pregions, &nnumapregions); if (physsz != NULL) *physsz = nnumapregions; if (phys != NULL) *phys = numa_pregions; if (physsz == NULL || phys == NULL) { printf("unset value\n"); return; } maxdom = 0; for (i = 0; i < nnumapregions; i++) if (numa_pregions[i].mr_domain > maxdom) maxdom = numa_pregions[i].mr_domain; mi = mem_info; for (i = 0; i < nnumapregions; i++, mi++) { mi->start = numa_pregions[i].mr_start; mi->end = numa_pregions[i].mr_start + numa_pregions[i].mr_size; mi->domain = numa_pregions[i].mr_domain; } offset = 0; vm_locality_table[offset] = 10; ndomain = maxdom + 1; if (ndomain > 1) { for (i = 0; i < ndomain; i++) { for (j = 0; j < ndomain; j++) { /* * Not sure what these values should actually be */ if (i == j) vm_locality_table[offset] = 10; else vm_locality_table[offset] = 21; offset++; } } } vm_phys_register_domains(ndomain, mem_info, vm_locality_table); } void mem_regions(struct mem_region **phys, int *physsz, struct mem_region **avail, int *availsz) { int i, j, still_merging; if (npregions == 0) { PLATFORM_MEM_REGIONS(plat_obj, pregions, &npregions, aregions, &naregions); qsort(pregions, npregions, sizeof(*pregions), mr_cmp); qsort(aregions, naregions, sizeof(*aregions), mr_cmp); /* Remove overlapping available regions */ do { still_merging = FALSE; for (i = 0; i < naregions; i++) { if (aregions[i].mr_size == 0) continue; for (j = i+1; j < naregions; j++) { if (aregions[j].mr_size == 0) continue; if (!memr_overlap(&aregions[j], &aregions[i])) continue; memr_merge(&aregions[j], &aregions[i]); /* mark inactive */ aregions[j].mr_size = 0; still_merging = TRUE; } } } while (still_merging == TRUE); /* Collapse zero-length available regions */ for (i = 0; i < naregions; i++) { if (aregions[i].mr_size == 0) { memcpy(&aregions[i], &aregions[i+1], (naregions - i - 1)*sizeof(*aregions)); naregions--; i--; } } } if (phys != NULL) *phys = pregions; if (avail != NULL) *avail = aregions; if (physsz != NULL) *physsz = npregions; if (availsz != NULL) *availsz = naregions; } int mem_valid(vm_offset_t addr, int len) { int i; if (npregions == 0) { struct mem_region *p, *a; int na, np; mem_regions(&p, &np, &a, &na); } for (i = 0; i < npregions; i++) if ((addr >= pregions[i].mr_start) && (addr + len <= pregions[i].mr_start + pregions[i].mr_size)) return (0); return (EFAULT); } vm_offset_t platform_real_maxaddr(void) { return (PLATFORM_REAL_MAXADDR(plat_obj)); } const char * installed_platform() { return (plat_def_impl->name); } u_long platform_timebase_freq(struct cpuref *cpu) { return (PLATFORM_TIMEBASE_FREQ(plat_obj, cpu)); } /* * Put the current CPU, as last step in suspend, to sleep */ void platform_sleep() { PLATFORM_SLEEP(plat_obj); } int platform_smp_first_cpu(struct cpuref *cpu) { return (PLATFORM_SMP_FIRST_CPU(plat_obj, cpu)); } int platform_smp_next_cpu(struct cpuref *cpu) { return (PLATFORM_SMP_NEXT_CPU(plat_obj, cpu)); } int platform_smp_get_bsp(struct cpuref *cpu) { return (PLATFORM_SMP_GET_BSP(plat_obj, cpu)); } int platform_smp_start_cpu(struct pcpu *cpu) { return (PLATFORM_SMP_START_CPU(plat_obj, cpu)); } void platform_smp_ap_init() { PLATFORM_SMP_AP_INIT(plat_obj); } void platform_smp_probe_threads(void) { PLATFORM_SMP_PROBE_THREADS(plat_obj); } #ifdef SMP struct cpu_group * cpu_topo(void) { return (PLATFORM_SMP_TOPO(plat_obj)); } #endif + +int +platform_node_numa_domain(phandle_t node) +{ + return (PLATFORM_NODE_NUMA_DOMAIN(plat_obj, node)); +} /* * Reset back to firmware. */ void cpu_reset() { PLATFORM_RESET(plat_obj); } void platform_smp_timebase_sync(u_long tb, int ap) { PLATFORM_SMP_TIMEBASE_SYNC(plat_obj, tb, ap); } /* * Platform install routines. Highest priority wins, using the same * algorithm as bus attachment. */ SET_DECLARE(platform_set, platform_def_t); void platform_probe_and_attach() { platform_def_t **platpp, *platp; int prio, best_prio; plat_obj = &plat_kernel_obj; best_prio = 0; /* * Try to locate the best platform kobj */ SET_FOREACH(platpp, platform_set) { platp = *platpp; /* * Take care of compiling the selected class, and * then statically initialise the MMU object */ kobj_class_compile_static(platp, &plat_kernel_kops); kobj_init_static((kobj_t)plat_obj, platp); prio = PLATFORM_PROBE(plat_obj); /* Check for errors */ if (prio > 0) continue; /* * Check if this module was specifically requested through * the loader tunable we provide. */ if (strcmp(platp->name,plat_name) == 0) { plat_def_impl = platp; break; } /* Otherwise, see if it is better than our current best */ if (plat_def_impl == NULL || prio > best_prio) { best_prio = prio; plat_def_impl = platp; } /* * We can't free the KOBJ, since it is static. Reset the ops * member of this class so that we can come back later. */ platp->ops = NULL; } if (plat_def_impl == NULL) panic("No platform module found!"); /* * Recompile to make sure we ended with the * correct one, and then attach. */ kobj_class_compile_static(plat_def_impl, &plat_kernel_kops); kobj_init_static((kobj_t)plat_obj, plat_def_impl); strlcpy(plat_name,plat_def_impl->name,sizeof(plat_name)); PLATFORM_ATTACH(plat_obj); } Index: projects/clang1000-import/sys/powerpc/powerpc/platform_if.m =================================================================== --- projects/clang1000-import/sys/powerpc/powerpc/platform_if.m (revision 356919) +++ projects/clang1000-import/sys/powerpc/powerpc/platform_if.m (revision 356920) @@ -1,257 +1,272 @@ #- # Copyright (c) 2009 Nathan Whitehorn # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ # #include #include #include #include #include +#include #include #include #include #include /** * @defgroup PLATFORM platform - KObj methods for PowerPC platform * implementations * @brief A set of methods required by all platform implementations. * These are used to bring up secondary CPUs, supply the physical memory * map, etc. *@{ */ INTERFACE platform; # # Default implementations # CODE { static void platform_null_attach(platform_t plat) { return; } static int platform_null_smp_first_cpu(platform_t plat, struct cpuref *cpuref) { cpuref->cr_hwref = -1; cpuref->cr_cpuid = 0; return (0); } static int platform_null_smp_next_cpu(platform_t plat, struct cpuref *_cpuref) { return (ENOENT); } static struct cpu_group *platform_null_smp_topo(platform_t plat) { #ifdef SMP return (smp_topo_none()); #else return (NULL); #endif } static vm_offset_t platform_null_real_maxaddr(platform_t plat) { return (VM_MAX_ADDRESS); } static void platform_null_smp_ap_init(platform_t plat) { return; } static void platform_null_smp_probe_threads(void) { return; } + static int platform_null_node_numa_domain(platform_t plat, + phandle_t node) + { + return (0); + } }; /** * @brief Probe for whether we are on this platform, returning the standard * newbus probe codes. If we have Open Firmware or a flattened device tree, * it is guaranteed to be available at this point. */ METHOD int probe { platform_t _plat; }; /** * @brief Attach this platform module. This happens before the MMU is online, * so the platform module can install its own high-priority MMU module at * this point. */ METHOD int attach { platform_t _plat; } DEFAULT platform_null_attach; /** * @brief Return the system's physical memory map. * * It shall provide the total and the available regions of RAM. * The available regions need not take the kernel into account. * * @param _memp Array of physical memory chunks * @param _memsz Number of physical memory chunks * @param _availp Array of available physical memory chunks * @param _availsz Number of available physical memory chunks */ METHOD void mem_regions { platform_t _plat; struct mem_region *_memp; int *_memsz; struct mem_region *_availp; int *_availsz; }; /** * @brief Return the system's physical memory map. * * It shall provide the total RAM with the corresponding domains. * * @param _memp Array of physical memory chunks * @param _memsz Number of physical memory chunks */ METHOD void numa_mem_regions { platform_t _plat; struct numa_mem_region *_memp; int *_memsz; }; /** * @brief Return the maximum address accessible in real mode * (for use with hypervisors) */ METHOD vm_offset_t real_maxaddr { platform_t _plat; } DEFAULT platform_null_real_maxaddr; /** * @brief Get the CPU's timebase frequency, in ticks per second. * * @param _cpu CPU whose timebase to query */ METHOD u_long timebase_freq { platform_t _plat; struct cpuref *_cpu; }; # SMP bits /** * @brief Fill the first CPU's cpuref * * @param _cpuref CPU */ METHOD int smp_first_cpu { platform_t _plat; struct cpuref *_cpuref; } DEFAULT platform_null_smp_first_cpu; /** * @brief Fill the next CPU's cpuref * * @param _cpuref CPU */ METHOD int smp_next_cpu { platform_t _plat; struct cpuref *_cpuref; } DEFAULT platform_null_smp_next_cpu; /** * @brief Find the boot processor * * @param _cpuref CPU */ METHOD int smp_get_bsp { platform_t _plat; struct cpuref *_cpuref; } DEFAULT platform_null_smp_first_cpu; /** * @brief Start a CPU * * @param _cpuref CPU */ METHOD int smp_start_cpu { platform_t _plat; struct pcpu *_cpu; }; /** * @brief Start a CPU * */ METHOD void smp_ap_init { platform_t _plat; } DEFAULT platform_null_smp_ap_init; /** * @brief Probe mp_ncores and smp_threads_per_core for early MI code */ METHOD void smp_probe_threads { platform_t _plat; } DEFAULT platform_null_smp_probe_threads; /** * @brief Return SMP topology */ METHOD cpu_group_t smp_topo { platform_t _plat; } DEFAULT platform_null_smp_topo; /** * @brief Reset system */ METHOD void reset { platform_t _plat; }; /** * @brief Suspend the CPU */ METHOD void sleep { platform_t _plat; }; /** * @brief Attempt to synchronize timebase of current CPU with others. * Entered (approximately) simultaneously on all CPUs, including the BSP. * Passed the timebase value on the BSP as of shortly before the call. */ METHOD void smp_timebase_sync { platform_t _plat; u_long _tb; int _ap; }; +/** + * @brief Return the NUMA domain for the given device tree node. Always returns + * a valid domain. + * + */ +METHOD int node_numa_domain { + platform_t _plat; + phandle_t _node; +} DEFAULT platform_null_node_numa_domain; Index: projects/clang1000-import/sys/powerpc/powerpc/sigcode64.S =================================================================== --- projects/clang1000-import/sys/powerpc/powerpc/sigcode64.S (revision 356919) +++ projects/clang1000-import/sys/powerpc/powerpc/sigcode64.S (revision 356920) @@ -1,76 +1,80 @@ /* $FreeBSD$ */ /* $NetBSD: sigcode.S,v 1.1 1999/11/17 14:56:11 kleink Exp $ */ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "assym.inc" /* * The following code gets copied to the top of the user stack on process * execution. It does signal trampolining on signal delivery. * * On entry r1 points to a struct sigframe at bottom of current stack. * All other registers are unchanged. * * Entered midway through for v2 ELF binaries that don't need to deal with * function descriptors. * */ .globl CNAME(sigcode64),CNAME(szsigcode64) .globl CNAME(sigcode64_elfv2),CNAME(szsigcode64_elfv2) CNAME(sigcode64): mflr 2 /* resolve function descriptor */ ld 0,0(2) ld 2,8(2) mtlr 0 CNAME(sigcode64_elfv2): addi 1,1,-112 /* reserved space for callee */ mflr 12 /* ELFv2 wants the address in r12 */ blrl addi 3,1,112+SF_UC /* restore sp, and get &frame->sf_uc */ li 0,SYS_sigreturn sc /* sigreturn(scp) */ + /* + * If we get back to here, it means sigreturn failed. + * As such, we are now stuck in the wrong context. + * Exit immediately without touching the stack. + */ li 0,SYS_exit sc /* exit(errno) */ - nop /* align to doubleword */ endsigcode64: .data CNAME(szsigcode64): .long endsigcode64 - CNAME(sigcode64) CNAME(szsigcode64_elfv2): .long endsigcode64 - CNAME(sigcode64_elfv2) Index: projects/clang1000-import/sys/sys/refcount.h =================================================================== --- projects/clang1000-import/sys/sys/refcount.h (revision 356919) +++ projects/clang1000-import/sys/sys/refcount.h (revision 356920) @@ -1,197 +1,212 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2005 John Baldwin * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __SYS_REFCOUNT_H__ #define __SYS_REFCOUNT_H__ #include #ifdef _KERNEL #include #else #include #define KASSERT(exp, msg) /* */ #endif #define REFCOUNT_WAITER (1U << 31) /* Refcount has waiter. */ #define REFCOUNT_SATURATION_VALUE (3U << 29) #define REFCOUNT_SATURATED(val) (((val) & (1U << 30)) != 0) #define REFCOUNT_COUNT(x) ((x) & ~REFCOUNT_WAITER) bool refcount_release_last(volatile u_int *count, u_int n, u_int old); -void refcount_sleep(volatile u_int *count, const char *wmesg, int prio); /* * Attempt to handle reference count overflow and underflow. Force the counter * to stay at the saturation value so that a counter overflow cannot trigger * destruction of the containing object and instead leads to a less harmful * memory leak. */ static __inline void _refcount_update_saturated(volatile u_int *count) { #ifdef INVARIANTS panic("refcount %p wraparound", count); #else atomic_store_int(count, REFCOUNT_SATURATION_VALUE); #endif } static __inline void refcount_init(volatile u_int *count, u_int value) { KASSERT(!REFCOUNT_SATURATED(value), ("invalid initial refcount value %u", value)); *count = value; } static __inline u_int refcount_acquire(volatile u_int *count) { u_int old; old = atomic_fetchadd_int(count, 1); if (__predict_false(REFCOUNT_SATURATED(old))) _refcount_update_saturated(count); return (old); } static __inline u_int refcount_acquiren(volatile u_int *count, u_int n) { u_int old; KASSERT(n < REFCOUNT_SATURATION_VALUE / 2, ("refcount_acquiren: n=%u too large", n)); old = atomic_fetchadd_int(count, n); if (__predict_false(REFCOUNT_SATURATED(old))) _refcount_update_saturated(count); return (old); } static __inline __result_use_check bool refcount_acquire_checked(volatile u_int *count) { u_int lcount; for (lcount = *count;;) { if (__predict_false(REFCOUNT_SATURATED(lcount + 1))) return (false); if (__predict_true(atomic_fcmpset_int(count, &lcount, lcount + 1) == 1)) return (true); } } static __inline bool refcount_releasen(volatile u_int *count, u_int n) { u_int old; KASSERT(n < REFCOUNT_SATURATION_VALUE / 2, ("refcount_releasen: n=%u too large", n)); atomic_thread_fence_rel(); old = atomic_fetchadd_int(count, -n); if (__predict_false(n >= REFCOUNT_COUNT(old) || REFCOUNT_SATURATED(old))) return (refcount_release_last(count, n, old)); return (false); } static __inline bool refcount_release(volatile u_int *count) { return (refcount_releasen(count, 1)); } +#ifdef _KERNEL +struct lock_object; +void _refcount_sleep(volatile u_int *count, struct lock_object *, + const char *wmesg, int prio); + static __inline void +refcount_sleep(volatile u_int *count, const char *wmesg, int prio) +{ + + _refcount_sleep(count, NULL, wmesg, prio); +} + +#define refcount_sleep_interlock(count, lock, wmesg, prio) \ + _refcount_sleep((count), (struct lock_object *)(lock), (wmesg), (prio)) + +static __inline void refcount_wait(volatile u_int *count, const char *wmesg, int prio) { while (*count != 0) refcount_sleep(count, wmesg, prio); } +#endif /* * This functions returns non-zero if the refcount was * incremented. Else zero is returned. */ static __inline __result_use_check bool refcount_acquire_if_gt(volatile u_int *count, u_int n) { u_int old; old = *count; for (;;) { if (REFCOUNT_COUNT(old) <= n) return (false); if (__predict_false(REFCOUNT_SATURATED(old))) return (true); if (atomic_fcmpset_int(count, &old, old + 1)) return (true); } } static __inline __result_use_check bool refcount_acquire_if_not_zero(volatile u_int *count) { return refcount_acquire_if_gt(count, 0); } static __inline __result_use_check bool refcount_release_if_gt(volatile u_int *count, u_int n) { u_int old; KASSERT(n > 0, ("refcount_release_if_gt: Use refcount_release for final ref")); old = *count; for (;;) { if (REFCOUNT_COUNT(old) <= n) return (false); if (__predict_false(REFCOUNT_SATURATED(old))) return (true); if (atomic_fcmpset_int(count, &old, old - 1)) return (true); } } static __inline __result_use_check bool refcount_release_if_not_last(volatile u_int *count) { return refcount_release_if_gt(count, 1); } #endif /* ! __SYS_REFCOUNT_H__ */ Index: projects/clang1000-import/sys/tools/makeobjops.awk =================================================================== --- projects/clang1000-import/sys/tools/makeobjops.awk (revision 356919) +++ projects/clang1000-import/sys/tools/makeobjops.awk (revision 356920) @@ -1,530 +1,538 @@ #!/usr/bin/awk -f #- # SPDX-License-Identifier: BSD-3-Clause # # Copyright (c) 1992, 1993 # The Regents of the University of California. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 3. Neither the name of the University nor the names of its contributors # may be used to endorse or promote products derived from this software # without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # From @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93 # From @(#)makedevops.sh 1.1 1998/06/14 13:53:12 dfr Exp $ # From @(#)makedevops.sh ?.? 1998/10/05 # From src/sys/kern/makedevops.pl,v 1.12 1999/11/22 14:40:04 n_hibma Exp # From src/sys/kern/makeobjops.pl,v 1.8 2001/11/16 02:02:42 joe Exp # # $FreeBSD$ # # Script to produce kobj front-end sugar. # function usage () { print "usage: makeobjops.awk [-d] [-p] [-l ] [-c|-h]"; print "where -c produce only .c files"; print " -h produce only .h files"; print " -p use the path component in the source file for destination dir"; print " -l set line width for output files [80]"; print " -d switch on debugging"; exit 1; } function warn (msg) { print "makeobjops.awk:", msg > "/dev/stderr"; } function warnsrc (msg) { warn(src ":" lineno ": " msg); } function debug (msg) { if (opt_d) warn(msg); } function die (msg) { warn(msg); exit 1; } # These are just for convenience ... function printc(s) {if (opt_c) print s > ctmpfilename;} function printh(s) {if (opt_h) print s > htmpfilename;} # # If a line exceeds maxlength, split it into multiple # lines at commas. Subsequent lines are indented by # the specified number of spaces. # # In other words: Lines are split by replacing ", " # by ",\n" plus indent spaces. # function format_line (line, maxlength, indent) { rline = ""; while (length(line) > maxlength) { # # Find the rightmost ", " so that the part # to the left of it is just within maxlength. # If there is none, give up and leave it as-is. # if (!match(substr(line, 1, maxlength + 1), /^.*, /)) break; rline = rline substr(line, 1, RLENGTH - 1) "\n"; line = sprintf("%*s", indent, "") substr(line, RLENGTH + 1); } return rline line; } # # Join an array into a string. # function join (separator, array, num) { _result = "" if (num) { while (num > 1) _result = separator array[num--] _result; _result = array[1] _result; } return _result; } # # Execute a system command and report if it failed. # function system_check (cmd) { if ((rc = system(cmd))) warn(cmd " failed (" rc ")"); } # # Handle "INTERFACE" line. # function handle_interface () { intname = $2; sub(/;$/, "", intname); if (intname !~ /^[a-z_][a-z0-9_]*$/) { debug($0); warnsrc("Invalid interface name '" intname "', use [a-z_][a-z0-9_]*"); error = 1; return; } if (!/;[ ]*$/) warnsrc("Semicolon missing at end of line, no problem"); debug("Interface " intname); printh("#ifndef _" intname "_if_h_"); printh("#define _" intname "_if_h_\n"); printc("#include \"" intname "_if.h\"\n"); } # # Pass doc comments through to the C file # function handle_doc () { doc = "" while (!/\*\//) { doc = doc $0 "\n"; getline < src; lineno++; } doc = doc $0 "\n"; return doc; } # # Handle "CODE" and "HEADER" sections. # Returns the code as-is. # function handle_code () { code = "\n"; getline < src; indent = $0; sub(/[^ ].*$/, "", indent); # find the indent used while (!/^}/) { sub("^" indent, ""); # remove the indent code = code $0 "\n"; getline < src; lineno++;; } return code; } # # Handle "METHOD" and "STATICMETHOD" sections. # function handle_method (static, doc) { # # Get the return type and function name and delete that from # the line. What is left is the possibly first function argument # if it is on the same line. # if (!intname) { warnsrc("No interface name defined"); error = 1; return; } sub(/^[^ ]+[ ]+/, ""); ret = $0; sub(/[ ]*\{.*$/, "", ret); name = ret; sub(/^.*[ ]/, "", name); # last element is name of method sub(/[ ]+[^ ]+$/, "", ret); # return type debug("Method: name=" name " return type=" ret); sub(/^[^\{]*\{[ ]*/, ""); if (!name || !ret) { debug($0); warnsrc("Invalid method specification"); error = 1; return; } if (name !~ /^[a-z_][a-z_0-9]*$/) { warnsrc("Invalid method name '" name "', use [a-z_][a-z0-9_]*"); error = 1; return; } if (methods[name]) { warnsrc("Duplicate method name"); error = 1; return; } methods[name] = name; line = $0; while (line !~ /\}/ && (getline < src) > 0) { line = line " " $0; lineno++ } default_function = ""; if (!match(line, /\};?/)) { warnsrc("Premature end of file"); error = 1; return; } extra = substr(line, RSTART + RLENGTH); if (extra ~ /[ ]*DEFAULT[ ]*[a-zA-Z_][a-zA-Z_0-9]*[ ]*;/) { default_function = extra; sub(/.*DEFAULT[ ]*/, "", default_function); sub(/[; ]+.*$/, "", default_function); } else if (extra && opt_d) { # Warn about garbage at end of line. warnsrc("Ignored '" extra "'"); } sub(/\};?.*$/, "", line); # # Create a list of variables without the types prepended. # sub(/^[ ]+/, "", line); # remove leading ... sub(/[ ]+$/, "", line); # ... and trailing whitespace gsub(/[ ]+/, " ", line); # remove double spaces num_arguments = split(line, arguments, / *; */) - 1; delete varnames; # list of varnames num_varnames = 0; for (i = 1; i <= num_arguments; i++) { if (!arguments[i]) continue; # skip argument if argument is empty num_ar = split(arguments[i], ar, /[* ]+/); if (num_ar < 2) { # only 1 word in argument? warnsrc("no type for '" arguments[i] "'"); error = 1; return; } # Last element is name of variable. varnames[++num_varnames] = ar[num_ar]; } argument_list = join(", ", arguments, num_arguments); varname_list = join(", ", varnames, num_varnames); if (opt_d) { warn("Arguments: " argument_list); warn("Varnames: " varname_list); } mname = intname "_" name; # method name umname = toupper(mname); # uppercase method name firstvar = varnames[1]; if (default_function == "") default_function = "kobj_error_method"; # the method description printh("/** @brief Unique descriptor for the " umname "() method */"); printh("extern struct kobjop_desc " mname "_desc;"); # the method typedef printh("/** @brief A function implementing the " umname "() method */"); prototype = "typedef " ret " " mname "_t("; printh(format_line(prototype argument_list ");", line_width, length(prototype))); # Print out the method desc printc("struct kobjop_desc " mname "_desc = {"); printc("\t0, { &" mname "_desc, (kobjop_t)" default_function " }"); printc("};\n"); # Print out the method itself printh(doc); if (0) { # haven't chosen the format yet printh("static __inline " ret " " umname "(" varname_list ")"); printh("\t" join(";\n\t", arguments, num_arguments) ";"); } else { prototype = "static __inline " ret " " umname "("; printh(format_line(prototype argument_list ")", line_width, length(prototype))); } printh("{"); - printh("\tkobjop_t _m;"); + if (singleton) + printh("\tstatic kobjop_t _m;"); + else + printh("\tkobjop_t _m;"); if (ret != "void") printh("\t" ret " rc;"); if (!static) firstvar = "((kobj_t)" firstvar ")"; if (prolog != "") printh(prolog); + if (singleton) + printh("\tif (_m == NULL)"); printh("\tKOBJOPLOOKUP(" firstvar "->ops," mname ");"); rceq = (ret != "void") ? "rc = " : ""; printh("\t" rceq "((" mname "_t *) _m)(" varname_list ");"); if (epilog != "") printh(epilog); if (ret != "void") printh("\treturn (rc);"); printh("}\n"); } # # Begin of the main program. # BEGIN { line_width = 80; gerror = 0; # # Process the command line. # num_files = 0; for (i = 1; i < ARGC; i++) { if (ARGV[i] ~ /^-/) { # # awk doesn't have getopt(), so we have to do it ourselves. # This is a bit clumsy, but it works. # for (j = 2; j <= length(ARGV[i]); j++) { o = substr(ARGV[i], j, 1); if (o == "c") opt_c = 1; else if (o == "h") opt_h = 1; else if (o == "p") opt_p = 1; else if (o == "d") opt_d = 1; else if (o == "l") { if (length(ARGV[i]) > j) { opt_l = substr(ARGV[i], j + 1); break; } else { if (++i < ARGC) opt_l = ARGV[i]; else usage(); } } else usage(); } } else if (ARGV[i] ~ /\.m$/) filenames[num_files++] = ARGV[i]; else usage(); } if (!num_files || !(opt_c || opt_h)) usage(); if (opt_p) debug("Will produce files in original not in current directory"); if (opt_l) { if (opt_l !~ /^[0-9]+$/ || opt_l < 1) die("Invalid line width '" opt_l "'"); line_width = opt_l; debug("Line width set to " line_width); } for (i = 0; i < num_files; i++) debug("Filename: " filenames[i]); for (file_i = 0; file_i < num_files; file_i++) { src = filenames[file_i]; cfilename = hfilename = src; sub(/\.m$/, ".c", cfilename); sub(/\.m$/, ".h", hfilename); if (!opt_p) { sub(/^.*\//, "", cfilename); sub(/^.*\//, "", hfilename); } debug("Processing from " src " to " cfilename " / " hfilename); ctmpfilename = cfilename ".tmp"; htmpfilename = hfilename ".tmp"; # Avoid a literal generated file tag here. generated = "@" "generated"; common_head = \ "/*\n" \ " * This file is " generated " automatically.\n" \ " * Do not modify anything in here by hand.\n" \ " *\n" \ " * Created from source file\n" \ " * " src "\n" \ " * with\n" \ " * makeobjops.awk\n" \ " *\n" \ " * See the source file for legal information\n" \ " */\n"; printc(common_head "\n" \ "#include \n" \ "#include \n" \ "#include \n" \ "#include "); printh(common_head); delete methods; # clear list of methods intname = ""; lineno = 0; error = 0; # to signal clean up and gerror setting lastdoc = ""; prolog = ""; epilog = ""; + singleton = 0; while (!error && (getline < src) > 0) { lineno++; # # Take special notice of include directives. # if (/^#[ ]*include[ ]+["<][^">]+[">]/) { incld = $0; sub(/^#[ ]*include[ ]+/, "", incld); debug("Included file: " incld); printc("#include " incld); } sub(/#.*/, ""); # remove comments sub(/^[ ]+/, ""); # remove leading ... sub(/[ ]+$/, ""); # ... and trailing whitespace if (/^$/) { # skip empty lines } else if (/^\/\*\*/) lastdoc = handle_doc(); else if (/^INTERFACE[ ]+[^ ;]*[ ]*;?[ ]*$/) { printh(lastdoc); lastdoc = ""; handle_interface(); } else if (/^CODE[ ]*{$/) printc(handle_code()); else if (/^HEADER[ ]*{$/) printh(handle_code()); else if (/^METHOD/) { handle_method(0, lastdoc); lastdoc = ""; prolog = ""; epilog = ""; } else if (/^STATICMETHOD/) { handle_method(1, lastdoc); lastdoc = ""; prolog = ""; epilog = ""; } else if (/^PROLOG[ ]*{$/) prolog = handle_code(); else if (/^EPILOG[ ]*{$/) epilog = handle_code(); + else if (/^SINGLETON/) + singleton = 1; else { debug($0); warnsrc("Invalid line encountered"); error = 1; } } # # Print the final '#endif' in the header file. # printh("#endif /* _" intname "_if_h_ */"); close (ctmpfilename); close (htmpfilename); if (error) { warn("Output skipped"); system_check("rm -f " ctmpfilename " " htmpfilename); gerror = 1; } else { if (opt_c) system_check("mv -f " ctmpfilename " " cfilename); if (opt_h) system_check("mv -f " htmpfilename " " hfilename); } } exit gerror; } Index: projects/clang1000-import/sys/vm/device_pager.c =================================================================== --- projects/clang1000-import/sys/vm/device_pager.c (revision 356919) +++ projects/clang1000-import/sys/vm/device_pager.c (revision 356920) @@ -1,470 +1,471 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990 University of Utah. * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)device_pager.c 8.1 (Berkeley) 6/11/93 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void dev_pager_init(void); static vm_object_t dev_pager_alloc(void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *); static void dev_pager_dealloc(vm_object_t); static int dev_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); static void dev_pager_putpages(vm_object_t, vm_page_t *, int, int, int *); static boolean_t dev_pager_haspage(vm_object_t, vm_pindex_t, int *, int *); static void dev_pager_free_page(vm_object_t object, vm_page_t m); static int dev_pager_populate(vm_object_t object, vm_pindex_t pidx, int fault_type, vm_prot_t, vm_pindex_t *first, vm_pindex_t *last); /* list of device pager objects */ static struct pagerlst dev_pager_object_list; /* protect list manipulation */ static struct mtx dev_pager_mtx; struct pagerops devicepagerops = { .pgo_init = dev_pager_init, .pgo_alloc = dev_pager_alloc, .pgo_dealloc = dev_pager_dealloc, .pgo_getpages = dev_pager_getpages, .pgo_putpages = dev_pager_putpages, .pgo_haspage = dev_pager_haspage, }; struct pagerops mgtdevicepagerops = { .pgo_alloc = dev_pager_alloc, .pgo_dealloc = dev_pager_dealloc, .pgo_getpages = dev_pager_getpages, .pgo_putpages = dev_pager_putpages, .pgo_haspage = dev_pager_haspage, .pgo_populate = dev_pager_populate, }; static int old_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color); static void old_dev_pager_dtor(void *handle); static int old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot, vm_page_t *mres); static struct cdev_pager_ops old_dev_pager_ops = { .cdev_pg_ctor = old_dev_pager_ctor, .cdev_pg_dtor = old_dev_pager_dtor, .cdev_pg_fault = old_dev_pager_fault }; static void dev_pager_init(void) { TAILQ_INIT(&dev_pager_object_list); mtx_init(&dev_pager_mtx, "dev_pager list", NULL, MTX_DEF); } vm_object_t cdev_pager_lookup(void *handle) { vm_object_t object; mtx_lock(&dev_pager_mtx); object = vm_pager_object_lookup(&dev_pager_object_list, handle); mtx_unlock(&dev_pager_mtx); return (object); } vm_object_t cdev_pager_allocate(void *handle, enum obj_type tp, struct cdev_pager_ops *ops, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred) { vm_object_t object, object1; vm_pindex_t pindex; u_short color; if (tp != OBJT_DEVICE && tp != OBJT_MGTDEVICE) return (NULL); KASSERT(tp == OBJT_MGTDEVICE || ops->cdev_pg_populate == NULL, ("populate on unmanaged device pager")); /* * Offset should be page aligned. */ if (foff & PAGE_MASK) return (NULL); /* * Treat the mmap(2) file offset as an unsigned value for a * device mapping. This, in effect, allows a user to pass all * possible off_t values as the mapping cookie to the driver. At * this point, we know that both foff and size are a multiple * of the page size. Do a check to avoid wrap. */ size = round_page(size); pindex = OFF_TO_IDX(foff) + OFF_TO_IDX(size); if (pindex > OBJ_MAX_SIZE || pindex < OFF_TO_IDX(foff) || pindex < OFF_TO_IDX(size)) return (NULL); if (ops->cdev_pg_ctor(handle, size, prot, foff, cred, &color) != 0) return (NULL); mtx_lock(&dev_pager_mtx); /* * Look up pager, creating as necessary. */ object1 = NULL; object = vm_pager_object_lookup(&dev_pager_object_list, handle); if (object == NULL) { /* * Allocate object and associate it with the pager. Initialize * the object's pg_color based upon the physical address of the * device's memory. */ mtx_unlock(&dev_pager_mtx); object1 = vm_object_allocate(tp, pindex); object1->flags |= OBJ_COLORED; object1->pg_color = color; object1->handle = handle; object1->un_pager.devp.ops = ops; object1->un_pager.devp.dev = handle; TAILQ_INIT(&object1->un_pager.devp.devp_pglist); mtx_lock(&dev_pager_mtx); object = vm_pager_object_lookup(&dev_pager_object_list, handle); if (object != NULL) { /* * We raced with other thread while allocating object. */ if (pindex > object->size) object->size = pindex; KASSERT(object->type == tp, ("Inconsistent device pager type %p %d", object, tp)); KASSERT(object->un_pager.devp.ops == ops, ("Inconsistent devops %p %p", object, ops)); } else { object = object1; object1 = NULL; object->handle = handle; TAILQ_INSERT_TAIL(&dev_pager_object_list, object, pager_object_list); if (ops->cdev_pg_populate != NULL) vm_object_set_flag(object, OBJ_POPULATE); } } else { if (pindex > object->size) object->size = pindex; KASSERT(object->type == tp, ("Inconsistent device pager type %p %d", object, tp)); } mtx_unlock(&dev_pager_mtx); if (object1 != NULL) { object1->handle = object1; mtx_lock(&dev_pager_mtx); TAILQ_INSERT_TAIL(&dev_pager_object_list, object1, pager_object_list); mtx_unlock(&dev_pager_mtx); vm_object_deallocate(object1); } return (object); } static vm_object_t dev_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred) { return (cdev_pager_allocate(handle, OBJT_DEVICE, &old_dev_pager_ops, size, prot, foff, cred)); } void cdev_pager_free_page(vm_object_t object, vm_page_t m) { VM_OBJECT_ASSERT_WLOCKED(object); if (object->type == OBJT_MGTDEVICE) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("unmanaged %p", m)); pmap_remove_all(m); (void)vm_page_remove(m); } else if (object->type == OBJT_DEVICE) dev_pager_free_page(object, m); } static void dev_pager_free_page(vm_object_t object, vm_page_t m) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->type == OBJT_DEVICE && (m->oflags & VPO_UNMANAGED) != 0), ("Managed device or page obj %p m %p", object, m)); TAILQ_REMOVE(&object->un_pager.devp.devp_pglist, m, plinks.q); vm_page_putfake(m); } static void dev_pager_dealloc(vm_object_t object) { vm_page_t m; VM_OBJECT_WUNLOCK(object); object->un_pager.devp.ops->cdev_pg_dtor(object->un_pager.devp.dev); mtx_lock(&dev_pager_mtx); TAILQ_REMOVE(&dev_pager_object_list, object, pager_object_list); mtx_unlock(&dev_pager_mtx); VM_OBJECT_WLOCK(object); if (object->type == OBJT_DEVICE) { /* * Free up our fake pages. */ while ((m = TAILQ_FIRST(&object->un_pager.devp.devp_pglist)) != NULL) { if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0) continue; dev_pager_free_page(object, m); } } object->handle = NULL; object->type = OBJT_DEAD; } static int dev_pager_getpages(vm_object_t object, vm_page_t *ma, int count, int *rbehind, int *rahead) { int error; /* Since our haspage reports zero after/before, the count is 1. */ KASSERT(count == 1, ("%s: count %d", __func__, count)); - VM_OBJECT_ASSERT_WLOCKED(object); if (object->un_pager.devp.ops->cdev_pg_fault == NULL) return (VM_PAGER_FAIL); + VM_OBJECT_WLOCK(object); error = object->un_pager.devp.ops->cdev_pg_fault(object, IDX_TO_OFF(ma[0]->pindex), PROT_READ, &ma[0]); VM_OBJECT_ASSERT_WLOCKED(object); if (error == VM_PAGER_OK) { KASSERT((object->type == OBJT_DEVICE && (ma[0]->oflags & VPO_UNMANAGED) != 0) || (object->type == OBJT_MGTDEVICE && (ma[0]->oflags & VPO_UNMANAGED) == 0), ("Wrong page type %p %p", ma[0], object)); if (object->type == OBJT_DEVICE) { TAILQ_INSERT_TAIL(&object->un_pager.devp.devp_pglist, ma[0], plinks.q); } if (rbehind) *rbehind = 0; if (rahead) *rahead = 0; } + VM_OBJECT_WUNLOCK(object); return (error); } static int dev_pager_populate(vm_object_t object, vm_pindex_t pidx, int fault_type, vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last) { VM_OBJECT_ASSERT_WLOCKED(object); if (object->un_pager.devp.ops->cdev_pg_populate == NULL) return (VM_PAGER_FAIL); return (object->un_pager.devp.ops->cdev_pg_populate(object, pidx, fault_type, max_prot, first, last)); } static int old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot, vm_page_t *mres) { vm_paddr_t paddr; vm_page_t m_paddr, page; struct cdev *dev; struct cdevsw *csw; struct file *fpop; struct thread *td; vm_memattr_t memattr, memattr1; int ref, ret; memattr = object->memattr; VM_OBJECT_WUNLOCK(object); dev = object->handle; csw = dev_refthread(dev, &ref); if (csw == NULL) { VM_OBJECT_WLOCK(object); return (VM_PAGER_FAIL); } td = curthread; fpop = td->td_fpop; td->td_fpop = NULL; ret = csw->d_mmap(dev, offset, &paddr, prot, &memattr); td->td_fpop = fpop; dev_relthread(dev, ref); if (ret != 0) { printf( "WARNING: dev_pager_getpage: map function returns error %d", ret); VM_OBJECT_WLOCK(object); return (VM_PAGER_FAIL); } /* If "paddr" is a real page, perform a sanity check on "memattr". */ if ((m_paddr = vm_phys_paddr_to_vm_page(paddr)) != NULL && (memattr1 = pmap_page_get_memattr(m_paddr)) != memattr) { /* * For the /dev/mem d_mmap routine to return the * correct memattr, pmap_page_get_memattr() needs to * be called, which we do there. */ if ((csw->d_flags & D_MEM) == 0) { printf("WARNING: Device driver %s has set " "\"memattr\" inconsistently (drv %u pmap %u).\n", csw->d_name, memattr, memattr1); } memattr = memattr1; } if (((*mres)->flags & PG_FICTITIOUS) != 0) { /* * If the passed in result page is a fake page, update it with * the new physical address. */ page = *mres; VM_OBJECT_WLOCK(object); vm_page_updatefake(page, paddr, memattr); } else { /* * Replace the passed in reqpage page with our own fake page and * free up the all of the original pages. */ page = vm_page_getfake(paddr, memattr); VM_OBJECT_WLOCK(object); vm_page_replace(page, object, (*mres)->pindex, *mres); *mres = page; } vm_page_valid(page); return (VM_PAGER_OK); } static void dev_pager_putpages(vm_object_t object, vm_page_t *m, int count, int flags, int *rtvals) { panic("dev_pager_putpage called"); } static boolean_t dev_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after) { if (before != NULL) *before = 0; if (after != NULL) *after = 0; return (TRUE); } static int old_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color) { struct cdev *dev; struct cdevsw *csw; vm_memattr_t dummy; vm_ooffset_t off; vm_paddr_t paddr; unsigned int npages; int ref; /* * Make sure this device can be mapped. */ dev = handle; csw = dev_refthread(dev, &ref); if (csw == NULL) return (ENXIO); /* * Check that the specified range of the device allows the desired * protection. * * XXX assumes VM_PROT_* == PROT_* */ npages = OFF_TO_IDX(size); paddr = 0; /* Make paddr initialized for the case of size == 0. */ for (off = foff; npages--; off += PAGE_SIZE) { if (csw->d_mmap(dev, off, &paddr, (int)prot, &dummy) != 0) { dev_relthread(dev, ref); return (EINVAL); } } dev_ref(dev); dev_relthread(dev, ref); *color = atop(paddr) - OFF_TO_IDX(off - PAGE_SIZE); return (0); } static void old_dev_pager_dtor(void *handle) { dev_rel(handle); } Index: projects/clang1000-import/sys/vm/phys_pager.c =================================================================== --- projects/clang1000-import/sys/vm/phys_pager.c (revision 356919) +++ projects/clang1000-import/sys/vm/phys_pager.c (revision 356920) @@ -1,251 +1,250 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2000 Peter Wemm * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* list of phys pager objects */ static struct pagerlst phys_pager_object_list; /* protect access to phys_pager_object_list */ static struct mtx phys_pager_mtx; static void phys_pager_init(void) { TAILQ_INIT(&phys_pager_object_list); mtx_init(&phys_pager_mtx, "phys_pager list", NULL, MTX_DEF); } static vm_object_t phys_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred) { vm_object_t object, object1; vm_pindex_t pindex; /* * Offset should be page aligned. */ if (foff & PAGE_MASK) return (NULL); pindex = OFF_TO_IDX(foff + PAGE_MASK + size); if (handle != NULL) { mtx_lock(&phys_pager_mtx); /* * Look up pager, creating as necessary. */ object1 = NULL; object = vm_pager_object_lookup(&phys_pager_object_list, handle); if (object == NULL) { /* * Allocate object and associate it with the pager. */ mtx_unlock(&phys_pager_mtx); object1 = vm_object_allocate(OBJT_PHYS, pindex); mtx_lock(&phys_pager_mtx); object = vm_pager_object_lookup(&phys_pager_object_list, handle); if (object != NULL) { /* * We raced with other thread while * allocating object. */ if (pindex > object->size) object->size = pindex; } else { object = object1; object1 = NULL; object->handle = handle; vm_object_set_flag(object, OBJ_POPULATE); TAILQ_INSERT_TAIL(&phys_pager_object_list, object, pager_object_list); } } else { if (pindex > object->size) object->size = pindex; } mtx_unlock(&phys_pager_mtx); vm_object_deallocate(object1); } else { object = vm_object_allocate(OBJT_PHYS, pindex); vm_object_set_flag(object, OBJ_POPULATE); } return (object); } static void phys_pager_dealloc(vm_object_t object) { if (object->handle != NULL) { VM_OBJECT_WUNLOCK(object); mtx_lock(&phys_pager_mtx); TAILQ_REMOVE(&phys_pager_object_list, object, pager_object_list); mtx_unlock(&phys_pager_mtx); VM_OBJECT_WLOCK(object); } object->handle = NULL; object->type = OBJT_DEAD; } /* * Fill as many pages as vm_fault has allocated for us. */ static int phys_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, int *rahead) { int i; - VM_OBJECT_ASSERT_WLOCKED(object); for (i = 0; i < count; i++) { if (vm_page_none_valid(m[i])) { if ((m[i]->flags & PG_ZERO) == 0) pmap_zero_page(m[i]); vm_page_valid(m[i]); } KASSERT(vm_page_all_valid(m[i]), ("phys_pager_getpages: partially valid page %p", m[i])); KASSERT(m[i]->dirty == 0, ("phys_pager_getpages: dirty page %p", m[i])); } if (rbehind) *rbehind = 0; if (rahead) *rahead = 0; return (VM_PAGER_OK); } /* * Implement a pretty aggressive clustered getpages strategy. Hint that * everything in an entire 4MB window should be prefaulted at once. * * 4MB (1024 slots per page table page) is convenient for x86, * but may not be for other arches. */ #ifndef PHYSCLUSTER #define PHYSCLUSTER 1024 #endif static int phys_pager_cluster = PHYSCLUSTER; SYSCTL_INT(_vm, OID_AUTO, phys_pager_cluster, CTLFLAG_RWTUN, &phys_pager_cluster, 0, "prefault window size for phys pager"); /* * Max hint to vm_page_alloc() about the further allocation needs * inside the phys_pager_populate() loop. The number of bits used to * implement VM_ALLOC_COUNT() determines the hard limit on this value. * That limit is currently 65535. */ #define PHYSALLOC 16 static int phys_pager_populate(vm_object_t object, vm_pindex_t pidx, int fault_type __unused, vm_prot_t max_prot __unused, vm_pindex_t *first, vm_pindex_t *last) { vm_page_t m; vm_pindex_t base, end, i; int ahead; base = rounddown(pidx, phys_pager_cluster); end = base + phys_pager_cluster - 1; if (end >= object->size) end = object->size - 1; if (*first > base) base = *first; if (end > *last) end = *last; *first = base; *last = end; for (i = base; i <= end; i++) { ahead = MIN(end - i, PHYSALLOC); m = vm_page_grab(object, i, VM_ALLOC_NORMAL | VM_ALLOC_COUNT(ahead)); if (!vm_page_all_valid(m)) vm_page_zero_invalid(m, TRUE); KASSERT(m->dirty == 0, ("phys_pager_populate: dirty page %p", m)); } return (VM_PAGER_OK); } static void phys_pager_putpages(vm_object_t object, vm_page_t *m, int count, boolean_t sync, int *rtvals) { panic("phys_pager_putpage called"); } static boolean_t phys_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after) { vm_pindex_t base, end; base = rounddown(pindex, phys_pager_cluster); end = base + phys_pager_cluster - 1; if (before != NULL) *before = pindex - base; if (after != NULL) *after = end - pindex; return (TRUE); } struct pagerops physpagerops = { .pgo_init = phys_pager_init, .pgo_alloc = phys_pager_alloc, .pgo_dealloc = phys_pager_dealloc, .pgo_getpages = phys_pager_getpages, .pgo_putpages = phys_pager_putpages, .pgo_haspage = phys_pager_haspage, .pgo_populate = phys_pager_populate, }; Index: projects/clang1000-import/sys/vm/sg_pager.c =================================================================== --- projects/clang1000-import/sys/vm/sg_pager.c (revision 356919) +++ projects/clang1000-import/sys/vm/sg_pager.c (revision 356920) @@ -1,228 +1,228 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009 Hudson River Trading LLC * Written by: John H. Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * This pager manages OBJT_SG objects. These objects are backed by * a scatter/gather list of physical address ranges. */ #include #include #include #include #include #include #include #include #include #include #include #include #include static vm_object_t sg_pager_alloc(void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *); static void sg_pager_dealloc(vm_object_t); static int sg_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); static void sg_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *); static boolean_t sg_pager_haspage(vm_object_t, vm_pindex_t, int *, int *); struct pagerops sgpagerops = { .pgo_alloc = sg_pager_alloc, .pgo_dealloc = sg_pager_dealloc, .pgo_getpages = sg_pager_getpages, .pgo_putpages = sg_pager_putpages, .pgo_haspage = sg_pager_haspage, }; static vm_object_t sg_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred) { struct sglist *sg; vm_object_t object; vm_pindex_t npages, pindex; int i; /* * Offset should be page aligned. */ if (foff & PAGE_MASK) return (NULL); /* * The scatter/gather list must only include page-aligned * ranges. */ npages = 0; sg = handle; for (i = 0; i < sg->sg_nseg; i++) { if ((sg->sg_segs[i].ss_paddr % PAGE_SIZE) != 0 || (sg->sg_segs[i].ss_len % PAGE_SIZE) != 0) return (NULL); npages += sg->sg_segs[i].ss_len / PAGE_SIZE; } /* * The scatter/gather list has a fixed size. Refuse requests * to map beyond that. */ size = round_page(size); pindex = OFF_TO_IDX(foff) + OFF_TO_IDX(size); if (pindex > npages || pindex < OFF_TO_IDX(foff) || pindex < OFF_TO_IDX(size)) return (NULL); /* * Allocate a new object and associate it with the * scatter/gather list. It is ok for our purposes to have * multiple VM objects associated with the same scatter/gather * list because scatter/gather lists are static. This is also * simpler than ensuring a unique object per scatter/gather * list. */ object = vm_object_allocate(OBJT_SG, npages); object->handle = sglist_hold(sg); TAILQ_INIT(&object->un_pager.sgp.sgp_pglist); return (object); } static void sg_pager_dealloc(vm_object_t object) { struct sglist *sg; vm_page_t m; /* * Free up our fake pages. */ while ((m = TAILQ_FIRST(&object->un_pager.sgp.sgp_pglist)) != 0) { if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0) continue; TAILQ_REMOVE(&object->un_pager.sgp.sgp_pglist, m, plinks.q); vm_page_putfake(m); } sg = object->handle; sglist_free(sg); object->handle = NULL; object->type = OBJT_DEAD; } static int sg_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, int *rahead) { struct sglist *sg; vm_page_t m_paddr, page; vm_pindex_t offset; vm_paddr_t paddr; vm_memattr_t memattr; size_t space; int i; /* Since our haspage reports zero after/before, the count is 1. */ KASSERT(count == 1, ("%s: count %d", __func__, count)); - VM_OBJECT_ASSERT_WLOCKED(object); + /* Handle is stable while paging is in progress. */ sg = object->handle; memattr = object->memattr; - VM_OBJECT_WUNLOCK(object); offset = m[0]->pindex; /* * Lookup the physical address of the requested page. An initial * value of '1' instead of '0' is used so we can assert that the * page is found since '0' can be a valid page-aligned physical * address. */ space = 0; paddr = 1; for (i = 0; i < sg->sg_nseg; i++) { if (space + sg->sg_segs[i].ss_len <= (offset * PAGE_SIZE)) { space += sg->sg_segs[i].ss_len; continue; } paddr = sg->sg_segs[i].ss_paddr + offset * PAGE_SIZE - space; break; } KASSERT(paddr != 1, ("invalid SG page index")); /* If "paddr" is a real page, perform a sanity check on "memattr". */ if ((m_paddr = vm_phys_paddr_to_vm_page(paddr)) != NULL && pmap_page_get_memattr(m_paddr) != memattr) { memattr = pmap_page_get_memattr(m_paddr); printf( "WARNING: A device driver has set \"memattr\" inconsistently.\n"); } /* Return a fake page for the requested page. */ KASSERT(!(m[0]->flags & PG_FICTITIOUS), ("backing page for SG is fake")); /* Construct a new fake page. */ page = vm_page_getfake(paddr, memattr); VM_OBJECT_WLOCK(object); TAILQ_INSERT_TAIL(&object->un_pager.sgp.sgp_pglist, page, plinks.q); vm_page_replace(page, object, offset, m[0]); + VM_OBJECT_WUNLOCK(object); m[0] = page; vm_page_valid(page); if (rbehind) *rbehind = 0; if (rahead) *rahead = 0; return (VM_PAGER_OK); } static void sg_pager_putpages(vm_object_t object, vm_page_t *m, int count, boolean_t sync, int *rtvals) { panic("sg_pager_putpage called"); } static boolean_t sg_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after) { if (before != NULL) *before = 0; if (after != NULL) *after = 0; return (TRUE); } Index: projects/clang1000-import/sys/vm/swap_pager.c =================================================================== --- projects/clang1000-import/sys/vm/swap_pager.c (revision 356919) +++ projects/clang1000-import/sys/vm/swap_pager.c (revision 356920) @@ -1,3104 +1,3104 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1998 Matthew Dillon, * Copyright (c) 1994 John S. Dyson * Copyright (c) 1990 University of Utah. * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * New Swap System * Matthew Dillon * * Radix Bitmap 'blists'. * * - The new swapper uses the new radix bitmap code. This should scale * to arbitrarily small or arbitrarily large swap spaces and an almost * arbitrary degree of fragmentation. * * Features: * * - on the fly reallocation of swap during putpages. The new system * does not try to keep previously allocated swap blocks for dirty * pages. * * - on the fly deallocation of swap * * - No more garbage collection required. Unnecessarily allocated swap * blocks only exist for dirty vm_page_t's now and these are already * cycled (in a high-load system) by the pager. We also do on-the-fly * removal of invalidated swap blocks when a page is destroyed * or renamed. * * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ * * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 * @(#)vm_swap.c 8.5 (Berkeley) 2/17/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * MAX_PAGEOUT_CLUSTER must be a power of 2 between 1 and 64. * The 64-page limit is due to the radix code (kern/subr_blist.c). */ #ifndef MAX_PAGEOUT_CLUSTER #define MAX_PAGEOUT_CLUSTER 32 #endif #if !defined(SWB_NPAGES) #define SWB_NPAGES MAX_PAGEOUT_CLUSTER #endif #define SWAP_META_PAGES PCTRIE_COUNT /* * A swblk structure maps each page index within a * SWAP_META_PAGES-aligned and sized range to the address of an * on-disk swap block (or SWAPBLK_NONE). The collection of these * mappings for an entire vm object is implemented as a pc-trie. */ struct swblk { vm_pindex_t p; daddr_t d[SWAP_META_PAGES]; }; static MALLOC_DEFINE(M_VMPGDATA, "vm_pgdata", "swap pager private data"); static struct mtx sw_dev_mtx; static TAILQ_HEAD(, swdevt) swtailq = TAILQ_HEAD_INITIALIZER(swtailq); static struct swdevt *swdevhd; /* Allocate from here next */ static int nswapdev; /* Number of swap devices */ int swap_pager_avail; static struct sx swdev_syscall_lock; /* serialize swap(on|off) */ static u_long swap_reserved; static u_long swap_total; static int sysctl_page_shift(SYSCTL_HANDLER_ARGS); static SYSCTL_NODE(_vm_stats, OID_AUTO, swap, CTLFLAG_RD, 0, "VM swap stats"); SYSCTL_PROC(_vm, OID_AUTO, swap_reserved, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, &swap_reserved, 0, sysctl_page_shift, "A", "Amount of swap storage needed to back all allocated anonymous memory."); SYSCTL_PROC(_vm, OID_AUTO, swap_total, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, &swap_total, 0, sysctl_page_shift, "A", "Total amount of available swap storage."); static int overcommit = 0; SYSCTL_INT(_vm, VM_OVERCOMMIT, overcommit, CTLFLAG_RW, &overcommit, 0, "Configure virtual memory overcommit behavior. See tuning(7) " "for details."); static unsigned long swzone; SYSCTL_ULONG(_vm, OID_AUTO, swzone, CTLFLAG_RD, &swzone, 0, "Actual size of swap metadata zone"); static unsigned long swap_maxpages; SYSCTL_ULONG(_vm, OID_AUTO, swap_maxpages, CTLFLAG_RD, &swap_maxpages, 0, "Maximum amount of swap supported"); static counter_u64_t swap_free_deferred; SYSCTL_COUNTER_U64(_vm_stats_swap, OID_AUTO, free_deferred, CTLFLAG_RD, &swap_free_deferred, "Number of pages that deferred freeing swap space"); static counter_u64_t swap_free_completed; SYSCTL_COUNTER_U64(_vm_stats_swap, OID_AUTO, free_completed, CTLFLAG_RD, &swap_free_completed, "Number of deferred frees completed"); /* bits from overcommit */ #define SWAP_RESERVE_FORCE_ON (1 << 0) #define SWAP_RESERVE_RLIMIT_ON (1 << 1) #define SWAP_RESERVE_ALLOW_NONWIRED (1 << 2) static int sysctl_page_shift(SYSCTL_HANDLER_ARGS) { uint64_t newval; u_long value = *(u_long *)arg1; newval = ((uint64_t)value) << PAGE_SHIFT; return (sysctl_handle_64(oidp, &newval, 0, req)); } int swap_reserve(vm_ooffset_t incr) { return (swap_reserve_by_cred(incr, curthread->td_ucred)); } int swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred) { u_long r, s, prev, pincr; int res, error; static int curfail; static struct timeval lastfail; struct uidinfo *uip; uip = cred->cr_ruidinfo; KASSERT((incr & PAGE_MASK) == 0, ("%s: incr: %ju & PAGE_MASK", __func__, (uintmax_t)incr)); #ifdef RACCT if (racct_enable) { PROC_LOCK(curproc); error = racct_add(curproc, RACCT_SWAP, incr); PROC_UNLOCK(curproc); if (error != 0) return (0); } #endif pincr = atop(incr); res = 0; prev = atomic_fetchadd_long(&swap_reserved, pincr); r = prev + pincr; if (overcommit & SWAP_RESERVE_ALLOW_NONWIRED) { s = vm_cnt.v_page_count - vm_cnt.v_free_reserved - vm_wire_count(); } else s = 0; s += swap_total; if ((overcommit & SWAP_RESERVE_FORCE_ON) == 0 || r <= s || (error = priv_check(curthread, PRIV_VM_SWAP_NOQUOTA)) == 0) { res = 1; } else { prev = atomic_fetchadd_long(&swap_reserved, -pincr); if (prev < pincr) panic("swap_reserved < incr on overcommit fail"); } if (res) { prev = atomic_fetchadd_long(&uip->ui_vmsize, pincr); if ((overcommit & SWAP_RESERVE_RLIMIT_ON) != 0 && prev + pincr > lim_cur(curthread, RLIMIT_SWAP) && priv_check(curthread, PRIV_VM_SWAP_NORLIMIT)) { res = 0; prev = atomic_fetchadd_long(&uip->ui_vmsize, -pincr); if (prev < pincr) panic("uip->ui_vmsize < incr on overcommit fail"); } } if (!res && ppsratecheck(&lastfail, &curfail, 1)) { printf("uid %d, pid %d: swap reservation for %jd bytes failed\n", uip->ui_uid, curproc->p_pid, incr); } #ifdef RACCT if (racct_enable && !res) { PROC_LOCK(curproc); racct_sub(curproc, RACCT_SWAP, incr); PROC_UNLOCK(curproc); } #endif return (res); } void swap_reserve_force(vm_ooffset_t incr) { struct uidinfo *uip; u_long pincr; KASSERT((incr & PAGE_MASK) == 0, ("%s: incr: %ju & PAGE_MASK", __func__, (uintmax_t)incr)); PROC_LOCK(curproc); #ifdef RACCT if (racct_enable) racct_add_force(curproc, RACCT_SWAP, incr); #endif pincr = atop(incr); atomic_add_long(&swap_reserved, pincr); uip = curproc->p_ucred->cr_ruidinfo; atomic_add_long(&uip->ui_vmsize, pincr); PROC_UNLOCK(curproc); } void swap_release(vm_ooffset_t decr) { struct ucred *cred; PROC_LOCK(curproc); cred = curproc->p_ucred; swap_release_by_cred(decr, cred); PROC_UNLOCK(curproc); } void swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred) { u_long prev, pdecr; struct uidinfo *uip; uip = cred->cr_ruidinfo; KASSERT((decr & PAGE_MASK) == 0, ("%s: decr: %ju & PAGE_MASK", __func__, (uintmax_t)decr)); pdecr = atop(decr); prev = atomic_fetchadd_long(&swap_reserved, -pdecr); if (prev < pdecr) panic("swap_reserved < decr"); prev = atomic_fetchadd_long(&uip->ui_vmsize, -pdecr); if (prev < pdecr) printf("negative vmsize for uid = %d\n", uip->ui_uid); #ifdef RACCT if (racct_enable) racct_sub_cred(cred, RACCT_SWAP, decr); #endif } static int swap_pager_full = 2; /* swap space exhaustion (task killing) */ static int swap_pager_almost_full = 1; /* swap space exhaustion (w/hysteresis)*/ static struct mtx swbuf_mtx; /* to sync nsw_wcount_async */ static int nsw_wcount_async; /* limit async write buffers */ static int nsw_wcount_async_max;/* assigned maximum */ static int nsw_cluster_max; /* maximum VOP I/O allowed */ static int sysctl_swap_async_max(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vm, OID_AUTO, swap_async_max, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, sysctl_swap_async_max, "I", "Maximum running async swap ops"); static int sysctl_swap_fragmentation(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vm, OID_AUTO, swap_fragmentation, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_swap_fragmentation, "A", "Swap Fragmentation Info"); static struct sx sw_alloc_sx; /* * "named" and "unnamed" anon region objects. Try to reduce the overhead * of searching a named list by hashing it just a little. */ #define NOBJLISTS 8 #define NOBJLIST(handle) \ (&swap_pager_object_list[((int)(intptr_t)handle >> 4) & (NOBJLISTS-1)]) static struct pagerlst swap_pager_object_list[NOBJLISTS]; static uma_zone_t swwbuf_zone; static uma_zone_t swrbuf_zone; static uma_zone_t swblk_zone; static uma_zone_t swpctrie_zone; /* * pagerops for OBJT_SWAP - "swap pager". Some ops are also global procedure * calls hooked from other parts of the VM system and do not appear here. * (see vm/swap_pager.h). */ static vm_object_t swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t offset, struct ucred *); static void swap_pager_dealloc(vm_object_t object); static int swap_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); static int swap_pager_getpages_async(vm_object_t, vm_page_t *, int, int *, int *, pgo_getpages_iodone_t, void *); static void swap_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *); static boolean_t swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after); static void swap_pager_init(void); static void swap_pager_unswapped(vm_page_t); static void swap_pager_swapoff(struct swdevt *sp); static void swap_pager_update_writecount(vm_object_t object, vm_offset_t start, vm_offset_t end); static void swap_pager_release_writecount(vm_object_t object, vm_offset_t start, vm_offset_t end); struct pagerops swappagerops = { .pgo_init = swap_pager_init, /* early system initialization of pager */ .pgo_alloc = swap_pager_alloc, /* allocate an OBJT_SWAP object */ .pgo_dealloc = swap_pager_dealloc, /* deallocate an OBJT_SWAP object */ .pgo_getpages = swap_pager_getpages, /* pagein */ .pgo_getpages_async = swap_pager_getpages_async, /* pagein (async) */ .pgo_putpages = swap_pager_putpages, /* pageout */ .pgo_haspage = swap_pager_haspage, /* get backing store status for page */ .pgo_pageunswapped = swap_pager_unswapped, /* remove swap related to page */ .pgo_update_writecount = swap_pager_update_writecount, .pgo_release_writecount = swap_pager_release_writecount, }; /* * swap_*() routines are externally accessible. swp_*() routines are * internal. */ static int nswap_lowat = 128; /* in pages, swap_pager_almost_full warn */ static int nswap_hiwat = 512; /* in pages, swap_pager_almost_full warn */ SYSCTL_INT(_vm, OID_AUTO, dmmax, CTLFLAG_RD, &nsw_cluster_max, 0, "Maximum size of a swap block in pages"); static void swp_sizecheck(void); static void swp_pager_async_iodone(struct buf *bp); static bool swp_pager_swblk_empty(struct swblk *sb, int start, int limit); static void swp_pager_free_empty_swblk(vm_object_t, struct swblk *sb); static int swapongeom(struct vnode *); static int swaponvp(struct thread *, struct vnode *, u_long); static int swapoff_one(struct swdevt *sp, struct ucred *cred); /* * Swap bitmap functions */ static void swp_pager_freeswapspace(daddr_t blk, daddr_t npages); static daddr_t swp_pager_getswapspace(int *npages, int limit); /* * Metadata functions */ static daddr_t swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t); static void swp_pager_meta_free(vm_object_t, vm_pindex_t, vm_pindex_t); static void swp_pager_meta_transfer(vm_object_t src, vm_object_t dst, vm_pindex_t pindex, vm_pindex_t count); static void swp_pager_meta_free_all(vm_object_t); static daddr_t swp_pager_meta_lookup(vm_object_t, vm_pindex_t); static void swp_pager_init_freerange(daddr_t *start, daddr_t *num) { *start = SWAPBLK_NONE; *num = 0; } static void swp_pager_update_freerange(daddr_t *start, daddr_t *num, daddr_t addr) { if (*start + *num == addr) { (*num)++; } else { swp_pager_freeswapspace(*start, *num); *start = addr; *num = 1; } } static void * swblk_trie_alloc(struct pctrie *ptree) { return (uma_zalloc(swpctrie_zone, M_NOWAIT | (curproc == pageproc ? M_USE_RESERVE : 0))); } static void swblk_trie_free(struct pctrie *ptree, void *node) { uma_zfree(swpctrie_zone, node); } PCTRIE_DEFINE(SWAP, swblk, p, swblk_trie_alloc, swblk_trie_free); /* * SWP_SIZECHECK() - update swap_pager_full indication * * update the swap_pager_almost_full indication and warn when we are * about to run out of swap space, using lowat/hiwat hysteresis. * * Clear swap_pager_full ( task killing ) indication when lowat is met. * * No restrictions on call * This routine may not block. */ static void swp_sizecheck(void) { if (swap_pager_avail < nswap_lowat) { if (swap_pager_almost_full == 0) { printf("swap_pager: out of swap space\n"); swap_pager_almost_full = 1; } } else { swap_pager_full = 0; if (swap_pager_avail > nswap_hiwat) swap_pager_almost_full = 0; } } /* * SWAP_PAGER_INIT() - initialize the swap pager! * * Expected to be started from system init. NOTE: This code is run * before much else so be careful what you depend on. Most of the VM * system has yet to be initialized at this point. */ static void swap_pager_init(void) { /* * Initialize object lists */ int i; for (i = 0; i < NOBJLISTS; ++i) TAILQ_INIT(&swap_pager_object_list[i]); mtx_init(&sw_dev_mtx, "swapdev", NULL, MTX_DEF); sx_init(&sw_alloc_sx, "swspsx"); sx_init(&swdev_syscall_lock, "swsysc"); } static void swap_pager_counters(void) { swap_free_deferred = counter_u64_alloc(M_WAITOK); swap_free_completed = counter_u64_alloc(M_WAITOK); } SYSINIT(swap_counters, SI_SUB_CPU, SI_ORDER_ANY, swap_pager_counters, NULL); /* * SWAP_PAGER_SWAP_INIT() - swap pager initialization from pageout process * * Expected to be started from pageout process once, prior to entering * its main loop. */ void swap_pager_swap_init(void) { unsigned long n, n2; /* * Number of in-transit swap bp operations. Don't * exhaust the pbufs completely. Make sure we * initialize workable values (0 will work for hysteresis * but it isn't very efficient). * * The nsw_cluster_max is constrained by the bp->b_pages[] * array, which has MAXPHYS / PAGE_SIZE entries, and our locally * defined MAX_PAGEOUT_CLUSTER. Also be aware that swap ops are * constrained by the swap device interleave stripe size. * * Currently we hardwire nsw_wcount_async to 4. This limit is * designed to prevent other I/O from having high latencies due to * our pageout I/O. The value 4 works well for one or two active swap * devices but is probably a little low if you have more. Even so, * a higher value would probably generate only a limited improvement * with three or four active swap devices since the system does not * typically have to pageout at extreme bandwidths. We will want * at least 2 per swap devices, and 4 is a pretty good value if you * have one NFS swap device due to the command/ack latency over NFS. * So it all works out pretty well. */ nsw_cluster_max = min(MAXPHYS / PAGE_SIZE, MAX_PAGEOUT_CLUSTER); nsw_wcount_async = 4; nsw_wcount_async_max = nsw_wcount_async; mtx_init(&swbuf_mtx, "async swbuf mutex", NULL, MTX_DEF); swwbuf_zone = pbuf_zsecond_create("swwbuf", nswbuf / 4); swrbuf_zone = pbuf_zsecond_create("swrbuf", nswbuf / 2); /* * Initialize our zone, taking the user's requested size or * estimating the number we need based on the number of pages * in the system. */ n = maxswzone != 0 ? maxswzone / sizeof(struct swblk) : vm_cnt.v_page_count / 2; swpctrie_zone = uma_zcreate("swpctrie", pctrie_node_size(), NULL, NULL, pctrie_zone_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM); if (swpctrie_zone == NULL) panic("failed to create swap pctrie zone."); swblk_zone = uma_zcreate("swblk", sizeof(struct swblk), NULL, NULL, NULL, NULL, _Alignof(struct swblk) - 1, UMA_ZONE_VM); if (swblk_zone == NULL) panic("failed to create swap blk zone."); n2 = n; do { if (uma_zone_reserve_kva(swblk_zone, n)) break; /* * if the allocation failed, try a zone two thirds the * size of the previous attempt. */ n -= ((n + 2) / 3); } while (n > 0); /* * Often uma_zone_reserve_kva() cannot reserve exactly the * requested size. Account for the difference when * calculating swap_maxpages. */ n = uma_zone_get_max(swblk_zone); if (n < n2) printf("Swap blk zone entries changed from %lu to %lu.\n", n2, n); /* absolute maximum we can handle assuming 100% efficiency */ swap_maxpages = n * SWAP_META_PAGES; swzone = n * sizeof(struct swblk); if (!uma_zone_reserve_kva(swpctrie_zone, n)) printf("Cannot reserve swap pctrie zone, " "reduce kern.maxswzone.\n"); } static vm_object_t swap_pager_alloc_init(void *handle, struct ucred *cred, vm_ooffset_t size, vm_ooffset_t offset) { vm_object_t object; if (cred != NULL) { if (!swap_reserve_by_cred(size, cred)) return (NULL); crhold(cred); } /* * The un_pager.swp.swp_blks trie is initialized by * vm_object_allocate() to ensure the correct order of * visibility to other threads. */ object = vm_object_allocate(OBJT_SWAP, OFF_TO_IDX(offset + PAGE_MASK + size)); object->un_pager.swp.writemappings = 0; object->handle = handle; if (cred != NULL) { object->cred = cred; object->charge = size; } return (object); } /* * SWAP_PAGER_ALLOC() - allocate a new OBJT_SWAP VM object and instantiate * its metadata structures. * * This routine is called from the mmap and fork code to create a new * OBJT_SWAP object. * * This routine must ensure that no live duplicate is created for * the named object request, which is protected against by * holding the sw_alloc_sx lock in case handle != NULL. */ static vm_object_t swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t offset, struct ucred *cred) { vm_object_t object; if (handle != NULL) { /* * Reference existing named region or allocate new one. There * should not be a race here against swp_pager_meta_build() * as called from vm_page_remove() in regards to the lookup * of the handle. */ sx_xlock(&sw_alloc_sx); object = vm_pager_object_lookup(NOBJLIST(handle), handle); if (object == NULL) { object = swap_pager_alloc_init(handle, cred, size, offset); if (object != NULL) { TAILQ_INSERT_TAIL(NOBJLIST(object->handle), object, pager_object_list); } } sx_xunlock(&sw_alloc_sx); } else { object = swap_pager_alloc_init(handle, cred, size, offset); } return (object); } /* * SWAP_PAGER_DEALLOC() - remove swap metadata from object * * The swap backing for the object is destroyed. The code is * designed such that we can reinstantiate it later, but this * routine is typically called only when the entire object is * about to be destroyed. * * The object must be locked. */ static void swap_pager_dealloc(vm_object_t object) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & OBJ_DEAD) != 0, ("dealloc of reachable obj")); /* * Remove from list right away so lookups will fail if we block for * pageout completion. */ if ((object->flags & OBJ_ANON) == 0 && object->handle != NULL) { VM_OBJECT_WUNLOCK(object); sx_xlock(&sw_alloc_sx); TAILQ_REMOVE(NOBJLIST(object->handle), object, pager_object_list); sx_xunlock(&sw_alloc_sx); VM_OBJECT_WLOCK(object); } vm_object_pip_wait(object, "swpdea"); /* * Free all remaining metadata. We only bother to free it from * the swap meta data. We do not attempt to free swapblk's still * associated with vm_page_t's for this object. We do not care * if paging is still in progress on some objects. */ swp_pager_meta_free_all(object); object->handle = NULL; object->type = OBJT_DEAD; } /************************************************************************ * SWAP PAGER BITMAP ROUTINES * ************************************************************************/ /* * SWP_PAGER_GETSWAPSPACE() - allocate raw swap space * * Allocate swap for up to the requested number of pages, and at * least a minimum number of pages. The starting swap block number * (a page index) is returned or SWAPBLK_NONE if the allocation * failed. * * Also has the side effect of advising that somebody made a mistake * when they configured swap and didn't configure enough. * * This routine may not sleep. * * We allocate in round-robin fashion from the configured devices. */ static daddr_t swp_pager_getswapspace(int *io_npages, int limit) { daddr_t blk; struct swdevt *sp; int mpages, npages; blk = SWAPBLK_NONE; mpages = *io_npages; npages = imin(BLIST_MAX_ALLOC, mpages); mtx_lock(&sw_dev_mtx); sp = swdevhd; while (!TAILQ_EMPTY(&swtailq)) { if (sp == NULL) sp = TAILQ_FIRST(&swtailq); if ((sp->sw_flags & SW_CLOSING) == 0) blk = blist_alloc(sp->sw_blist, &npages, mpages); if (blk != SWAPBLK_NONE) break; sp = TAILQ_NEXT(sp, sw_list); if (swdevhd == sp) { if (npages <= limit) break; mpages = npages - 1; npages >>= 1; } } if (blk != SWAPBLK_NONE) { *io_npages = npages; blk += sp->sw_first; sp->sw_used += npages; swap_pager_avail -= npages; swp_sizecheck(); swdevhd = TAILQ_NEXT(sp, sw_list); } else { if (swap_pager_full != 2) { printf("swp_pager_getswapspace(%d): failed\n", *io_npages); swap_pager_full = 2; swap_pager_almost_full = 1; } swdevhd = NULL; } mtx_unlock(&sw_dev_mtx); return (blk); } static bool swp_pager_isondev(daddr_t blk, struct swdevt *sp) { return (blk >= sp->sw_first && blk < sp->sw_end); } static void swp_pager_strategy(struct buf *bp) { struct swdevt *sp; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { if (swp_pager_isondev(bp->b_blkno, sp)) { mtx_unlock(&sw_dev_mtx); if ((sp->sw_flags & SW_UNMAPPED) != 0 && unmapped_buf_allowed) { bp->b_data = unmapped_buf; bp->b_offset = 0; } else { pmap_qenter((vm_offset_t)bp->b_data, &bp->b_pages[0], bp->b_bcount / PAGE_SIZE); } sp->sw_strategy(bp, sp); return; } } panic("Swapdev not found"); } /* * SWP_PAGER_FREESWAPSPACE() - free raw swap space * * This routine returns the specified swap blocks back to the bitmap. * * This routine may not sleep. */ static void swp_pager_freeswapspace(daddr_t blk, daddr_t npages) { struct swdevt *sp; if (npages == 0) return; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { if (swp_pager_isondev(blk, sp)) { sp->sw_used -= npages; /* * If we are attempting to stop swapping on * this device, we don't want to mark any * blocks free lest they be reused. */ if ((sp->sw_flags & SW_CLOSING) == 0) { blist_free(sp->sw_blist, blk - sp->sw_first, npages); swap_pager_avail += npages; swp_sizecheck(); } mtx_unlock(&sw_dev_mtx); return; } } panic("Swapdev not found"); } /* * SYSCTL_SWAP_FRAGMENTATION() - produce raw swap space stats */ static int sysctl_swap_fragmentation(SYSCTL_HANDLER_ARGS) { struct sbuf sbuf; struct swdevt *sp; const char *devname; int error; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sbuf_new_for_sysctl(&sbuf, NULL, 128, req); mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { if (vn_isdisk(sp->sw_vp, NULL)) devname = devtoname(sp->sw_vp->v_rdev); else devname = "[file]"; sbuf_printf(&sbuf, "\nFree space on device %s:\n", devname); blist_stats(sp->sw_blist, &sbuf); } mtx_unlock(&sw_dev_mtx); error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); return (error); } /* * SWAP_PAGER_FREESPACE() - frees swap blocks associated with a page * range within an object. * * This is a globally accessible routine. * * This routine removes swapblk assignments from swap metadata. * * The external callers of this routine typically have already destroyed * or renamed vm_page_t's associated with this range in the object so * we should be ok. * * The object must be locked. */ void swap_pager_freespace(vm_object_t object, vm_pindex_t start, vm_size_t size) { swp_pager_meta_free(object, start, size); } /* * SWAP_PAGER_RESERVE() - reserve swap blocks in object * * Assigns swap blocks to the specified range within the object. The * swap blocks are not zeroed. Any previous swap assignment is destroyed. * * Returns 0 on success, -1 on failure. */ int swap_pager_reserve(vm_object_t object, vm_pindex_t start, vm_size_t size) { daddr_t addr, blk, n_free, s_free; int i, j, n; swp_pager_init_freerange(&s_free, &n_free); VM_OBJECT_WLOCK(object); for (i = 0; i < size; i += n) { n = size - i; blk = swp_pager_getswapspace(&n, 1); if (blk == SWAPBLK_NONE) { swp_pager_meta_free(object, start, i); VM_OBJECT_WUNLOCK(object); return (-1); } for (j = 0; j < n; ++j) { addr = swp_pager_meta_build(object, start + i + j, blk + j); if (addr != SWAPBLK_NONE) swp_pager_update_freerange(&s_free, &n_free, addr); } } swp_pager_freeswapspace(s_free, n_free); VM_OBJECT_WUNLOCK(object); return (0); } static bool swp_pager_xfer_source(vm_object_t srcobject, vm_object_t dstobject, vm_pindex_t pindex, daddr_t addr) { daddr_t dstaddr; KASSERT(srcobject->type == OBJT_SWAP, ("%s: Srcobject not swappable", __func__)); if (dstobject->type == OBJT_SWAP && swp_pager_meta_lookup(dstobject, pindex) != SWAPBLK_NONE) { /* Caller should destroy the source block. */ return (false); } /* * Destination has no swapblk and is not resident, transfer source. * swp_pager_meta_build() can sleep. */ - vm_object_pip_add(srcobject, 1); VM_OBJECT_WUNLOCK(srcobject); - vm_object_pip_add(dstobject, 1); dstaddr = swp_pager_meta_build(dstobject, pindex, addr); KASSERT(dstaddr == SWAPBLK_NONE, ("Unexpected destination swapblk")); - vm_object_pip_wakeup(dstobject); VM_OBJECT_WLOCK(srcobject); - vm_object_pip_wakeup(srcobject); + return (true); } /* * SWAP_PAGER_COPY() - copy blocks from source pager to destination pager * and destroy the source. * * Copy any valid swapblks from the source to the destination. In * cases where both the source and destination have a valid swapblk, * we keep the destination's. * * This routine is allowed to sleep. It may sleep allocating metadata - * indirectly through swp_pager_meta_build() or if paging is still in - * progress on the source. + * indirectly through swp_pager_meta_build(). * * The source object contains no vm_page_t's (which is just as well) * * The source object is of type OBJT_SWAP. * * The source and destination objects must be locked. * Both object locks may temporarily be released. */ void swap_pager_copy(vm_object_t srcobject, vm_object_t dstobject, vm_pindex_t offset, int destroysource) { VM_OBJECT_ASSERT_WLOCKED(srcobject); VM_OBJECT_ASSERT_WLOCKED(dstobject); /* * If destroysource is set, we remove the source object from the * swap_pager internal queue now. */ if (destroysource && (srcobject->flags & OBJ_ANON) == 0 && srcobject->handle != NULL) { - vm_object_pip_add(srcobject, 1); VM_OBJECT_WUNLOCK(srcobject); - vm_object_pip_add(dstobject, 1); VM_OBJECT_WUNLOCK(dstobject); sx_xlock(&sw_alloc_sx); TAILQ_REMOVE(NOBJLIST(srcobject->handle), srcobject, pager_object_list); sx_xunlock(&sw_alloc_sx); VM_OBJECT_WLOCK(dstobject); - vm_object_pip_wakeup(dstobject); VM_OBJECT_WLOCK(srcobject); - vm_object_pip_wakeup(srcobject); } /* * Transfer source to destination. */ swp_pager_meta_transfer(srcobject, dstobject, offset, dstobject->size); /* * Free left over swap blocks in source. * * We have to revert the type to OBJT_DEFAULT so we do not accidentally * double-remove the object from the swap queues. */ if (destroysource) { swp_pager_meta_free_all(srcobject); /* * Reverting the type is not necessary, the caller is going * to destroy srcobject directly, but I'm doing it here * for consistency since we've removed the object from its * queues. */ srcobject->type = OBJT_DEFAULT; } } /* * SWAP_PAGER_HASPAGE() - determine if we have good backing store for * the requested page. * * We determine whether good backing store exists for the requested * page and return TRUE if it does, FALSE if it doesn't. * * If TRUE, we also try to determine how much valid, contiguous backing * store exists before and after the requested page. */ static boolean_t swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after) { daddr_t blk, blk0; int i; VM_OBJECT_ASSERT_LOCKED(object); KASSERT(object->type == OBJT_SWAP, ("%s: object not swappable", __func__)); /* * do we have good backing store at the requested index ? */ blk0 = swp_pager_meta_lookup(object, pindex); if (blk0 == SWAPBLK_NONE) { if (before) *before = 0; if (after) *after = 0; return (FALSE); } /* * find backwards-looking contiguous good backing store */ if (before != NULL) { for (i = 1; i < SWB_NPAGES; i++) { if (i > pindex) break; blk = swp_pager_meta_lookup(object, pindex - i); if (blk != blk0 - i) break; } *before = i - 1; } /* * find forward-looking contiguous good backing store */ if (after != NULL) { for (i = 1; i < SWB_NPAGES; i++) { blk = swp_pager_meta_lookup(object, pindex + i); if (blk != blk0 + i) break; } *after = i - 1; } return (TRUE); } /* * SWAP_PAGER_PAGE_UNSWAPPED() - remove swap backing store related to page * * This removes any associated swap backing store, whether valid or * not, from the page. * * This routine is typically called when a page is made dirty, at * which point any associated swap can be freed. MADV_FREE also * calls us in a special-case situation * * NOTE!!! If the page is clean and the swap was valid, the caller * should make the page dirty before calling this routine. This routine * does NOT change the m->dirty status of the page. Also: MADV_FREE * depends on it. * * This routine may not sleep. * * The object containing the page may be locked. */ static void swap_pager_unswapped(vm_page_t m) { struct swblk *sb; vm_object_t obj; /* * Handle enqueing deferred frees first. If we do not have the * object lock we wait for the page daemon to clear the space. */ obj = m->object; if (!VM_OBJECT_WOWNED(obj)) { VM_PAGE_OBJECT_BUSY_ASSERT(m); /* * The caller is responsible for synchronization but we * will harmlessly handle races. This is typically provided * by only calling unswapped() when a page transitions from * clean to dirty. */ if ((m->a.flags & (PGA_SWAP_SPACE | PGA_SWAP_FREE)) == PGA_SWAP_SPACE) { vm_page_aflag_set(m, PGA_SWAP_FREE); counter_u64_add(swap_free_deferred, 1); } return; } if ((m->a.flags & PGA_SWAP_FREE) != 0) counter_u64_add(swap_free_completed, 1); vm_page_aflag_clear(m, PGA_SWAP_FREE | PGA_SWAP_SPACE); /* * The meta data only exists if the object is OBJT_SWAP * and even then might not be allocated yet. */ KASSERT(m->object->type == OBJT_SWAP, ("Free object not swappable")); sb = SWAP_PCTRIE_LOOKUP(&m->object->un_pager.swp.swp_blks, rounddown(m->pindex, SWAP_META_PAGES)); if (sb == NULL) return; if (sb->d[m->pindex % SWAP_META_PAGES] == SWAPBLK_NONE) return; swp_pager_freeswapspace(sb->d[m->pindex % SWAP_META_PAGES], 1); sb->d[m->pindex % SWAP_META_PAGES] = SWAPBLK_NONE; swp_pager_free_empty_swblk(m->object, sb); } /* * swap_pager_getpages() - bring pages in from swap * * Attempt to page in the pages in array "ma" of length "count". The * caller may optionally specify that additional pages preceding and * succeeding the specified range be paged in. The number of such pages * is returned in the "rbehind" and "rahead" parameters, and they will * be in the inactive queue upon return. * * The pages in "ma" must be busied and will remain busied upon return. */ static int swap_pager_getpages(vm_object_t object, vm_page_t *ma, int count, int *rbehind, int *rahead) { struct buf *bp; vm_page_t bm, mpred, msucc, p; vm_pindex_t pindex; daddr_t blk; int i, maxahead, maxbehind, reqcount; + VM_OBJECT_WLOCK(object); reqcount = count; - /* - * Determine the final number of read-behind pages and - * allocate them BEFORE releasing the object lock. Otherwise, - * there can be a problematic race with vm_object_split(). - * Specifically, vm_object_split() might first transfer pages - * that precede ma[0] in the current object to a new object, - * and then this function incorrectly recreates those pages as - * read-behind pages in the current object. - */ KASSERT(object->type == OBJT_SWAP, ("%s: object not swappable", __func__)); - if (!swap_pager_haspage(object, ma[0]->pindex, &maxbehind, &maxahead)) + if (!swap_pager_haspage(object, ma[0]->pindex, &maxbehind, &maxahead)) { + VM_OBJECT_WUNLOCK(object); return (VM_PAGER_FAIL); + } + KASSERT(reqcount - 1 <= maxahead, + ("page count %d extends beyond swap block", reqcount)); + /* + * Do not transfer any pages other than those that are xbusied + * when running during a split or collapse operation. This + * prevents clustering from re-creating pages which are being + * moved into another object. + */ + if ((object->flags & (OBJ_SPLIT | OBJ_DEAD)) != 0) { + maxahead = reqcount - 1; + maxbehind = 0; + } + + /* * Clip the readahead and readbehind ranges to exclude resident pages. */ if (rahead != NULL) { - KASSERT(reqcount - 1 <= maxahead, - ("page count %d extends beyond swap block", reqcount)); *rahead = imin(*rahead, maxahead - (reqcount - 1)); pindex = ma[reqcount - 1]->pindex; msucc = TAILQ_NEXT(ma[reqcount - 1], listq); if (msucc != NULL && msucc->pindex - pindex - 1 < *rahead) *rahead = msucc->pindex - pindex - 1; } if (rbehind != NULL) { *rbehind = imin(*rbehind, maxbehind); pindex = ma[0]->pindex; mpred = TAILQ_PREV(ma[0], pglist, listq); if (mpred != NULL && pindex - mpred->pindex - 1 < *rbehind) *rbehind = pindex - mpred->pindex - 1; } bm = ma[0]; for (i = 0; i < count; i++) ma[i]->oflags |= VPO_SWAPINPROG; /* * Allocate readahead and readbehind pages. */ if (rbehind != NULL) { for (i = 1; i <= *rbehind; i++) { p = vm_page_alloc(object, ma[0]->pindex - i, VM_ALLOC_NORMAL); if (p == NULL) break; p->oflags |= VPO_SWAPINPROG; bm = p; } *rbehind = i - 1; } if (rahead != NULL) { for (i = 0; i < *rahead; i++) { p = vm_page_alloc(object, ma[reqcount - 1]->pindex + i + 1, VM_ALLOC_NORMAL); if (p == NULL) break; p->oflags |= VPO_SWAPINPROG; } *rahead = i; } if (rbehind != NULL) count += *rbehind; if (rahead != NULL) count += *rahead; vm_object_pip_add(object, count); pindex = bm->pindex; blk = swp_pager_meta_lookup(object, pindex); KASSERT(blk != SWAPBLK_NONE, ("no swap blocking containing %p(%jx)", object, (uintmax_t)pindex)); VM_OBJECT_WUNLOCK(object); bp = uma_zalloc(swrbuf_zone, M_WAITOK); /* Pages cannot leave the object while busy. */ for (i = 0, p = bm; i < count; i++, p = TAILQ_NEXT(p, listq)) { MPASS(p->pindex == bm->pindex + i); bp->b_pages[i] = p; } bp->b_flags |= B_PAGING; bp->b_iocmd = BIO_READ; bp->b_iodone = swp_pager_async_iodone; bp->b_rcred = crhold(thread0.td_ucred); bp->b_wcred = crhold(thread0.td_ucred); bp->b_blkno = blk; bp->b_bcount = PAGE_SIZE * count; bp->b_bufsize = PAGE_SIZE * count; bp->b_npages = count; bp->b_pgbefore = rbehind != NULL ? *rbehind : 0; bp->b_pgafter = rahead != NULL ? *rahead : 0; VM_CNT_INC(v_swapin); VM_CNT_ADD(v_swappgsin, count); /* * perform the I/O. NOTE!!! bp cannot be considered valid after * this point because we automatically release it on completion. * Instead, we look at the one page we are interested in which we * still hold a lock on even through the I/O completion. * * The other pages in our ma[] array are also released on completion, * so we cannot assume they are valid anymore either. * * NOTE: b_blkno is destroyed by the call to swapdev_strategy */ BUF_KERNPROC(bp); swp_pager_strategy(bp); /* * Wait for the pages we want to complete. VPO_SWAPINPROG is always * cleared on completion. If an I/O error occurs, SWAPBLK_NONE * is set in the metadata for each page in the request. */ VM_OBJECT_WLOCK(object); + /* This could be implemented more efficiently with aflags */ while ((ma[0]->oflags & VPO_SWAPINPROG) != 0) { ma[0]->oflags |= VPO_SWAPSLEEP; VM_CNT_INC(v_intrans); if (VM_OBJECT_SLEEP(object, &object->handle, PSWP, "swread", hz * 20)) { printf( "swap_pager: indefinite wait buffer: bufobj: %p, blkno: %jd, size: %ld\n", bp->b_bufobj, (intmax_t)bp->b_blkno, bp->b_bcount); } } + VM_OBJECT_WUNLOCK(object); /* * If we had an unrecoverable read error pages will not be valid. */ for (i = 0; i < reqcount; i++) if (ma[i]->valid != VM_PAGE_BITS_ALL) return (VM_PAGER_ERROR); return (VM_PAGER_OK); /* * A final note: in a low swap situation, we cannot deallocate swap * and mark a page dirty here because the caller is likely to mark * the page clean when we return, causing the page to possibly revert * to all-zero's later. */ } /* * swap_pager_getpages_async(): * * Right now this is emulation of asynchronous operation on top of * swap_pager_getpages(). */ static int swap_pager_getpages_async(vm_object_t object, vm_page_t *ma, int count, int *rbehind, int *rahead, pgo_getpages_iodone_t iodone, void *arg) { int r, error; r = swap_pager_getpages(object, ma, count, rbehind, rahead); - VM_OBJECT_WUNLOCK(object); switch (r) { case VM_PAGER_OK: error = 0; break; case VM_PAGER_ERROR: error = EIO; break; case VM_PAGER_FAIL: error = EINVAL; break; default: panic("unhandled swap_pager_getpages() error %d", r); } (iodone)(arg, ma, count, error); - VM_OBJECT_WLOCK(object); return (r); } /* * swap_pager_putpages: * * Assign swap (if necessary) and initiate I/O on the specified pages. * * We support both OBJT_DEFAULT and OBJT_SWAP objects. DEFAULT objects * are automatically converted to SWAP objects. * * In a low memory situation we may block in VOP_STRATEGY(), but the new * vm_page reservation system coupled with properly written VFS devices * should ensure that no low-memory deadlock occurs. This is an area * which needs work. * * The parent has N vm_object_pip_add() references prior to * calling us and will remove references for rtvals[] that are * not set to VM_PAGER_PEND. We need to remove the rest on I/O * completion. * * The parent has soft-busy'd the pages it passes us and will unbusy * those whose rtvals[] entry is not set to VM_PAGER_PEND on return. * We need to unbusy the rest on I/O completion. */ static void swap_pager_putpages(vm_object_t object, vm_page_t *ma, int count, int flags, int *rtvals) { struct buf *bp; daddr_t addr, blk, n_free, s_free; vm_page_t mreq; int i, j, n; bool async; KASSERT(count == 0 || ma[0]->object == object, ("%s: object mismatch %p/%p", __func__, object, ma[0]->object)); /* * Step 1 * * Turn object into OBJT_SWAP. Force sync if not a pageout process. */ if (object->type != OBJT_SWAP) { addr = swp_pager_meta_build(object, 0, SWAPBLK_NONE); KASSERT(addr == SWAPBLK_NONE, ("unexpected object swap block")); } VM_OBJECT_WUNLOCK(object); async = curproc == pageproc && (flags & VM_PAGER_PUT_SYNC) == 0; swp_pager_init_freerange(&s_free, &n_free); /* * Step 2 * * Assign swap blocks and issue I/O. We reallocate swap on the fly. * The page is left dirty until the pageout operation completes * successfully. */ for (i = 0; i < count; i += n) { /* Maximum I/O size is limited by maximum swap block size. */ n = min(count - i, nsw_cluster_max); /* Get a block of swap of size up to size n. */ blk = swp_pager_getswapspace(&n, 4); if (blk == SWAPBLK_NONE) { for (j = 0; j < n; ++j) rtvals[i + j] = VM_PAGER_FAIL; continue; } /* * All I/O parameters have been satisfied. Build the I/O * request and assign the swap space. */ if (async) { mtx_lock(&swbuf_mtx); while (nsw_wcount_async == 0) msleep(&nsw_wcount_async, &swbuf_mtx, PVM, "swbufa", 0); nsw_wcount_async--; mtx_unlock(&swbuf_mtx); } bp = uma_zalloc(swwbuf_zone, M_WAITOK); if (async) bp->b_flags = B_ASYNC; bp->b_flags |= B_PAGING; bp->b_iocmd = BIO_WRITE; bp->b_rcred = crhold(thread0.td_ucred); bp->b_wcred = crhold(thread0.td_ucred); bp->b_bcount = PAGE_SIZE * n; bp->b_bufsize = PAGE_SIZE * n; bp->b_blkno = blk; VM_OBJECT_WLOCK(object); for (j = 0; j < n; ++j) { mreq = ma[i + j]; vm_page_aflag_clear(mreq, PGA_SWAP_FREE); addr = swp_pager_meta_build(mreq->object, mreq->pindex, blk + j); if (addr != SWAPBLK_NONE) swp_pager_update_freerange(&s_free, &n_free, addr); MPASS(mreq->dirty == VM_PAGE_BITS_ALL); mreq->oflags |= VPO_SWAPINPROG; bp->b_pages[j] = mreq; } VM_OBJECT_WUNLOCK(object); bp->b_npages = n; /* * Must set dirty range for NFS to work. */ bp->b_dirtyoff = 0; bp->b_dirtyend = bp->b_bcount; VM_CNT_INC(v_swapout); VM_CNT_ADD(v_swappgsout, bp->b_npages); /* * We unconditionally set rtvals[] to VM_PAGER_PEND so that we * can call the async completion routine at the end of a * synchronous I/O operation. Otherwise, our caller would * perform duplicate unbusy and wakeup operations on the page * and object, respectively. */ for (j = 0; j < n; j++) rtvals[i + j] = VM_PAGER_PEND; /* * asynchronous * * NOTE: b_blkno is destroyed by the call to swapdev_strategy. */ if (async) { bp->b_iodone = swp_pager_async_iodone; BUF_KERNPROC(bp); swp_pager_strategy(bp); continue; } /* * synchronous * * NOTE: b_blkno is destroyed by the call to swapdev_strategy. */ bp->b_iodone = bdone; swp_pager_strategy(bp); /* * Wait for the sync I/O to complete. */ bwait(bp, PVM, "swwrt"); /* * Now that we are through with the bp, we can call the * normal async completion, which frees everything up. */ swp_pager_async_iodone(bp); } swp_pager_freeswapspace(s_free, n_free); VM_OBJECT_WLOCK(object); } /* * swp_pager_async_iodone: * * Completion routine for asynchronous reads and writes from/to swap. * Also called manually by synchronous code to finish up a bp. * * This routine may not sleep. */ static void swp_pager_async_iodone(struct buf *bp) { int i; vm_object_t object = NULL; /* * Report error - unless we ran out of memory, in which case * we've already logged it in swapgeom_strategy(). */ if (bp->b_ioflags & BIO_ERROR && bp->b_error != ENOMEM) { printf( "swap_pager: I/O error - %s failed; blkno %ld," "size %ld, error %d\n", ((bp->b_iocmd == BIO_READ) ? "pagein" : "pageout"), (long)bp->b_blkno, (long)bp->b_bcount, bp->b_error ); } /* * remove the mapping for kernel virtual */ if (buf_mapped(bp)) pmap_qremove((vm_offset_t)bp->b_data, bp->b_npages); else bp->b_data = bp->b_kvabase; if (bp->b_npages) { object = bp->b_pages[0]->object; VM_OBJECT_WLOCK(object); } /* * cleanup pages. If an error occurs writing to swap, we are in * very serious trouble. If it happens to be a disk error, though, * we may be able to recover by reassigning the swap later on. So * in this case we remove the m->swapblk assignment for the page * but do not free it in the rlist. The errornous block(s) are thus * never reallocated as swap. Redirty the page and continue. */ for (i = 0; i < bp->b_npages; ++i) { vm_page_t m = bp->b_pages[i]; m->oflags &= ~VPO_SWAPINPROG; if (m->oflags & VPO_SWAPSLEEP) { m->oflags &= ~VPO_SWAPSLEEP; wakeup(&object->handle); } /* We always have space after I/O, successful or not. */ vm_page_aflag_set(m, PGA_SWAP_SPACE); if (bp->b_ioflags & BIO_ERROR) { /* * If an error occurs I'd love to throw the swapblk * away without freeing it back to swapspace, so it * can never be used again. But I can't from an * interrupt. */ if (bp->b_iocmd == BIO_READ) { /* * NOTE: for reads, m->dirty will probably * be overridden by the original caller of * getpages so don't play cute tricks here. */ vm_page_invalid(m); } else { /* * If a write error occurs, reactivate page * so it doesn't clog the inactive list, * then finish the I/O. */ MPASS(m->dirty == VM_PAGE_BITS_ALL); /* PQ_UNSWAPPABLE? */ vm_page_activate(m); vm_page_sunbusy(m); } } else if (bp->b_iocmd == BIO_READ) { /* * NOTE: for reads, m->dirty will probably be * overridden by the original caller of getpages so * we cannot set them in order to free the underlying * swap in a low-swap situation. I don't think we'd * want to do that anyway, but it was an optimization * that existed in the old swapper for a time before * it got ripped out due to precisely this problem. */ KASSERT(!pmap_page_is_mapped(m), ("swp_pager_async_iodone: page %p is mapped", m)); KASSERT(m->dirty == 0, ("swp_pager_async_iodone: page %p is dirty", m)); vm_page_valid(m); if (i < bp->b_pgbefore || i >= bp->b_npages - bp->b_pgafter) vm_page_readahead_finish(m); } else { /* * For write success, clear the dirty * status, then finish the I/O ( which decrements the * busy count and possibly wakes waiter's up ). * A page is only written to swap after a period of * inactivity. Therefore, we do not expect it to be * reused. */ KASSERT(!pmap_page_is_write_mapped(m), ("swp_pager_async_iodone: page %p is not write" " protected", m)); vm_page_undirty(m); vm_page_deactivate_noreuse(m); vm_page_sunbusy(m); } } /* * adjust pip. NOTE: the original parent may still have its own * pip refs on the object. */ if (object != NULL) { vm_object_pip_wakeupn(object, bp->b_npages); VM_OBJECT_WUNLOCK(object); } /* * swapdev_strategy() manually sets b_vp and b_bufobj before calling * bstrategy(). Set them back to NULL now we're done with it, or we'll * trigger a KASSERT in relpbuf(). */ if (bp->b_vp) { bp->b_vp = NULL; bp->b_bufobj = NULL; } /* * release the physical I/O buffer */ if (bp->b_flags & B_ASYNC) { mtx_lock(&swbuf_mtx); if (++nsw_wcount_async == 1) wakeup(&nsw_wcount_async); mtx_unlock(&swbuf_mtx); } uma_zfree((bp->b_iocmd == BIO_READ) ? swrbuf_zone : swwbuf_zone, bp); } int swap_pager_nswapdev(void) { return (nswapdev); } static void swp_pager_force_dirty(vm_page_t m) { vm_page_dirty(m); #ifdef INVARIANTS if (!vm_page_wired(m) && m->a.queue == PQ_NONE) panic("page %p is neither wired nor queued", m); #endif vm_page_xunbusy(m); swap_pager_unswapped(m); } static void swp_pager_force_launder(vm_page_t m) { vm_page_dirty(m); vm_page_launder(m); vm_page_xunbusy(m); swap_pager_unswapped(m); } /* * SWP_PAGER_FORCE_PAGEIN() - force swap blocks to be paged in * * This routine dissociates pages starting at the given index within an * object from their backing store, paging them in if they do not reside * in memory. Pages that are paged in are marked dirty and placed in the * laundry queue. Pages are marked dirty because they no longer have * backing store. They are placed in the laundry queue because they have * not been accessed recently. Otherwise, they would already reside in * memory. */ static void swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex, int npages) { vm_page_t ma[npages]; int i, j; KASSERT(npages > 0, ("%s: No pages", __func__)); KASSERT(npages <= MAXPHYS / PAGE_SIZE, ("%s: Too many pages: %d", __func__, npages)); KASSERT(object->type == OBJT_SWAP, ("%s: Object not swappable", __func__)); vm_object_pip_add(object, npages); vm_page_grab_pages(object, pindex, VM_ALLOC_NORMAL, ma, npages); for (i = j = 0;; i++) { /* Count nonresident pages, to page-in all at once. */ if (i < npages && ma[i]->valid != VM_PAGE_BITS_ALL) continue; if (j < i) { + VM_OBJECT_WUNLOCK(object); /* Page-in nonresident pages. Mark for laundering. */ if (swap_pager_getpages(object, &ma[j], i - j, NULL, NULL) != VM_PAGER_OK) panic("%s: read from swap failed", __func__); + VM_OBJECT_WLOCK(object); do { swp_pager_force_launder(ma[j]); } while (++j < i); } if (i == npages) break; /* Mark dirty a resident page. */ swp_pager_force_dirty(ma[j++]); } vm_object_pip_wakeupn(object, npages); } /* * swap_pager_swapoff_object: * * Page in all of the pages that have been paged out for an object * to a swap device. */ static void swap_pager_swapoff_object(struct swdevt *sp, vm_object_t object) { struct swblk *sb; vm_pindex_t pi, s_pindex; daddr_t blk, n_blks, s_blk; int i; KASSERT(object->type == OBJT_SWAP, ("%s: Object not swappable", __func__)); n_blks = 0; for (pi = 0; (sb = SWAP_PCTRIE_LOOKUP_GE( &object->un_pager.swp.swp_blks, pi)) != NULL; ) { for (i = 0; i < SWAP_META_PAGES; i++) { blk = sb->d[i]; if (!swp_pager_isondev(blk, sp)) blk = SWAPBLK_NONE; /* * If there are no blocks/pages accumulated, start a new * accumulation here. */ if (n_blks == 0) { if (blk != SWAPBLK_NONE) { s_blk = blk; s_pindex = sb->p + i; n_blks = 1; } continue; } /* * If the accumulation can be extended without breaking * the sequence of consecutive blocks and pages that * swp_pager_force_pagein() depends on, do so. */ if (n_blks < MAXPHYS / PAGE_SIZE && s_blk + n_blks == blk && s_pindex + n_blks == sb->p + i) { ++n_blks; continue; } /* * The sequence of consecutive blocks and pages cannot * be extended, so page them all in here. Then, * because doing so involves releasing and reacquiring * a lock that protects the swap block pctrie, do not * rely on the current swap block. Break this loop and * re-fetch the same pindex from the pctrie again. */ swp_pager_force_pagein(object, s_pindex, n_blks); n_blks = 0; break; } if (i == SWAP_META_PAGES) pi = sb->p + SWAP_META_PAGES; } if (n_blks > 0) swp_pager_force_pagein(object, s_pindex, n_blks); } /* * swap_pager_swapoff: * * Page in all of the pages that have been paged out to the * given device. The corresponding blocks in the bitmap must be * marked as allocated and the device must be flagged SW_CLOSING. * There may be no processes swapped out to the device. * * This routine may block. */ static void swap_pager_swapoff(struct swdevt *sp) { vm_object_t object; int retries; sx_assert(&swdev_syscall_lock, SA_XLOCKED); retries = 0; full_rescan: mtx_lock(&vm_object_list_mtx); TAILQ_FOREACH(object, &vm_object_list, object_list) { if (object->type != OBJT_SWAP) continue; mtx_unlock(&vm_object_list_mtx); /* Depends on type-stability. */ VM_OBJECT_WLOCK(object); /* * Dead objects are eventually terminated on their own. */ if ((object->flags & OBJ_DEAD) != 0) goto next_obj; /* * Sync with fences placed after pctrie * initialization. We must not access pctrie below * unless we checked that our object is swap and not * dead. */ atomic_thread_fence_acq(); if (object->type != OBJT_SWAP) goto next_obj; swap_pager_swapoff_object(sp, object); next_obj: VM_OBJECT_WUNLOCK(object); mtx_lock(&vm_object_list_mtx); } mtx_unlock(&vm_object_list_mtx); if (sp->sw_used) { /* * Objects may be locked or paging to the device being * removed, so we will miss their pages and need to * make another pass. We have marked this device as * SW_CLOSING, so the activity should finish soon. */ retries++; if (retries > 100) { panic("swapoff: failed to locate %d swap blocks", sp->sw_used); } pause("swpoff", hz / 20); goto full_rescan; } EVENTHANDLER_INVOKE(swapoff, sp); } /************************************************************************ * SWAP META DATA * ************************************************************************ * * These routines manipulate the swap metadata stored in the * OBJT_SWAP object. * * Swap metadata is implemented with a global hash and not directly * linked into the object. Instead the object simply contains * appropriate tracking counters. */ /* * SWP_PAGER_SWBLK_EMPTY() - is a range of blocks free? */ static bool swp_pager_swblk_empty(struct swblk *sb, int start, int limit) { int i; MPASS(0 <= start && start <= limit && limit <= SWAP_META_PAGES); for (i = start; i < limit; i++) { if (sb->d[i] != SWAPBLK_NONE) return (false); } return (true); } /* * SWP_PAGER_FREE_EMPTY_SWBLK() - frees if a block is free * * Nothing is done if the block is still in use. */ static void swp_pager_free_empty_swblk(vm_object_t object, struct swblk *sb) { if (swp_pager_swblk_empty(sb, 0, SWAP_META_PAGES)) { SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks, sb->p); uma_zfree(swblk_zone, sb); } } /* * SWP_PAGER_META_BUILD() - add swap block to swap meta data for object * * We first convert the object to a swap object if it is a default * object. * * The specified swapblk is added to the object's swap metadata. If * the swapblk is not valid, it is freed instead. Any previously * assigned swapblk is returned. */ static daddr_t swp_pager_meta_build(vm_object_t object, vm_pindex_t pindex, daddr_t swapblk) { static volatile int swblk_zone_exhausted, swpctrie_zone_exhausted; struct swblk *sb, *sb1; vm_pindex_t modpi, rdpi; daddr_t prev_swapblk; int error, i; VM_OBJECT_ASSERT_WLOCKED(object); /* * Convert default object to swap object if necessary */ if (object->type != OBJT_SWAP) { pctrie_init(&object->un_pager.swp.swp_blks); /* * Ensure that swap_pager_swapoff()'s iteration over * object_list does not see a garbage pctrie. */ atomic_thread_fence_rel(); object->type = OBJT_SWAP; object->un_pager.swp.writemappings = 0; KASSERT((object->flags & OBJ_ANON) != 0 || object->handle == NULL, ("default pager %p with handle %p", object, object->handle)); } rdpi = rounddown(pindex, SWAP_META_PAGES); sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, rdpi); if (sb == NULL) { if (swapblk == SWAPBLK_NONE) return (SWAPBLK_NONE); for (;;) { sb = uma_zalloc(swblk_zone, M_NOWAIT | (curproc == pageproc ? M_USE_RESERVE : 0)); if (sb != NULL) { sb->p = rdpi; for (i = 0; i < SWAP_META_PAGES; i++) sb->d[i] = SWAPBLK_NONE; if (atomic_cmpset_int(&swblk_zone_exhausted, 1, 0)) printf("swblk zone ok\n"); break; } VM_OBJECT_WUNLOCK(object); if (uma_zone_exhausted(swblk_zone)) { if (atomic_cmpset_int(&swblk_zone_exhausted, 0, 1)) printf("swap blk zone exhausted, " "increase kern.maxswzone\n"); vm_pageout_oom(VM_OOM_SWAPZ); pause("swzonxb", 10); } else uma_zwait(swblk_zone); VM_OBJECT_WLOCK(object); sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, rdpi); if (sb != NULL) /* * Somebody swapped out a nearby page, * allocating swblk at the rdpi index, * while we dropped the object lock. */ goto allocated; } for (;;) { error = SWAP_PCTRIE_INSERT( &object->un_pager.swp.swp_blks, sb); if (error == 0) { if (atomic_cmpset_int(&swpctrie_zone_exhausted, 1, 0)) printf("swpctrie zone ok\n"); break; } VM_OBJECT_WUNLOCK(object); if (uma_zone_exhausted(swpctrie_zone)) { if (atomic_cmpset_int(&swpctrie_zone_exhausted, 0, 1)) printf("swap pctrie zone exhausted, " "increase kern.maxswzone\n"); vm_pageout_oom(VM_OOM_SWAPZ); pause("swzonxp", 10); } else uma_zwait(swpctrie_zone); VM_OBJECT_WLOCK(object); sb1 = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, rdpi); if (sb1 != NULL) { uma_zfree(swblk_zone, sb); sb = sb1; goto allocated; } } } allocated: MPASS(sb->p == rdpi); modpi = pindex % SWAP_META_PAGES; /* Return prior contents of metadata. */ prev_swapblk = sb->d[modpi]; /* Enter block into metadata. */ sb->d[modpi] = swapblk; /* * Free the swblk if we end up with the empty page run. */ if (swapblk == SWAPBLK_NONE) swp_pager_free_empty_swblk(object, sb); return (prev_swapblk); } /* * SWP_PAGER_META_TRANSFER() - free a range of blocks in the srcobject's swap * metadata, or transfer it into dstobject. * * This routine will free swap metadata structures as they are cleaned * out. */ static void swp_pager_meta_transfer(vm_object_t srcobject, vm_object_t dstobject, vm_pindex_t pindex, vm_pindex_t count) { struct swblk *sb; daddr_t n_free, s_free; vm_pindex_t offset, last; int i, limit, start; VM_OBJECT_ASSERT_WLOCKED(srcobject); if (srcobject->type != OBJT_SWAP || count == 0) return; swp_pager_init_freerange(&s_free, &n_free); offset = pindex; last = pindex + count; for (;;) { sb = SWAP_PCTRIE_LOOKUP_GE(&srcobject->un_pager.swp.swp_blks, rounddown(pindex, SWAP_META_PAGES)); if (sb == NULL || sb->p >= last) break; start = pindex > sb->p ? pindex - sb->p : 0; limit = last - sb->p < SWAP_META_PAGES ? last - sb->p : SWAP_META_PAGES; for (i = start; i < limit; i++) { if (sb->d[i] == SWAPBLK_NONE) continue; if (dstobject == NULL || !swp_pager_xfer_source(srcobject, dstobject, sb->p + i - offset, sb->d[i])) { swp_pager_update_freerange(&s_free, &n_free, sb->d[i]); } sb->d[i] = SWAPBLK_NONE; } pindex = sb->p + SWAP_META_PAGES; if (swp_pager_swblk_empty(sb, 0, start) && swp_pager_swblk_empty(sb, limit, SWAP_META_PAGES)) { SWAP_PCTRIE_REMOVE(&srcobject->un_pager.swp.swp_blks, sb->p); uma_zfree(swblk_zone, sb); } } swp_pager_freeswapspace(s_free, n_free); } /* * SWP_PAGER_META_FREE() - free a range of blocks in the object's swap metadata * * The requested range of blocks is freed, with any associated swap * returned to the swap bitmap. * * This routine will free swap metadata structures as they are cleaned * out. This routine does *NOT* operate on swap metadata associated * with resident pages. */ static void swp_pager_meta_free(vm_object_t object, vm_pindex_t pindex, vm_pindex_t count) { swp_pager_meta_transfer(object, NULL, pindex, count); } /* * SWP_PAGER_META_FREE_ALL() - destroy all swap metadata associated with object * * This routine locates and destroys all swap metadata associated with * an object. */ static void swp_pager_meta_free_all(vm_object_t object) { struct swblk *sb; daddr_t n_free, s_free; vm_pindex_t pindex; int i; VM_OBJECT_ASSERT_WLOCKED(object); if (object->type != OBJT_SWAP) return; swp_pager_init_freerange(&s_free, &n_free); for (pindex = 0; (sb = SWAP_PCTRIE_LOOKUP_GE( &object->un_pager.swp.swp_blks, pindex)) != NULL;) { pindex = sb->p + SWAP_META_PAGES; for (i = 0; i < SWAP_META_PAGES; i++) { if (sb->d[i] == SWAPBLK_NONE) continue; swp_pager_update_freerange(&s_free, &n_free, sb->d[i]); } SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks, sb->p); uma_zfree(swblk_zone, sb); } swp_pager_freeswapspace(s_free, n_free); } /* * SWP_PAGER_METACTL() - misc control of swap meta data. * * This routine is capable of looking up, or removing swapblk * assignments in the swap meta data. It returns the swapblk being * looked-up, popped, or SWAPBLK_NONE if the block was invalid. * * When acting on a busy resident page and paging is in progress, we * have to wait until paging is complete but otherwise can act on the * busy page. */ static daddr_t swp_pager_meta_lookup(vm_object_t object, vm_pindex_t pindex) { struct swblk *sb; VM_OBJECT_ASSERT_LOCKED(object); /* * The meta data only exists if the object is OBJT_SWAP * and even then might not be allocated yet. */ KASSERT(object->type == OBJT_SWAP, ("Lookup object not swappable")); sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, rounddown(pindex, SWAP_META_PAGES)); if (sb == NULL) return (SWAPBLK_NONE); return (sb->d[pindex % SWAP_META_PAGES]); } /* * Returns the least page index which is greater than or equal to the * parameter pindex and for which there is a swap block allocated. * Returns object's size if the object's type is not swap or if there * are no allocated swap blocks for the object after the requested * pindex. */ vm_pindex_t swap_pager_find_least(vm_object_t object, vm_pindex_t pindex) { struct swblk *sb; int i; VM_OBJECT_ASSERT_LOCKED(object); if (object->type != OBJT_SWAP) return (object->size); sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks, rounddown(pindex, SWAP_META_PAGES)); if (sb == NULL) return (object->size); if (sb->p < pindex) { for (i = pindex % SWAP_META_PAGES; i < SWAP_META_PAGES; i++) { if (sb->d[i] != SWAPBLK_NONE) return (sb->p + i); } sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks, roundup(pindex, SWAP_META_PAGES)); if (sb == NULL) return (object->size); } for (i = 0; i < SWAP_META_PAGES; i++) { if (sb->d[i] != SWAPBLK_NONE) return (sb->p + i); } /* * We get here if a swblk is present in the trie but it * doesn't map any blocks. */ MPASS(0); return (object->size); } /* * System call swapon(name) enables swapping on device name, * which must be in the swdevsw. Return EBUSY * if already swapping on this device. */ #ifndef _SYS_SYSPROTO_H_ struct swapon_args { char *name; }; #endif /* * MPSAFE */ /* ARGSUSED */ int sys_swapon(struct thread *td, struct swapon_args *uap) { struct vattr attr; struct vnode *vp; struct nameidata nd; int error; error = priv_check(td, PRIV_SWAPON); if (error) return (error); sx_xlock(&swdev_syscall_lock); /* * Swap metadata may not fit in the KVM if we have physical * memory of >1GB. */ if (swblk_zone == NULL) { error = ENOMEM; goto done; } NDINIT(&nd, LOOKUP, ISOPEN | FOLLOW | AUDITVNODE1, UIO_USERSPACE, uap->name, td); error = namei(&nd); if (error) goto done; NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; if (vn_isdisk(vp, &error)) { error = swapongeom(vp); } else if (vp->v_type == VREG && (vp->v_mount->mnt_vfc->vfc_flags & VFCF_NETWORK) != 0 && (error = VOP_GETATTR(vp, &attr, td->td_ucred)) == 0) { /* * Allow direct swapping to NFS regular files in the same * way that nfs_mountroot() sets up diskless swapping. */ error = swaponvp(td, vp, attr.va_size / DEV_BSIZE); } if (error) vrele(vp); done: sx_xunlock(&swdev_syscall_lock); return (error); } /* * Check that the total amount of swap currently configured does not * exceed half the theoretical maximum. If it does, print a warning * message. */ static void swapon_check_swzone(void) { /* recommend using no more than half that amount */ if (swap_total > swap_maxpages / 2) { printf("warning: total configured swap (%lu pages) " "exceeds maximum recommended amount (%lu pages).\n", swap_total, swap_maxpages / 2); printf("warning: increase kern.maxswzone " "or reduce amount of swap.\n"); } } static void swaponsomething(struct vnode *vp, void *id, u_long nblks, sw_strategy_t *strategy, sw_close_t *close, dev_t dev, int flags) { struct swdevt *sp, *tsp; swblk_t dvbase; u_long mblocks; /* * nblks is in DEV_BSIZE'd chunks, convert to PAGE_SIZE'd chunks. * First chop nblks off to page-align it, then convert. * * sw->sw_nblks is in page-sized chunks now too. */ nblks &= ~(ctodb(1) - 1); nblks = dbtoc(nblks); /* * If we go beyond this, we get overflows in the radix * tree bitmap code. */ mblocks = 0x40000000 / BLIST_META_RADIX; if (nblks > mblocks) { printf( "WARNING: reducing swap size to maximum of %luMB per unit\n", mblocks / 1024 / 1024 * PAGE_SIZE); nblks = mblocks; } sp = malloc(sizeof *sp, M_VMPGDATA, M_WAITOK | M_ZERO); sp->sw_vp = vp; sp->sw_id = id; sp->sw_dev = dev; sp->sw_nblks = nblks; sp->sw_used = 0; sp->sw_strategy = strategy; sp->sw_close = close; sp->sw_flags = flags; sp->sw_blist = blist_create(nblks, M_WAITOK); /* * Do not free the first blocks in order to avoid overwriting * any bsd label at the front of the partition */ blist_free(sp->sw_blist, howmany(BBSIZE, PAGE_SIZE), nblks - howmany(BBSIZE, PAGE_SIZE)); dvbase = 0; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(tsp, &swtailq, sw_list) { if (tsp->sw_end >= dvbase) { /* * We put one uncovered page between the devices * in order to definitively prevent any cross-device * I/O requests */ dvbase = tsp->sw_end + 1; } } sp->sw_first = dvbase; sp->sw_end = dvbase + nblks; TAILQ_INSERT_TAIL(&swtailq, sp, sw_list); nswapdev++; swap_pager_avail += nblks - howmany(BBSIZE, PAGE_SIZE); swap_total += nblks; swapon_check_swzone(); swp_sizecheck(); mtx_unlock(&sw_dev_mtx); EVENTHANDLER_INVOKE(swapon, sp); } /* * SYSCALL: swapoff(devname) * * Disable swapping on the given device. * * XXX: Badly designed system call: it should use a device index * rather than filename as specification. We keep sw_vp around * only to make this work. */ #ifndef _SYS_SYSPROTO_H_ struct swapoff_args { char *name; }; #endif /* * MPSAFE */ /* ARGSUSED */ int sys_swapoff(struct thread *td, struct swapoff_args *uap) { struct vnode *vp; struct nameidata nd; struct swdevt *sp; int error; error = priv_check(td, PRIV_SWAPOFF); if (error) return (error); sx_xlock(&swdev_syscall_lock); NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, UIO_USERSPACE, uap->name, td); error = namei(&nd); if (error) goto done; NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { if (sp->sw_vp == vp) break; } mtx_unlock(&sw_dev_mtx); if (sp == NULL) { error = EINVAL; goto done; } error = swapoff_one(sp, td->td_ucred); done: sx_xunlock(&swdev_syscall_lock); return (error); } static int swapoff_one(struct swdevt *sp, struct ucred *cred) { u_long nblks; #ifdef MAC int error; #endif sx_assert(&swdev_syscall_lock, SA_XLOCKED); #ifdef MAC (void) vn_lock(sp->sw_vp, LK_EXCLUSIVE | LK_RETRY); error = mac_system_check_swapoff(cred, sp->sw_vp); (void) VOP_UNLOCK(sp->sw_vp); if (error != 0) return (error); #endif nblks = sp->sw_nblks; /* * We can turn off this swap device safely only if the * available virtual memory in the system will fit the amount * of data we will have to page back in, plus an epsilon so * the system doesn't become critically low on swap space. */ if (vm_free_count() + swap_pager_avail < nblks + nswap_lowat) return (ENOMEM); /* * Prevent further allocations on this device. */ mtx_lock(&sw_dev_mtx); sp->sw_flags |= SW_CLOSING; swap_pager_avail -= blist_fill(sp->sw_blist, 0, nblks); swap_total -= nblks; mtx_unlock(&sw_dev_mtx); /* * Page in the contents of the device and close it. */ swap_pager_swapoff(sp); sp->sw_close(curthread, sp); mtx_lock(&sw_dev_mtx); sp->sw_id = NULL; TAILQ_REMOVE(&swtailq, sp, sw_list); nswapdev--; if (nswapdev == 0) { swap_pager_full = 2; swap_pager_almost_full = 1; } if (swdevhd == sp) swdevhd = NULL; mtx_unlock(&sw_dev_mtx); blist_destroy(sp->sw_blist); free(sp, M_VMPGDATA); return (0); } void swapoff_all(void) { struct swdevt *sp, *spt; const char *devname; int error; sx_xlock(&swdev_syscall_lock); mtx_lock(&sw_dev_mtx); TAILQ_FOREACH_SAFE(sp, &swtailq, sw_list, spt) { mtx_unlock(&sw_dev_mtx); if (vn_isdisk(sp->sw_vp, NULL)) devname = devtoname(sp->sw_vp->v_rdev); else devname = "[file]"; error = swapoff_one(sp, thread0.td_ucred); if (error != 0) { printf("Cannot remove swap device %s (error=%d), " "skipping.\n", devname, error); } else if (bootverbose) { printf("Swap device %s removed.\n", devname); } mtx_lock(&sw_dev_mtx); } mtx_unlock(&sw_dev_mtx); sx_xunlock(&swdev_syscall_lock); } void swap_pager_status(int *total, int *used) { struct swdevt *sp; *total = 0; *used = 0; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { *total += sp->sw_nblks; *used += sp->sw_used; } mtx_unlock(&sw_dev_mtx); } int swap_dev_info(int name, struct xswdev *xs, char *devname, size_t len) { struct swdevt *sp; const char *tmp_devname; int error, n; n = 0; error = ENOENT; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { if (n != name) { n++; continue; } xs->xsw_version = XSWDEV_VERSION; xs->xsw_dev = sp->sw_dev; xs->xsw_flags = sp->sw_flags; xs->xsw_nblks = sp->sw_nblks; xs->xsw_used = sp->sw_used; if (devname != NULL) { if (vn_isdisk(sp->sw_vp, NULL)) tmp_devname = devtoname(sp->sw_vp->v_rdev); else tmp_devname = "[file]"; strncpy(devname, tmp_devname, len); } error = 0; break; } mtx_unlock(&sw_dev_mtx); return (error); } #if defined(COMPAT_FREEBSD11) #define XSWDEV_VERSION_11 1 struct xswdev11 { u_int xsw_version; uint32_t xsw_dev; int xsw_flags; int xsw_nblks; int xsw_used; }; #endif #if defined(__amd64__) && defined(COMPAT_FREEBSD32) struct xswdev32 { u_int xsw_version; u_int xsw_dev1, xsw_dev2; int xsw_flags; int xsw_nblks; int xsw_used; }; #endif static int sysctl_vm_swap_info(SYSCTL_HANDLER_ARGS) { struct xswdev xs; #if defined(__amd64__) && defined(COMPAT_FREEBSD32) struct xswdev32 xs32; #endif #if defined(COMPAT_FREEBSD11) struct xswdev11 xs11; #endif int error; if (arg2 != 1) /* name length */ return (EINVAL); error = swap_dev_info(*(int *)arg1, &xs, NULL, 0); if (error != 0) return (error); #if defined(__amd64__) && defined(COMPAT_FREEBSD32) if (req->oldlen == sizeof(xs32)) { xs32.xsw_version = XSWDEV_VERSION; xs32.xsw_dev1 = xs.xsw_dev; xs32.xsw_dev2 = xs.xsw_dev >> 32; xs32.xsw_flags = xs.xsw_flags; xs32.xsw_nblks = xs.xsw_nblks; xs32.xsw_used = xs.xsw_used; error = SYSCTL_OUT(req, &xs32, sizeof(xs32)); return (error); } #endif #if defined(COMPAT_FREEBSD11) if (req->oldlen == sizeof(xs11)) { xs11.xsw_version = XSWDEV_VERSION_11; xs11.xsw_dev = xs.xsw_dev; /* truncation */ xs11.xsw_flags = xs.xsw_flags; xs11.xsw_nblks = xs.xsw_nblks; xs11.xsw_used = xs.xsw_used; error = SYSCTL_OUT(req, &xs11, sizeof(xs11)); return (error); } #endif error = SYSCTL_OUT(req, &xs, sizeof(xs)); return (error); } SYSCTL_INT(_vm, OID_AUTO, nswapdev, CTLFLAG_RD, &nswapdev, 0, "Number of swap devices"); SYSCTL_NODE(_vm, OID_AUTO, swap_info, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_vm_swap_info, "Swap statistics by device"); /* * Count the approximate swap usage in pages for a vmspace. The * shadowed or not yet copied on write swap blocks are not accounted. * The map must be locked. */ long vmspace_swap_count(struct vmspace *vmspace) { vm_map_t map; vm_map_entry_t cur; vm_object_t object; struct swblk *sb; vm_pindex_t e, pi; long count; int i; map = &vmspace->vm_map; count = 0; VM_MAP_ENTRY_FOREACH(cur, map) { if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) continue; object = cur->object.vm_object; if (object == NULL || object->type != OBJT_SWAP) continue; VM_OBJECT_RLOCK(object); if (object->type != OBJT_SWAP) goto unlock; pi = OFF_TO_IDX(cur->offset); e = pi + OFF_TO_IDX(cur->end - cur->start); for (;; pi = sb->p + SWAP_META_PAGES) { sb = SWAP_PCTRIE_LOOKUP_GE( &object->un_pager.swp.swp_blks, pi); if (sb == NULL || sb->p >= e) break; for (i = 0; i < SWAP_META_PAGES; i++) { if (sb->p + i < e && sb->d[i] != SWAPBLK_NONE) count++; } } unlock: VM_OBJECT_RUNLOCK(object); } return (count); } /* * GEOM backend * * Swapping onto disk devices. * */ static g_orphan_t swapgeom_orphan; static struct g_class g_swap_class = { .name = "SWAP", .version = G_VERSION, .orphan = swapgeom_orphan, }; DECLARE_GEOM_CLASS(g_swap_class, g_class); static void swapgeom_close_ev(void *arg, int flags) { struct g_consumer *cp; cp = arg; g_access(cp, -1, -1, 0); g_detach(cp); g_destroy_consumer(cp); } /* * Add a reference to the g_consumer for an inflight transaction. */ static void swapgeom_acquire(struct g_consumer *cp) { mtx_assert(&sw_dev_mtx, MA_OWNED); cp->index++; } /* * Remove a reference from the g_consumer. Post a close event if all * references go away, since the function might be called from the * biodone context. */ static void swapgeom_release(struct g_consumer *cp, struct swdevt *sp) { mtx_assert(&sw_dev_mtx, MA_OWNED); cp->index--; if (cp->index == 0) { if (g_post_event(swapgeom_close_ev, cp, M_NOWAIT, NULL) == 0) sp->sw_id = NULL; } } static void swapgeom_done(struct bio *bp2) { struct swdevt *sp; struct buf *bp; struct g_consumer *cp; bp = bp2->bio_caller2; cp = bp2->bio_from; bp->b_ioflags = bp2->bio_flags; if (bp2->bio_error) bp->b_ioflags |= BIO_ERROR; bp->b_resid = bp->b_bcount - bp2->bio_completed; bp->b_error = bp2->bio_error; bp->b_caller1 = NULL; bufdone(bp); sp = bp2->bio_caller1; mtx_lock(&sw_dev_mtx); swapgeom_release(cp, sp); mtx_unlock(&sw_dev_mtx); g_destroy_bio(bp2); } static void swapgeom_strategy(struct buf *bp, struct swdevt *sp) { struct bio *bio; struct g_consumer *cp; mtx_lock(&sw_dev_mtx); cp = sp->sw_id; if (cp == NULL) { mtx_unlock(&sw_dev_mtx); bp->b_error = ENXIO; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return; } swapgeom_acquire(cp); mtx_unlock(&sw_dev_mtx); if (bp->b_iocmd == BIO_WRITE) bio = g_new_bio(); else bio = g_alloc_bio(); if (bio == NULL) { mtx_lock(&sw_dev_mtx); swapgeom_release(cp, sp); mtx_unlock(&sw_dev_mtx); bp->b_error = ENOMEM; bp->b_ioflags |= BIO_ERROR; printf("swap_pager: cannot allocate bio\n"); bufdone(bp); return; } bp->b_caller1 = bio; bio->bio_caller1 = sp; bio->bio_caller2 = bp; bio->bio_cmd = bp->b_iocmd; bio->bio_offset = (bp->b_blkno - sp->sw_first) * PAGE_SIZE; bio->bio_length = bp->b_bcount; bio->bio_done = swapgeom_done; if (!buf_mapped(bp)) { bio->bio_ma = bp->b_pages; bio->bio_data = unmapped_buf; bio->bio_ma_offset = (vm_offset_t)bp->b_offset & PAGE_MASK; bio->bio_ma_n = bp->b_npages; bio->bio_flags |= BIO_UNMAPPED; } else { bio->bio_data = bp->b_data; bio->bio_ma = NULL; } g_io_request(bio, cp); return; } static void swapgeom_orphan(struct g_consumer *cp) { struct swdevt *sp; int destroy; mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { if (sp->sw_id == cp) { sp->sw_flags |= SW_CLOSING; break; } } /* * Drop reference we were created with. Do directly since we're in a * special context where we don't have to queue the call to * swapgeom_close_ev(). */ cp->index--; destroy = ((sp != NULL) && (cp->index == 0)); if (destroy) sp->sw_id = NULL; mtx_unlock(&sw_dev_mtx); if (destroy) swapgeom_close_ev(cp, 0); } static void swapgeom_close(struct thread *td, struct swdevt *sw) { struct g_consumer *cp; mtx_lock(&sw_dev_mtx); cp = sw->sw_id; sw->sw_id = NULL; mtx_unlock(&sw_dev_mtx); /* * swapgeom_close() may be called from the biodone context, * where we cannot perform topology changes. Delegate the * work to the events thread. */ if (cp != NULL) g_waitfor_event(swapgeom_close_ev, cp, M_WAITOK, NULL); } static int swapongeom_locked(struct cdev *dev, struct vnode *vp) { struct g_provider *pp; struct g_consumer *cp; static struct g_geom *gp; struct swdevt *sp; u_long nblks; int error; pp = g_dev_getprovider(dev); if (pp == NULL) return (ENODEV); mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { cp = sp->sw_id; if (cp != NULL && cp->provider == pp) { mtx_unlock(&sw_dev_mtx); return (EBUSY); } } mtx_unlock(&sw_dev_mtx); if (gp == NULL) gp = g_new_geomf(&g_swap_class, "swap"); cp = g_new_consumer(gp); cp->index = 1; /* Number of active I/Os, plus one for being active. */ cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; g_attach(cp, pp); /* * XXX: Every time you think you can improve the margin for * footshooting, somebody depends on the ability to do so: * savecore(8) wants to write to our swapdev so we cannot * set an exclusive count :-( */ error = g_access(cp, 1, 1, 0); if (error != 0) { g_detach(cp); g_destroy_consumer(cp); return (error); } nblks = pp->mediasize / DEV_BSIZE; swaponsomething(vp, cp, nblks, swapgeom_strategy, swapgeom_close, dev2udev(dev), (pp->flags & G_PF_ACCEPT_UNMAPPED) != 0 ? SW_UNMAPPED : 0); return (0); } static int swapongeom(struct vnode *vp) { int error; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_type != VCHR || VN_IS_DOOMED(vp)) { error = ENOENT; } else { g_topology_lock(); error = swapongeom_locked(vp->v_rdev, vp); g_topology_unlock(); } VOP_UNLOCK(vp); return (error); } /* * VNODE backend * * This is used mainly for network filesystem (read: probably only tested * with NFS) swapfiles. * */ static void swapdev_strategy(struct buf *bp, struct swdevt *sp) { struct vnode *vp2; bp->b_blkno = ctodb(bp->b_blkno - sp->sw_first); vp2 = sp->sw_id; vhold(vp2); if (bp->b_iocmd == BIO_WRITE) { if (bp->b_bufobj) bufobj_wdrop(bp->b_bufobj); bufobj_wref(&vp2->v_bufobj); } if (bp->b_bufobj != &vp2->v_bufobj) bp->b_bufobj = &vp2->v_bufobj; bp->b_vp = vp2; bp->b_iooffset = dbtob(bp->b_blkno); bstrategy(bp); return; } static void swapdev_close(struct thread *td, struct swdevt *sp) { VOP_CLOSE(sp->sw_vp, FREAD | FWRITE, td->td_ucred, td); vrele(sp->sw_vp); } static int swaponvp(struct thread *td, struct vnode *vp, u_long nblks) { struct swdevt *sp; int error; if (nblks == 0) return (ENXIO); mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { if (sp->sw_id == vp) { mtx_unlock(&sw_dev_mtx); return (EBUSY); } } mtx_unlock(&sw_dev_mtx); (void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); #ifdef MAC error = mac_system_check_swapon(td->td_ucred, vp); if (error == 0) #endif error = VOP_OPEN(vp, FREAD | FWRITE, td->td_ucred, td, NULL); (void) VOP_UNLOCK(vp); if (error) return (error); swaponsomething(vp, vp, nblks, swapdev_strategy, swapdev_close, NODEV, 0); return (0); } static int sysctl_swap_async_max(SYSCTL_HANDLER_ARGS) { int error, new, n; new = nsw_wcount_async_max; error = sysctl_handle_int(oidp, &new, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (new > nswbuf / 2 || new < 1) return (EINVAL); mtx_lock(&swbuf_mtx); while (nsw_wcount_async_max != new) { /* * Adjust difference. If the current async count is too low, * we will need to sqeeze our update slowly in. Sleep with a * higher priority than getpbuf() to finish faster. */ n = new - nsw_wcount_async_max; if (nsw_wcount_async + n >= 0) { nsw_wcount_async += n; nsw_wcount_async_max += n; wakeup(&nsw_wcount_async); } else { nsw_wcount_async_max -= nsw_wcount_async; nsw_wcount_async = 0; msleep(&nsw_wcount_async, &swbuf_mtx, PSWP, "swpsysctl", 0); } } mtx_unlock(&swbuf_mtx); return (0); } static void swap_pager_update_writecount(vm_object_t object, vm_offset_t start, vm_offset_t end) { VM_OBJECT_WLOCK(object); KASSERT((object->flags & OBJ_ANON) == 0, ("Splittable object with writecount")); object->un_pager.swp.writemappings += (vm_ooffset_t)end - start; VM_OBJECT_WUNLOCK(object); } static void swap_pager_release_writecount(vm_object_t object, vm_offset_t start, vm_offset_t end) { VM_OBJECT_WLOCK(object); KASSERT((object->flags & OBJ_ANON) == 0, ("Splittable object with writecount")); object->un_pager.swp.writemappings -= (vm_ooffset_t)end - start; VM_OBJECT_WUNLOCK(object); } Index: projects/clang1000-import/sys/vm/uma_core.c =================================================================== --- projects/clang1000-import/sys/vm/uma_core.c (revision 356919) +++ projects/clang1000-import/sys/vm/uma_core.c (revision 356920) @@ -1,5012 +1,5016 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2002-2019 Jeffrey Roberson * Copyright (c) 2004, 2005 Bosko Milekic * Copyright (c) 2004-2006 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * uma_core.c Implementation of the Universal Memory allocator * * This allocator is intended to replace the multitude of similar object caches * in the standard FreeBSD kernel. The intent is to be flexible as well as * efficient. A primary design goal is to return unused memory to the rest of * the system. This will make the system as a whole more flexible due to the * ability to move memory to subsystems which most need it instead of leaving * pools of reserved memory unused. * * The basic ideas stem from similar slab/zone based allocators whose algorithms * are well known. * */ /* * TODO: * - Improve memory usage for large allocations * - Investigate cache size adjustments */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_param.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEBUG_MEMGUARD #include #endif #include /* * This is the zone and keg from which all zones are spawned. */ static uma_zone_t kegs; static uma_zone_t zones; /* * These are the two zones from which all offpage uma_slab_ts are allocated. * * One zone is for slab headers that can represent a larger number of items, * making the slabs themselves more efficient, and the other zone is for * headers that are smaller and represent fewer items, making the headers more * efficient. */ #define SLABZONE_SIZE(setsize) \ (sizeof(struct uma_hash_slab) + BITSET_SIZE(setsize) * SLAB_BITSETS) #define SLABZONE0_SETSIZE (PAGE_SIZE / 16) #define SLABZONE1_SETSIZE SLAB_MAX_SETSIZE #define SLABZONE0_SIZE SLABZONE_SIZE(SLABZONE0_SETSIZE) #define SLABZONE1_SIZE SLABZONE_SIZE(SLABZONE1_SETSIZE) static uma_zone_t slabzones[2]; /* * The initial hash tables come out of this zone so they can be allocated * prior to malloc coming up. */ static uma_zone_t hashzone; /* The boot-time adjusted value for cache line alignment. */ int uma_align_cache = 64 - 1; static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets"); static MALLOC_DEFINE(M_UMA, "UMA", "UMA Misc"); /* * Are we allowed to allocate buckets? */ static int bucketdisable = 1; /* Linked list of all kegs in the system */ static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs); /* Linked list of all cache-only zones in the system */ static LIST_HEAD(,uma_zone) uma_cachezones = LIST_HEAD_INITIALIZER(uma_cachezones); /* This RW lock protects the keg list */ static struct rwlock_padalign __exclusive_cache_line uma_rwlock; /* * First available virual address for boot time allocations. */ static vm_offset_t bootstart; static vm_offset_t bootmem; static struct sx uma_reclaim_lock; /* * kmem soft limit, initialized by uma_set_limit(). Ensure that early * allocations don't trigger a wakeup of the reclaim thread. */ unsigned long uma_kmem_limit = LONG_MAX; SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_limit, CTLFLAG_RD, &uma_kmem_limit, 0, "UMA kernel memory soft limit"); unsigned long uma_kmem_total; SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_total, CTLFLAG_RD, &uma_kmem_total, 0, "UMA kernel memory usage"); /* Is the VM done starting up? */ static enum { BOOT_COLD, BOOT_KVA, BOOT_RUNNING, BOOT_SHUTDOWN, } booted = BOOT_COLD; /* * This is the handle used to schedule events that need to happen * outside of the allocation fast path. */ static struct callout uma_callout; #define UMA_TIMEOUT 20 /* Seconds for callout interval. */ /* * This structure is passed as the zone ctor arg so that I don't have to create * a special allocation function just for zones. */ struct uma_zctor_args { const char *name; size_t size; uma_ctor ctor; uma_dtor dtor; uma_init uminit; uma_fini fini; uma_import import; uma_release release; void *arg; uma_keg_t keg; int align; uint32_t flags; }; struct uma_kctor_args { uma_zone_t zone; size_t size; uma_init uminit; uma_fini fini; int align; uint32_t flags; }; struct uma_bucket_zone { uma_zone_t ubz_zone; char *ubz_name; int ubz_entries; /* Number of items it can hold. */ int ubz_maxsize; /* Maximum allocation size per-item. */ }; /* * Compute the actual number of bucket entries to pack them in power * of two sizes for more efficient space utilization. */ #define BUCKET_SIZE(n) \ (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *)) #define BUCKET_MAX BUCKET_SIZE(256) #define BUCKET_MIN BUCKET_SIZE(4) struct uma_bucket_zone bucket_zones[] = { { NULL, "4 Bucket", BUCKET_SIZE(4), 4096 }, { NULL, "6 Bucket", BUCKET_SIZE(6), 3072 }, { NULL, "8 Bucket", BUCKET_SIZE(8), 2048 }, { NULL, "12 Bucket", BUCKET_SIZE(12), 1536 }, { NULL, "16 Bucket", BUCKET_SIZE(16), 1024 }, { NULL, "32 Bucket", BUCKET_SIZE(32), 512 }, { NULL, "64 Bucket", BUCKET_SIZE(64), 256 }, { NULL, "128 Bucket", BUCKET_SIZE(128), 128 }, { NULL, "256 Bucket", BUCKET_SIZE(256), 64 }, { NULL, NULL, 0} }; /* * Flags and enumerations to be passed to internal functions. */ enum zfreeskip { SKIP_NONE = 0, SKIP_CNT = 0x00000001, SKIP_DTOR = 0x00010000, SKIP_FINI = 0x00020000, }; /* Prototypes.. */ void uma_startup1(vm_offset_t); void uma_startup2(void); static void *noobj_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int); static void *page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int); static void *pcpu_page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int); static void *startup_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int); static void page_free(void *, vm_size_t, uint8_t); static void pcpu_page_free(void *, vm_size_t, uint8_t); static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int, int); static void cache_drain(uma_zone_t); static void bucket_drain(uma_zone_t, uma_bucket_t); static void bucket_cache_reclaim(uma_zone_t zone, bool); static int keg_ctor(void *, int, void *, int); static void keg_dtor(void *, int, void *); static int zone_ctor(void *, int, void *, int); static void zone_dtor(void *, int, void *); static int zero_init(void *, int, int); static void zone_foreach(void (*zfunc)(uma_zone_t, void *), void *); static void zone_foreach_unlocked(void (*zfunc)(uma_zone_t, void *), void *); static void zone_timeout(uma_zone_t zone, void *); static int hash_alloc(struct uma_hash *, u_int); static int hash_expand(struct uma_hash *, struct uma_hash *); static void hash_free(struct uma_hash *hash); static void uma_timeout(void *); static void uma_startup3(void); static void uma_shutdown(void); static void *zone_alloc_item(uma_zone_t, void *, int, int); static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip); static int zone_alloc_limit(uma_zone_t zone, int count, int flags); static void zone_free_limit(uma_zone_t zone, int count); static void bucket_enable(void); static void bucket_init(void); static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int); static void bucket_free(uma_zone_t zone, uma_bucket_t, void *); static void bucket_zone_drain(void); static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int); static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab); static void slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item); static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini, int align, uint32_t flags); static int zone_import(void *, void **, int, int, int); static void zone_release(void *, void **, int); static bool cache_alloc(uma_zone_t, uma_cache_t, void *, int); static bool cache_free(uma_zone_t, uma_cache_t, void *, void *, int); static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS); static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS); static int sysctl_handle_uma_zone_allocs(SYSCTL_HANDLER_ARGS); static int sysctl_handle_uma_zone_frees(SYSCTL_HANDLER_ARGS); static int sysctl_handle_uma_zone_flags(SYSCTL_HANDLER_ARGS); static int sysctl_handle_uma_slab_efficiency(SYSCTL_HANDLER_ARGS); static int sysctl_handle_uma_zone_items(SYSCTL_HANDLER_ARGS); static uint64_t uma_zone_get_allocs(uma_zone_t zone); #ifdef INVARIANTS static uint64_t uma_keg_get_allocs(uma_keg_t zone); static inline struct noslabbits *slab_dbg_bits(uma_slab_t slab, uma_keg_t keg); static bool uma_dbg_kskip(uma_keg_t keg, void *mem); static bool uma_dbg_zskip(uma_zone_t zone, void *mem); static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item); static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item); static SYSCTL_NODE(_vm, OID_AUTO, debug, CTLFLAG_RD, 0, "Memory allocation debugging"); static u_int dbg_divisor = 1; SYSCTL_UINT(_vm_debug, OID_AUTO, divisor, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &dbg_divisor, 0, "Debug & thrash every this item in memory allocator"); static counter_u64_t uma_dbg_cnt = EARLY_COUNTER; static counter_u64_t uma_skip_cnt = EARLY_COUNTER; SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, trashed, CTLFLAG_RD, &uma_dbg_cnt, "memory items debugged"); SYSCTL_COUNTER_U64(_vm_debug, OID_AUTO, skipped, CTLFLAG_RD, &uma_skip_cnt, "memory items skipped, not debugged"); #endif SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); SYSCTL_NODE(_vm, OID_AUTO, uma, CTLFLAG_RW, 0, "Universal Memory Allocator"); SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLTYPE_INT, 0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones"); SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLTYPE_STRUCT, 0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats"); static int zone_warnings = 1; SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0, "Warn when UMA zones becomes full"); /* * Select the slab zone for an offpage slab with the given maximum item count. */ static inline uma_zone_t slabzone(int ipers) { return (slabzones[ipers > SLABZONE0_SETSIZE]); } /* * This routine checks to see whether or not it's safe to enable buckets. */ static void bucket_enable(void) { KASSERT(booted >= BOOT_KVA, ("Bucket enable before init")); bucketdisable = vm_page_count_min(); } /* * Initialize bucket_zones, the array of zones of buckets of various sizes. * * For each zone, calculate the memory required for each bucket, consisting * of the header and an array of pointers. */ static void bucket_init(void) { struct uma_bucket_zone *ubz; int size; for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) { size = roundup(sizeof(struct uma_bucket), sizeof(void *)); size += sizeof(void *) * ubz->ubz_entries; ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET | UMA_ZONE_FIRSTTOUCH); } } /* * Given a desired number of entries for a bucket, return the zone from which * to allocate the bucket. */ static struct uma_bucket_zone * bucket_zone_lookup(int entries) { struct uma_bucket_zone *ubz; for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) if (ubz->ubz_entries >= entries) return (ubz); ubz--; return (ubz); } static struct uma_bucket_zone * bucket_zone_max(uma_zone_t zone, int nitems) { struct uma_bucket_zone *ubz; int bpcpu; bpcpu = 2; if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0) /* Count the cross-domain bucket. */ bpcpu++; for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) if (ubz->ubz_entries * bpcpu * mp_ncpus > nitems) break; if (ubz == &bucket_zones[0]) ubz = NULL; else ubz--; return (ubz); } static int bucket_select(int size) { struct uma_bucket_zone *ubz; ubz = &bucket_zones[0]; if (size > ubz->ubz_maxsize) return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1); for (; ubz->ubz_entries != 0; ubz++) if (ubz->ubz_maxsize < size) break; ubz--; return (ubz->ubz_entries); } static uma_bucket_t bucket_alloc(uma_zone_t zone, void *udata, int flags) { struct uma_bucket_zone *ubz; uma_bucket_t bucket; /* * Don't allocate buckets in low memory situations. */ if (bucketdisable) return (NULL); /* * To limit bucket recursion we store the original zone flags * in a cookie passed via zalloc_arg/zfree_arg. This allows the * NOVM flag to persist even through deep recursions. We also * store ZFLAG_BUCKET once we have recursed attempting to allocate * a bucket for a bucket zone so we do not allow infinite bucket * recursion. This cookie will even persist to frees of unused * buckets via the allocation path or bucket allocations in the * free path. */ if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0) udata = (void *)(uintptr_t)zone->uz_flags; else { if ((uintptr_t)udata & UMA_ZFLAG_BUCKET) return (NULL); udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET); } if ((uintptr_t)udata & UMA_ZFLAG_CACHEONLY) flags |= M_NOVM; ubz = bucket_zone_lookup(zone->uz_bucket_size); if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0) ubz++; bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags); if (bucket) { #ifdef INVARIANTS bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries); #endif bucket->ub_cnt = 0; bucket->ub_entries = ubz->ubz_entries; } return (bucket); } static void bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata) { struct uma_bucket_zone *ubz; KASSERT(bucket->ub_cnt == 0, ("bucket_free: Freeing a non free bucket.")); if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0) udata = (void *)(uintptr_t)zone->uz_flags; ubz = bucket_zone_lookup(bucket->ub_entries); uma_zfree_arg(ubz->ubz_zone, bucket, udata); } static void bucket_zone_drain(void) { struct uma_bucket_zone *ubz; for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) uma_zone_reclaim(ubz->ubz_zone, UMA_RECLAIM_DRAIN); } /* * Attempt to satisfy an allocation by retrieving a full bucket from one of the * zone's caches. */ static uma_bucket_t zone_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom) { uma_bucket_t bucket; ZONE_LOCK_ASSERT(zone); if ((bucket = TAILQ_FIRST(&zdom->uzd_buckets)) != NULL) { MPASS(zdom->uzd_nitems >= bucket->ub_cnt); TAILQ_REMOVE(&zdom->uzd_buckets, bucket, ub_link); zdom->uzd_nitems -= bucket->ub_cnt; if (zdom->uzd_imin > zdom->uzd_nitems) zdom->uzd_imin = zdom->uzd_nitems; zone->uz_bkt_count -= bucket->ub_cnt; } return (bucket); } /* * Insert a full bucket into the specified cache. The "ws" parameter indicates * whether the bucket's contents should be counted as part of the zone's working * set. */ static void zone_put_bucket(uma_zone_t zone, uma_zone_domain_t zdom, uma_bucket_t bucket, const bool ws) { ZONE_LOCK_ASSERT(zone); KASSERT(!ws || zone->uz_bkt_count < zone->uz_bkt_max, ("%s: zone %p overflow", __func__, zone)); if (ws) TAILQ_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link); else TAILQ_INSERT_TAIL(&zdom->uzd_buckets, bucket, ub_link); zdom->uzd_nitems += bucket->ub_cnt; if (ws && zdom->uzd_imax < zdom->uzd_nitems) zdom->uzd_imax = zdom->uzd_nitems; zone->uz_bkt_count += bucket->ub_cnt; } /* Pops an item out of a per-cpu cache bucket. */ static inline void * cache_bucket_pop(uma_cache_t cache, uma_cache_bucket_t bucket) { void *item; CRITICAL_ASSERT(curthread); bucket->ucb_cnt--; item = bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt]; #ifdef INVARIANTS bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] = NULL; KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled.")); #endif cache->uc_allocs++; return (item); } /* Pushes an item into a per-cpu cache bucket. */ static inline void cache_bucket_push(uma_cache_t cache, uma_cache_bucket_t bucket, void *item) { CRITICAL_ASSERT(curthread); KASSERT(bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] == NULL, ("uma_zfree: Freeing to non free bucket index.")); bucket->ucb_bucket->ub_bucket[bucket->ucb_cnt] = item; bucket->ucb_cnt++; cache->uc_frees++; } /* * Unload a UMA bucket from a per-cpu cache. */ static inline uma_bucket_t cache_bucket_unload(uma_cache_bucket_t bucket) { uma_bucket_t b; b = bucket->ucb_bucket; if (b != NULL) { MPASS(b->ub_entries == bucket->ucb_entries); b->ub_cnt = bucket->ucb_cnt; bucket->ucb_bucket = NULL; bucket->ucb_entries = bucket->ucb_cnt = 0; } return (b); } static inline uma_bucket_t cache_bucket_unload_alloc(uma_cache_t cache) { return (cache_bucket_unload(&cache->uc_allocbucket)); } static inline uma_bucket_t cache_bucket_unload_free(uma_cache_t cache) { return (cache_bucket_unload(&cache->uc_freebucket)); } static inline uma_bucket_t cache_bucket_unload_cross(uma_cache_t cache) { return (cache_bucket_unload(&cache->uc_crossbucket)); } /* * Load a bucket into a per-cpu cache bucket. */ static inline void cache_bucket_load(uma_cache_bucket_t bucket, uma_bucket_t b) { CRITICAL_ASSERT(curthread); MPASS(bucket->ucb_bucket == NULL); bucket->ucb_bucket = b; bucket->ucb_cnt = b->ub_cnt; bucket->ucb_entries = b->ub_entries; } static inline void cache_bucket_load_alloc(uma_cache_t cache, uma_bucket_t b) { cache_bucket_load(&cache->uc_allocbucket, b); } static inline void cache_bucket_load_free(uma_cache_t cache, uma_bucket_t b) { cache_bucket_load(&cache->uc_freebucket, b); } #ifdef NUMA static inline void cache_bucket_load_cross(uma_cache_t cache, uma_bucket_t b) { cache_bucket_load(&cache->uc_crossbucket, b); } #endif /* * Copy and preserve ucb_spare. */ static inline void cache_bucket_copy(uma_cache_bucket_t b1, uma_cache_bucket_t b2) { b1->ucb_bucket = b2->ucb_bucket; b1->ucb_entries = b2->ucb_entries; b1->ucb_cnt = b2->ucb_cnt; } /* * Swap two cache buckets. */ static inline void cache_bucket_swap(uma_cache_bucket_t b1, uma_cache_bucket_t b2) { struct uma_cache_bucket b3; CRITICAL_ASSERT(curthread); cache_bucket_copy(&b3, b1); cache_bucket_copy(b1, b2); cache_bucket_copy(b2, &b3); } static void zone_log_warning(uma_zone_t zone) { static const struct timeval warninterval = { 300, 0 }; if (!zone_warnings || zone->uz_warning == NULL) return; if (ratecheck(&zone->uz_ratecheck, &warninterval)) printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning); } static inline void zone_maxaction(uma_zone_t zone) { if (zone->uz_maxaction.ta_func != NULL) taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction); } /* * Routine called by timeout which is used to fire off some time interval * based calculations. (stats, hash size, etc.) * * Arguments: * arg Unused * * Returns: * Nothing */ static void uma_timeout(void *unused) { bucket_enable(); zone_foreach(zone_timeout, NULL); /* Reschedule this event */ callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); } /* * Update the working set size estimate for the zone's bucket cache. * The constants chosen here are somewhat arbitrary. With an update period of * 20s (UMA_TIMEOUT), this estimate is dominated by zone activity over the * last 100s. */ static void zone_domain_update_wss(uma_zone_domain_t zdom) { long wss; MPASS(zdom->uzd_imax >= zdom->uzd_imin); wss = zdom->uzd_imax - zdom->uzd_imin; zdom->uzd_imax = zdom->uzd_imin = zdom->uzd_nitems; zdom->uzd_wss = (4 * wss + zdom->uzd_wss) / 5; } /* * Routine to perform timeout driven calculations. This expands the * hashes and does per cpu statistics aggregation. * * Returns nothing. */ static void zone_timeout(uma_zone_t zone, void *unused) { uma_keg_t keg; u_int slabs, pages; if ((zone->uz_flags & UMA_ZFLAG_HASH) == 0) goto update_wss; keg = zone->uz_keg; /* * Hash zones are non-numa by definition so the first domain * is the only one present. */ KEG_LOCK(keg, 0); pages = keg->uk_domain[0].ud_pages; /* * Expand the keg hash table. * * This is done if the number of slabs is larger than the hash size. * What I'm trying to do here is completely reduce collisions. This * may be a little aggressive. Should I allow for two collisions max? */ if ((slabs = pages / keg->uk_ppera) > keg->uk_hash.uh_hashsize) { struct uma_hash newhash; struct uma_hash oldhash; int ret; /* * This is so involved because allocating and freeing * while the keg lock is held will lead to deadlock. * I have to do everything in stages and check for * races. */ KEG_UNLOCK(keg, 0); ret = hash_alloc(&newhash, 1 << fls(slabs)); KEG_LOCK(keg, 0); if (ret) { if (hash_expand(&keg->uk_hash, &newhash)) { oldhash = keg->uk_hash; keg->uk_hash = newhash; } else oldhash = newhash; KEG_UNLOCK(keg, 0); hash_free(&oldhash); goto update_wss; } } KEG_UNLOCK(keg, 0); update_wss: ZONE_LOCK(zone); for (int i = 0; i < vm_ndomains; i++) zone_domain_update_wss(&zone->uz_domain[i]); ZONE_UNLOCK(zone); } /* * Allocate and zero fill the next sized hash table from the appropriate * backing store. * * Arguments: * hash A new hash structure with the old hash size in uh_hashsize * * Returns: * 1 on success and 0 on failure. */ static int hash_alloc(struct uma_hash *hash, u_int size) { size_t alloc; KASSERT(powerof2(size), ("hash size must be power of 2")); if (size > UMA_HASH_SIZE_INIT) { hash->uh_hashsize = size; alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize; hash->uh_slab_hash = malloc(alloc, M_UMAHASH, M_NOWAIT); } else { alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; hash->uh_slab_hash = zone_alloc_item(hashzone, NULL, UMA_ANYDOMAIN, M_WAITOK); hash->uh_hashsize = UMA_HASH_SIZE_INIT; } if (hash->uh_slab_hash) { bzero(hash->uh_slab_hash, alloc); hash->uh_hashmask = hash->uh_hashsize - 1; return (1); } return (0); } /* * Expands the hash table for HASH zones. This is done from zone_timeout * to reduce collisions. This must not be done in the regular allocation * path, otherwise, we can recurse on the vm while allocating pages. * * Arguments: * oldhash The hash you want to expand * newhash The hash structure for the new table * * Returns: * Nothing * * Discussion: */ static int hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash) { uma_hash_slab_t slab; u_int hval; u_int idx; if (!newhash->uh_slab_hash) return (0); if (oldhash->uh_hashsize >= newhash->uh_hashsize) return (0); /* * I need to investigate hash algorithms for resizing without a * full rehash. */ for (idx = 0; idx < oldhash->uh_hashsize; idx++) while (!LIST_EMPTY(&oldhash->uh_slab_hash[idx])) { slab = LIST_FIRST(&oldhash->uh_slab_hash[idx]); LIST_REMOVE(slab, uhs_hlink); hval = UMA_HASH(newhash, slab->uhs_data); LIST_INSERT_HEAD(&newhash->uh_slab_hash[hval], slab, uhs_hlink); } return (1); } /* * Free the hash bucket to the appropriate backing store. * * Arguments: * slab_hash The hash bucket we're freeing * hashsize The number of entries in that hash bucket * * Returns: * Nothing */ static void hash_free(struct uma_hash *hash) { if (hash->uh_slab_hash == NULL) return; if (hash->uh_hashsize == UMA_HASH_SIZE_INIT) zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE); else free(hash->uh_slab_hash, M_UMAHASH); } /* * Frees all outstanding items in a bucket * * Arguments: * zone The zone to free to, must be unlocked. * bucket The free/alloc bucket with items. * * Returns: * Nothing */ static void bucket_drain(uma_zone_t zone, uma_bucket_t bucket) { int i; if (bucket == NULL || bucket->ub_cnt == 0) return; if (zone->uz_fini) for (i = 0; i < bucket->ub_cnt; i++) zone->uz_fini(bucket->ub_bucket[i], zone->uz_size); zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt); if (zone->uz_max_items > 0) zone_free_limit(zone, bucket->ub_cnt); bucket->ub_cnt = 0; } /* * Drains the per cpu caches for a zone. * * NOTE: This may only be called while the zone is being torn down, and not * during normal operation. This is necessary in order that we do not have * to migrate CPUs to drain the per-CPU caches. * * Arguments: * zone The zone to drain, must be unlocked. * * Returns: * Nothing */ static void cache_drain(uma_zone_t zone) { uma_cache_t cache; uma_bucket_t bucket; int cpu; /* * XXX: It is safe to not lock the per-CPU caches, because we're * tearing down the zone anyway. I.e., there will be no further use * of the caches at this point. * * XXX: It would good to be able to assert that the zone is being * torn down to prevent improper use of cache_drain(). */ CPU_FOREACH(cpu) { cache = &zone->uz_cpu[cpu]; bucket = cache_bucket_unload_alloc(cache); if (bucket != NULL) { bucket_drain(zone, bucket); bucket_free(zone, bucket, NULL); } bucket = cache_bucket_unload_free(cache); if (bucket != NULL) { bucket_drain(zone, bucket); bucket_free(zone, bucket, NULL); } bucket = cache_bucket_unload_cross(cache); if (bucket != NULL) { bucket_drain(zone, bucket); bucket_free(zone, bucket, NULL); } } bucket_cache_reclaim(zone, true); } static void cache_shrink(uma_zone_t zone, void *unused) { if (zone->uz_flags & UMA_ZFLAG_INTERNAL) return; ZONE_LOCK(zone); zone->uz_bucket_size = (zone->uz_bucket_size_min + zone->uz_bucket_size) / 2; ZONE_UNLOCK(zone); } static void cache_drain_safe_cpu(uma_zone_t zone, void *unused) { uma_cache_t cache; uma_bucket_t b1, b2, b3; int domain; if (zone->uz_flags & UMA_ZFLAG_INTERNAL) return; b1 = b2 = b3 = NULL; ZONE_LOCK(zone); critical_enter(); if (zone->uz_flags & UMA_ZONE_FIRSTTOUCH) domain = PCPU_GET(domain); else domain = 0; cache = &zone->uz_cpu[curcpu]; b1 = cache_bucket_unload_alloc(cache); if (b1 != NULL && b1->ub_cnt != 0) { zone_put_bucket(zone, &zone->uz_domain[domain], b1, false); b1 = NULL; } b2 = cache_bucket_unload_free(cache); if (b2 != NULL && b2->ub_cnt != 0) { zone_put_bucket(zone, &zone->uz_domain[domain], b2, false); b2 = NULL; } b3 = cache_bucket_unload_cross(cache); critical_exit(); ZONE_UNLOCK(zone); if (b1) bucket_free(zone, b1, NULL); if (b2) bucket_free(zone, b2, NULL); if (b3) { bucket_drain(zone, b3); bucket_free(zone, b3, NULL); } } /* * Safely drain per-CPU caches of a zone(s) to alloc bucket. * This is an expensive call because it needs to bind to all CPUs * one by one and enter a critical section on each of them in order * to safely access their cache buckets. * Zone lock must not be held on call this function. */ static void pcpu_cache_drain_safe(uma_zone_t zone) { int cpu; /* * Polite bucket sizes shrinking was not enough, shrink aggressively. */ if (zone) cache_shrink(zone, NULL); else zone_foreach(cache_shrink, NULL); CPU_FOREACH(cpu) { thread_lock(curthread); sched_bind(curthread, cpu); thread_unlock(curthread); if (zone) cache_drain_safe_cpu(zone, NULL); else zone_foreach(cache_drain_safe_cpu, NULL); } thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); } /* * Reclaim cached buckets from a zone. All buckets are reclaimed if the caller * requested a drain, otherwise the per-domain caches are trimmed to either * estimated working set size. */ static void bucket_cache_reclaim(uma_zone_t zone, bool drain) { uma_zone_domain_t zdom; uma_bucket_t bucket; long target, tofree; int i; for (i = 0; i < vm_ndomains; i++) { /* * The cross bucket is partially filled and not part of * the item count. Reclaim it individually here. */ zdom = &zone->uz_domain[i]; ZONE_CROSS_LOCK(zone); bucket = zdom->uzd_cross; zdom->uzd_cross = NULL; ZONE_CROSS_UNLOCK(zone); if (bucket != NULL) { bucket_drain(zone, bucket); bucket_free(zone, bucket, NULL); } /* * Shrink the zone bucket size to ensure that the per-CPU caches * don't grow too large. */ ZONE_LOCK(zone); if (i == 0 && zone->uz_bucket_size > zone->uz_bucket_size_min) zone->uz_bucket_size--; /* * If we were asked to drain the zone, we are done only once * this bucket cache is empty. Otherwise, we reclaim items in * excess of the zone's estimated working set size. If the * difference nitems - imin is larger than the WSS estimate, * then the estimate will grow at the end of this interval and * we ignore the historical average. */ target = drain ? 0 : lmax(zdom->uzd_wss, zdom->uzd_nitems - zdom->uzd_imin); while (zdom->uzd_nitems > target) { bucket = TAILQ_LAST(&zdom->uzd_buckets, uma_bucketlist); if (bucket == NULL) break; tofree = bucket->ub_cnt; TAILQ_REMOVE(&zdom->uzd_buckets, bucket, ub_link); zdom->uzd_nitems -= tofree; /* * Shift the bounds of the current WSS interval to avoid * perturbing the estimate. */ zdom->uzd_imax -= lmin(zdom->uzd_imax, tofree); zdom->uzd_imin -= lmin(zdom->uzd_imin, tofree); ZONE_UNLOCK(zone); bucket_drain(zone, bucket); bucket_free(zone, bucket, NULL); ZONE_LOCK(zone); } ZONE_UNLOCK(zone); } } static void keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start) { uint8_t *mem; int i; uint8_t flags; CTR4(KTR_UMA, "keg_free_slab keg %s(%p) slab %p, returning %d bytes", keg->uk_name, keg, slab, PAGE_SIZE * keg->uk_ppera); mem = slab_data(slab, keg); flags = slab->us_flags; i = start; if (keg->uk_fini != NULL) { for (i--; i > -1; i--) #ifdef INVARIANTS /* * trash_fini implies that dtor was trash_dtor. trash_fini * would check that memory hasn't been modified since free, * which executed trash_dtor. * That's why we need to run uma_dbg_kskip() check here, * albeit we don't make skip check for other init/fini * invocations. */ if (!uma_dbg_kskip(keg, slab_item(slab, keg, i)) || keg->uk_fini != trash_fini) #endif keg->uk_fini(slab_item(slab, keg, i), keg->uk_size); } if (keg->uk_flags & UMA_ZFLAG_OFFPAGE) zone_free_item(slabzone(keg->uk_ipers), slab_tohashslab(slab), NULL, SKIP_NONE); keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags); uma_total_dec(PAGE_SIZE * keg->uk_ppera); } /* * Frees pages from a keg back to the system. This is done on demand from * the pageout daemon. * * Returns nothing. */ static void keg_drain(uma_keg_t keg) { struct slabhead freeslabs = { 0 }; uma_domain_t dom; uma_slab_t slab, tmp; int i, n; /* * We don't want to take pages from statically allocated kegs at this * time */ if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL) return; for (i = 0; i < vm_ndomains; i++) { CTR4(KTR_UMA, "keg_drain %s(%p) domain %d free items: %u", keg->uk_name, keg, i, dom->ud_free); n = 0; dom = &keg->uk_domain[i]; KEG_LOCK(keg, i); LIST_FOREACH_SAFE(slab, &dom->ud_free_slab, us_link, tmp) { if (keg->uk_flags & UMA_ZFLAG_HASH) UMA_HASH_REMOVE(&keg->uk_hash, slab); n++; LIST_REMOVE(slab, us_link); LIST_INSERT_HEAD(&freeslabs, slab, us_link); } dom->ud_pages -= n * keg->uk_ppera; dom->ud_free -= n * keg->uk_ipers; KEG_UNLOCK(keg, i); } while ((slab = LIST_FIRST(&freeslabs)) != NULL) { LIST_REMOVE(slab, us_link); keg_free_slab(keg, slab, keg->uk_ipers); } } static void zone_reclaim(uma_zone_t zone, int waitok, bool drain) { /* * Set draining to interlock with zone_dtor() so we can release our * locks as we go. Only dtor() should do a WAITOK call since it * is the only call that knows the structure will still be available * when it wakes up. */ ZONE_LOCK(zone); while (zone->uz_flags & UMA_ZFLAG_RECLAIMING) { if (waitok == M_NOWAIT) goto out; msleep(zone, &zone->uz_lock, PVM, "zonedrain", 1); } zone->uz_flags |= UMA_ZFLAG_RECLAIMING; ZONE_UNLOCK(zone); bucket_cache_reclaim(zone, drain); /* * The DRAINING flag protects us from being freed while * we're running. Normally the uma_rwlock would protect us but we * must be able to release and acquire the right lock for each keg. */ if ((zone->uz_flags & UMA_ZFLAG_CACHE) == 0) keg_drain(zone->uz_keg); ZONE_LOCK(zone); zone->uz_flags &= ~UMA_ZFLAG_RECLAIMING; wakeup(zone); out: ZONE_UNLOCK(zone); } static void zone_drain(uma_zone_t zone, void *unused) { zone_reclaim(zone, M_NOWAIT, true); } static void zone_trim(uma_zone_t zone, void *unused) { zone_reclaim(zone, M_NOWAIT, false); } /* * Allocate a new slab for a keg and inserts it into the partial slab list. * The keg should be unlocked on entry. If the allocation succeeds it will * be locked on return. * * Arguments: * flags Wait flags for the item initialization routine * aflags Wait flags for the slab allocation * * Returns: * The slab that was allocated or NULL if there is no memory and the * caller specified M_NOWAIT. */ static uma_slab_t keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int flags, int aflags) { uma_domain_t dom; uma_alloc allocf; uma_slab_t slab; unsigned long size; uint8_t *mem; uint8_t sflags; int i; KASSERT(domain >= 0 && domain < vm_ndomains, ("keg_alloc_slab: domain %d out of range", domain)); allocf = keg->uk_allocf; slab = NULL; mem = NULL; if (keg->uk_flags & UMA_ZFLAG_OFFPAGE) { uma_hash_slab_t hslab; hslab = zone_alloc_item(slabzone(keg->uk_ipers), NULL, domain, aflags); if (hslab == NULL) goto fail; slab = &hslab->uhs_slab; } /* * This reproduces the old vm_zone behavior of zero filling pages the * first time they are added to a zone. * * Malloced items are zeroed in uma_zalloc. */ if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0) aflags |= M_ZERO; else aflags &= ~M_ZERO; if (keg->uk_flags & UMA_ZONE_NODUMP) aflags |= M_NODUMP; /* zone is passed for legacy reasons. */ size = keg->uk_ppera * PAGE_SIZE; mem = allocf(zone, size, domain, &sflags, aflags); if (mem == NULL) { if (keg->uk_flags & UMA_ZFLAG_OFFPAGE) zone_free_item(slabzone(keg->uk_ipers), slab_tohashslab(slab), NULL, SKIP_NONE); goto fail; } uma_total_inc(size); /* For HASH zones all pages go to the same uma_domain. */ if ((keg->uk_flags & UMA_ZFLAG_HASH) != 0) domain = 0; /* Point the slab into the allocated memory */ if (!(keg->uk_flags & UMA_ZFLAG_OFFPAGE)) slab = (uma_slab_t )(mem + keg->uk_pgoff); else slab_tohashslab(slab)->uhs_data = mem; if (keg->uk_flags & UMA_ZFLAG_VTOSLAB) for (i = 0; i < keg->uk_ppera; i++) vsetzoneslab((vm_offset_t)mem + (i * PAGE_SIZE), zone, slab); slab->us_freecount = keg->uk_ipers; slab->us_flags = sflags; slab->us_domain = domain; BIT_FILL(keg->uk_ipers, &slab->us_free); #ifdef INVARIANTS BIT_ZERO(keg->uk_ipers, slab_dbg_bits(slab, keg)); #endif if (keg->uk_init != NULL) { for (i = 0; i < keg->uk_ipers; i++) if (keg->uk_init(slab_item(slab, keg, i), keg->uk_size, flags) != 0) break; if (i != keg->uk_ipers) { keg_free_slab(keg, slab, i); goto fail; } } KEG_LOCK(keg, domain); CTR3(KTR_UMA, "keg_alloc_slab: allocated slab %p for %s(%p)", slab, keg->uk_name, keg); if (keg->uk_flags & UMA_ZFLAG_HASH) UMA_HASH_INSERT(&keg->uk_hash, slab, mem); /* * If we got a slab here it's safe to mark it partially used * and return. We assume that the caller is going to remove * at least one item. */ dom = &keg->uk_domain[domain]; LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link); dom->ud_pages += keg->uk_ppera; dom->ud_free += keg->uk_ipers; return (slab); fail: return (NULL); } /* * This function is intended to be used early on in place of page_alloc() so * that we may use the boot time page cache to satisfy allocations before * the VM is ready. */ static void * startup_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag, int wait) { vm_paddr_t pa; vm_page_t m; void *mem; int pages; int i; pages = howmany(bytes, PAGE_SIZE); KASSERT(pages > 0, ("%s can't reserve 0 pages", __func__)); *pflag = UMA_SLAB_BOOT; m = vm_page_alloc_contig_domain(NULL, 0, domain, malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED, pages, (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT); if (m == NULL) return (NULL); pa = VM_PAGE_TO_PHYS(m); for (i = 0; i < pages; i++, pa += PAGE_SIZE) { #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \ defined(__riscv) || defined(__powerpc64__) if ((wait & M_NODUMP) == 0) dump_add_page(pa); #endif } /* Allocate KVA and indirectly advance bootmem. */ mem = (void *)pmap_map(&bootmem, m->phys_addr, m->phys_addr + (pages * PAGE_SIZE), VM_PROT_READ | VM_PROT_WRITE); if ((wait & M_ZERO) != 0) bzero(mem, pages * PAGE_SIZE); return (mem); } static void startup_free(void *mem, vm_size_t bytes) { vm_offset_t va; vm_page_t m; va = (vm_offset_t)mem; m = PHYS_TO_VM_PAGE(pmap_kextract(va)); pmap_remove(kernel_pmap, va, va + bytes); for (; bytes != 0; bytes -= PAGE_SIZE, m++) { #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \ defined(__riscv) || defined(__powerpc64__) dump_drop_page(VM_PAGE_TO_PHYS(m)); #endif vm_page_unwire_noq(m); vm_page_free(m); } } /* * Allocates a number of pages from the system * * Arguments: * bytes The number of bytes requested * wait Shall we wait? * * Returns: * A pointer to the alloced memory or possibly * NULL if M_NOWAIT is set. */ static void * page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag, int wait) { void *p; /* Returned page */ *pflag = UMA_SLAB_KERNEL; p = (void *)kmem_malloc_domainset(DOMAINSET_FIXED(domain), bytes, wait); return (p); } static void * pcpu_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag, int wait) { struct pglist alloctail; vm_offset_t addr, zkva; int cpu, flags; vm_page_t p, p_next; #ifdef NUMA struct pcpu *pc; #endif MPASS(bytes == (mp_maxid + 1) * PAGE_SIZE); TAILQ_INIT(&alloctail); flags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ | malloc2vm_flags(wait); *pflag = UMA_SLAB_KERNEL; for (cpu = 0; cpu <= mp_maxid; cpu++) { if (CPU_ABSENT(cpu)) { p = vm_page_alloc(NULL, 0, flags); } else { #ifndef NUMA p = vm_page_alloc(NULL, 0, flags); #else pc = pcpu_find(cpu); - p = vm_page_alloc_domain(NULL, 0, pc->pc_domain, flags); + if (__predict_false(VM_DOMAIN_EMPTY(pc->pc_domain))) + p = NULL; + else + p = vm_page_alloc_domain(NULL, 0, + pc->pc_domain, flags); if (__predict_false(p == NULL)) p = vm_page_alloc(NULL, 0, flags); #endif } if (__predict_false(p == NULL)) goto fail; TAILQ_INSERT_TAIL(&alloctail, p, listq); } if ((addr = kva_alloc(bytes)) == 0) goto fail; zkva = addr; TAILQ_FOREACH(p, &alloctail, listq) { pmap_qenter(zkva, &p, 1); zkva += PAGE_SIZE; } return ((void*)addr); fail: TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) { vm_page_unwire_noq(p); vm_page_free(p); } return (NULL); } /* * Allocates a number of pages from within an object * * Arguments: * bytes The number of bytes requested * wait Shall we wait? * * Returns: * A pointer to the alloced memory or possibly * NULL if M_NOWAIT is set. */ static void * noobj_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags, int wait) { TAILQ_HEAD(, vm_page) alloctail; u_long npages; vm_offset_t retkva, zkva; vm_page_t p, p_next; uma_keg_t keg; TAILQ_INIT(&alloctail); keg = zone->uz_keg; npages = howmany(bytes, PAGE_SIZE); while (npages > 0) { p = vm_page_alloc_domain(NULL, 0, domain, VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ | ((wait & M_WAITOK) != 0 ? VM_ALLOC_WAITOK : VM_ALLOC_NOWAIT)); if (p != NULL) { /* * Since the page does not belong to an object, its * listq is unused. */ TAILQ_INSERT_TAIL(&alloctail, p, listq); npages--; continue; } /* * Page allocation failed, free intermediate pages and * exit. */ TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) { vm_page_unwire_noq(p); vm_page_free(p); } return (NULL); } *flags = UMA_SLAB_PRIV; zkva = keg->uk_kva + atomic_fetchadd_long(&keg->uk_offset, round_page(bytes)); retkva = zkva; TAILQ_FOREACH(p, &alloctail, listq) { pmap_qenter(zkva, &p, 1); zkva += PAGE_SIZE; } return ((void *)retkva); } /* * Frees a number of pages to the system * * Arguments: * mem A pointer to the memory to be freed * size The size of the memory being freed * flags The original p->us_flags field * * Returns: * Nothing */ static void page_free(void *mem, vm_size_t size, uint8_t flags) { if ((flags & UMA_SLAB_BOOT) != 0) { startup_free(mem, size); return; } if ((flags & UMA_SLAB_KERNEL) == 0) panic("UMA: page_free used with invalid flags %x", flags); kmem_free((vm_offset_t)mem, size); } /* * Frees pcpu zone allocations * * Arguments: * mem A pointer to the memory to be freed * size The size of the memory being freed * flags The original p->us_flags field * * Returns: * Nothing */ static void pcpu_page_free(void *mem, vm_size_t size, uint8_t flags) { vm_offset_t sva, curva; vm_paddr_t paddr; vm_page_t m; MPASS(size == (mp_maxid+1)*PAGE_SIZE); sva = (vm_offset_t)mem; for (curva = sva; curva < sva + size; curva += PAGE_SIZE) { paddr = pmap_kextract(curva); m = PHYS_TO_VM_PAGE(paddr); vm_page_unwire_noq(m); vm_page_free(m); } pmap_qremove(sva, size >> PAGE_SHIFT); kva_free(sva, size); } /* * Zero fill initializer * * Arguments/Returns follow uma_init specifications */ static int zero_init(void *mem, int size, int flags) { bzero(mem, size); return (0); } #ifdef INVARIANTS struct noslabbits * slab_dbg_bits(uma_slab_t slab, uma_keg_t keg) { return ((void *)((char *)&slab->us_free + BITSET_SIZE(keg->uk_ipers))); } #endif /* * Actual size of embedded struct slab (!OFFPAGE). */ size_t slab_sizeof(int nitems) { size_t s; s = sizeof(struct uma_slab) + BITSET_SIZE(nitems) * SLAB_BITSETS; return (roundup(s, UMA_ALIGN_PTR + 1)); } /* * Size of memory for embedded slabs (!OFFPAGE). */ size_t slab_space(int nitems) { return (UMA_SLAB_SIZE - slab_sizeof(nitems)); } #define UMA_FIXPT_SHIFT 31 #define UMA_FRAC_FIXPT(n, d) \ ((uint32_t)(((uint64_t)(n) << UMA_FIXPT_SHIFT) / (d))) #define UMA_FIXPT_PCT(f) \ ((u_int)(((uint64_t)100 * (f)) >> UMA_FIXPT_SHIFT)) #define UMA_PCT_FIXPT(pct) UMA_FRAC_FIXPT((pct), 100) #define UMA_MIN_EFF UMA_PCT_FIXPT(100 - UMA_MAX_WASTE) /* * Compute the number of items that will fit in a slab. If hdr is true, the * item count may be limited to provide space in the slab for an inline slab * header. Otherwise, all slab space will be provided for item storage. */ static u_int slab_ipers_hdr(u_int size, u_int rsize, u_int slabsize, bool hdr) { u_int ipers; u_int padpi; /* The padding between items is not needed after the last item. */ padpi = rsize - size; if (hdr) { /* * Start with the maximum item count and remove items until * the slab header first alongside the allocatable memory. */ for (ipers = MIN(SLAB_MAX_SETSIZE, (slabsize + padpi - slab_sizeof(1)) / rsize); ipers > 0 && ipers * rsize - padpi + slab_sizeof(ipers) > slabsize; ipers--) continue; } else { ipers = MIN((slabsize + padpi) / rsize, SLAB_MAX_SETSIZE); } return (ipers); } /* * Compute the number of items that will fit in a slab for a startup zone. */ int slab_ipers(size_t size, int align) { int rsize; rsize = roundup(size, align + 1); /* Assume no CACHESPREAD */ return (slab_ipers_hdr(size, rsize, UMA_SLAB_SIZE, true)); } /* * Determine the format of a uma keg. This determines where the slab header * will be placed (inline or offpage) and calculates ipers, rsize, and ppera. * * Arguments * keg The zone we should initialize * * Returns * Nothing */ static void keg_layout(uma_keg_t keg) { u_int alignsize; u_int eff; u_int eff_offpage; u_int format; u_int ipers; u_int ipers_offpage; u_int pages; u_int rsize; u_int slabsize; KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 || (keg->uk_size <= UMA_PCPU_ALLOC_SIZE && (keg->uk_flags & UMA_ZONE_CACHESPREAD) == 0), ("%s: cannot configure for PCPU: keg=%s, size=%u, flags=0x%b", __func__, keg->uk_name, keg->uk_size, keg->uk_flags, PRINT_UMA_ZFLAGS)); KASSERT((keg->uk_flags & (UMA_ZFLAG_INTERNAL | UMA_ZFLAG_CACHEONLY)) == 0 || (keg->uk_flags & (UMA_ZONE_NOTOUCH | UMA_ZONE_PCPU)) == 0, ("%s: incompatible flags 0x%b", __func__, keg->uk_flags, PRINT_UMA_ZFLAGS)); alignsize = keg->uk_align + 1; format = 0; ipers = 0; /* * Calculate the size of each allocation (rsize) according to * alignment. If the requested size is smaller than we have * allocation bits for we round it up. */ rsize = MAX(keg->uk_size, UMA_SMALLEST_UNIT); rsize = roundup2(rsize, alignsize); if ((keg->uk_flags & UMA_ZONE_PCPU) != 0) { slabsize = UMA_PCPU_ALLOC_SIZE; pages = mp_maxid + 1; } else if ((keg->uk_flags & UMA_ZONE_CACHESPREAD) != 0) { /* * We want one item to start on every align boundary in a page. * To do this we will span pages. We will also extend the item * by the size of align if it is an even multiple of align. * Otherwise, it would fall on the same boundary every time. */ if ((rsize & alignsize) == 0) rsize += alignsize; slabsize = rsize * (PAGE_SIZE / alignsize); slabsize = MIN(slabsize, rsize * SLAB_MAX_SETSIZE); slabsize = MIN(slabsize, UMA_CACHESPREAD_MAX_SIZE); pages = howmany(slabsize, PAGE_SIZE); slabsize = ptoa(pages); } else { /* * Choose a slab size of as many pages as it takes to represent * a single item. We will then try to fit as many additional * items into the slab as possible. At some point, we may want * to increase the slab size for awkward item sizes in order to * increase efficiency. */ pages = howmany(keg->uk_size, PAGE_SIZE); slabsize = ptoa(pages); } /* Evaluate an inline slab layout. */ if ((keg->uk_flags & (UMA_ZONE_NOTOUCH | UMA_ZONE_PCPU)) == 0) ipers = slab_ipers_hdr(keg->uk_size, rsize, slabsize, true); /* TODO: vm_page-embedded slab. */ /* * We can't do OFFPAGE if we're internal or if we've been * asked to not go to the VM for buckets. If we do this we * may end up going to the VM for slabs which we do not * want to do if we're UMA_ZFLAG_CACHEONLY as a result * of UMA_ZONE_VM, which clearly forbids it. */ if ((keg->uk_flags & (UMA_ZFLAG_INTERNAL | UMA_ZFLAG_CACHEONLY)) != 0) { if (ipers == 0) { /* We need an extra page for the slab header. */ pages++; slabsize = ptoa(pages); ipers = slab_ipers_hdr(keg->uk_size, rsize, slabsize, true); } goto out; } /* * See if using an OFFPAGE slab will improve our efficiency. * Only do this if we are below our efficiency threshold. * * XXX We could try growing slabsize to limit max waste as well. * Historically this was not done because the VM could not * efficiently handle contiguous allocations. */ eff = UMA_FRAC_FIXPT(ipers * rsize, slabsize); ipers_offpage = slab_ipers_hdr(keg->uk_size, rsize, slabsize, false); eff_offpage = UMA_FRAC_FIXPT(ipers_offpage * rsize, slabsize + slabzone(ipers_offpage)->uz_keg->uk_rsize); if (ipers == 0 || (eff < UMA_MIN_EFF && eff < eff_offpage)) { CTR5(KTR_UMA, "UMA decided we need offpage slab headers for " "keg: %s(%p), minimum efficiency allowed = %u%%, " "old efficiency = %u%%, offpage efficiency = %u%%", keg->uk_name, keg, UMA_FIXPT_PCT(UMA_MIN_EFF), UMA_FIXPT_PCT(eff), UMA_FIXPT_PCT(eff_offpage)); format = UMA_ZFLAG_OFFPAGE; ipers = ipers_offpage; } out: /* * How do we find the slab header if it is offpage or if not all item * start addresses are in the same page? We could solve the latter * case with vaddr alignment, but we don't. */ if ((format & UMA_ZFLAG_OFFPAGE) != 0 || (ipers - 1) * rsize >= PAGE_SIZE) { if ((keg->uk_flags & UMA_ZONE_NOTPAGE) != 0) format |= UMA_ZFLAG_HASH; else format |= UMA_ZFLAG_VTOSLAB; } keg->uk_ipers = ipers; keg->uk_rsize = rsize; keg->uk_flags |= format; keg->uk_ppera = pages; CTR6(KTR_UMA, "%s: keg=%s, flags=%#x, rsize=%u, ipers=%u, ppera=%u", __func__, keg->uk_name, keg->uk_flags, rsize, ipers, pages); KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_MAX_SETSIZE, ("%s: keg=%s, flags=0x%b, rsize=%u, ipers=%u, ppera=%u", __func__, keg->uk_name, keg->uk_flags, PRINT_UMA_ZFLAGS, rsize, ipers, pages)); } /* * Keg header ctor. This initializes all fields, locks, etc. And inserts * the keg onto the global keg list. * * Arguments/Returns follow uma_ctor specifications * udata Actually uma_kctor_args */ static int keg_ctor(void *mem, int size, void *udata, int flags) { struct uma_kctor_args *arg = udata; uma_keg_t keg = mem; uma_zone_t zone; int i; bzero(keg, size); keg->uk_size = arg->size; keg->uk_init = arg->uminit; keg->uk_fini = arg->fini; keg->uk_align = arg->align; keg->uk_reserve = 0; keg->uk_flags = arg->flags; /* * We use a global round-robin policy by default. Zones with * UMA_ZONE_FIRSTTOUCH set will use first-touch instead, in which * case the iterator is never run. */ keg->uk_dr.dr_policy = DOMAINSET_RR(); keg->uk_dr.dr_iter = 0; /* * The master zone is passed to us at keg-creation time. */ zone = arg->zone; keg->uk_name = zone->uz_name; if (arg->flags & UMA_ZONE_VM) keg->uk_flags |= UMA_ZFLAG_CACHEONLY; if (arg->flags & UMA_ZONE_ZINIT) keg->uk_init = zero_init; if (arg->flags & UMA_ZONE_MALLOC) keg->uk_flags |= UMA_ZFLAG_VTOSLAB; #ifndef SMP keg->uk_flags &= ~UMA_ZONE_PCPU; #endif keg_layout(keg); /* * Use a first-touch NUMA policy for all kegs that pmap_extract() * will work on with the exception of critical VM structures * necessary for paging. * * Zones may override the default by specifying either. */ #ifdef NUMA if ((keg->uk_flags & (UMA_ZFLAG_HASH | UMA_ZONE_VM | UMA_ZONE_ROUNDROBIN)) == 0) keg->uk_flags |= UMA_ZONE_FIRSTTOUCH; else if ((keg->uk_flags & UMA_ZONE_FIRSTTOUCH) == 0) keg->uk_flags |= UMA_ZONE_ROUNDROBIN; #endif /* * If we haven't booted yet we need allocations to go through the * startup cache until the vm is ready. */ #ifdef UMA_MD_SMALL_ALLOC if (keg->uk_ppera == 1) keg->uk_allocf = uma_small_alloc; else #endif if (booted < BOOT_KVA) keg->uk_allocf = startup_alloc; else if (keg->uk_flags & UMA_ZONE_PCPU) keg->uk_allocf = pcpu_page_alloc; else keg->uk_allocf = page_alloc; #ifdef UMA_MD_SMALL_ALLOC if (keg->uk_ppera == 1) keg->uk_freef = uma_small_free; else #endif if (keg->uk_flags & UMA_ZONE_PCPU) keg->uk_freef = pcpu_page_free; else keg->uk_freef = page_free; /* * Initialize keg's locks. */ for (i = 0; i < vm_ndomains; i++) KEG_LOCK_INIT(keg, i, (arg->flags & UMA_ZONE_MTXCLASS)); /* * If we're putting the slab header in the actual page we need to * figure out where in each page it goes. See slab_sizeof * definition. */ if (!(keg->uk_flags & UMA_ZFLAG_OFFPAGE)) { size_t shsize; shsize = slab_sizeof(keg->uk_ipers); keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - shsize; /* * The only way the following is possible is if with our * UMA_ALIGN_PTR adjustments we are now bigger than * UMA_SLAB_SIZE. I haven't checked whether this is * mathematically possible for all cases, so we make * sure here anyway. */ KASSERT(keg->uk_pgoff + shsize <= PAGE_SIZE * keg->uk_ppera, ("zone %s ipers %d rsize %d size %d slab won't fit", zone->uz_name, keg->uk_ipers, keg->uk_rsize, keg->uk_size)); } if (keg->uk_flags & UMA_ZFLAG_HASH) hash_alloc(&keg->uk_hash, 0); CTR3(KTR_UMA, "keg_ctor %p zone %s(%p)", keg, zone->uz_name, zone); LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link); rw_wlock(&uma_rwlock); LIST_INSERT_HEAD(&uma_kegs, keg, uk_link); rw_wunlock(&uma_rwlock); return (0); } static void zone_kva_available(uma_zone_t zone, void *unused) { uma_keg_t keg; if ((zone->uz_flags & UMA_ZFLAG_CACHE) != 0) return; KEG_GET(zone, keg); if (keg->uk_allocf == startup_alloc) keg->uk_allocf = page_alloc; } static void zone_alloc_counters(uma_zone_t zone, void *unused) { zone->uz_allocs = counter_u64_alloc(M_WAITOK); zone->uz_frees = counter_u64_alloc(M_WAITOK); zone->uz_fails = counter_u64_alloc(M_WAITOK); } static void zone_alloc_sysctl(uma_zone_t zone, void *unused) { uma_zone_domain_t zdom; uma_domain_t dom; uma_keg_t keg; struct sysctl_oid *oid, *domainoid; int domains, i, cnt; static const char *nokeg = "cache zone"; char *c; /* * Make a sysctl safe copy of the zone name by removing * any special characters and handling dups by appending * an index. */ if (zone->uz_namecnt != 0) { /* Count the number of decimal digits and '_' separator. */ for (i = 1, cnt = zone->uz_namecnt; cnt != 0; i++) cnt /= 10; zone->uz_ctlname = malloc(strlen(zone->uz_name) + i + 1, M_UMA, M_WAITOK); sprintf(zone->uz_ctlname, "%s_%d", zone->uz_name, zone->uz_namecnt); } else zone->uz_ctlname = strdup(zone->uz_name, M_UMA); for (c = zone->uz_ctlname; *c != '\0'; c++) if (strchr("./\\ -", *c) != NULL) *c = '_'; /* * Basic parameters at the root. */ zone->uz_oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_vm_uma), OID_AUTO, zone->uz_ctlname, CTLFLAG_RD, NULL, ""); oid = zone->uz_oid; SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "size", CTLFLAG_RD, &zone->uz_size, 0, "Allocation size"); SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "flags", CTLFLAG_RD | CTLTYPE_STRING | CTLFLAG_MPSAFE, zone, 0, sysctl_handle_uma_zone_flags, "A", "Allocator configuration flags"); SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "bucket_size", CTLFLAG_RD, &zone->uz_bucket_size, 0, "Desired per-cpu cache size"); SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "bucket_size_max", CTLFLAG_RD, &zone->uz_bucket_size_max, 0, "Maximum allowed per-cpu cache size"); /* * keg if present. */ if ((zone->uz_flags & UMA_ZFLAG_HASH) == 0) domains = vm_ndomains; else domains = 1; oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO, "keg", CTLFLAG_RD, NULL, ""); keg = zone->uz_keg; if ((zone->uz_flags & UMA_ZFLAG_CACHE) == 0) { SYSCTL_ADD_CONST_STRING(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "name", CTLFLAG_RD, keg->uk_name, "Keg name"); SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "rsize", CTLFLAG_RD, &keg->uk_rsize, 0, "Real object size with alignment"); SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "ppera", CTLFLAG_RD, &keg->uk_ppera, 0, "pages per-slab allocation"); SYSCTL_ADD_U16(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "ipers", CTLFLAG_RD, &keg->uk_ipers, 0, "items available per-slab"); SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "align", CTLFLAG_RD, &keg->uk_align, 0, "item alignment mask"); SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "efficiency", CTLFLAG_RD | CTLTYPE_INT | CTLFLAG_MPSAFE, keg, 0, sysctl_handle_uma_slab_efficiency, "I", "Slab utilization (100 - internal fragmentation %)"); domainoid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "domain", CTLFLAG_RD, NULL, ""); for (i = 0; i < domains; i++) { dom = &keg->uk_domain[i]; oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(domainoid), OID_AUTO, VM_DOMAIN(i)->vmd_name, CTLFLAG_RD, NULL, ""); SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "pages", CTLFLAG_RD, &dom->ud_pages, 0, "Total pages currently allocated from VM"); SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "free", CTLFLAG_RD, &dom->ud_free, 0, "items free in the slab layer"); } } else SYSCTL_ADD_CONST_STRING(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "name", CTLFLAG_RD, nokeg, "Keg name"); /* * Information about zone limits. */ oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO, "limit", CTLFLAG_RD, NULL, ""); SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "items", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE, zone, 0, sysctl_handle_uma_zone_items, "QU", "current number of allocated items if limit is set"); SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "max_items", CTLFLAG_RD, &zone->uz_max_items, 0, "Maximum number of cached items"); SYSCTL_ADD_U32(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "sleepers", CTLFLAG_RD, &zone->uz_sleepers, 0, "Number of threads sleeping at limit"); SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "sleeps", CTLFLAG_RD, &zone->uz_sleeps, 0, "Total zone limit sleeps"); SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "bucket_max", CTLFLAG_RD, &zone->uz_bkt_max, 0, "Maximum number of items in the bucket cache"); SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "bucket_cnt", CTLFLAG_RD, &zone->uz_bkt_count, 0, "Number of items in the bucket cache"); /* * Per-domain zone information. */ domainoid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO, "domain", CTLFLAG_RD, NULL, ""); if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) == 0) domains = 1; for (i = 0; i < domains; i++) { zdom = &zone->uz_domain[i]; oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(domainoid), OID_AUTO, VM_DOMAIN(i)->vmd_name, CTLFLAG_RD, NULL, ""); SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "nitems", CTLFLAG_RD, &zdom->uzd_nitems, "number of items in this domain"); SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "imax", CTLFLAG_RD, &zdom->uzd_imax, "maximum item count in this period"); SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "imin", CTLFLAG_RD, &zdom->uzd_imin, "minimum item count in this period"); SYSCTL_ADD_LONG(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "wss", CTLFLAG_RD, &zdom->uzd_wss, "Working set size"); } /* * General statistics. */ oid = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(zone->uz_oid), OID_AUTO, "stats", CTLFLAG_RD, NULL, ""); SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "current", CTLFLAG_RD | CTLTYPE_INT | CTLFLAG_MPSAFE, zone, 1, sysctl_handle_uma_zone_cur, "I", "Current number of allocated items"); SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "allocs", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE, zone, 0, sysctl_handle_uma_zone_allocs, "QU", "Total allocation calls"); SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "frees", CTLFLAG_RD | CTLTYPE_U64 | CTLFLAG_MPSAFE, zone, 0, sysctl_handle_uma_zone_frees, "QU", "Total free calls"); SYSCTL_ADD_COUNTER_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "fails", CTLFLAG_RD, &zone->uz_fails, "Number of allocation failures"); SYSCTL_ADD_U64(NULL, SYSCTL_CHILDREN(oid), OID_AUTO, "xdomain", CTLFLAG_RD, &zone->uz_xdomain, 0, "Free calls from the wrong domain"); } struct uma_zone_count { const char *name; int count; }; static void zone_count(uma_zone_t zone, void *arg) { struct uma_zone_count *cnt; cnt = arg; /* * Some zones are rapidly created with identical names and * destroyed out of order. This can lead to gaps in the count. * Use one greater than the maximum observed for this name. */ if (strcmp(zone->uz_name, cnt->name) == 0) cnt->count = MAX(cnt->count, zone->uz_namecnt + 1); } static void zone_update_caches(uma_zone_t zone) { int i; for (i = 0; i <= mp_maxid; i++) { cache_set_uz_size(&zone->uz_cpu[i], zone->uz_size); cache_set_uz_flags(&zone->uz_cpu[i], zone->uz_flags); } } /* * Zone header ctor. This initializes all fields, locks, etc. * * Arguments/Returns follow uma_ctor specifications * udata Actually uma_zctor_args */ static int zone_ctor(void *mem, int size, void *udata, int flags) { struct uma_zone_count cnt; struct uma_zctor_args *arg = udata; uma_zone_t zone = mem; uma_zone_t z; uma_keg_t keg; int i; bzero(zone, size); zone->uz_name = arg->name; zone->uz_ctor = arg->ctor; zone->uz_dtor = arg->dtor; zone->uz_init = NULL; zone->uz_fini = NULL; zone->uz_sleeps = 0; zone->uz_xdomain = 0; zone->uz_bucket_size = 0; zone->uz_bucket_size_min = 0; zone->uz_bucket_size_max = BUCKET_MAX; zone->uz_flags = 0; zone->uz_warning = NULL; /* The domain structures follow the cpu structures. */ zone->uz_domain = (struct uma_zone_domain *)&zone->uz_cpu[mp_ncpus]; zone->uz_bkt_max = ULONG_MAX; timevalclear(&zone->uz_ratecheck); /* Count the number of duplicate names. */ cnt.name = arg->name; cnt.count = 0; zone_foreach(zone_count, &cnt); zone->uz_namecnt = cnt.count; ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS)); ZONE_CROSS_LOCK_INIT(zone); for (i = 0; i < vm_ndomains; i++) TAILQ_INIT(&zone->uz_domain[i].uzd_buckets); #ifdef INVARIANTS if (arg->uminit == trash_init && arg->fini == trash_fini) zone->uz_flags |= UMA_ZFLAG_TRASH | UMA_ZFLAG_CTORDTOR; #endif /* * This is a pure cache zone, no kegs. */ if (arg->import) { KASSERT((arg->flags & UMA_ZFLAG_CACHE) != 0, ("zone_ctor: Import specified for non-cache zone.")); if (arg->flags & UMA_ZONE_VM) arg->flags |= UMA_ZFLAG_CACHEONLY; zone->uz_flags = arg->flags; zone->uz_size = arg->size; zone->uz_import = arg->import; zone->uz_release = arg->release; zone->uz_arg = arg->arg; rw_wlock(&uma_rwlock); LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link); rw_wunlock(&uma_rwlock); goto out; } /* * Use the regular zone/keg/slab allocator. */ zone->uz_import = zone_import; zone->uz_release = zone_release; zone->uz_arg = zone; keg = arg->keg; if (arg->flags & UMA_ZONE_SECONDARY) { KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0, ("Secondary zone requested UMA_ZFLAG_INTERNAL")); KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg")); zone->uz_init = arg->uminit; zone->uz_fini = arg->fini; zone->uz_flags |= UMA_ZONE_SECONDARY; rw_wlock(&uma_rwlock); ZONE_LOCK(zone); LIST_FOREACH(z, &keg->uk_zones, uz_link) { if (LIST_NEXT(z, uz_link) == NULL) { LIST_INSERT_AFTER(z, zone, uz_link); break; } } ZONE_UNLOCK(zone); rw_wunlock(&uma_rwlock); } else if (keg == NULL) { if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini, arg->align, arg->flags)) == NULL) return (ENOMEM); } else { struct uma_kctor_args karg; int error; /* We should only be here from uma_startup() */ karg.size = arg->size; karg.uminit = arg->uminit; karg.fini = arg->fini; karg.align = arg->align; karg.flags = arg->flags; karg.zone = zone; error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg, flags); if (error) return (error); } /* Inherit properties from the keg. */ zone->uz_keg = keg; zone->uz_size = keg->uk_size; zone->uz_flags |= (keg->uk_flags & (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT)); out: if (__predict_true(booted >= BOOT_RUNNING)) { zone_alloc_counters(zone, NULL); zone_alloc_sysctl(zone, NULL); } else { zone->uz_allocs = EARLY_COUNTER; zone->uz_frees = EARLY_COUNTER; zone->uz_fails = EARLY_COUNTER; } KASSERT((arg->flags & (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET)) != (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET), ("Invalid zone flag combination")); if (arg->flags & UMA_ZFLAG_INTERNAL) zone->uz_bucket_size_max = zone->uz_bucket_size = 0; if ((arg->flags & UMA_ZONE_MAXBUCKET) != 0) zone->uz_bucket_size = BUCKET_MAX; else if ((arg->flags & UMA_ZONE_MINBUCKET) != 0) zone->uz_bucket_size_max = zone->uz_bucket_size = BUCKET_MIN; else if ((arg->flags & UMA_ZONE_NOBUCKET) != 0) zone->uz_bucket_size = 0; else zone->uz_bucket_size = bucket_select(zone->uz_size); zone->uz_bucket_size_min = zone->uz_bucket_size; if (zone->uz_dtor != NULL || zone->uz_ctor != NULL) zone->uz_flags |= UMA_ZFLAG_CTORDTOR; zone_update_caches(zone); return (0); } /* * Keg header dtor. This frees all data, destroys locks, frees the hash * table and removes the keg from the global list. * * Arguments/Returns follow uma_dtor specifications * udata unused */ static void keg_dtor(void *arg, int size, void *udata) { uma_keg_t keg; uint32_t free, pages; int i; keg = (uma_keg_t)arg; free = pages = 0; for (i = 0; i < vm_ndomains; i++) { free += keg->uk_domain[i].ud_free; pages += keg->uk_domain[i].ud_pages; KEG_LOCK_FINI(keg, i); } if (free != 0) printf("Freed UMA keg (%s) was not empty (%u items). " " Lost %u pages of memory.\n", keg->uk_name ? keg->uk_name : "", free, pages); hash_free(&keg->uk_hash); } /* * Zone header dtor. * * Arguments/Returns follow uma_dtor specifications * udata unused */ static void zone_dtor(void *arg, int size, void *udata) { uma_zone_t zone; uma_keg_t keg; zone = (uma_zone_t)arg; sysctl_remove_oid(zone->uz_oid, 1, 1); if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) cache_drain(zone); rw_wlock(&uma_rwlock); LIST_REMOVE(zone, uz_link); rw_wunlock(&uma_rwlock); /* * XXX there are some races here where * the zone can be drained but zone lock * released and then refilled before we * remove it... we dont care for now */ zone_reclaim(zone, M_WAITOK, true); /* * We only destroy kegs from non secondary/non cache zones. */ if ((zone->uz_flags & (UMA_ZONE_SECONDARY | UMA_ZFLAG_CACHE)) == 0) { keg = zone->uz_keg; rw_wlock(&uma_rwlock); LIST_REMOVE(keg, uk_link); rw_wunlock(&uma_rwlock); zone_free_item(kegs, keg, NULL, SKIP_NONE); } counter_u64_free(zone->uz_allocs); counter_u64_free(zone->uz_frees); counter_u64_free(zone->uz_fails); free(zone->uz_ctlname, M_UMA); ZONE_LOCK_FINI(zone); ZONE_CROSS_LOCK_FINI(zone); } static void zone_foreach_unlocked(void (*zfunc)(uma_zone_t, void *arg), void *arg) { uma_keg_t keg; uma_zone_t zone; LIST_FOREACH(keg, &uma_kegs, uk_link) { LIST_FOREACH(zone, &keg->uk_zones, uz_link) zfunc(zone, arg); } LIST_FOREACH(zone, &uma_cachezones, uz_link) zfunc(zone, arg); } /* * Traverses every zone in the system and calls a callback * * Arguments: * zfunc A pointer to a function which accepts a zone * as an argument. * * Returns: * Nothing */ static void zone_foreach(void (*zfunc)(uma_zone_t, void *arg), void *arg) { rw_rlock(&uma_rwlock); zone_foreach_unlocked(zfunc, arg); rw_runlock(&uma_rwlock); } /* * Initialize the kernel memory allocator. This is done after pages can be * allocated but before general KVA is available. */ void uma_startup1(vm_offset_t virtual_avail) { struct uma_zctor_args args; size_t ksize, zsize, size; uma_keg_t masterkeg; uintptr_t m; uint8_t pflag; bootstart = bootmem = virtual_avail; rw_init(&uma_rwlock, "UMA lock"); sx_init(&uma_reclaim_lock, "umareclaim"); ksize = sizeof(struct uma_keg) + (sizeof(struct uma_domain) * vm_ndomains); ksize = roundup(ksize, UMA_SUPER_ALIGN); zsize = sizeof(struct uma_zone) + (sizeof(struct uma_cache) * (mp_maxid + 1)) + (sizeof(struct uma_zone_domain) * vm_ndomains); zsize = roundup(zsize, UMA_SUPER_ALIGN); /* Allocate the zone of zones, zone of kegs, and zone of zones keg. */ size = (zsize * 2) + ksize; m = (uintptr_t)startup_alloc(NULL, size, 0, &pflag, M_NOWAIT | M_ZERO); zones = (uma_zone_t)m; m += zsize; kegs = (uma_zone_t)m; m += zsize; masterkeg = (uma_keg_t)m; /* "manually" create the initial zone */ memset(&args, 0, sizeof(args)); args.name = "UMA Kegs"; args.size = ksize; args.ctor = keg_ctor; args.dtor = keg_dtor; args.uminit = zero_init; args.fini = NULL; args.keg = masterkeg; args.align = UMA_SUPER_ALIGN - 1; args.flags = UMA_ZFLAG_INTERNAL; zone_ctor(kegs, zsize, &args, M_WAITOK); args.name = "UMA Zones"; args.size = zsize; args.ctor = zone_ctor; args.dtor = zone_dtor; args.uminit = zero_init; args.fini = NULL; args.keg = NULL; args.align = UMA_SUPER_ALIGN - 1; args.flags = UMA_ZFLAG_INTERNAL; zone_ctor(zones, zsize, &args, M_WAITOK); /* Now make zones for slab headers */ slabzones[0] = uma_zcreate("UMA Slabs 0", SLABZONE0_SIZE, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); slabzones[1] = uma_zcreate("UMA Slabs 1", SLABZONE1_SIZE, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); hashzone = uma_zcreate("UMA Hash", sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); bucket_init(); } #ifndef UMA_MD_SMALL_ALLOC extern void vm_radix_reserve_kva(void); #endif /* * Advertise the availability of normal kva allocations and switch to * the default back-end allocator. Marks the KVA we consumed on startup * as used in the map. */ void uma_startup2(void) { if (!PMAP_HAS_DMAP) { vm_map_lock(kernel_map); (void)vm_map_insert(kernel_map, NULL, 0, bootstart, bootmem, VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT); vm_map_unlock(kernel_map); } #ifndef UMA_MD_SMALL_ALLOC /* Set up radix zone to use noobj_alloc. */ vm_radix_reserve_kva(); #endif booted = BOOT_KVA; zone_foreach_unlocked(zone_kva_available, NULL); bucket_enable(); } /* * Finish our initialization steps. */ static void uma_startup3(void) { #ifdef INVARIANTS TUNABLE_INT_FETCH("vm.debug.divisor", &dbg_divisor); uma_dbg_cnt = counter_u64_alloc(M_WAITOK); uma_skip_cnt = counter_u64_alloc(M_WAITOK); #endif zone_foreach_unlocked(zone_alloc_counters, NULL); zone_foreach_unlocked(zone_alloc_sysctl, NULL); callout_init(&uma_callout, 1); callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); booted = BOOT_RUNNING; EVENTHANDLER_REGISTER(shutdown_post_sync, uma_shutdown, NULL, EVENTHANDLER_PRI_FIRST); } static void uma_shutdown(void) { booted = BOOT_SHUTDOWN; } static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini, int align, uint32_t flags) { struct uma_kctor_args args; args.size = size; args.uminit = uminit; args.fini = fini; args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align; args.flags = flags; args.zone = zone; return (zone_alloc_item(kegs, &args, UMA_ANYDOMAIN, M_WAITOK)); } /* Public functions */ /* See uma.h */ void uma_set_align(int align) { if (align != UMA_ALIGN_CACHE) uma_align_cache = align; } /* See uma.h */ uma_zone_t uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor, uma_init uminit, uma_fini fini, int align, uint32_t flags) { struct uma_zctor_args args; uma_zone_t res; KASSERT(powerof2(align + 1), ("invalid zone alignment %d for \"%s\"", align, name)); /* This stuff is essential for the zone ctor */ memset(&args, 0, sizeof(args)); args.name = name; args.size = size; args.ctor = ctor; args.dtor = dtor; args.uminit = uminit; args.fini = fini; #ifdef INVARIANTS /* * Inject procedures which check for memory use after free if we are * allowed to scramble the memory while it is not allocated. This * requires that: UMA is actually able to access the memory, no init * or fini procedures, no dependency on the initial value of the * memory, and no (legitimate) use of the memory after free. Note, * the ctor and dtor do not need to be empty. */ if ((!(flags & (UMA_ZONE_ZINIT | UMA_ZONE_NOTOUCH | UMA_ZONE_NOFREE))) && uminit == NULL && fini == NULL) { args.uminit = trash_init; args.fini = trash_fini; } #endif args.align = align; args.flags = flags; args.keg = NULL; sx_slock(&uma_reclaim_lock); res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK); sx_sunlock(&uma_reclaim_lock); return (res); } /* See uma.h */ uma_zone_t uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor, uma_init zinit, uma_fini zfini, uma_zone_t master) { struct uma_zctor_args args; uma_keg_t keg; uma_zone_t res; keg = master->uz_keg; memset(&args, 0, sizeof(args)); args.name = name; args.size = keg->uk_size; args.ctor = ctor; args.dtor = dtor; args.uminit = zinit; args.fini = zfini; args.align = keg->uk_align; args.flags = keg->uk_flags | UMA_ZONE_SECONDARY; args.keg = keg; sx_slock(&uma_reclaim_lock); res = zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK); sx_sunlock(&uma_reclaim_lock); return (res); } /* See uma.h */ uma_zone_t uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor, uma_init zinit, uma_fini zfini, uma_import zimport, uma_release zrelease, void *arg, int flags) { struct uma_zctor_args args; memset(&args, 0, sizeof(args)); args.name = name; args.size = size; args.ctor = ctor; args.dtor = dtor; args.uminit = zinit; args.fini = zfini; args.import = zimport; args.release = zrelease; args.arg = arg; args.align = 0; args.flags = flags | UMA_ZFLAG_CACHE; return (zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK)); } /* See uma.h */ void uma_zdestroy(uma_zone_t zone) { /* * Large slabs are expensive to reclaim, so don't bother doing * unnecessary work if we're shutting down. */ if (booted == BOOT_SHUTDOWN && zone->uz_fini == NULL && zone->uz_release == zone_release) return; sx_slock(&uma_reclaim_lock); zone_free_item(zones, zone, NULL, SKIP_NONE); sx_sunlock(&uma_reclaim_lock); } void uma_zwait(uma_zone_t zone) { void *item; item = uma_zalloc_arg(zone, NULL, M_WAITOK); uma_zfree(zone, item); } void * uma_zalloc_pcpu_arg(uma_zone_t zone, void *udata, int flags) { void *item; #ifdef SMP int i; MPASS(zone->uz_flags & UMA_ZONE_PCPU); #endif item = uma_zalloc_arg(zone, udata, flags & ~M_ZERO); if (item != NULL && (flags & M_ZERO)) { #ifdef SMP for (i = 0; i <= mp_maxid; i++) bzero(zpcpu_get_cpu(item, i), zone->uz_size); #else bzero(item, zone->uz_size); #endif } return (item); } /* * A stub while both regular and pcpu cases are identical. */ void uma_zfree_pcpu_arg(uma_zone_t zone, void *item, void *udata) { #ifdef SMP MPASS(zone->uz_flags & UMA_ZONE_PCPU); #endif uma_zfree_arg(zone, item, udata); } #ifdef INVARIANTS #define UMA_ALWAYS_CTORDTOR 1 #else #define UMA_ALWAYS_CTORDTOR 0 #endif static void * item_ctor(uma_zone_t zone, int size, void *udata, int flags, void *item) { #ifdef INVARIANTS bool skipdbg; skipdbg = uma_dbg_zskip(zone, item); if (!skipdbg && (zone->uz_flags & UMA_ZFLAG_TRASH) != 0 && zone->uz_ctor != trash_ctor) trash_ctor(item, size, udata, flags); #endif if (__predict_false(zone->uz_ctor != NULL) && zone->uz_ctor(item, size, udata, flags) != 0) { counter_u64_add(zone->uz_fails, 1); zone_free_item(zone, item, udata, SKIP_DTOR | SKIP_CNT); return (NULL); } #ifdef INVARIANTS if (!skipdbg) uma_dbg_alloc(zone, NULL, item); #endif if (flags & M_ZERO) bzero(item, size); return (item); } static inline void item_dtor(uma_zone_t zone, void *item, int size, void *udata, enum zfreeskip skip) { #ifdef INVARIANTS bool skipdbg; skipdbg = uma_dbg_zskip(zone, item); if (skip == SKIP_NONE && !skipdbg) { if ((zone->uz_flags & UMA_ZONE_MALLOC) != 0) uma_dbg_free(zone, udata, item); else uma_dbg_free(zone, NULL, item); } #endif if (__predict_true(skip < SKIP_DTOR)) { if (zone->uz_dtor != NULL) zone->uz_dtor(item, size, udata); #ifdef INVARIANTS if (!skipdbg && (zone->uz_flags & UMA_ZFLAG_TRASH) != 0 && zone->uz_dtor != trash_dtor) trash_dtor(item, size, udata); #endif } } /* See uma.h */ void * uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) { uma_cache_bucket_t bucket; uma_cache_t cache; void *item; int domain, size, uz_flags; /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */ random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA); /* This is the fast path allocation */ CTR3(KTR_UMA, "uma_zalloc_arg zone %s(%p) flags %d", zone->uz_name, zone, flags); #ifdef WITNESS if (flags & M_WAITOK) { WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "uma_zalloc_arg: zone \"%s\"", zone->uz_name); } #endif #ifdef INVARIANTS KASSERT((flags & M_EXEC) == 0, ("uma_zalloc_arg: called with M_EXEC")); KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(), ("uma_zalloc_arg: called with spinlock or critical section held")); if (zone->uz_flags & UMA_ZONE_PCPU) KASSERT((flags & M_ZERO) == 0, ("allocating from a pcpu zone " "with M_ZERO passed")); #endif #ifdef DEBUG_MEMGUARD if (memguard_cmp_zone(zone)) { item = memguard_alloc(zone->uz_size, flags); if (item != NULL) { if (zone->uz_init != NULL && zone->uz_init(item, zone->uz_size, flags) != 0) return (NULL); if (zone->uz_ctor != NULL && zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) { counter_u64_add(zone->uz_fails, 1); zone->uz_fini(item, zone->uz_size); return (NULL); } return (item); } /* This is unfortunate but should not be fatal. */ } #endif /* * If possible, allocate from the per-CPU cache. There are two * requirements for safe access to the per-CPU cache: (1) the thread * accessing the cache must not be preempted or yield during access, * and (2) the thread must not migrate CPUs without switching which * cache it accesses. We rely on a critical section to prevent * preemption and migration. We release the critical section in * order to acquire the zone mutex if we are unable to allocate from * the current cache; when we re-acquire the critical section, we * must detect and handle migration if it has occurred. */ critical_enter(); do { cache = &zone->uz_cpu[curcpu]; bucket = &cache->uc_allocbucket; size = cache_uz_size(cache); uz_flags = cache_uz_flags(cache); if (__predict_true(bucket->ucb_cnt != 0)) { item = cache_bucket_pop(cache, bucket); critical_exit(); if (__predict_false((uz_flags & UMA_ZFLAG_CTORDTOR) != 0 || UMA_ALWAYS_CTORDTOR)) return (item_ctor(zone, size, udata, flags, item)); if (flags & M_ZERO) bzero(item, size); return (item); } } while (cache_alloc(zone, cache, udata, flags)); critical_exit(); /* * We can not get a bucket so try to return a single item. */ if (uz_flags & UMA_ZONE_FIRSTTOUCH) domain = PCPU_GET(domain); else domain = UMA_ANYDOMAIN; return (zone_alloc_item(zone, udata, domain, flags)); } /* * Replenish an alloc bucket and possibly restore an old one. Called in * a critical section. Returns in a critical section. * * A false return value indicates an allocation failure. * A true return value indicates success and the caller should retry. */ static __noinline bool cache_alloc(uma_zone_t zone, uma_cache_t cache, void *udata, int flags) { uma_zone_domain_t zdom; uma_bucket_t bucket; int domain; bool lockfail; CRITICAL_ASSERT(curthread); /* * If we have run out of items in our alloc bucket see * if we can switch with the free bucket. */ if (cache->uc_freebucket.ucb_cnt != 0) { cache_bucket_swap(&cache->uc_freebucket, &cache->uc_allocbucket); return (true); } /* * Discard any empty allocation bucket while we hold no locks. */ bucket = cache_bucket_unload_alloc(cache); critical_exit(); if (bucket != NULL) bucket_free(zone, bucket, udata); /* Short-circuit for zones without buckets and low memory. */ if (zone->uz_bucket_size == 0 || bucketdisable) { critical_enter(); return (false); } /* * Attempt to retrieve the item from the per-CPU cache has failed, so * we must go back to the zone. This requires the zone lock, so we * must drop the critical section, then re-acquire it when we go back * to the cache. Since the critical section is released, we may be * preempted or migrate. As such, make sure not to maintain any * thread-local state specific to the cache from prior to releasing * the critical section. */ lockfail = 0; if (ZONE_TRYLOCK(zone) == 0) { /* Record contention to size the buckets. */ ZONE_LOCK(zone); lockfail = 1; } /* See if we lost the race to fill the cache. */ critical_enter(); cache = &zone->uz_cpu[curcpu]; if (cache->uc_allocbucket.ucb_bucket != NULL) { ZONE_UNLOCK(zone); return (true); } /* * Check the zone's cache of buckets. */ if (zone->uz_flags & UMA_ZONE_FIRSTTOUCH) { domain = PCPU_GET(domain); zdom = &zone->uz_domain[domain]; } else { domain = UMA_ANYDOMAIN; zdom = &zone->uz_domain[0]; } if ((bucket = zone_fetch_bucket(zone, zdom)) != NULL) { ZONE_UNLOCK(zone); KASSERT(bucket->ub_cnt != 0, ("uma_zalloc_arg: Returning an empty bucket.")); cache_bucket_load_alloc(cache, bucket); return (true); } /* We are no longer associated with this CPU. */ critical_exit(); /* * We bump the uz count when the cache size is insufficient to * handle the working set. */ if (lockfail && zone->uz_bucket_size < zone->uz_bucket_size_max) zone->uz_bucket_size++; ZONE_UNLOCK(zone); /* * Fill a bucket and attempt to use it as the alloc bucket. */ bucket = zone_alloc_bucket(zone, udata, domain, flags); CTR3(KTR_UMA, "uma_zalloc: zone %s(%p) bucket zone returned %p", zone->uz_name, zone, bucket); if (bucket == NULL) { critical_enter(); return (false); } /* * See if we lost the race or were migrated. Cache the * initialized bucket to make this less likely or claim * the memory directly. */ ZONE_LOCK(zone); critical_enter(); cache = &zone->uz_cpu[curcpu]; if (cache->uc_allocbucket.ucb_bucket == NULL && ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) == 0 || domain == PCPU_GET(domain))) { cache_bucket_load_alloc(cache, bucket); zdom->uzd_imax += bucket->ub_cnt; } else if (zone->uz_bkt_count >= zone->uz_bkt_max) { critical_exit(); ZONE_UNLOCK(zone); bucket_drain(zone, bucket); bucket_free(zone, bucket, udata); critical_enter(); return (true); } else zone_put_bucket(zone, zdom, bucket, false); ZONE_UNLOCK(zone); return (true); } void * uma_zalloc_domain(uma_zone_t zone, void *udata, int domain, int flags) { /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */ random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA); /* This is the fast path allocation */ CTR4(KTR_UMA, "uma_zalloc_domain zone %s(%p) domain %d flags %d", zone->uz_name, zone, domain, flags); if (flags & M_WAITOK) { WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "uma_zalloc_domain: zone \"%s\"", zone->uz_name); } KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(), ("uma_zalloc_domain: called with spinlock or critical section held")); return (zone_alloc_item(zone, udata, domain, flags)); } /* * Find a slab with some space. Prefer slabs that are partially used over those * that are totally full. This helps to reduce fragmentation. * * If 'rr' is 1, search all domains starting from 'domain'. Otherwise check * only 'domain'. */ static uma_slab_t keg_first_slab(uma_keg_t keg, int domain, bool rr) { uma_domain_t dom; uma_slab_t slab; int start; KASSERT(domain >= 0 && domain < vm_ndomains, ("keg_first_slab: domain %d out of range", domain)); KEG_LOCK_ASSERT(keg, domain); slab = NULL; start = domain; do { dom = &keg->uk_domain[domain]; if (!LIST_EMPTY(&dom->ud_part_slab)) return (LIST_FIRST(&dom->ud_part_slab)); if (!LIST_EMPTY(&dom->ud_free_slab)) { slab = LIST_FIRST(&dom->ud_free_slab); LIST_REMOVE(slab, us_link); LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link); return (slab); } if (rr) domain = (domain + 1) % vm_ndomains; } while (domain != start); return (NULL); } /* * Fetch an existing slab from a free or partial list. Returns with the * keg domain lock held if a slab was found or unlocked if not. */ static uma_slab_t keg_fetch_free_slab(uma_keg_t keg, int domain, bool rr, int flags) { uma_slab_t slab; uint32_t reserve; /* HASH has a single free list. */ if ((keg->uk_flags & UMA_ZFLAG_HASH) != 0) domain = 0; KEG_LOCK(keg, domain); reserve = (flags & M_USE_RESERVE) != 0 ? 0 : keg->uk_reserve; if (keg->uk_domain[domain].ud_free <= reserve || (slab = keg_first_slab(keg, domain, rr)) == NULL) { KEG_UNLOCK(keg, domain); return (NULL); } return (slab); } static uma_slab_t keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, const int flags) { struct vm_domainset_iter di; uma_slab_t slab; int aflags, domain; bool rr; restart: /* * Use the keg's policy if upper layers haven't already specified a * domain (as happens with first-touch zones). * * To avoid races we run the iterator with the keg lock held, but that * means that we cannot allow the vm_domainset layer to sleep. Thus, * clear M_WAITOK and handle low memory conditions locally. */ rr = rdomain == UMA_ANYDOMAIN; if (rr) { aflags = (flags & ~M_WAITOK) | M_NOWAIT; vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, &aflags); } else { aflags = flags; domain = rdomain; } for (;;) { slab = keg_fetch_free_slab(keg, domain, rr, flags); if (slab != NULL) return (slab); /* * M_NOVM means don't ask at all! */ if (flags & M_NOVM) break; slab = keg_alloc_slab(keg, zone, domain, flags, aflags); if (slab != NULL) return (slab); if (!rr && (flags & M_WAITOK) == 0) break; if (rr && vm_domainset_iter_policy(&di, &domain) != 0) { if ((flags & M_WAITOK) != 0) { vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask); goto restart; } break; } } /* * We might not have been able to get a slab but another cpu * could have while we were unlocked. Check again before we * fail. */ if ((slab = keg_fetch_free_slab(keg, domain, rr, flags)) != NULL) return (slab); return (NULL); } static void * slab_alloc_item(uma_keg_t keg, uma_slab_t slab) { uma_domain_t dom; void *item; int freei; KEG_LOCK_ASSERT(keg, slab->us_domain); dom = &keg->uk_domain[slab->us_domain]; freei = BIT_FFS(keg->uk_ipers, &slab->us_free) - 1; BIT_CLR(keg->uk_ipers, freei, &slab->us_free); item = slab_item(slab, keg, freei); slab->us_freecount--; dom->ud_free--; /* Move this slab to the full list */ if (slab->us_freecount == 0) { LIST_REMOVE(slab, us_link); LIST_INSERT_HEAD(&dom->ud_full_slab, slab, us_link); } return (item); } static int zone_import(void *arg, void **bucket, int max, int domain, int flags) { uma_domain_t dom; uma_zone_t zone; uma_slab_t slab; uma_keg_t keg; #ifdef NUMA int stripe; #endif int i; zone = arg; slab = NULL; keg = zone->uz_keg; /* Try to keep the buckets totally full */ for (i = 0; i < max; ) { if ((slab = keg_fetch_slab(keg, zone, domain, flags)) == NULL) break; #ifdef NUMA stripe = howmany(max, vm_ndomains); #endif dom = &keg->uk_domain[slab->us_domain]; while (slab->us_freecount && i < max) { bucket[i++] = slab_alloc_item(keg, slab); if (dom->ud_free <= keg->uk_reserve) break; #ifdef NUMA /* * If the zone is striped we pick a new slab for every * N allocations. Eliminating this conditional will * instead pick a new domain for each bucket rather * than stripe within each bucket. The current option * produces more fragmentation and requires more cpu * time but yields better distribution. */ if ((zone->uz_flags & UMA_ZONE_ROUNDROBIN) != 0 && vm_ndomains > 1 && --stripe == 0) break; #endif } KEG_UNLOCK(keg, slab->us_domain); /* Don't block if we allocated any successfully. */ flags &= ~M_WAITOK; flags |= M_NOWAIT; } return i; } static int zone_alloc_limit_hard(uma_zone_t zone, int count, int flags) { uint64_t old, new, total, max; /* * The hard case. We're going to sleep because there were existing * sleepers or because we ran out of items. This routine enforces * fairness by keeping fifo order. * * First release our ill gotten gains and make some noise. */ for (;;) { zone_free_limit(zone, count); zone_log_warning(zone); zone_maxaction(zone); if (flags & M_NOWAIT) return (0); /* * We need to allocate an item or set ourself as a sleeper * while the sleepq lock is held to avoid wakeup races. This * is essentially a home rolled semaphore. */ sleepq_lock(&zone->uz_max_items); old = zone->uz_items; do { MPASS(UZ_ITEMS_SLEEPERS(old) < UZ_ITEMS_SLEEPERS_MAX); /* Cache the max since we will evaluate twice. */ max = zone->uz_max_items; if (UZ_ITEMS_SLEEPERS(old) != 0 || UZ_ITEMS_COUNT(old) >= max) new = old + UZ_ITEMS_SLEEPER; else new = old + MIN(count, max - old); } while (atomic_fcmpset_64(&zone->uz_items, &old, new) == 0); /* We may have successfully allocated under the sleepq lock. */ if (UZ_ITEMS_SLEEPERS(new) == 0) { sleepq_release(&zone->uz_max_items); return (new - old); } /* * This is in a different cacheline from uz_items so that we * don't constantly invalidate the fastpath cacheline when we * adjust item counts. This could be limited to toggling on * transitions. */ atomic_add_32(&zone->uz_sleepers, 1); atomic_add_64(&zone->uz_sleeps, 1); /* * We have added ourselves as a sleeper. The sleepq lock * protects us from wakeup races. Sleep now and then retry. */ sleepq_add(&zone->uz_max_items, NULL, "zonelimit", 0, 0); sleepq_wait(&zone->uz_max_items, PVM); /* * After wakeup, remove ourselves as a sleeper and try * again. We no longer have the sleepq lock for protection. * * Subract ourselves as a sleeper while attempting to add * our count. */ atomic_subtract_32(&zone->uz_sleepers, 1); old = atomic_fetchadd_64(&zone->uz_items, -(UZ_ITEMS_SLEEPER - count)); /* We're no longer a sleeper. */ old -= UZ_ITEMS_SLEEPER; /* * If we're still at the limit, restart. Notably do not * block on other sleepers. Cache the max value to protect * against changes via sysctl. */ total = UZ_ITEMS_COUNT(old); max = zone->uz_max_items; if (total >= max) continue; /* Truncate if necessary, otherwise wake other sleepers. */ if (total + count > max) { zone_free_limit(zone, total + count - max); count = max - total; } else if (total + count < max && UZ_ITEMS_SLEEPERS(old) != 0) wakeup_one(&zone->uz_max_items); return (count); } } /* * Allocate 'count' items from our max_items limit. Returns the number * available. If M_NOWAIT is not specified it will sleep until at least * one item can be allocated. */ static int zone_alloc_limit(uma_zone_t zone, int count, int flags) { uint64_t old; uint64_t max; max = zone->uz_max_items; MPASS(max > 0); /* * We expect normal allocations to succeed with a simple * fetchadd. */ old = atomic_fetchadd_64(&zone->uz_items, count); if (__predict_true(old + count <= max)) return (count); /* * If we had some items and no sleepers just return the * truncated value. We have to release the excess space * though because that may wake sleepers who weren't woken * because we were temporarily over the limit. */ if (old < max) { zone_free_limit(zone, (old + count) - max); return (max - old); } return (zone_alloc_limit_hard(zone, count, flags)); } /* * Free a number of items back to the limit. */ static void zone_free_limit(uma_zone_t zone, int count) { uint64_t old; MPASS(count > 0); /* * In the common case we either have no sleepers or * are still over the limit and can just return. */ old = atomic_fetchadd_64(&zone->uz_items, -count); if (__predict_true(UZ_ITEMS_SLEEPERS(old) == 0 || UZ_ITEMS_COUNT(old) - count >= zone->uz_max_items)) return; /* * Moderate the rate of wakeups. Sleepers will continue * to generate wakeups if necessary. */ wakeup_one(&zone->uz_max_items); } static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags) { uma_bucket_t bucket; int maxbucket, cnt; CTR3(KTR_UMA, "zone_alloc_bucket zone %s(%p) domain %d", zone->uz_name, zone, domain); /* Avoid allocs targeting empty domains. */ if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain)) domain = UMA_ANYDOMAIN; if (zone->uz_max_items > 0) maxbucket = zone_alloc_limit(zone, zone->uz_bucket_size, M_NOWAIT); else maxbucket = zone->uz_bucket_size; if (maxbucket == 0) return (false); /* Don't wait for buckets, preserve caller's NOVM setting. */ bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM)); if (bucket == NULL) { cnt = 0; goto out; } bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket, MIN(maxbucket, bucket->ub_entries), domain, flags); /* * Initialize the memory if necessary. */ if (bucket->ub_cnt != 0 && zone->uz_init != NULL) { int i; for (i = 0; i < bucket->ub_cnt; i++) if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size, flags) != 0) break; /* * If we couldn't initialize the whole bucket, put the * rest back onto the freelist. */ if (i != bucket->ub_cnt) { zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i], bucket->ub_cnt - i); #ifdef INVARIANTS bzero(&bucket->ub_bucket[i], sizeof(void *) * (bucket->ub_cnt - i)); #endif bucket->ub_cnt = i; } } cnt = bucket->ub_cnt; if (bucket->ub_cnt == 0) { bucket_free(zone, bucket, udata); counter_u64_add(zone->uz_fails, 1); bucket = NULL; } out: if (zone->uz_max_items > 0 && cnt < maxbucket) zone_free_limit(zone, maxbucket - cnt); return (bucket); } /* * Allocates a single item from a zone. * * Arguments * zone The zone to alloc for. * udata The data to be passed to the constructor. * domain The domain to allocate from or UMA_ANYDOMAIN. * flags M_WAITOK, M_NOWAIT, M_ZERO. * * Returns * NULL if there is no memory and M_NOWAIT is set * An item if successful */ static void * zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags) { void *item; if (zone->uz_max_items > 0 && zone_alloc_limit(zone, 1, flags) == 0) return (NULL); /* Avoid allocs targeting empty domains. */ if (domain != UMA_ANYDOMAIN && VM_DOMAIN_EMPTY(domain)) domain = UMA_ANYDOMAIN; if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1) goto fail_cnt; /* * We have to call both the zone's init (not the keg's init) * and the zone's ctor. This is because the item is going from * a keg slab directly to the user, and the user is expecting it * to be both zone-init'd as well as zone-ctor'd. */ if (zone->uz_init != NULL) { if (zone->uz_init(item, zone->uz_size, flags) != 0) { zone_free_item(zone, item, udata, SKIP_FINI | SKIP_CNT); goto fail_cnt; } } item = item_ctor(zone, zone->uz_size, udata, flags, item); if (item == NULL) goto fail; counter_u64_add(zone->uz_allocs, 1); CTR3(KTR_UMA, "zone_alloc_item item %p from %s(%p)", item, zone->uz_name, zone); return (item); fail_cnt: counter_u64_add(zone->uz_fails, 1); fail: if (zone->uz_max_items > 0) zone_free_limit(zone, 1); CTR2(KTR_UMA, "zone_alloc_item failed from %s(%p)", zone->uz_name, zone); return (NULL); } /* See uma.h */ void uma_zfree_arg(uma_zone_t zone, void *item, void *udata) { uma_cache_t cache; uma_cache_bucket_t bucket; int domain, itemdomain, uz_flags; /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */ random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA); CTR2(KTR_UMA, "uma_zfree_arg zone %s(%p)", zone->uz_name, zone); KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(), ("uma_zfree_arg: called with spinlock or critical section held")); /* uma_zfree(..., NULL) does nothing, to match free(9). */ if (item == NULL) return; #ifdef DEBUG_MEMGUARD if (is_memguard_addr(item)) { if (zone->uz_dtor != NULL) zone->uz_dtor(item, zone->uz_size, udata); if (zone->uz_fini != NULL) zone->uz_fini(item, zone->uz_size); memguard_free(item); return; } #endif /* * We are accessing the per-cpu cache without a critical section to * fetch size and flags. This is acceptable, if we are preempted we * will simply read another cpu's line. */ cache = &zone->uz_cpu[curcpu]; uz_flags = cache_uz_flags(cache); if (__predict_false((uz_flags & UMA_ZFLAG_CTORDTOR) != 0 || UMA_ALWAYS_CTORDTOR)) item_dtor(zone, item, cache_uz_size(cache), udata, SKIP_NONE); /* * The race here is acceptable. If we miss it we'll just have to wait * a little longer for the limits to be reset. */ if (__predict_false(uz_flags & UMA_ZFLAG_LIMIT)) { if (zone->uz_sleepers > 0) goto zfree_item; } /* * If possible, free to the per-CPU cache. There are two * requirements for safe access to the per-CPU cache: (1) the thread * accessing the cache must not be preempted or yield during access, * and (2) the thread must not migrate CPUs without switching which * cache it accesses. We rely on a critical section to prevent * preemption and migration. We release the critical section in * order to acquire the zone mutex if we are unable to free to the * current cache; when we re-acquire the critical section, we must * detect and handle migration if it has occurred. */ domain = itemdomain = 0; #ifdef NUMA if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0) itemdomain = _vm_phys_domain(pmap_kextract((vm_offset_t)item)); #endif critical_enter(); do { cache = &zone->uz_cpu[curcpu]; #ifdef NUMA domain = PCPU_GET(domain); if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0 && domain != itemdomain) { bucket = &cache->uc_crossbucket; } else #endif { /* * Try to free into the allocbucket first to give LIFO * ordering for cache-hot datastructures. Spill over * into the freebucket if necessary. Alloc will swap * them if one runs dry. */ bucket = &cache->uc_allocbucket; if (__predict_false(bucket->ucb_cnt >= bucket->ucb_entries)) bucket = &cache->uc_freebucket; } if (__predict_true(bucket->ucb_cnt < bucket->ucb_entries)) { cache_bucket_push(cache, bucket, item); critical_exit(); return; } } while (cache_free(zone, cache, udata, item, itemdomain)); critical_exit(); /* * If nothing else caught this, we'll just do an internal free. */ zfree_item: zone_free_item(zone, item, udata, SKIP_DTOR); } #ifdef NUMA /* * sort crossdomain free buckets to domain correct buckets and cache * them. */ static void zone_free_cross(uma_zone_t zone, uma_bucket_t bucket, void *udata) { struct uma_bucketlist fullbuckets; uma_zone_domain_t zdom; uma_bucket_t b; void *item; int domain; CTR3(KTR_UMA, "uma_zfree: zone %s(%p) draining cross bucket %p", zone->uz_name, zone, bucket); TAILQ_INIT(&fullbuckets); /* * To avoid having ndomain * ndomain buckets for sorting we have a * lock on the current crossfree bucket. A full matrix with * per-domain locking could be used if necessary. */ ZONE_CROSS_LOCK(zone); while (bucket->ub_cnt > 0) { item = bucket->ub_bucket[bucket->ub_cnt - 1]; domain = _vm_phys_domain(pmap_kextract((vm_offset_t)item)); zdom = &zone->uz_domain[domain]; if (zdom->uzd_cross == NULL) { zdom->uzd_cross = bucket_alloc(zone, udata, M_NOWAIT); if (zdom->uzd_cross == NULL) break; } zdom->uzd_cross->ub_bucket[zdom->uzd_cross->ub_cnt++] = item; if (zdom->uzd_cross->ub_cnt == zdom->uzd_cross->ub_entries) { TAILQ_INSERT_HEAD(&fullbuckets, zdom->uzd_cross, ub_link); zdom->uzd_cross = NULL; } bucket->ub_cnt--; } ZONE_CROSS_UNLOCK(zone); if (!TAILQ_EMPTY(&fullbuckets)) { ZONE_LOCK(zone); while ((b = TAILQ_FIRST(&fullbuckets)) != NULL) { TAILQ_REMOVE(&fullbuckets, b, ub_link); if (zone->uz_bkt_count >= zone->uz_bkt_max) { ZONE_UNLOCK(zone); bucket_drain(zone, b); bucket_free(zone, b, udata); ZONE_LOCK(zone); } else { domain = _vm_phys_domain( pmap_kextract( (vm_offset_t)b->ub_bucket[0])); zdom = &zone->uz_domain[domain]; zone_put_bucket(zone, zdom, b, true); } } ZONE_UNLOCK(zone); } if (bucket->ub_cnt != 0) bucket_drain(zone, bucket); bucket_free(zone, bucket, udata); } #endif static void zone_free_bucket(uma_zone_t zone, uma_bucket_t bucket, void *udata, int domain, int itemdomain) { uma_zone_domain_t zdom; #ifdef NUMA /* * Buckets coming from the wrong domain will be entirely for the * only other domain on two domain systems. In this case we can * simply cache them. Otherwise we need to sort them back to * correct domains. */ if (domain != itemdomain && vm_ndomains > 2) { zone_free_cross(zone, bucket, udata); return; } #endif /* * Attempt to save the bucket in the zone's domain bucket cache. * * We bump the uz count when the cache size is insufficient to * handle the working set. */ if (ZONE_TRYLOCK(zone) == 0) { /* Record contention to size the buckets. */ ZONE_LOCK(zone); if (zone->uz_bucket_size < zone->uz_bucket_size_max) zone->uz_bucket_size++; } CTR3(KTR_UMA, "uma_zfree: zone %s(%p) putting bucket %p on free list", zone->uz_name, zone, bucket); /* ub_cnt is pointing to the last free item */ KASSERT(bucket->ub_cnt == bucket->ub_entries, ("uma_zfree: Attempting to insert partial bucket onto the full list.\n")); if (zone->uz_bkt_count >= zone->uz_bkt_max) { ZONE_UNLOCK(zone); bucket_drain(zone, bucket); bucket_free(zone, bucket, udata); } else { zdom = &zone->uz_domain[itemdomain]; zone_put_bucket(zone, zdom, bucket, true); ZONE_UNLOCK(zone); } } /* * Populate a free or cross bucket for the current cpu cache. Free any * existing full bucket either to the zone cache or back to the slab layer. * * Enters and returns in a critical section. false return indicates that * we can not satisfy this free in the cache layer. true indicates that * the caller should retry. */ static __noinline bool cache_free(uma_zone_t zone, uma_cache_t cache, void *udata, void *item, int itemdomain) { uma_cache_bucket_t cbucket; uma_bucket_t bucket; int domain; CRITICAL_ASSERT(curthread); if (zone->uz_bucket_size == 0 || bucketdisable) return false; cache = &zone->uz_cpu[curcpu]; /* * FIRSTTOUCH domains need to free to the correct zdom. When * enabled this is the zdom of the item. The bucket is the * cross bucket if the current domain and itemdomain do not match. */ cbucket = &cache->uc_freebucket; #ifdef NUMA if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0) { domain = PCPU_GET(domain); if (domain != itemdomain) { cbucket = &cache->uc_crossbucket; if (cbucket->ucb_cnt != 0) atomic_add_64(&zone->uz_xdomain, cbucket->ucb_cnt); } } else #endif itemdomain = domain = 0; bucket = cache_bucket_unload(cbucket); /* We are no longer associated with this CPU. */ critical_exit(); if (bucket != NULL) zone_free_bucket(zone, bucket, udata, domain, itemdomain); bucket = bucket_alloc(zone, udata, M_NOWAIT); CTR3(KTR_UMA, "uma_zfree: zone %s(%p) allocated bucket %p", zone->uz_name, zone, bucket); critical_enter(); if (bucket == NULL) return (false); cache = &zone->uz_cpu[curcpu]; #ifdef NUMA /* * Check to see if we should be populating the cross bucket. If it * is already populated we will fall through and attempt to populate * the free bucket. */ if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0) { domain = PCPU_GET(domain); if (domain != itemdomain && cache->uc_crossbucket.ucb_bucket == NULL) { cache_bucket_load_cross(cache, bucket); return (true); } } #endif /* * We may have lost the race to fill the bucket or switched CPUs. */ if (cache->uc_freebucket.ucb_bucket != NULL) { critical_exit(); bucket_free(zone, bucket, udata); critical_enter(); } else cache_bucket_load_free(cache, bucket); return (true); } void uma_zfree_domain(uma_zone_t zone, void *item, void *udata) { /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */ random_harvest_fast_uma(&zone, sizeof(zone), RANDOM_UMA); CTR2(KTR_UMA, "uma_zfree_domain zone %s(%p)", zone->uz_name, zone); KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(), ("uma_zfree_domain: called with spinlock or critical section held")); /* uma_zfree(..., NULL) does nothing, to match free(9). */ if (item == NULL) return; zone_free_item(zone, item, udata, SKIP_NONE); } static void slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item) { uma_keg_t keg; uma_domain_t dom; int freei; keg = zone->uz_keg; KEG_LOCK_ASSERT(keg, slab->us_domain); /* Do we need to remove from any lists? */ dom = &keg->uk_domain[slab->us_domain]; if (slab->us_freecount+1 == keg->uk_ipers) { LIST_REMOVE(slab, us_link); LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link); } else if (slab->us_freecount == 0) { LIST_REMOVE(slab, us_link); LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link); } /* Slab management. */ freei = slab_item_index(slab, keg, item); BIT_SET(keg->uk_ipers, freei, &slab->us_free); slab->us_freecount++; /* Keg statistics. */ dom->ud_free++; } static void zone_release(void *arg, void **bucket, int cnt) { struct mtx *lock; uma_zone_t zone; uma_slab_t slab; uma_keg_t keg; uint8_t *mem; void *item; int i; zone = arg; keg = zone->uz_keg; lock = NULL; if (__predict_false((zone->uz_flags & UMA_ZFLAG_HASH) != 0)) lock = KEG_LOCK(keg, 0); for (i = 0; i < cnt; i++) { item = bucket[i]; if (__predict_true((zone->uz_flags & UMA_ZFLAG_VTOSLAB) != 0)) { slab = vtoslab((vm_offset_t)item); } else { mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK)); if ((zone->uz_flags & UMA_ZFLAG_HASH) != 0) slab = hash_sfind(&keg->uk_hash, mem); else slab = (uma_slab_t)(mem + keg->uk_pgoff); } if (lock != KEG_LOCKPTR(keg, slab->us_domain)) { if (lock != NULL) mtx_unlock(lock); lock = KEG_LOCK(keg, slab->us_domain); } slab_free_item(zone, slab, item); } if (lock != NULL) mtx_unlock(lock); } /* * Frees a single item to any zone. * * Arguments: * zone The zone to free to * item The item we're freeing * udata User supplied data for the dtor * skip Skip dtors and finis */ static void zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip) { item_dtor(zone, item, zone->uz_size, udata, skip); if (skip < SKIP_FINI && zone->uz_fini) zone->uz_fini(item, zone->uz_size); zone->uz_release(zone->uz_arg, &item, 1); if (skip & SKIP_CNT) return; counter_u64_add(zone->uz_frees, 1); if (zone->uz_max_items > 0) zone_free_limit(zone, 1); } /* See uma.h */ int uma_zone_set_max(uma_zone_t zone, int nitems) { struct uma_bucket_zone *ubz; int count; /* * XXX This can misbehave if the zone has any allocations with * no limit and a limit is imposed. There is currently no * way to clear a limit. */ ZONE_LOCK(zone); ubz = bucket_zone_max(zone, nitems); count = ubz != NULL ? ubz->ubz_entries : 0; zone->uz_bucket_size_max = zone->uz_bucket_size = count; if (zone->uz_bucket_size_min > zone->uz_bucket_size_max) zone->uz_bucket_size_min = zone->uz_bucket_size_max; zone->uz_max_items = nitems; zone->uz_flags |= UMA_ZFLAG_LIMIT; zone_update_caches(zone); /* We may need to wake waiters. */ wakeup(&zone->uz_max_items); ZONE_UNLOCK(zone); return (nitems); } /* See uma.h */ void uma_zone_set_maxcache(uma_zone_t zone, int nitems) { struct uma_bucket_zone *ubz; int bpcpu; ZONE_LOCK(zone); ubz = bucket_zone_max(zone, nitems); if (ubz != NULL) { bpcpu = 2; if ((zone->uz_flags & UMA_ZONE_FIRSTTOUCH) != 0) /* Count the cross-domain bucket. */ bpcpu++; nitems -= ubz->ubz_entries * bpcpu * mp_ncpus; zone->uz_bucket_size_max = ubz->ubz_entries; } else { zone->uz_bucket_size_max = zone->uz_bucket_size = 0; } if (zone->uz_bucket_size_min > zone->uz_bucket_size_max) zone->uz_bucket_size_min = zone->uz_bucket_size_max; zone->uz_bkt_max = nitems; ZONE_UNLOCK(zone); } /* See uma.h */ int uma_zone_get_max(uma_zone_t zone) { int nitems; nitems = atomic_load_64(&zone->uz_max_items); return (nitems); } /* See uma.h */ void uma_zone_set_warning(uma_zone_t zone, const char *warning) { ZONE_ASSERT_COLD(zone); zone->uz_warning = warning; } /* See uma.h */ void uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction) { ZONE_ASSERT_COLD(zone); TASK_INIT(&zone->uz_maxaction, 0, (task_fn_t *)maxaction, zone); } /* See uma.h */ int uma_zone_get_cur(uma_zone_t zone) { int64_t nitems; u_int i; nitems = 0; if (zone->uz_allocs != EARLY_COUNTER && zone->uz_frees != EARLY_COUNTER) nitems = counter_u64_fetch(zone->uz_allocs) - counter_u64_fetch(zone->uz_frees); CPU_FOREACH(i) nitems += atomic_load_64(&zone->uz_cpu[i].uc_allocs) - atomic_load_64(&zone->uz_cpu[i].uc_frees); return (nitems < 0 ? 0 : nitems); } static uint64_t uma_zone_get_allocs(uma_zone_t zone) { uint64_t nitems; u_int i; nitems = 0; if (zone->uz_allocs != EARLY_COUNTER) nitems = counter_u64_fetch(zone->uz_allocs); CPU_FOREACH(i) nitems += atomic_load_64(&zone->uz_cpu[i].uc_allocs); return (nitems); } static uint64_t uma_zone_get_frees(uma_zone_t zone) { uint64_t nitems; u_int i; nitems = 0; if (zone->uz_frees != EARLY_COUNTER) nitems = counter_u64_fetch(zone->uz_frees); CPU_FOREACH(i) nitems += atomic_load_64(&zone->uz_cpu[i].uc_frees); return (nitems); } #ifdef INVARIANTS /* Used only for KEG_ASSERT_COLD(). */ static uint64_t uma_keg_get_allocs(uma_keg_t keg) { uma_zone_t z; uint64_t nitems; nitems = 0; LIST_FOREACH(z, &keg->uk_zones, uz_link) nitems += uma_zone_get_allocs(z); return (nitems); } #endif /* See uma.h */ void uma_zone_set_init(uma_zone_t zone, uma_init uminit) { uma_keg_t keg; KEG_GET(zone, keg); KEG_ASSERT_COLD(keg); keg->uk_init = uminit; } /* See uma.h */ void uma_zone_set_fini(uma_zone_t zone, uma_fini fini) { uma_keg_t keg; KEG_GET(zone, keg); KEG_ASSERT_COLD(keg); keg->uk_fini = fini; } /* See uma.h */ void uma_zone_set_zinit(uma_zone_t zone, uma_init zinit) { ZONE_ASSERT_COLD(zone); zone->uz_init = zinit; } /* See uma.h */ void uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini) { ZONE_ASSERT_COLD(zone); zone->uz_fini = zfini; } /* See uma.h */ void uma_zone_set_freef(uma_zone_t zone, uma_free freef) { uma_keg_t keg; KEG_GET(zone, keg); KEG_ASSERT_COLD(keg); keg->uk_freef = freef; } /* See uma.h */ void uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) { uma_keg_t keg; KEG_GET(zone, keg); KEG_ASSERT_COLD(keg); keg->uk_allocf = allocf; } /* See uma.h */ void uma_zone_reserve(uma_zone_t zone, int items) { uma_keg_t keg; KEG_GET(zone, keg); KEG_ASSERT_COLD(keg); keg->uk_reserve = items; } /* See uma.h */ int uma_zone_reserve_kva(uma_zone_t zone, int count) { uma_keg_t keg; vm_offset_t kva; u_int pages; KEG_GET(zone, keg); KEG_ASSERT_COLD(keg); ZONE_ASSERT_COLD(zone); pages = howmany(count, keg->uk_ipers) * keg->uk_ppera; #ifdef UMA_MD_SMALL_ALLOC if (keg->uk_ppera > 1) { #else if (1) { #endif kva = kva_alloc((vm_size_t)pages * PAGE_SIZE); if (kva == 0) return (0); } else kva = 0; ZONE_LOCK(zone); MPASS(keg->uk_kva == 0); keg->uk_kva = kva; keg->uk_offset = 0; zone->uz_max_items = pages * keg->uk_ipers; #ifdef UMA_MD_SMALL_ALLOC keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc; #else keg->uk_allocf = noobj_alloc; #endif keg->uk_flags |= UMA_ZFLAG_LIMIT | UMA_ZONE_NOFREE; zone->uz_flags |= UMA_ZFLAG_LIMIT | UMA_ZONE_NOFREE; zone_update_caches(zone); ZONE_UNLOCK(zone); return (1); } /* See uma.h */ void uma_prealloc(uma_zone_t zone, int items) { struct vm_domainset_iter di; uma_domain_t dom; uma_slab_t slab; uma_keg_t keg; int aflags, domain, slabs; KEG_GET(zone, keg); slabs = howmany(items, keg->uk_ipers); while (slabs-- > 0) { aflags = M_NOWAIT; vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, &aflags); for (;;) { slab = keg_alloc_slab(keg, zone, domain, M_WAITOK, aflags); if (slab != NULL) { dom = &keg->uk_domain[slab->us_domain]; LIST_REMOVE(slab, us_link); LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link); KEG_UNLOCK(keg, slab->us_domain); break; } if (vm_domainset_iter_policy(&di, &domain) != 0) vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask); } } } /* See uma.h */ void uma_reclaim(int req) { CTR0(KTR_UMA, "UMA: vm asked us to release pages!"); sx_xlock(&uma_reclaim_lock); bucket_enable(); switch (req) { case UMA_RECLAIM_TRIM: zone_foreach(zone_trim, NULL); break; case UMA_RECLAIM_DRAIN: case UMA_RECLAIM_DRAIN_CPU: zone_foreach(zone_drain, NULL); if (req == UMA_RECLAIM_DRAIN_CPU) { pcpu_cache_drain_safe(NULL); zone_foreach(zone_drain, NULL); } break; default: panic("unhandled reclamation request %d", req); } /* * Some slabs may have been freed but this zone will be visited early * we visit again so that we can free pages that are empty once other * zones are drained. We have to do the same for buckets. */ zone_drain(slabzones[0], NULL); zone_drain(slabzones[1], NULL); bucket_zone_drain(); sx_xunlock(&uma_reclaim_lock); } static volatile int uma_reclaim_needed; void uma_reclaim_wakeup(void) { if (atomic_fetchadd_int(&uma_reclaim_needed, 1) == 0) wakeup(uma_reclaim); } void uma_reclaim_worker(void *arg __unused) { for (;;) { sx_xlock(&uma_reclaim_lock); while (atomic_load_int(&uma_reclaim_needed) == 0) sx_sleep(uma_reclaim, &uma_reclaim_lock, PVM, "umarcl", hz); sx_xunlock(&uma_reclaim_lock); EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM); uma_reclaim(UMA_RECLAIM_DRAIN_CPU); atomic_store_int(&uma_reclaim_needed, 0); /* Don't fire more than once per-second. */ pause("umarclslp", hz); } } /* See uma.h */ void uma_zone_reclaim(uma_zone_t zone, int req) { switch (req) { case UMA_RECLAIM_TRIM: zone_trim(zone, NULL); break; case UMA_RECLAIM_DRAIN: zone_drain(zone, NULL); break; case UMA_RECLAIM_DRAIN_CPU: pcpu_cache_drain_safe(zone); zone_drain(zone, NULL); break; default: panic("unhandled reclamation request %d", req); } } /* See uma.h */ int uma_zone_exhausted(uma_zone_t zone) { return (atomic_load_32(&zone->uz_sleepers) > 0); } unsigned long uma_limit(void) { return (uma_kmem_limit); } void uma_set_limit(unsigned long limit) { uma_kmem_limit = limit; } unsigned long uma_size(void) { return (atomic_load_long(&uma_kmem_total)); } long uma_avail(void) { return (uma_kmem_limit - uma_size()); } #ifdef DDB /* * Generate statistics across both the zone and its per-cpu cache's. Return * desired statistics if the pointer is non-NULL for that statistic. * * Note: does not update the zone statistics, as it can't safely clear the * per-CPU cache statistic. * */ static void uma_zone_sumstat(uma_zone_t z, long *cachefreep, uint64_t *allocsp, uint64_t *freesp, uint64_t *sleepsp, uint64_t *xdomainp) { uma_cache_t cache; uint64_t allocs, frees, sleeps, xdomain; int cachefree, cpu; allocs = frees = sleeps = xdomain = 0; cachefree = 0; CPU_FOREACH(cpu) { cache = &z->uz_cpu[cpu]; cachefree += cache->uc_allocbucket.ucb_cnt; cachefree += cache->uc_freebucket.ucb_cnt; xdomain += cache->uc_crossbucket.ucb_cnt; cachefree += cache->uc_crossbucket.ucb_cnt; allocs += cache->uc_allocs; frees += cache->uc_frees; } allocs += counter_u64_fetch(z->uz_allocs); frees += counter_u64_fetch(z->uz_frees); sleeps += z->uz_sleeps; xdomain += z->uz_xdomain; if (cachefreep != NULL) *cachefreep = cachefree; if (allocsp != NULL) *allocsp = allocs; if (freesp != NULL) *freesp = frees; if (sleepsp != NULL) *sleepsp = sleeps; if (xdomainp != NULL) *xdomainp = xdomain; } #endif /* DDB */ static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS) { uma_keg_t kz; uma_zone_t z; int count; count = 0; rw_rlock(&uma_rwlock); LIST_FOREACH(kz, &uma_kegs, uk_link) { LIST_FOREACH(z, &kz->uk_zones, uz_link) count++; } LIST_FOREACH(z, &uma_cachezones, uz_link) count++; rw_runlock(&uma_rwlock); return (sysctl_handle_int(oidp, &count, 0, req)); } static void uma_vm_zone_stats(struct uma_type_header *uth, uma_zone_t z, struct sbuf *sbuf, struct uma_percpu_stat *ups, bool internal) { uma_zone_domain_t zdom; uma_cache_t cache; int i; for (i = 0; i < vm_ndomains; i++) { zdom = &z->uz_domain[i]; uth->uth_zone_free += zdom->uzd_nitems; } uth->uth_allocs = counter_u64_fetch(z->uz_allocs); uth->uth_frees = counter_u64_fetch(z->uz_frees); uth->uth_fails = counter_u64_fetch(z->uz_fails); uth->uth_sleeps = z->uz_sleeps; uth->uth_xdomain = z->uz_xdomain; /* * While it is not normally safe to access the cache bucket pointers * while not on the CPU that owns the cache, we only allow the pointers * to be exchanged without the zone lock held, not invalidated, so * accept the possible race associated with bucket exchange during * monitoring. Use atomic_load_ptr() to ensure that the bucket pointers * are loaded only once. */ for (i = 0; i < mp_maxid + 1; i++) { bzero(&ups[i], sizeof(*ups)); if (internal || CPU_ABSENT(i)) continue; cache = &z->uz_cpu[i]; ups[i].ups_cache_free += cache->uc_allocbucket.ucb_cnt; ups[i].ups_cache_free += cache->uc_freebucket.ucb_cnt; ups[i].ups_cache_free += cache->uc_crossbucket.ucb_cnt; ups[i].ups_allocs = cache->uc_allocs; ups[i].ups_frees = cache->uc_frees; } } static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS) { struct uma_stream_header ush; struct uma_type_header uth; struct uma_percpu_stat *ups; struct sbuf sbuf; uma_keg_t kz; uma_zone_t z; uint64_t items; uint32_t kfree, pages; int count, error, i; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sbuf_new_for_sysctl(&sbuf, NULL, 128, req); sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL); ups = malloc((mp_maxid + 1) * sizeof(*ups), M_TEMP, M_WAITOK); count = 0; rw_rlock(&uma_rwlock); LIST_FOREACH(kz, &uma_kegs, uk_link) { LIST_FOREACH(z, &kz->uk_zones, uz_link) count++; } LIST_FOREACH(z, &uma_cachezones, uz_link) count++; /* * Insert stream header. */ bzero(&ush, sizeof(ush)); ush.ush_version = UMA_STREAM_VERSION; ush.ush_maxcpus = (mp_maxid + 1); ush.ush_count = count; (void)sbuf_bcat(&sbuf, &ush, sizeof(ush)); LIST_FOREACH(kz, &uma_kegs, uk_link) { kfree = pages = 0; for (i = 0; i < vm_ndomains; i++) { kfree += kz->uk_domain[i].ud_free; pages += kz->uk_domain[i].ud_pages; } LIST_FOREACH(z, &kz->uk_zones, uz_link) { bzero(&uth, sizeof(uth)); ZONE_LOCK(z); strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME); uth.uth_align = kz->uk_align; uth.uth_size = kz->uk_size; uth.uth_rsize = kz->uk_rsize; if (z->uz_max_items > 0) { items = UZ_ITEMS_COUNT(z->uz_items); uth.uth_pages = (items / kz->uk_ipers) * kz->uk_ppera; } else uth.uth_pages = pages; uth.uth_maxpages = (z->uz_max_items / kz->uk_ipers) * kz->uk_ppera; uth.uth_limit = z->uz_max_items; uth.uth_keg_free = kfree; /* * A zone is secondary is it is not the first entry * on the keg's zone list. */ if ((z->uz_flags & UMA_ZONE_SECONDARY) && (LIST_FIRST(&kz->uk_zones) != z)) uth.uth_zone_flags = UTH_ZONE_SECONDARY; uma_vm_zone_stats(&uth, z, &sbuf, ups, kz->uk_flags & UMA_ZFLAG_INTERNAL); ZONE_UNLOCK(z); (void)sbuf_bcat(&sbuf, &uth, sizeof(uth)); for (i = 0; i < mp_maxid + 1; i++) (void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i])); } } LIST_FOREACH(z, &uma_cachezones, uz_link) { bzero(&uth, sizeof(uth)); ZONE_LOCK(z); strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME); uth.uth_size = z->uz_size; uma_vm_zone_stats(&uth, z, &sbuf, ups, false); ZONE_UNLOCK(z); (void)sbuf_bcat(&sbuf, &uth, sizeof(uth)); for (i = 0; i < mp_maxid + 1; i++) (void)sbuf_bcat(&sbuf, &ups[i], sizeof(ups[i])); } rw_runlock(&uma_rwlock); error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); free(ups, M_TEMP); return (error); } int sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS) { uma_zone_t zone = *(uma_zone_t *)arg1; int error, max; max = uma_zone_get_max(zone); error = sysctl_handle_int(oidp, &max, 0, req); if (error || !req->newptr) return (error); uma_zone_set_max(zone, max); return (0); } int sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS) { uma_zone_t zone; int cur; /* * Some callers want to add sysctls for global zones that * may not yet exist so they pass a pointer to a pointer. */ if (arg2 == 0) zone = *(uma_zone_t *)arg1; else zone = arg1; cur = uma_zone_get_cur(zone); return (sysctl_handle_int(oidp, &cur, 0, req)); } static int sysctl_handle_uma_zone_allocs(SYSCTL_HANDLER_ARGS) { uma_zone_t zone = arg1; uint64_t cur; cur = uma_zone_get_allocs(zone); return (sysctl_handle_64(oidp, &cur, 0, req)); } static int sysctl_handle_uma_zone_frees(SYSCTL_HANDLER_ARGS) { uma_zone_t zone = arg1; uint64_t cur; cur = uma_zone_get_frees(zone); return (sysctl_handle_64(oidp, &cur, 0, req)); } static int sysctl_handle_uma_zone_flags(SYSCTL_HANDLER_ARGS) { struct sbuf sbuf; uma_zone_t zone = arg1; int error; sbuf_new_for_sysctl(&sbuf, NULL, 0, req); if (zone->uz_flags != 0) sbuf_printf(&sbuf, "0x%b", zone->uz_flags, PRINT_UMA_ZFLAGS); else sbuf_printf(&sbuf, "0"); error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); return (error); } static int sysctl_handle_uma_slab_efficiency(SYSCTL_HANDLER_ARGS) { uma_keg_t keg = arg1; int avail, effpct, total; total = keg->uk_ppera * PAGE_SIZE; if ((keg->uk_flags & UMA_ZFLAG_OFFPAGE) != 0) total += slabzone(keg->uk_ipers)->uz_keg->uk_rsize; /* * We consider the client's requested size and alignment here, not the * real size determination uk_rsize, because we also adjust the real * size for internal implementation reasons (max bitset size). */ avail = keg->uk_ipers * roundup2(keg->uk_size, keg->uk_align + 1); if ((keg->uk_flags & UMA_ZONE_PCPU) != 0) avail *= mp_maxid + 1; effpct = 100 * avail / total; return (sysctl_handle_int(oidp, &effpct, 0, req)); } static int sysctl_handle_uma_zone_items(SYSCTL_HANDLER_ARGS) { uma_zone_t zone = arg1; uint64_t cur; cur = UZ_ITEMS_COUNT(atomic_load_64(&zone->uz_items)); return (sysctl_handle_64(oidp, &cur, 0, req)); } #ifdef INVARIANTS static uma_slab_t uma_dbg_getslab(uma_zone_t zone, void *item) { uma_slab_t slab; uma_keg_t keg; uint8_t *mem; /* * It is safe to return the slab here even though the * zone is unlocked because the item's allocation state * essentially holds a reference. */ mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK)); if ((zone->uz_flags & UMA_ZFLAG_CACHE) != 0) return (NULL); if (zone->uz_flags & UMA_ZFLAG_VTOSLAB) return (vtoslab((vm_offset_t)mem)); keg = zone->uz_keg; if ((keg->uk_flags & UMA_ZFLAG_HASH) == 0) return ((uma_slab_t)(mem + keg->uk_pgoff)); KEG_LOCK(keg, 0); slab = hash_sfind(&keg->uk_hash, mem); KEG_UNLOCK(keg, 0); return (slab); } static bool uma_dbg_zskip(uma_zone_t zone, void *mem) { if ((zone->uz_flags & UMA_ZFLAG_CACHE) != 0) return (true); return (uma_dbg_kskip(zone->uz_keg, mem)); } static bool uma_dbg_kskip(uma_keg_t keg, void *mem) { uintptr_t idx; if (dbg_divisor == 0) return (true); if (dbg_divisor == 1) return (false); idx = (uintptr_t)mem >> PAGE_SHIFT; if (keg->uk_ipers > 1) { idx *= keg->uk_ipers; idx += ((uintptr_t)mem & PAGE_MASK) / keg->uk_rsize; } if ((idx / dbg_divisor) * dbg_divisor != idx) { counter_u64_add(uma_skip_cnt, 1); return (true); } counter_u64_add(uma_dbg_cnt, 1); return (false); } /* * Set up the slab's freei data such that uma_dbg_free can function. * */ static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item) { uma_keg_t keg; int freei; if (slab == NULL) { slab = uma_dbg_getslab(zone, item); if (slab == NULL) panic("uma: item %p did not belong to zone %s\n", item, zone->uz_name); } keg = zone->uz_keg; freei = slab_item_index(slab, keg, item); if (BIT_ISSET(keg->uk_ipers, freei, slab_dbg_bits(slab, keg))) panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)\n", item, zone, zone->uz_name, slab, freei); BIT_SET_ATOMIC(keg->uk_ipers, freei, slab_dbg_bits(slab, keg)); } /* * Verifies freed addresses. Checks for alignment, valid slab membership * and duplicate frees. * */ static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item) { uma_keg_t keg; int freei; if (slab == NULL) { slab = uma_dbg_getslab(zone, item); if (slab == NULL) panic("uma: Freed item %p did not belong to zone %s\n", item, zone->uz_name); } keg = zone->uz_keg; freei = slab_item_index(slab, keg, item); if (freei >= keg->uk_ipers) panic("Invalid free of %p from zone %p(%s) slab %p(%d)\n", item, zone, zone->uz_name, slab, freei); if (slab_item(slab, keg, freei) != item) panic("Unaligned free of %p from zone %p(%s) slab %p(%d)\n", item, zone, zone->uz_name, slab, freei); if (!BIT_ISSET(keg->uk_ipers, freei, slab_dbg_bits(slab, keg))) panic("Duplicate free of %p from zone %p(%s) slab %p(%d)\n", item, zone, zone->uz_name, slab, freei); BIT_CLR_ATOMIC(keg->uk_ipers, freei, slab_dbg_bits(slab, keg)); } #endif /* INVARIANTS */ #ifdef DDB static int64_t get_uma_stats(uma_keg_t kz, uma_zone_t z, uint64_t *allocs, uint64_t *used, uint64_t *sleeps, long *cachefree, uint64_t *xdomain) { uint64_t frees; int i; if (kz->uk_flags & UMA_ZFLAG_INTERNAL) { *allocs = counter_u64_fetch(z->uz_allocs); frees = counter_u64_fetch(z->uz_frees); *sleeps = z->uz_sleeps; *cachefree = 0; *xdomain = 0; } else uma_zone_sumstat(z, cachefree, allocs, &frees, sleeps, xdomain); for (i = 0; i < vm_ndomains; i++) { *cachefree += z->uz_domain[i].uzd_nitems; if (!((z->uz_flags & UMA_ZONE_SECONDARY) && (LIST_FIRST(&kz->uk_zones) != z))) *cachefree += kz->uk_domain[i].ud_free; } *used = *allocs - frees; return (((int64_t)*used + *cachefree) * kz->uk_size); } DB_SHOW_COMMAND(uma, db_show_uma) { const char *fmt_hdr, *fmt_entry; uma_keg_t kz; uma_zone_t z; uint64_t allocs, used, sleeps, xdomain; long cachefree; /* variables for sorting */ uma_keg_t cur_keg; uma_zone_t cur_zone, last_zone; int64_t cur_size, last_size, size; int ties; /* /i option produces machine-parseable CSV output */ if (modif[0] == 'i') { fmt_hdr = "%s,%s,%s,%s,%s,%s,%s,%s,%s\n"; fmt_entry = "\"%s\",%ju,%jd,%ld,%ju,%ju,%u,%jd,%ju\n"; } else { fmt_hdr = "%18s %6s %7s %7s %11s %7s %7s %10s %8s\n"; fmt_entry = "%18s %6ju %7jd %7ld %11ju %7ju %7u %10jd %8ju\n"; } db_printf(fmt_hdr, "Zone", "Size", "Used", "Free", "Requests", "Sleeps", "Bucket", "Total Mem", "XFree"); /* Sort the zones with largest size first. */ last_zone = NULL; last_size = INT64_MAX; for (;;) { cur_zone = NULL; cur_size = -1; ties = 0; LIST_FOREACH(kz, &uma_kegs, uk_link) { LIST_FOREACH(z, &kz->uk_zones, uz_link) { /* * In the case of size ties, print out zones * in the order they are encountered. That is, * when we encounter the most recently output * zone, we have already printed all preceding * ties, and we must print all following ties. */ if (z == last_zone) { ties = 1; continue; } size = get_uma_stats(kz, z, &allocs, &used, &sleeps, &cachefree, &xdomain); if (size > cur_size && size < last_size + ties) { cur_size = size; cur_zone = z; cur_keg = kz; } } } if (cur_zone == NULL) break; size = get_uma_stats(cur_keg, cur_zone, &allocs, &used, &sleeps, &cachefree, &xdomain); db_printf(fmt_entry, cur_zone->uz_name, (uintmax_t)cur_keg->uk_size, (intmax_t)used, cachefree, (uintmax_t)allocs, (uintmax_t)sleeps, (unsigned)cur_zone->uz_bucket_size, (intmax_t)size, xdomain); if (db_pager_quit) return; last_zone = cur_zone; last_size = cur_size; } } DB_SHOW_COMMAND(umacache, db_show_umacache) { uma_zone_t z; uint64_t allocs, frees; long cachefree; int i; db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free", "Requests", "Bucket"); LIST_FOREACH(z, &uma_cachezones, uz_link) { uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL, NULL); for (i = 0; i < vm_ndomains; i++) cachefree += z->uz_domain[i].uzd_nitems; db_printf("%18s %8ju %8jd %8ld %12ju %8u\n", z->uz_name, (uintmax_t)z->uz_size, (intmax_t)(allocs - frees), cachefree, (uintmax_t)allocs, z->uz_bucket_size); if (db_pager_quit) return; } } #endif /* DDB */ Index: projects/clang1000-import/sys/vm/vm_fault.c =================================================================== --- projects/clang1000-import/sys/vm/vm_fault.c (revision 356919) +++ projects/clang1000-import/sys/vm/vm_fault.c (revision 356920) @@ -1,1903 +1,1905 @@ /*- * SPDX-License-Identifier: (BSD-4-Clause AND MIT-CMU) * * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_fault.c 8.4 (Berkeley) 1/12/94 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * Page fault handling module. */ #include __FBSDID("$FreeBSD$"); #include "opt_ktrace.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include #include #include #include #include #include #include #define PFBAK 4 #define PFFOR 4 #define VM_FAULT_READ_DEFAULT (1 + VM_FAULT_READ_AHEAD_INIT) #define VM_FAULT_READ_MAX (1 + VM_FAULT_READ_AHEAD_MAX) #define VM_FAULT_DONTNEED_MIN 1048576 struct faultstate { vm_page_t m; vm_page_t m_cow; vm_object_t object; vm_pindex_t pindex; vm_page_t first_m; vm_object_t first_object; vm_pindex_t first_pindex; vm_map_t map; vm_map_entry_t entry; int map_generation; bool lookup_still_valid; struct vnode *vp; }; static void vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr, int ahead); static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, int backward, int forward, bool obj_locked); static int vm_pfault_oom_attempts = 3; SYSCTL_INT(_vm, OID_AUTO, pfault_oom_attempts, CTLFLAG_RWTUN, &vm_pfault_oom_attempts, 0, "Number of page allocation attempts in page fault handler before it " "triggers OOM handling"); static int vm_pfault_oom_wait = 10; SYSCTL_INT(_vm, OID_AUTO, pfault_oom_wait, CTLFLAG_RWTUN, &vm_pfault_oom_wait, 0, "Number of seconds to wait for free pages before retrying " "the page fault handler"); static inline void fault_page_release(vm_page_t *mp) { vm_page_t m; m = *mp; if (m != NULL) { /* * We are likely to loop around again and attempt to busy * this page. Deactivating it leaves it available for * pageout while optimizing fault restarts. */ vm_page_deactivate(m); vm_page_xunbusy(m); *mp = NULL; } } static inline void fault_page_free(vm_page_t *mp) { vm_page_t m; m = *mp; if (m != NULL) { VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_wired(m)) vm_page_free(m); else vm_page_xunbusy(m); *mp = NULL; } } static inline void unlock_map(struct faultstate *fs) { if (fs->lookup_still_valid) { vm_map_lookup_done(fs->map, fs->entry); fs->lookup_still_valid = false; } } static void unlock_vp(struct faultstate *fs) { if (fs->vp != NULL) { vput(fs->vp); fs->vp = NULL; } } static void fault_deallocate(struct faultstate *fs) { fault_page_release(&fs->m_cow); fault_page_release(&fs->m); vm_object_pip_wakeup(fs->object); if (fs->object != fs->first_object) { VM_OBJECT_WLOCK(fs->first_object); fault_page_free(&fs->first_m); VM_OBJECT_WUNLOCK(fs->first_object); vm_object_pip_wakeup(fs->first_object); } vm_object_deallocate(fs->first_object); unlock_map(fs); unlock_vp(fs); } static void unlock_and_deallocate(struct faultstate *fs) { VM_OBJECT_WUNLOCK(fs->object); fault_deallocate(fs); } static void vm_fault_dirty(vm_map_entry_t entry, vm_page_t m, vm_prot_t prot, vm_prot_t fault_type, int fault_flags) { bool need_dirty; if (((prot & VM_PROT_WRITE) == 0 && (fault_flags & VM_FAULT_DIRTY) == 0) || (m->oflags & VPO_UNMANAGED) != 0) return; VM_PAGE_OBJECT_BUSY_ASSERT(m); need_dirty = ((fault_type & VM_PROT_WRITE) != 0 && (fault_flags & VM_FAULT_WIRE) == 0) || (fault_flags & VM_FAULT_DIRTY) != 0; vm_object_set_writeable_dirty(m->object); /* * If the fault is a write, we know that this page is being * written NOW so dirty it explicitly to save on * pmap_is_modified() calls later. * * Also, since the page is now dirty, we can possibly tell * the pager to release any swap backing the page. */ if (need_dirty && vm_page_set_dirty(m) == 0) { /* * If this is a NOSYNC mmap we do not want to set PGA_NOSYNC * if the page is already dirty to prevent data written with * the expectation of being synced from not being synced. * Likewise if this entry does not request NOSYNC then make * sure the page isn't marked NOSYNC. Applications sharing * data should use the same flags to avoid ping ponging. */ if ((entry->eflags & MAP_ENTRY_NOSYNC) != 0) vm_page_aflag_set(m, PGA_NOSYNC); else vm_page_aflag_clear(m, PGA_NOSYNC); } } /* * Unlocks fs.first_object and fs.map on success. */ static int vm_fault_soft_fast(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot, int fault_type, int fault_flags, boolean_t wired, vm_page_t *m_hold) { vm_page_t m, m_map; #if (defined(__aarch64__) || defined(__amd64__) || (defined(__arm__) && \ __ARM_ARCH >= 6) || defined(__i386__) || defined(__riscv)) && \ VM_NRESERVLEVEL > 0 vm_page_t m_super; int flags; #endif int psind, rv; MPASS(fs->vp == NULL); vm_object_busy(fs->first_object); m = vm_page_lookup(fs->first_object, fs->first_pindex); /* A busy page can be mapped for read|execute access. */ if (m == NULL || ((prot & VM_PROT_WRITE) != 0 && vm_page_busied(m)) || !vm_page_all_valid(m)) { rv = KERN_FAILURE; goto out; } m_map = m; psind = 0; #if (defined(__aarch64__) || defined(__amd64__) || (defined(__arm__) && \ __ARM_ARCH >= 6) || defined(__i386__) || defined(__riscv)) && \ VM_NRESERVLEVEL > 0 if ((m->flags & PG_FICTITIOUS) == 0 && (m_super = vm_reserv_to_superpage(m)) != NULL && rounddown2(vaddr, pagesizes[m_super->psind]) >= fs->entry->start && roundup2(vaddr + 1, pagesizes[m_super->psind]) <= fs->entry->end && (vaddr & (pagesizes[m_super->psind] - 1)) == (VM_PAGE_TO_PHYS(m) & (pagesizes[m_super->psind] - 1)) && !wired && pmap_ps_enabled(fs->map->pmap)) { flags = PS_ALL_VALID; if ((prot & VM_PROT_WRITE) != 0) { /* * Create a superpage mapping allowing write access * only if none of the constituent pages are busy and * all of them are already dirty (except possibly for * the page that was faulted on). */ flags |= PS_NONE_BUSY; if ((fs->first_object->flags & OBJ_UNMANAGED) == 0) flags |= PS_ALL_DIRTY; } if (vm_page_ps_test(m_super, flags, m)) { m_map = m_super; psind = m_super->psind; vaddr = rounddown2(vaddr, pagesizes[psind]); /* Preset the modified bit for dirty superpages. */ if ((flags & PS_ALL_DIRTY) != 0) fault_type |= VM_PROT_WRITE; } } #endif rv = pmap_enter(fs->map->pmap, vaddr, m_map, prot, fault_type | PMAP_ENTER_NOSLEEP | (wired ? PMAP_ENTER_WIRED : 0), psind); if (rv != KERN_SUCCESS) goto out; if (m_hold != NULL) { *m_hold = m; vm_page_wire(m); } vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags); if (psind == 0 && !wired) vm_fault_prefault(fs, vaddr, PFBAK, PFFOR, true); VM_OBJECT_RUNLOCK(fs->first_object); vm_map_lookup_done(fs->map, fs->entry); curthread->td_ru.ru_minflt++; out: vm_object_unbusy(fs->first_object); return (rv); } static void vm_fault_restore_map_lock(struct faultstate *fs) { VM_OBJECT_ASSERT_WLOCKED(fs->first_object); MPASS(REFCOUNT_COUNT(fs->first_object->paging_in_progress) > 0); if (!vm_map_trylock_read(fs->map)) { VM_OBJECT_WUNLOCK(fs->first_object); vm_map_lock_read(fs->map); VM_OBJECT_WLOCK(fs->first_object); } fs->lookup_still_valid = true; } static void vm_fault_populate_check_page(vm_page_t m) { /* * Check each page to ensure that the pager is obeying the * interface: the page must be installed in the object, fully * valid, and exclusively busied. */ MPASS(m != NULL); MPASS(vm_page_all_valid(m)); MPASS(vm_page_xbusied(m)); } static void vm_fault_populate_cleanup(vm_object_t object, vm_pindex_t first, vm_pindex_t last) { vm_page_t m; vm_pindex_t pidx; VM_OBJECT_ASSERT_WLOCKED(object); MPASS(first <= last); for (pidx = first, m = vm_page_lookup(object, pidx); pidx <= last; pidx++, m = vm_page_next(m)) { vm_fault_populate_check_page(m); vm_page_deactivate(m); vm_page_xunbusy(m); } } static int vm_fault_populate(struct faultstate *fs, vm_prot_t prot, int fault_type, int fault_flags, boolean_t wired, vm_page_t *m_hold) { vm_offset_t vaddr; vm_page_t m; vm_pindex_t map_first, map_last, pager_first, pager_last, pidx; int i, npages, psind, rv; MPASS(fs->object == fs->first_object); VM_OBJECT_ASSERT_WLOCKED(fs->first_object); MPASS(REFCOUNT_COUNT(fs->first_object->paging_in_progress) > 0); MPASS(fs->first_object->backing_object == NULL); MPASS(fs->lookup_still_valid); pager_first = OFF_TO_IDX(fs->entry->offset); pager_last = pager_first + atop(fs->entry->end - fs->entry->start) - 1; unlock_map(fs); unlock_vp(fs); /* * Call the pager (driver) populate() method. * * There is no guarantee that the method will be called again * if the current fault is for read, and a future fault is * for write. Report the entry's maximum allowed protection * to the driver. */ rv = vm_pager_populate(fs->first_object, fs->first_pindex, fault_type, fs->entry->max_protection, &pager_first, &pager_last); VM_OBJECT_ASSERT_WLOCKED(fs->first_object); if (rv == VM_PAGER_BAD) { /* * VM_PAGER_BAD is the backdoor for a pager to request * normal fault handling. */ vm_fault_restore_map_lock(fs); if (fs->map->timestamp != fs->map_generation) return (KERN_RESOURCE_SHORTAGE); /* RetryFault */ return (KERN_NOT_RECEIVER); } if (rv != VM_PAGER_OK) return (KERN_FAILURE); /* AKA SIGSEGV */ /* Ensure that the driver is obeying the interface. */ MPASS(pager_first <= pager_last); MPASS(fs->first_pindex <= pager_last); MPASS(fs->first_pindex >= pager_first); MPASS(pager_last < fs->first_object->size); vm_fault_restore_map_lock(fs); if (fs->map->timestamp != fs->map_generation) { vm_fault_populate_cleanup(fs->first_object, pager_first, pager_last); return (KERN_RESOURCE_SHORTAGE); /* RetryFault */ } /* * The map is unchanged after our last unlock. Process the fault. * * The range [pager_first, pager_last] that is given to the * pager is only a hint. The pager may populate any range * within the object that includes the requested page index. * In case the pager expanded the range, clip it to fit into * the map entry. */ map_first = OFF_TO_IDX(fs->entry->offset); if (map_first > pager_first) { vm_fault_populate_cleanup(fs->first_object, pager_first, map_first - 1); pager_first = map_first; } map_last = map_first + atop(fs->entry->end - fs->entry->start) - 1; if (map_last < pager_last) { vm_fault_populate_cleanup(fs->first_object, map_last + 1, pager_last); pager_last = map_last; } for (pidx = pager_first, m = vm_page_lookup(fs->first_object, pidx); pidx <= pager_last; pidx += npages, m = vm_page_next(&m[npages - 1])) { vaddr = fs->entry->start + IDX_TO_OFF(pidx) - fs->entry->offset; #if defined(__aarch64__) || defined(__amd64__) || (defined(__arm__) && \ __ARM_ARCH >= 6) || defined(__i386__) || defined(__riscv) psind = m->psind; if (psind > 0 && ((vaddr & (pagesizes[psind] - 1)) != 0 || pidx + OFF_TO_IDX(pagesizes[psind]) - 1 > pager_last || !pmap_ps_enabled(fs->map->pmap) || wired)) psind = 0; #else psind = 0; #endif npages = atop(pagesizes[psind]); for (i = 0; i < npages; i++) { vm_fault_populate_check_page(&m[i]); vm_fault_dirty(fs->entry, &m[i], prot, fault_type, fault_flags); } VM_OBJECT_WUNLOCK(fs->first_object); rv = pmap_enter(fs->map->pmap, vaddr, m, prot, fault_type | (wired ? PMAP_ENTER_WIRED : 0), psind); #if defined(__amd64__) if (psind > 0 && rv == KERN_FAILURE) { for (i = 0; i < npages; i++) { rv = pmap_enter(fs->map->pmap, vaddr + ptoa(i), &m[i], prot, fault_type | (wired ? PMAP_ENTER_WIRED : 0), 0); MPASS(rv == KERN_SUCCESS); } } #else MPASS(rv == KERN_SUCCESS); #endif VM_OBJECT_WLOCK(fs->first_object); for (i = 0; i < npages; i++) { if ((fault_flags & VM_FAULT_WIRE) != 0) vm_page_wire(&m[i]); else vm_page_activate(&m[i]); if (m_hold != NULL && m[i].pindex == fs->first_pindex) { *m_hold = &m[i]; vm_page_wire(&m[i]); } vm_page_xunbusy(&m[i]); } } curthread->td_ru.ru_majflt++; return (KERN_SUCCESS); } static int prot_fault_translation; SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RWTUN, &prot_fault_translation, 0, "Control signal to deliver on protection fault"); /* compat definition to keep common code for signal translation */ #define UCODE_PAGEFLT 12 #ifdef T_PAGEFLT _Static_assert(UCODE_PAGEFLT == T_PAGEFLT, "T_PAGEFLT"); #endif /* * vm_fault_trap: * * Handle a page fault occurring at the given address, * requiring the given permissions, in the map specified. * If successful, the page is inserted into the * associated physical map. * * NOTE: the given address should be truncated to the * proper page address. * * KERN_SUCCESS is returned if the page fault is handled; otherwise, * a standard error specifying why the fault is fatal is returned. * * The map in question must be referenced, and remains so. * Caller may hold no locks. */ int vm_fault_trap(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, int *signo, int *ucode) { int result; MPASS(signo == NULL || ucode != NULL); #ifdef KTRACE if (map != kernel_map && KTRPOINT(curthread, KTR_FAULT)) ktrfault(vaddr, fault_type); #endif result = vm_fault(map, trunc_page(vaddr), fault_type, fault_flags, NULL); KASSERT(result == KERN_SUCCESS || result == KERN_FAILURE || result == KERN_INVALID_ADDRESS || result == KERN_RESOURCE_SHORTAGE || result == KERN_PROTECTION_FAILURE || result == KERN_OUT_OF_BOUNDS, ("Unexpected Mach error %d from vm_fault()", result)); #ifdef KTRACE if (map != kernel_map && KTRPOINT(curthread, KTR_FAULTEND)) ktrfaultend(result); #endif if (result != KERN_SUCCESS && signo != NULL) { switch (result) { case KERN_FAILURE: case KERN_INVALID_ADDRESS: *signo = SIGSEGV; *ucode = SEGV_MAPERR; break; case KERN_RESOURCE_SHORTAGE: *signo = SIGBUS; *ucode = BUS_OOMERR; break; case KERN_OUT_OF_BOUNDS: *signo = SIGBUS; *ucode = BUS_OBJERR; break; case KERN_PROTECTION_FAILURE: if (prot_fault_translation == 0) { /* * Autodetect. This check also covers * the images without the ABI-tag ELF * note. */ if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && curproc->p_osrel >= P_OSREL_SIGSEGV) { *signo = SIGSEGV; *ucode = SEGV_ACCERR; } else { *signo = SIGBUS; *ucode = UCODE_PAGEFLT; } } else if (prot_fault_translation == 1) { /* Always compat mode. */ *signo = SIGBUS; *ucode = UCODE_PAGEFLT; } else { /* Always SIGSEGV mode. */ *signo = SIGSEGV; *ucode = SEGV_ACCERR; } break; default: KASSERT(0, ("Unexpected Mach error %d from vm_fault()", result)); break; } } return (result); } static int vm_fault_lock_vnode(struct faultstate *fs) { struct vnode *vp; int error, locked; if (fs->object->type != OBJT_VNODE) return (KERN_SUCCESS); vp = fs->object->handle; if (vp == fs->vp) { ASSERT_VOP_LOCKED(vp, "saved vnode is not locked"); return (KERN_SUCCESS); } /* * Perform an unlock in case the desired vnode changed while * the map was unlocked during a retry. */ unlock_vp(fs); locked = VOP_ISLOCKED(vp); if (locked != LK_EXCLUSIVE) locked = LK_SHARED; /* * We must not sleep acquiring the vnode lock while we have * the page exclusive busied or the object's * paging-in-progress count incremented. Otherwise, we could * deadlock. */ error = vget(vp, locked | LK_CANRECURSE | LK_NOWAIT, curthread); if (error == 0) { fs->vp = vp; return (KERN_SUCCESS); } vhold(vp); unlock_and_deallocate(fs); error = vget(vp, locked | LK_RETRY | LK_CANRECURSE, curthread); vdrop(vp); fs->vp = vp; KASSERT(error == 0, ("vm_fault: vget failed %d", error)); return (KERN_RESOURCE_SHORTAGE); } /* * Wait/Retry if the page is busy. We have to do this if the page is * either exclusive or shared busy because the vm_pager may be using * read busy for pageouts (and even pageins if it is the vnode pager), * and we could end up trying to pagein and pageout the same page * simultaneously. * * We can theoretically allow the busy case on a read fault if the page * is marked valid, but since such pages are typically already pmap'd, * putting that special case in might be more effort then it is worth. * We cannot under any circumstances mess around with a shared busied * page except, perhaps, to pmap it. */ static void vm_fault_busy_sleep(struct faultstate *fs) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_aflag_set(fs->m, PGA_REFERENCED); if (fs->object != fs->first_object) { fault_page_release(&fs->first_m); vm_object_pip_wakeup(fs->first_object); } vm_object_pip_wakeup(fs->object); unlock_map(fs); if (fs->m == vm_page_lookup(fs->object, fs->pindex)) vm_page_busy_sleep(fs->m, "vmpfw", false); else VM_OBJECT_WUNLOCK(fs->object); VM_CNT_INC(v_intrans); vm_object_deallocate(fs->first_object); } int vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold) { struct faultstate fs; struct domainset *dset; vm_object_t next_object, retry_object; vm_offset_t e_end, e_start; vm_pindex_t retry_pindex; vm_prot_t prot, retry_prot; int ahead, alloc_req, behind, cluster_offset, era, faultcount; int nera, oom, result, rv; u_char behavior; boolean_t wired; /* Passed by reference. */ bool dead, hardfault, is_first_object_locked; VM_CNT_INC(v_vm_faults); if ((curthread->td_pflags & TDP_NOFAULTING) != 0) return (KERN_PROTECTION_FAILURE); fs.vp = NULL; faultcount = 0; nera = -1; hardfault = false; RetryFault: oom = 0; RetryFault_oom: /* * Find the backing store object and offset into it to begin the * search. */ fs.map = map; result = vm_map_lookup(&fs.map, vaddr, fault_type | VM_PROT_FAULT_LOOKUP, &fs.entry, &fs.first_object, &fs.first_pindex, &prot, &wired); if (result != KERN_SUCCESS) { unlock_vp(&fs); return (result); } fs.map_generation = fs.map->timestamp; if (fs.entry->eflags & MAP_ENTRY_NOFAULT) { panic("%s: fault on nofault entry, addr: %#lx", __func__, (u_long)vaddr); } if (fs.entry->eflags & MAP_ENTRY_IN_TRANSITION && fs.entry->wiring_thread != curthread) { vm_map_unlock_read(fs.map); vm_map_lock(fs.map); if (vm_map_lookup_entry(fs.map, vaddr, &fs.entry) && (fs.entry->eflags & MAP_ENTRY_IN_TRANSITION)) { unlock_vp(&fs); fs.entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; vm_map_unlock_and_wait(fs.map, 0); } else vm_map_unlock(fs.map); goto RetryFault; } MPASS((fs.entry->eflags & MAP_ENTRY_GUARD) == 0); if (wired) fault_type = prot | (fault_type & VM_PROT_COPY); else KASSERT((fault_flags & VM_FAULT_WIRE) == 0, ("!wired && VM_FAULT_WIRE")); /* * Try to avoid lock contention on the top-level object through * special-case handling of some types of page faults, specifically, * those that are mapping an existing page from the top-level object. * Under this condition, a read lock on the object suffices, allowing * multiple page faults of a similar type to run in parallel. */ if (fs.vp == NULL /* avoid locked vnode leak */ && (fault_flags & (VM_FAULT_WIRE | VM_FAULT_DIRTY)) == 0) { VM_OBJECT_RLOCK(fs.first_object); rv = vm_fault_soft_fast(&fs, vaddr, prot, fault_type, fault_flags, wired, m_hold); if (rv == KERN_SUCCESS) return (rv); if (!VM_OBJECT_TRYUPGRADE(fs.first_object)) { VM_OBJECT_RUNLOCK(fs.first_object); VM_OBJECT_WLOCK(fs.first_object); } } else { VM_OBJECT_WLOCK(fs.first_object); } /* * Make a reference to this object to prevent its disposal while we * are messing with it. Once we have the reference, the map is free * to be diddled. Since objects reference their shadows (and copies), * they will stay around as well. * * Bump the paging-in-progress count to prevent size changes (e.g. * truncation operations) during I/O. */ vm_object_reference_locked(fs.first_object); vm_object_pip_add(fs.first_object, 1); fs.lookup_still_valid = true; fs.m_cow = fs.m = fs.first_m = NULL; /* * Search for the page at object/offset. */ fs.object = fs.first_object; fs.pindex = fs.first_pindex; while (TRUE) { KASSERT(fs.m == NULL, ("page still set %p at loop start", fs.m)); /* * If the object is marked for imminent termination, * we retry here, since the collapse pass has raced * with us. Otherwise, if we see terminally dead * object, return fail. */ if ((fs.object->flags & OBJ_DEAD) != 0) { dead = fs.object->type == OBJT_DEAD; unlock_and_deallocate(&fs); if (dead) return (KERN_PROTECTION_FAILURE); pause("vmf_de", 1); goto RetryFault; } /* * See if page is resident */ fs.m = vm_page_lookup(fs.object, fs.pindex); if (fs.m != NULL) { if (vm_page_tryxbusy(fs.m) == 0) { vm_fault_busy_sleep(&fs); goto RetryFault; } /* * The page is marked busy for other processes and the * pagedaemon. If it still isn't completely valid * (readable), jump to readrest, else break-out ( we * found the page ). */ if (!vm_page_all_valid(fs.m)) goto readrest; break; /* break to PAGE HAS BEEN FOUND */ } KASSERT(fs.m == NULL, ("fs.m should be NULL, not %p", fs.m)); /* * Page is not resident. If the pager might contain the page * or this is the beginning of the search, allocate a new * page. (Default objects are zero-fill, so there is no real * pager for them.) */ if (fs.object->type != OBJT_DEFAULT || fs.object == fs.first_object) { if ((fs.object->flags & OBJ_SIZEVNLOCK) != 0) { rv = vm_fault_lock_vnode(&fs); MPASS(rv == KERN_SUCCESS || rv == KERN_RESOURCE_SHORTAGE); if (rv == KERN_RESOURCE_SHORTAGE) goto RetryFault; } if (fs.pindex >= fs.object->size) { unlock_and_deallocate(&fs); return (KERN_OUT_OF_BOUNDS); } if (fs.object == fs.first_object && (fs.first_object->flags & OBJ_POPULATE) != 0 && fs.first_object->shadow_count == 0) { rv = vm_fault_populate(&fs, prot, fault_type, fault_flags, wired, m_hold); switch (rv) { case KERN_SUCCESS: case KERN_FAILURE: unlock_and_deallocate(&fs); return (rv); case KERN_RESOURCE_SHORTAGE: unlock_and_deallocate(&fs); goto RetryFault; case KERN_NOT_RECEIVER: /* * Pager's populate() method * returned VM_PAGER_BAD. */ break; default: panic("inconsistent return codes"); } } /* * Allocate a new page for this object/offset pair. * * Unlocked read of the p_flag is harmless. At * worst, the P_KILLED might be not observed * there, and allocation can fail, causing * restart and new reading of the p_flag. */ dset = fs.object->domain.dr_policy; if (dset == NULL) dset = curthread->td_domain.dr_policy; if (!vm_page_count_severe_set(&dset->ds_mask) || P_KILLED(curproc)) { #if VM_NRESERVLEVEL > 0 vm_object_color(fs.object, atop(vaddr) - fs.pindex); #endif alloc_req = P_KILLED(curproc) ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL; if (fs.object->type != OBJT_VNODE && fs.object->backing_object == NULL) alloc_req |= VM_ALLOC_ZERO; fs.m = vm_page_alloc(fs.object, fs.pindex, alloc_req); } if (fs.m == NULL) { unlock_and_deallocate(&fs); if (vm_pfault_oom_attempts < 0 || oom < vm_pfault_oom_attempts) { oom++; vm_waitpfault(dset, vm_pfault_oom_wait * hz); goto RetryFault_oom; } if (bootverbose) printf( "proc %d (%s) failed to alloc page on fault, starting OOM\n", curproc->p_pid, curproc->p_comm); vm_pageout_oom(VM_OOM_MEM_PF); goto RetryFault; } } readrest: /* * At this point, we have either allocated a new page or found * an existing page that is only partially valid. * * We hold a reference on the current object and the page is * exclusive busied. */ /* * If the pager for the current object might have the page, * then determine the number of additional pages to read and * potentially reprioritize previously read pages for earlier * reclamation. These operations should only be performed * once per page fault. Even if the current pager doesn't * have the page, the number of additional pages to read will * apply to subsequent objects in the shadow chain. */ if (fs.object->type != OBJT_DEFAULT && nera == -1 && !P_KILLED(curproc)) { KASSERT(fs.lookup_still_valid, ("map unlocked")); era = fs.entry->read_ahead; behavior = vm_map_entry_behavior(fs.entry); if (behavior == MAP_ENTRY_BEHAV_RANDOM) { nera = 0; } else if (behavior == MAP_ENTRY_BEHAV_SEQUENTIAL) { nera = VM_FAULT_READ_AHEAD_MAX; if (vaddr == fs.entry->next_read) vm_fault_dontneed(&fs, vaddr, nera); } else if (vaddr == fs.entry->next_read) { /* * This is a sequential fault. Arithmetically * increase the requested number of pages in * the read-ahead window. The requested * number of pages is "# of sequential faults * x (read ahead min + 1) + read ahead min" */ nera = VM_FAULT_READ_AHEAD_MIN; if (era > 0) { nera += era + 1; if (nera > VM_FAULT_READ_AHEAD_MAX) nera = VM_FAULT_READ_AHEAD_MAX; } if (era == VM_FAULT_READ_AHEAD_MAX) vm_fault_dontneed(&fs, vaddr, nera); } else { /* * This is a non-sequential fault. */ nera = 0; } if (era != nera) { /* * A read lock on the map suffices to update * the read ahead count safely. */ fs.entry->read_ahead = nera; } /* * Prepare for unlocking the map. Save the map * entry's start and end addresses, which are used to * optimize the size of the pager operation below. * Even if the map entry's addresses change after * unlocking the map, using the saved addresses is * safe. */ e_start = fs.entry->start; e_end = fs.entry->end; } /* * Call the pager to retrieve the page if there is a chance * that the pager has it, and potentially retrieve additional * pages at the same time. */ if (fs.object->type != OBJT_DEFAULT) { /* * Release the map lock before locking the vnode or * sleeping in the pager. (If the current object has * a shadow, then an earlier iteration of this loop * may have already unlocked the map.) */ unlock_map(&fs); rv = vm_fault_lock_vnode(&fs); MPASS(rv == KERN_SUCCESS || rv == KERN_RESOURCE_SHORTAGE); if (rv == KERN_RESOURCE_SHORTAGE) goto RetryFault; KASSERT(fs.vp == NULL || !fs.map->system_map, ("vm_fault: vnode-backed object mapped by system map")); /* * Page in the requested page and hint the pager, * that it may bring up surrounding pages. */ if (nera == -1 || behavior == MAP_ENTRY_BEHAV_RANDOM || P_KILLED(curproc)) { behind = 0; ahead = 0; } else { /* Is this a sequential fault? */ if (nera > 0) { behind = 0; ahead = nera; } else { /* * Request a cluster of pages that is * aligned to a VM_FAULT_READ_DEFAULT * page offset boundary within the * object. Alignment to a page offset * boundary is more likely to coincide * with the underlying file system * block than alignment to a virtual * address boundary. */ cluster_offset = fs.pindex % VM_FAULT_READ_DEFAULT; behind = ulmin(cluster_offset, atop(vaddr - e_start)); ahead = VM_FAULT_READ_DEFAULT - 1 - cluster_offset; } ahead = ulmin(ahead, atop(e_end - vaddr) - 1); } + VM_OBJECT_WUNLOCK(fs.object); rv = vm_pager_get_pages(fs.object, &fs.m, 1, &behind, &ahead); + VM_OBJECT_WLOCK(fs.object); if (rv == VM_PAGER_OK) { faultcount = behind + 1 + ahead; hardfault = true; break; /* break to PAGE HAS BEEN FOUND */ } if (rv == VM_PAGER_ERROR) printf("vm_fault: pager read error, pid %d (%s)\n", curproc->p_pid, curproc->p_comm); /* * If an I/O error occurred or the requested page was * outside the range of the pager, clean up and return * an error. */ if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) { fault_page_free(&fs.m); unlock_and_deallocate(&fs); return (KERN_OUT_OF_BOUNDS); } } /* * The requested page does not exist at this object/ * offset. Remove the invalid page from the object, * waking up anyone waiting for it, and continue on to * the next object. However, if this is the top-level * object, we must leave the busy page in place to * prevent another process from rushing past us, and * inserting the page in that object at the same time * that we are. */ if (fs.object == fs.first_object) { fs.first_m = fs.m; fs.m = NULL; } else fault_page_free(&fs.m); /* * Move on to the next object. Lock the next object before * unlocking the current one. */ next_object = fs.object->backing_object; if (next_object == NULL) { /* * If there's no object left, fill the page in the top * object with zeros. */ if (fs.object != fs.first_object) { vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); fs.object = fs.first_object; fs.pindex = fs.first_pindex; VM_OBJECT_WLOCK(fs.object); } MPASS(fs.first_m != NULL); MPASS(fs.m == NULL); fs.m = fs.first_m; fs.first_m = NULL; /* * Zero the page if necessary and mark it valid. */ if ((fs.m->flags & PG_ZERO) == 0) { pmap_zero_page(fs.m); } else { VM_CNT_INC(v_ozfod); } VM_CNT_INC(v_zfod); vm_page_valid(fs.m); /* Don't try to prefault neighboring pages. */ faultcount = 1; break; /* break to PAGE HAS BEEN FOUND */ } else { MPASS(fs.first_m != NULL); KASSERT(fs.object != next_object, ("object loop %p", next_object)); VM_OBJECT_WLOCK(next_object); vm_object_pip_add(next_object, 1); if (fs.object != fs.first_object) vm_object_pip_wakeup(fs.object); fs.pindex += OFF_TO_IDX(fs.object->backing_object_offset); VM_OBJECT_WUNLOCK(fs.object); fs.object = next_object; } } vm_page_assert_xbusied(fs.m); /* * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock * is held.] */ /* * If the page is being written, but isn't already owned by the * top-level object, we have to copy it into a new page owned by the * top-level object. */ if (fs.object != fs.first_object) { /* * We only really need to copy if we want to write it. */ if ((fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) { /* * This allows pages to be virtually copied from a * backing_object into the first_object, where the * backing object has no other refs to it, and cannot * gain any more refs. Instead of a bcopy, we just * move the page from the backing object to the * first object. Note that we must mark the page * dirty in the first object so that it will go out * to swap when needed. */ is_first_object_locked = false; if ( /* * Only one shadow object */ (fs.object->shadow_count == 1) && /* * No COW refs, except us */ (fs.object->ref_count == 1) && /* * No one else can look this object up */ (fs.object->handle == NULL) && /* * No other ways to look the object up */ ((fs.object->flags & OBJ_ANON) != 0) && (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs.first_object)) && /* * We don't chase down the shadow chain */ fs.object == fs.first_object->backing_object) { /* * Remove but keep xbusy for replace. fs.m is * moved into fs.first_object and left busy * while fs.first_m is conditionally freed. */ vm_page_remove_xbusy(fs.m); vm_page_replace(fs.m, fs.first_object, fs.first_pindex, fs.first_m); vm_page_dirty(fs.m); #if VM_NRESERVLEVEL > 0 /* * Rename the reservation. */ vm_reserv_rename(fs.m, fs.first_object, fs.object, OFF_TO_IDX( fs.first_object->backing_object_offset)); #endif VM_OBJECT_WUNLOCK(fs.object); fs.first_m = fs.m; fs.m = NULL; VM_CNT_INC(v_cow_optim); } else { VM_OBJECT_WUNLOCK(fs.object); /* * Oh, well, lets copy it. */ pmap_copy_page(fs.m, fs.first_m); vm_page_valid(fs.first_m); if (wired && (fault_flags & VM_FAULT_WIRE) == 0) { vm_page_wire(fs.first_m); vm_page_unwire(fs.m, PQ_INACTIVE); } /* * Save the cow page to be released after * pmap_enter is complete. */ fs.m_cow = fs.m; fs.m = NULL; } /* * fs.object != fs.first_object due to above * conditional */ vm_object_pip_wakeup(fs.object); /* * We only try to prefault read-only mappings to the * neighboring pages when this copy-on-write fault is * a hard fault. In other cases, trying to prefault * is typically wasted effort. */ if (faultcount == 0) faultcount = 1; /* * Only use the new page below... */ fs.object = fs.first_object; fs.pindex = fs.first_pindex; fs.m = fs.first_m; if (!is_first_object_locked) VM_OBJECT_WLOCK(fs.object); VM_CNT_INC(v_cow_faults); curthread->td_cow++; } else { prot &= ~VM_PROT_WRITE; } } /* * We must verify that the maps have not changed since our last * lookup. */ if (!fs.lookup_still_valid) { if (!vm_map_trylock_read(fs.map)) { unlock_and_deallocate(&fs); goto RetryFault; } fs.lookup_still_valid = true; if (fs.map->timestamp != fs.map_generation) { result = vm_map_lookup_locked(&fs.map, vaddr, fault_type, &fs.entry, &retry_object, &retry_pindex, &retry_prot, &wired); /* * If we don't need the page any longer, put it on the inactive * list (the easiest thing to do here). If no one needs it, * pageout will grab it eventually. */ if (result != KERN_SUCCESS) { unlock_and_deallocate(&fs); /* * If retry of map lookup would have blocked then * retry fault from start. */ if (result == KERN_FAILURE) goto RetryFault; return (result); } if ((retry_object != fs.first_object) || (retry_pindex != fs.first_pindex)) { unlock_and_deallocate(&fs); goto RetryFault; } /* * Check whether the protection has changed or the object has * been copied while we left the map unlocked. Changing from * read to write permission is OK - we leave the page * write-protected, and catch the write fault. Changing from * write to read permission means that we can't mark the page * write-enabled after all. */ prot &= retry_prot; fault_type &= retry_prot; if (prot == 0) { unlock_and_deallocate(&fs); goto RetryFault; } /* Reassert because wired may have changed. */ KASSERT(wired || (fault_flags & VM_FAULT_WIRE) == 0, ("!wired && VM_FAULT_WIRE")); } } /* * If the page was filled by a pager, save the virtual address that * should be faulted on next under a sequential access pattern to the * map entry. A read lock on the map suffices to update this address * safely. */ if (hardfault) fs.entry->next_read = vaddr + ptoa(ahead) + PAGE_SIZE; vm_page_assert_xbusied(fs.m); vm_fault_dirty(fs.entry, fs.m, prot, fault_type, fault_flags); /* * Page must be completely valid or it is not fit to * map into user space. vm_pager_get_pages() ensures this. */ KASSERT(vm_page_all_valid(fs.m), ("vm_fault: page %p partially invalid", fs.m)); VM_OBJECT_WUNLOCK(fs.object); /* * Put this page into the physical map. We had to do the unlock above * because pmap_enter() may sleep. We don't put the page * back on the active queue until later so that the pageout daemon * won't find it (yet). */ pmap_enter(fs.map->pmap, vaddr, fs.m, prot, fault_type | (wired ? PMAP_ENTER_WIRED : 0), 0); if (faultcount != 1 && (fault_flags & VM_FAULT_WIRE) == 0 && wired == 0) vm_fault_prefault(&fs, vaddr, faultcount > 0 ? behind : PFBAK, faultcount > 0 ? ahead : PFFOR, false); /* * If the page is not wired down, then put it where the pageout daemon * can find it. */ if ((fault_flags & VM_FAULT_WIRE) != 0) vm_page_wire(fs.m); else vm_page_activate(fs.m); if (m_hold != NULL) { *m_hold = fs.m; vm_page_wire(fs.m); } vm_page_xunbusy(fs.m); fs.m = NULL; /* * Unlock everything, and return */ fault_deallocate(&fs); if (hardfault) { VM_CNT_INC(v_io_faults); curthread->td_ru.ru_majflt++; #ifdef RACCT if (racct_enable && fs.object->type == OBJT_VNODE) { PROC_LOCK(curproc); if ((fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) { racct_add_force(curproc, RACCT_WRITEBPS, PAGE_SIZE + behind * PAGE_SIZE); racct_add_force(curproc, RACCT_WRITEIOPS, 1); } else { racct_add_force(curproc, RACCT_READBPS, PAGE_SIZE + ahead * PAGE_SIZE); racct_add_force(curproc, RACCT_READIOPS, 1); } PROC_UNLOCK(curproc); } #endif } else curthread->td_ru.ru_minflt++; return (KERN_SUCCESS); } /* * Speed up the reclamation of pages that precede the faulting pindex within * the first object of the shadow chain. Essentially, perform the equivalent * to madvise(..., MADV_DONTNEED) on a large cluster of pages that precedes * the faulting pindex by the cluster size when the pages read by vm_fault() * cross a cluster-size boundary. The cluster size is the greater of the * smallest superpage size and VM_FAULT_DONTNEED_MIN. * * When "fs->first_object" is a shadow object, the pages in the backing object * that precede the faulting pindex are deactivated by vm_fault(). So, this * function must only be concerned with pages in the first object. */ static void vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr, int ahead) { vm_map_entry_t entry; vm_object_t first_object, object; vm_offset_t end, start; vm_page_t m, m_next; vm_pindex_t pend, pstart; vm_size_t size; object = fs->object; VM_OBJECT_ASSERT_WLOCKED(object); first_object = fs->first_object; if (first_object != object) { if (!VM_OBJECT_TRYWLOCK(first_object)) { VM_OBJECT_WUNLOCK(object); VM_OBJECT_WLOCK(first_object); VM_OBJECT_WLOCK(object); } } /* Neither fictitious nor unmanaged pages can be reclaimed. */ if ((first_object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0) { size = VM_FAULT_DONTNEED_MIN; if (MAXPAGESIZES > 1 && size < pagesizes[1]) size = pagesizes[1]; end = rounddown2(vaddr, size); if (vaddr - end >= size - PAGE_SIZE - ptoa(ahead) && (entry = fs->entry)->start < end) { if (end - entry->start < size) start = entry->start; else start = end - size; pmap_advise(fs->map->pmap, start, end, MADV_DONTNEED); pstart = OFF_TO_IDX(entry->offset) + atop(start - entry->start); m_next = vm_page_find_least(first_object, pstart); pend = OFF_TO_IDX(entry->offset) + atop(end - entry->start); while ((m = m_next) != NULL && m->pindex < pend) { m_next = TAILQ_NEXT(m, listq); if (!vm_page_all_valid(m) || vm_page_busied(m)) continue; /* * Don't clear PGA_REFERENCED, since it would * likely represent a reference by a different * process. * * Typically, at this point, prefetched pages * are still in the inactive queue. Only * pages that triggered page faults are in the * active queue. The test for whether the page * is in the inactive queue is racy; in the * worst case we will requeue the page * unnecessarily. */ if (!vm_page_inactive(m)) vm_page_deactivate(m); } } } if (first_object != object) VM_OBJECT_WUNLOCK(first_object); } /* * vm_fault_prefault provides a quick way of clustering * pagefaults into a processes address space. It is a "cousin" * of vm_map_pmap_enter, except it runs at page fault time instead * of mmap time. */ static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, int backward, int forward, bool obj_locked) { pmap_t pmap; vm_map_entry_t entry; vm_object_t backing_object, lobject; vm_offset_t addr, starta; vm_pindex_t pindex; vm_page_t m; int i; pmap = fs->map->pmap; if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) return; entry = fs->entry; if (addra < backward * PAGE_SIZE) { starta = entry->start; } else { starta = addra - backward * PAGE_SIZE; if (starta < entry->start) starta = entry->start; } /* * Generate the sequence of virtual addresses that are candidates for * prefaulting in an outward spiral from the faulting virtual address, * "addra". Specifically, the sequence is "addra - PAGE_SIZE", "addra * + PAGE_SIZE", "addra - 2 * PAGE_SIZE", "addra + 2 * PAGE_SIZE", ... * If the candidate address doesn't have a backing physical page, then * the loop immediately terminates. */ for (i = 0; i < 2 * imax(backward, forward); i++) { addr = addra + ((i >> 1) + 1) * ((i & 1) == 0 ? -PAGE_SIZE : PAGE_SIZE); if (addr > addra + forward * PAGE_SIZE) addr = 0; if (addr < starta || addr >= entry->end) continue; if (!pmap_is_prefaultable(pmap, addr)) continue; pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; lobject = entry->object.vm_object; if (!obj_locked) VM_OBJECT_RLOCK(lobject); while ((m = vm_page_lookup(lobject, pindex)) == NULL && lobject->type == OBJT_DEFAULT && (backing_object = lobject->backing_object) != NULL) { KASSERT((lobject->backing_object_offset & PAGE_MASK) == 0, ("vm_fault_prefault: unaligned object offset")); pindex += lobject->backing_object_offset >> PAGE_SHIFT; VM_OBJECT_RLOCK(backing_object); if (!obj_locked || lobject != entry->object.vm_object) VM_OBJECT_RUNLOCK(lobject); lobject = backing_object; } if (m == NULL) { if (!obj_locked || lobject != entry->object.vm_object) VM_OBJECT_RUNLOCK(lobject); break; } if (vm_page_all_valid(m) && (m->flags & PG_FICTITIOUS) == 0) pmap_enter_quick(pmap, addr, m, entry->protection); if (!obj_locked || lobject != entry->object.vm_object) VM_OBJECT_RUNLOCK(lobject); } } /* * Hold each of the physical pages that are mapped by the specified range of * virtual addresses, ["addr", "addr" + "len"), if those mappings are valid * and allow the specified types of access, "prot". If all of the implied * pages are successfully held, then the number of held pages is returned * together with pointers to those pages in the array "ma". However, if any * of the pages cannot be held, -1 is returned. */ int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, vm_prot_t prot, vm_page_t *ma, int max_count) { vm_offset_t end, va; vm_page_t *mp; int count; boolean_t pmap_failed; if (len == 0) return (0); end = round_page(addr + len); addr = trunc_page(addr); /* * Check for illegal addresses. */ if (addr < vm_map_min(map) || addr > end || end > vm_map_max(map)) return (-1); if (atop(end - addr) > max_count) panic("vm_fault_quick_hold_pages: count > max_count"); count = atop(end - addr); /* * Most likely, the physical pages are resident in the pmap, so it is * faster to try pmap_extract_and_hold() first. */ pmap_failed = FALSE; for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) { *mp = pmap_extract_and_hold(map->pmap, va, prot); if (*mp == NULL) pmap_failed = TRUE; else if ((prot & VM_PROT_WRITE) != 0 && (*mp)->dirty != VM_PAGE_BITS_ALL) { /* * Explicitly dirty the physical page. Otherwise, the * caller's changes may go unnoticed because they are * performed through an unmanaged mapping or by a DMA * operation. * * The object lock is not held here. * See vm_page_clear_dirty_mask(). */ vm_page_dirty(*mp); } } if (pmap_failed) { /* * One or more pages could not be held by the pmap. Either no * page was mapped at the specified virtual address or that * mapping had insufficient permissions. Attempt to fault in * and hold these pages. * * If vm_fault_disable_pagefaults() was called, * i.e., TDP_NOFAULTING is set, we must not sleep nor * acquire MD VM locks, which means we must not call * vm_fault(). Some (out of tree) callers mark * too wide a code area with vm_fault_disable_pagefaults() * already, use the VM_PROT_QUICK_NOFAULT flag to request * the proper behaviour explicitly. */ if ((prot & VM_PROT_QUICK_NOFAULT) != 0 && (curthread->td_pflags & TDP_NOFAULTING) != 0) goto error; for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) if (*mp == NULL && vm_fault(map, va, prot, VM_FAULT_NORMAL, mp) != KERN_SUCCESS) goto error; } return (count); error: for (mp = ma; mp < ma + count; mp++) if (*mp != NULL) vm_page_unwire(*mp, PQ_INACTIVE); return (-1); } /* * Routine: * vm_fault_copy_entry * Function: * Create new shadow object backing dst_entry with private copy of * all underlying pages. When src_entry is equal to dst_entry, * function implements COW for wired-down map entry. Otherwise, * it forks wired entry into dst_map. * * In/out conditions: * The source and destination maps must be locked for write. * The source map entry must be wired down (or be a sharing map * entry corresponding to a main map entry that is wired down). */ void vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map, vm_map_entry_t dst_entry, vm_map_entry_t src_entry, vm_ooffset_t *fork_charge) { vm_object_t backing_object, dst_object, object, src_object; vm_pindex_t dst_pindex, pindex, src_pindex; vm_prot_t access, prot; vm_offset_t vaddr; vm_page_t dst_m; vm_page_t src_m; boolean_t upgrade; #ifdef lint src_map++; #endif /* lint */ upgrade = src_entry == dst_entry; access = prot = dst_entry->protection; src_object = src_entry->object.vm_object; src_pindex = OFF_TO_IDX(src_entry->offset); if (upgrade && (dst_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) { dst_object = src_object; vm_object_reference(dst_object); } else { /* * Create the top-level object for the destination entry. * Doesn't actually shadow anything - we copy the pages * directly. */ dst_object = vm_object_allocate_anon(atop(dst_entry->end - dst_entry->start), NULL, NULL, 0); #if VM_NRESERVLEVEL > 0 dst_object->flags |= OBJ_COLORED; dst_object->pg_color = atop(dst_entry->start); #endif dst_object->domain = src_object->domain; dst_object->charge = dst_entry->end - dst_entry->start; } VM_OBJECT_WLOCK(dst_object); KASSERT(upgrade || dst_entry->object.vm_object == NULL, ("vm_fault_copy_entry: vm_object not NULL")); if (src_object != dst_object) { dst_entry->object.vm_object = dst_object; dst_entry->offset = 0; dst_entry->eflags &= ~MAP_ENTRY_VN_EXEC; } if (fork_charge != NULL) { KASSERT(dst_entry->cred == NULL, ("vm_fault_copy_entry: leaked swp charge")); dst_object->cred = curthread->td_ucred; crhold(dst_object->cred); *fork_charge += dst_object->charge; } else if ((dst_object->type == OBJT_DEFAULT || dst_object->type == OBJT_SWAP) && dst_object->cred == NULL) { KASSERT(dst_entry->cred != NULL, ("no cred for entry %p", dst_entry)); dst_object->cred = dst_entry->cred; dst_entry->cred = NULL; } /* * If not an upgrade, then enter the mappings in the pmap as * read and/or execute accesses. Otherwise, enter them as * write accesses. * * A writeable large page mapping is only created if all of * the constituent small page mappings are modified. Marking * PTEs as modified on inception allows promotion to happen * without taking potentially large number of soft faults. */ if (!upgrade) access &= ~VM_PROT_WRITE; /* * Loop through all of the virtual pages within the entry's * range, copying each page from the source object to the * destination object. Since the source is wired, those pages * must exist. In contrast, the destination is pageable. * Since the destination object doesn't share any backing storage * with the source object, all of its pages must be dirtied, * regardless of whether they can be written. */ for (vaddr = dst_entry->start, dst_pindex = 0; vaddr < dst_entry->end; vaddr += PAGE_SIZE, dst_pindex++) { again: /* * Find the page in the source object, and copy it in. * Because the source is wired down, the page will be * in memory. */ if (src_object != dst_object) VM_OBJECT_RLOCK(src_object); object = src_object; pindex = src_pindex + dst_pindex; while ((src_m = vm_page_lookup(object, pindex)) == NULL && (backing_object = object->backing_object) != NULL) { /* * Unless the source mapping is read-only or * it is presently being upgraded from * read-only, the first object in the shadow * chain should provide all of the pages. In * other words, this loop body should never be * executed when the source mapping is already * read/write. */ KASSERT((src_entry->protection & VM_PROT_WRITE) == 0 || upgrade, ("vm_fault_copy_entry: main object missing page")); VM_OBJECT_RLOCK(backing_object); pindex += OFF_TO_IDX(object->backing_object_offset); if (object != dst_object) VM_OBJECT_RUNLOCK(object); object = backing_object; } KASSERT(src_m != NULL, ("vm_fault_copy_entry: page missing")); if (object != dst_object) { /* * Allocate a page in the destination object. */ dst_m = vm_page_alloc(dst_object, (src_object == dst_object ? src_pindex : 0) + dst_pindex, VM_ALLOC_NORMAL); if (dst_m == NULL) { VM_OBJECT_WUNLOCK(dst_object); VM_OBJECT_RUNLOCK(object); vm_wait(dst_object); VM_OBJECT_WLOCK(dst_object); goto again; } pmap_copy_page(src_m, dst_m); VM_OBJECT_RUNLOCK(object); dst_m->dirty = dst_m->valid = src_m->valid; } else { dst_m = src_m; if (vm_page_busy_acquire(dst_m, VM_ALLOC_WAITFAIL) == 0) goto again; if (dst_m->pindex >= dst_object->size) { /* * We are upgrading. Index can occur * out of bounds if the object type is * vnode and the file was truncated. */ vm_page_xunbusy(dst_m); break; } } VM_OBJECT_WUNLOCK(dst_object); /* * Enter it in the pmap. If a wired, copy-on-write * mapping is being replaced by a write-enabled * mapping, then wire that new mapping. * * The page can be invalid if the user called * msync(MS_INVALIDATE) or truncated the backing vnode * or shared memory object. In this case, do not * insert it into pmap, but still do the copy so that * all copies of the wired map entry have similar * backing pages. */ if (vm_page_all_valid(dst_m)) { pmap_enter(dst_map->pmap, vaddr, dst_m, prot, access | (upgrade ? PMAP_ENTER_WIRED : 0), 0); } /* * Mark it no longer busy, and put it on the active list. */ VM_OBJECT_WLOCK(dst_object); if (upgrade) { if (src_m != dst_m) { vm_page_unwire(src_m, PQ_INACTIVE); vm_page_wire(dst_m); } else { KASSERT(vm_page_wired(dst_m), ("dst_m %p is not wired", dst_m)); } } else { vm_page_activate(dst_m); } vm_page_xunbusy(dst_m); } VM_OBJECT_WUNLOCK(dst_object); if (upgrade) { dst_entry->eflags &= ~(MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY); vm_object_deallocate(src_object); } } /* * Block entry into the machine-independent layer's page fault handler by * the calling thread. Subsequent calls to vm_fault() by that thread will * return KERN_PROTECTION_FAILURE. Enable machine-dependent handling of * spurious page faults. */ int vm_fault_disable_pagefaults(void) { return (curthread_pflags_set(TDP_NOFAULTING | TDP_RESETSPUR)); } void vm_fault_enable_pagefaults(int save) { curthread_pflags_restore(save); } Index: projects/clang1000-import/sys/vm/vm_object.c =================================================================== --- projects/clang1000-import/sys/vm/vm_object.c (revision 356919) +++ projects/clang1000-import/sys/vm/vm_object.c (revision 356920) @@ -1,2763 +1,2816 @@ /*- * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU) * * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * Virtual memory object module. */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include /* for curproc, pageproc */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int old_msync; SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0, "Use old (insecure) msync behavior"); static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags, int flags, boolean_t *allclean, boolean_t *eio); static boolean_t vm_object_page_remove_write(vm_page_t p, int flags, boolean_t *allclean); -static void vm_object_qcollapse(vm_object_t object); -static void vm_object_vndeallocate(vm_object_t object); static void vm_object_backing_remove(vm_object_t object); /* * Virtual memory objects maintain the actual data * associated with allocated virtual memory. A given * page of memory exists within exactly one object. * * An object is only deallocated when all "references" * are given up. Only one "reference" to a given * region of an object should be writeable. * * Associated with each object is a list of all resident * memory pages belonging to that object; this list is * maintained by the "vm_page" module, and locked by the object's * lock. * * Each object also records a "pager" routine which is * used to retrieve (and store) pages to the proper backing * storage. In addition, objects may be backed by other * objects from which they were virtual-copied. * * The only items within the object structure which are * modified after time of creation are: * reference count locked by object's lock * pager routine locked by object's lock * */ struct object_q vm_object_list; struct mtx vm_object_list_mtx; /* lock for object list and count */ struct vm_object kernel_object_store; static SYSCTL_NODE(_vm_stats, OID_AUTO, object, CTLFLAG_RD, 0, "VM object stats"); static counter_u64_t object_collapses = EARLY_COUNTER; SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, collapses, CTLFLAG_RD, &object_collapses, "VM object collapses"); static counter_u64_t object_bypasses = EARLY_COUNTER; SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, bypasses, CTLFLAG_RD, &object_bypasses, "VM object bypasses"); +static counter_u64_t object_collapse_waits = EARLY_COUNTER; +SYSCTL_COUNTER_U64(_vm_stats_object, OID_AUTO, collapse_waits, CTLFLAG_RD, + &object_collapse_waits, + "Number of sleeps for collapse"); + static void counter_startup(void) { object_collapses = counter_u64_alloc(M_WAITOK); object_bypasses = counter_u64_alloc(M_WAITOK); + object_collapse_waits = counter_u64_alloc(M_WAITOK); } SYSINIT(object_counters, SI_SUB_CPU, SI_ORDER_ANY, counter_startup, NULL); static uma_zone_t obj_zone; static int vm_object_zinit(void *mem, int size, int flags); #ifdef INVARIANTS static void vm_object_zdtor(void *mem, int size, void *arg); static void vm_object_zdtor(void *mem, int size, void *arg) { vm_object_t object; object = (vm_object_t)mem; KASSERT(object->ref_count == 0, ("object %p ref_count = %d", object, object->ref_count)); KASSERT(TAILQ_EMPTY(&object->memq), ("object %p has resident pages in its memq", object)); KASSERT(vm_radix_is_empty(&object->rtree), ("object %p has resident pages in its trie", object)); #if VM_NRESERVLEVEL > 0 KASSERT(LIST_EMPTY(&object->rvq), ("object %p has reservations", object)); #endif KASSERT(REFCOUNT_COUNT(object->paging_in_progress) == 0, ("object %p paging_in_progress = %d", object, REFCOUNT_COUNT(object->paging_in_progress))); KASSERT(object->busy == 0, ("object %p busy = %d", object, object->busy)); KASSERT(object->resident_page_count == 0, ("object %p resident_page_count = %d", object, object->resident_page_count)); KASSERT(object->shadow_count == 0, ("object %p shadow_count = %d", object, object->shadow_count)); KASSERT(object->type == OBJT_DEAD, ("object %p has non-dead type %d", object, object->type)); } #endif static int vm_object_zinit(void *mem, int size, int flags) { vm_object_t object; object = (vm_object_t)mem; rw_init_flags(&object->lock, "vm object", RW_DUPOK | RW_NEW); /* These are true for any object that has been freed */ object->type = OBJT_DEAD; vm_radix_init(&object->rtree); refcount_init(&object->ref_count, 0); refcount_init(&object->paging_in_progress, 0); refcount_init(&object->busy, 0); object->resident_page_count = 0; object->shadow_count = 0; object->flags = OBJ_DEAD; mtx_lock(&vm_object_list_mtx); TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); mtx_unlock(&vm_object_list_mtx); return (0); } static void _vm_object_allocate(objtype_t type, vm_pindex_t size, u_short flags, vm_object_t object, void *handle) { TAILQ_INIT(&object->memq); LIST_INIT(&object->shadow_head); object->type = type; if (type == OBJT_SWAP) pctrie_init(&object->un_pager.swp.swp_blks); /* * Ensure that swap_pager_swapoff() iteration over object_list * sees up to date type and pctrie head if it observed * non-dead object. */ atomic_thread_fence_rel(); object->pg_color = 0; object->flags = flags; object->size = size; object->domain.dr_policy = NULL; object->generation = 1; object->cleangeneration = 1; refcount_init(&object->ref_count, 1); object->memattr = VM_MEMATTR_DEFAULT; object->cred = NULL; object->charge = 0; object->handle = handle; object->backing_object = NULL; object->backing_object_offset = (vm_ooffset_t) 0; #if VM_NRESERVLEVEL > 0 LIST_INIT(&object->rvq); #endif umtx_shm_object_init(object); } /* * vm_object_init: * * Initialize the VM objects module. */ void vm_object_init(void) { TAILQ_INIT(&vm_object_list); mtx_init(&vm_object_list_mtx, "vm object_list", NULL, MTX_DEF); rw_init(&kernel_object->lock, "kernel vm object"); _vm_object_allocate(OBJT_PHYS, atop(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), OBJ_UNMANAGED, kernel_object, NULL); #if VM_NRESERVLEVEL > 0 kernel_object->flags |= OBJ_COLORED; kernel_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS); #endif /* * The lock portion of struct vm_object must be type stable due * to vm_pageout_fallback_object_lock locking a vm object * without holding any references to it. */ obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL, #ifdef INVARIANTS vm_object_zdtor, #else NULL, #endif vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); vm_radix_zinit(); } void vm_object_clear_flag(vm_object_t object, u_short bits) { VM_OBJECT_ASSERT_WLOCKED(object); object->flags &= ~bits; } /* * Sets the default memory attribute for the specified object. Pages * that are allocated to this object are by default assigned this memory * attribute. * * Presently, this function must be called before any pages are allocated * to the object. In the future, this requirement may be relaxed for * "default" and "swap" objects. */ int vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr) { VM_OBJECT_ASSERT_WLOCKED(object); switch (object->type) { case OBJT_DEFAULT: case OBJT_DEVICE: case OBJT_MGTDEVICE: case OBJT_PHYS: case OBJT_SG: case OBJT_SWAP: case OBJT_VNODE: if (!TAILQ_EMPTY(&object->memq)) return (KERN_FAILURE); break; case OBJT_DEAD: return (KERN_INVALID_ARGUMENT); default: panic("vm_object_set_memattr: object %p is of undefined type", object); } object->memattr = memattr; return (KERN_SUCCESS); } void vm_object_pip_add(vm_object_t object, short i) { refcount_acquiren(&object->paging_in_progress, i); } void vm_object_pip_wakeup(vm_object_t object) { refcount_release(&object->paging_in_progress); } void vm_object_pip_wakeupn(vm_object_t object, short i) { refcount_releasen(&object->paging_in_progress, i); } +/* + * Atomically drop the interlock and wait for pip to drain. This protects + * from sleep/wakeup races due to identity changes. The lock is not + * re-acquired on return. + */ +static void +vm_object_pip_sleep(vm_object_t object, char *waitid) +{ + + refcount_sleep_interlock(&object->paging_in_progress, + &object->lock, waitid, PVM); +} + void vm_object_pip_wait(vm_object_t object, char *waitid) { VM_OBJECT_ASSERT_WLOCKED(object); while (REFCOUNT_COUNT(object->paging_in_progress) > 0) { - VM_OBJECT_WUNLOCK(object); - refcount_wait(&object->paging_in_progress, waitid, PVM); + vm_object_pip_sleep(object, waitid); VM_OBJECT_WLOCK(object); } } void vm_object_pip_wait_unlocked(vm_object_t object, char *waitid) { VM_OBJECT_ASSERT_UNLOCKED(object); while (REFCOUNT_COUNT(object->paging_in_progress) > 0) refcount_wait(&object->paging_in_progress, waitid, PVM); } /* * vm_object_allocate: * * Returns a new object with the given size. */ vm_object_t vm_object_allocate(objtype_t type, vm_pindex_t size) { vm_object_t object; u_short flags; switch (type) { case OBJT_DEAD: panic("vm_object_allocate: can't create OBJT_DEAD"); case OBJT_DEFAULT: case OBJT_SWAP: flags = OBJ_COLORED; break; case OBJT_DEVICE: case OBJT_SG: flags = OBJ_FICTITIOUS | OBJ_UNMANAGED; break; case OBJT_MGTDEVICE: flags = OBJ_FICTITIOUS; break; case OBJT_PHYS: flags = OBJ_UNMANAGED; break; case OBJT_VNODE: flags = 0; break; default: panic("vm_object_allocate: type %d is undefined", type); } object = (vm_object_t)uma_zalloc(obj_zone, M_WAITOK); _vm_object_allocate(type, size, flags, object, NULL); return (object); } /* * vm_object_allocate_anon: * * Returns a new default object of the given size and marked as * anonymous memory for special split/collapse handling. Color * to be initialized by the caller. */ vm_object_t vm_object_allocate_anon(vm_pindex_t size, vm_object_t backing_object, struct ucred *cred, vm_size_t charge) { vm_object_t handle, object; if (backing_object == NULL) handle = NULL; else if ((backing_object->flags & OBJ_ANON) != 0) handle = backing_object->handle; else handle = backing_object; object = uma_zalloc(obj_zone, M_WAITOK); _vm_object_allocate(OBJT_DEFAULT, size, OBJ_ANON | OBJ_ONEMAPPING, object, handle); object->cred = cred; object->charge = cred != NULL ? charge : 0; return (object); } - -/* - * vm_object_reference: - * - * Gets another reference to the given object. Note: OBJ_DEAD - * objects can be referenced during final cleaning. - */ -void -vm_object_reference(vm_object_t object) +static void +vm_object_reference_vnode(vm_object_t object) { struct vnode *vp; u_int old; - if (object == NULL) - return; - /* - * Many places assume exclusive access to objects with a single - * ref. vm_object_collapse() in particular will directly mainpulate - * references for objects in this state. vnode objects only need - * the lock for the first ref to reference the vnode. + * vnode objects need the lock for the first reference + * to serialize with vnode_object_deallocate(). */ - if (!refcount_acquire_if_gt(&object->ref_count, - object->type == OBJT_VNODE ? 0 : 1)) { + if (!refcount_acquire_if_gt(&object->ref_count, 0)) { VM_OBJECT_RLOCK(object); old = refcount_acquire(&object->ref_count); if (object->type == OBJT_VNODE && old == 0) { vp = object->handle; vref(vp); } VM_OBJECT_RUNLOCK(object); } } /* + * vm_object_reference: + * + * Acquires a reference to the given object. + */ +void +vm_object_reference(vm_object_t object) +{ + + if (object == NULL) + return; + + if (object->type == OBJT_VNODE) + vm_object_reference_vnode(object); + else + refcount_acquire(&object->ref_count); + KASSERT((object->flags & OBJ_DEAD) == 0, + ("vm_object_reference: Referenced dead object.")); +} + +/* * vm_object_reference_locked: * * Gets another reference to the given object. * * The object must be locked. */ void vm_object_reference_locked(vm_object_t object) { struct vnode *vp; u_int old; VM_OBJECT_ASSERT_LOCKED(object); old = refcount_acquire(&object->ref_count); if (object->type == OBJT_VNODE && old == 0) { - vp = object->handle; - vref(vp); - } + vp = object->handle; vref(vp); } + KASSERT((object->flags & OBJ_DEAD) == 0, + ("vm_object_reference: Referenced dead object.")); } /* * Handle deallocating an object of type OBJT_VNODE. */ static void -vm_object_vndeallocate(vm_object_t object) +vm_object_deallocate_vnode(vm_object_t object) { struct vnode *vp = (struct vnode *) object->handle; bool last; KASSERT(object->type == OBJT_VNODE, - ("vm_object_vndeallocate: not a vnode object")); - KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp")); + ("vm_object_deallocate_vnode: not a vnode object")); + KASSERT(vp != NULL, ("vm_object_deallocate_vnode: missing vp")); /* Object lock to protect handle lookup. */ last = refcount_release(&object->ref_count); VM_OBJECT_RUNLOCK(object); if (!last) return; if (!umtx_shm_vnobj_persistent) umtx_shm_object_terminated(object); /* vrele may need the vnode lock. */ vrele(vp); } + /* + * We dropped a reference on an object and discovered that it had a + * single remaining shadow. This is a sibling of the reference we + * dropped. Attempt to collapse the sibling and backing object. + */ +static vm_object_t +vm_object_deallocate_anon(vm_object_t backing_object) +{ + vm_object_t object; + + /* Fetch the final shadow. */ + object = LIST_FIRST(&backing_object->shadow_head); + KASSERT(object != NULL && backing_object->shadow_count == 1, + ("vm_object_anon_deallocate: ref_count: %d, shadow_count: %d", + backing_object->ref_count, backing_object->shadow_count)); + KASSERT((object->flags & (OBJ_TMPFS_NODE | OBJ_ANON)) == OBJ_ANON, + ("invalid shadow object %p", object)); + + if (!VM_OBJECT_TRYWLOCK(object)) { + /* + * Prevent object from disappearing since we do not have a + * reference. + */ + vm_object_pip_add(object, 1); + VM_OBJECT_WUNLOCK(backing_object); + VM_OBJECT_WLOCK(object); + vm_object_pip_wakeup(object); + } else + VM_OBJECT_WUNLOCK(backing_object); + + /* + * Check for a collapse/terminate race with the last reference holder. + */ + if ((object->flags & (OBJ_DEAD | OBJ_COLLAPSING)) != 0 || + !refcount_acquire_if_not_zero(&object->ref_count)) { + VM_OBJECT_WUNLOCK(object); + return (NULL); + } + backing_object = object->backing_object; + if (backing_object != NULL && (backing_object->flags & OBJ_ANON) != 0) + vm_object_collapse(object); + VM_OBJECT_WUNLOCK(object); + + return (object); +} + +/* * vm_object_deallocate: * * Release a reference to the specified object, * gained either through a vm_object_allocate * or a vm_object_reference call. When all references * are gone, storage associated with this object * may be relinquished. * * No object may be locked. */ void vm_object_deallocate(vm_object_t object) { - vm_object_t robject, temp; + vm_object_t temp; bool released; while (object != NULL) { /* * If the reference count goes to 0 we start calling * vm_object_terminate() on the object chain. A ref count * of 1 may be a special case depending on the shadow count * being 0 or 1. These cases require a write lock on the * object. */ if ((object->flags & OBJ_ANON) == 0) released = refcount_release_if_gt(&object->ref_count, 1); else released = refcount_release_if_gt(&object->ref_count, 2); if (released) return; if (object->type == OBJT_VNODE) { VM_OBJECT_RLOCK(object); if (object->type == OBJT_VNODE) { - vm_object_vndeallocate(object); + vm_object_deallocate_vnode(object); return; } VM_OBJECT_RUNLOCK(object); } VM_OBJECT_WLOCK(object); KASSERT(object->ref_count > 0, ("vm_object_deallocate: object deallocated too many times: %d", object->type)); - if (refcount_release(&object->ref_count)) - goto doterm; - if (object->ref_count > 1) { - VM_OBJECT_WUNLOCK(object); - return; - } else if (object->ref_count == 1) { - if (object->shadow_count == 0 && - (object->flags & OBJ_ANON) != 0) { - vm_object_set_flag(object, OBJ_ONEMAPPING); - } else if (object->shadow_count == 1) { - KASSERT((object->flags & OBJ_ANON) != 0, - ("obj %p with shadow_count > 0 is not anon", - object)); - robject = LIST_FIRST(&object->shadow_head); - KASSERT(robject != NULL, - ("vm_object_deallocate: ref_count: %d, " - "shadow_count: %d", object->ref_count, - object->shadow_count)); - KASSERT((robject->flags & OBJ_TMPFS_NODE) == 0, - ("shadowed tmpfs v_object %p", object)); - if (!VM_OBJECT_TRYWLOCK(robject)) { - /* - * Avoid a potential deadlock. - */ - refcount_acquire(&object->ref_count); - VM_OBJECT_WUNLOCK(object); - /* - * More likely than not the thread - * holding robject's lock has lower - * priority than the current thread. - * Let the lower priority thread run. - */ - pause("vmo_de", 1); - continue; - } - /* - * Collapse object into its shadow unless its - * shadow is dead. In that case, object will - * be deallocated by the thread that is - * deallocating its shadow. - */ - if ((robject->flags & - (OBJ_DEAD | OBJ_ANON)) == OBJ_ANON) { - - refcount_acquire(&robject->ref_count); -retry: - if (REFCOUNT_COUNT(robject->paging_in_progress) > 0) { - VM_OBJECT_WUNLOCK(object); - vm_object_pip_wait(robject, - "objde1"); - temp = robject->backing_object; - if (object == temp) { - VM_OBJECT_WLOCK(object); - goto retry; - } - } else if (REFCOUNT_COUNT(object->paging_in_progress) > 0) { - VM_OBJECT_WUNLOCK(robject); - VM_OBJECT_WUNLOCK(object); - refcount_wait( - &object->paging_in_progress, - "objde2", PVM); - VM_OBJECT_WLOCK(robject); - temp = robject->backing_object; - if (object == temp) { - VM_OBJECT_WLOCK(object); - goto retry; - } - } else - VM_OBJECT_WUNLOCK(object); - - if (robject->ref_count == 1) { - refcount_release(&robject->ref_count); - object = robject; - goto doterm; - } - object = robject; - vm_object_collapse(object); - VM_OBJECT_WUNLOCK(object); - continue; - } - VM_OBJECT_WUNLOCK(robject); + /* + * If this is not the final reference to an anonymous + * object we may need to collapse the shadow chain. + */ + if (!refcount_release(&object->ref_count)) { + if (object->ref_count > 1 || + object->shadow_count == 0) { + if ((object->flags & OBJ_ANON) != 0 && + object->ref_count == 1) + vm_object_set_flag(object, + OBJ_ONEMAPPING); + VM_OBJECT_WUNLOCK(object); + return; } - VM_OBJECT_WUNLOCK(object); - return; + + /* Handle collapsing last ref on anonymous objects. */ + object = vm_object_deallocate_anon(object); + continue; } -doterm: + + /* + * Handle the final reference to an object. We restart + * the loop with the backing object to avoid recursion. + */ umtx_shm_object_terminated(object); temp = object->backing_object; if (temp != NULL) { KASSERT((object->flags & OBJ_TMPFS_NODE) == 0, ("shadowed tmpfs v_object 2 %p", object)); vm_object_backing_remove(object); } - /* - * Don't double-terminate, we could be in a termination - * recursion due to the terminate having to sync data - * to disk. - */ - if ((object->flags & OBJ_DEAD) == 0) { - vm_object_set_flag(object, OBJ_DEAD); - vm_object_terminate(object); - } else - VM_OBJECT_WUNLOCK(object); + + KASSERT((object->flags & OBJ_DEAD) == 0, + ("vm_object_deallocate: Terminating dead object.")); + vm_object_set_flag(object, OBJ_DEAD); + vm_object_terminate(object); object = temp; } } /* * vm_object_destroy removes the object from the global object list * and frees the space for the object. */ void vm_object_destroy(vm_object_t object) { /* * Release the allocation charge. */ if (object->cred != NULL) { swap_release_by_cred(object->charge, object->cred); object->charge = 0; crfree(object->cred); object->cred = NULL; } /* * Free the space for the object. */ uma_zfree(obj_zone, object); } static void vm_object_backing_remove_locked(vm_object_t object) { vm_object_t backing_object; backing_object = object->backing_object; VM_OBJECT_ASSERT_WLOCKED(object); VM_OBJECT_ASSERT_WLOCKED(backing_object); + KASSERT((object->flags & OBJ_COLLAPSING) == 0, + ("vm_object_backing_remove: Removing collapsing object.")); + if ((object->flags & OBJ_SHADOWLIST) != 0) { LIST_REMOVE(object, shadow_list); backing_object->shadow_count--; object->flags &= ~OBJ_SHADOWLIST; } object->backing_object = NULL; } static void vm_object_backing_remove(vm_object_t object) { vm_object_t backing_object; VM_OBJECT_ASSERT_WLOCKED(object); if ((object->flags & OBJ_SHADOWLIST) != 0) { backing_object = object->backing_object; VM_OBJECT_WLOCK(backing_object); vm_object_backing_remove_locked(object); VM_OBJECT_WUNLOCK(backing_object); } else object->backing_object = NULL; } static void vm_object_backing_insert_locked(vm_object_t object, vm_object_t backing_object) { VM_OBJECT_ASSERT_WLOCKED(object); if ((backing_object->flags & OBJ_ANON) != 0) { VM_OBJECT_ASSERT_WLOCKED(backing_object); LIST_INSERT_HEAD(&backing_object->shadow_head, object, shadow_list); backing_object->shadow_count++; object->flags |= OBJ_SHADOWLIST; } object->backing_object = backing_object; } static void vm_object_backing_insert(vm_object_t object, vm_object_t backing_object) { VM_OBJECT_ASSERT_WLOCKED(object); if ((backing_object->flags & OBJ_ANON) != 0) { VM_OBJECT_WLOCK(backing_object); vm_object_backing_insert_locked(object, backing_object); VM_OBJECT_WUNLOCK(backing_object); } else object->backing_object = backing_object; } +/* + * Insert an object into a backing_object's shadow list with an additional + * reference to the backing_object added. + */ +static void +vm_object_backing_insert_ref(vm_object_t object, vm_object_t backing_object) +{ + VM_OBJECT_ASSERT_WLOCKED(object); + + if ((backing_object->flags & OBJ_ANON) != 0) { + VM_OBJECT_WLOCK(backing_object); + KASSERT((backing_object->flags & OBJ_DEAD) == 0, + ("shadowing dead anonymous object")); + vm_object_reference_locked(backing_object); + vm_object_backing_insert_locked(object, backing_object); + vm_object_clear_flag(backing_object, OBJ_ONEMAPPING); + VM_OBJECT_WUNLOCK(backing_object); + } else { + vm_object_reference(backing_object); + object->backing_object = backing_object; + } +} + /* + * Transfer a backing reference from backing_object to object. + */ +static void +vm_object_backing_transfer(vm_object_t object, vm_object_t backing_object) +{ + vm_object_t new_backing_object; + + /* + * Note that the reference to backing_object->backing_object + * moves from within backing_object to within object. + */ + vm_object_backing_remove_locked(object); + new_backing_object = backing_object->backing_object; + if (new_backing_object == NULL) + return; + if ((new_backing_object->flags & OBJ_ANON) != 0) { + VM_OBJECT_WLOCK(new_backing_object); + vm_object_backing_remove_locked(backing_object); + vm_object_backing_insert_locked(object, new_backing_object); + VM_OBJECT_WUNLOCK(new_backing_object); + } else { + object->backing_object = new_backing_object; + backing_object->backing_object = NULL; + } +} + +/* + * Wait for a concurrent collapse to settle. + */ +static void +vm_object_collapse_wait(vm_object_t object) +{ + + VM_OBJECT_ASSERT_WLOCKED(object); + + while ((object->flags & OBJ_COLLAPSING) != 0) { + vm_object_pip_wait(object, "vmcolwait"); + counter_u64_add(object_collapse_waits, 1); + } +} + +/* + * Waits for a backing object to clear a pending collapse and returns + * it locked if it is an ANON object. + */ +static vm_object_t +vm_object_backing_collapse_wait(vm_object_t object) +{ + vm_object_t backing_object; + + VM_OBJECT_ASSERT_WLOCKED(object); + + for (;;) { + backing_object = object->backing_object; + if (backing_object == NULL || + (backing_object->flags & OBJ_ANON) == 0) + return (NULL); + VM_OBJECT_WLOCK(backing_object); + if ((backing_object->flags & (OBJ_DEAD | OBJ_COLLAPSING)) == 0) + break; + VM_OBJECT_WUNLOCK(object); + vm_object_pip_sleep(backing_object, "vmbckwait"); + counter_u64_add(object_collapse_waits, 1); + VM_OBJECT_WLOCK(object); + } + return (backing_object); +} + +/* * vm_object_terminate_pages removes any remaining pageable pages * from the object and resets the object to an empty state. */ static void vm_object_terminate_pages(vm_object_t object) { vm_page_t p, p_next; VM_OBJECT_ASSERT_WLOCKED(object); /* * Free any remaining pageable pages. This also removes them from the * paging queues. However, don't free wired pages, just remove them * from the object. Rather than incrementally removing each page from * the object, the page and object are reset to any empty state. */ TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) { vm_page_assert_unbusied(p); KASSERT(p->object == object && (p->ref_count & VPRC_OBJREF) != 0, ("vm_object_terminate_pages: page %p is inconsistent", p)); p->object = NULL; if (vm_page_drop(p, VPRC_OBJREF) == VPRC_OBJREF) { VM_CNT_INC(v_pfree); vm_page_free(p); } } /* * If the object contained any pages, then reset it to an empty state. * None of the object's fields, including "resident_page_count", were * modified by the preceding loop. */ if (object->resident_page_count != 0) { vm_radix_reclaim_allnodes(&object->rtree); TAILQ_INIT(&object->memq); object->resident_page_count = 0; if (object->type == OBJT_VNODE) vdrop(object->handle); } } /* * vm_object_terminate actually destroys the specified object, freeing * up all previously used resources. * * The object must be locked. * This routine may block. */ void vm_object_terminate(vm_object_t object) { + VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & OBJ_DEAD) != 0, ("terminating non-dead obj %p", object)); + KASSERT((object->flags & OBJ_COLLAPSING) == 0, + ("terminating collapsing obj %p", object)); + KASSERT(object->backing_object == NULL, + ("terminating shadow obj %p", object)); /* * wait for the pageout daemon to be done with the object */ vm_object_pip_wait(object, "objtrm"); KASSERT(!REFCOUNT_COUNT(object->paging_in_progress), - ("vm_object_terminate: pageout in progress")); + ("vm_object_terminate: pageout in progress")); - KASSERT(object->ref_count == 0, - ("vm_object_terminate: object with references, ref_count=%d", - object->ref_count)); + KASSERT(object->ref_count == 0, + ("vm_object_terminate: object with references, ref_count=%d", + object->ref_count)); if ((object->flags & OBJ_PG_DTOR) == 0) vm_object_terminate_pages(object); #if VM_NRESERVLEVEL > 0 if (__predict_false(!LIST_EMPTY(&object->rvq))) vm_reserv_break_all(object); #endif KASSERT(object->cred == NULL || object->type == OBJT_DEFAULT || object->type == OBJT_SWAP, ("%s: non-swap obj %p has cred", __func__, object)); /* * Let the pager know object is dead. */ vm_pager_deallocate(object); VM_OBJECT_WUNLOCK(object); vm_object_destroy(object); } /* * Make the page read-only so that we can clear the object flags. However, if * this is a nosync mmap then the object is likely to stay dirty so do not * mess with the page and do not clear the object flags. Returns TRUE if the * page should be flushed, and FALSE otherwise. */ static boolean_t vm_object_page_remove_write(vm_page_t p, int flags, boolean_t *allclean) { vm_page_assert_busied(p); /* * If we have been asked to skip nosync pages and this is a * nosync page, skip it. Note that the object flags were not * cleared in this case so we do not have to set them. */ if ((flags & OBJPC_NOSYNC) != 0 && (p->a.flags & PGA_NOSYNC) != 0) { *allclean = FALSE; return (FALSE); } else { pmap_remove_write(p); return (p->dirty != 0); } } /* * vm_object_page_clean * * Clean all dirty pages in the specified range of object. Leaves page * on whatever queue it is currently on. If NOSYNC is set then do not * write out pages with PGA_NOSYNC set (originally comes from MAP_NOSYNC), * leaving the object dirty. * * When stuffing pages asynchronously, allow clustering. XXX we need a * synchronous clustering mode implementation. * * Odd semantics: if start == end, we clean everything. * * The object must be locked. * * Returns FALSE if some page from the range was not written, as * reported by the pager, and TRUE otherwise. */ boolean_t vm_object_page_clean(vm_object_t object, vm_ooffset_t start, vm_ooffset_t end, int flags) { vm_page_t np, p; vm_pindex_t pi, tend, tstart; int curgeneration, n, pagerflags; boolean_t eio, res, allclean; VM_OBJECT_ASSERT_WLOCKED(object); if (object->type != OBJT_VNODE || !vm_object_mightbedirty(object) || object->resident_page_count == 0) return (TRUE); pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) != 0 ? VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK; pagerflags |= (flags & OBJPC_INVAL) != 0 ? VM_PAGER_PUT_INVAL : 0; tstart = OFF_TO_IDX(start); tend = (end == 0) ? object->size : OFF_TO_IDX(end + PAGE_MASK); allclean = tstart == 0 && tend >= object->size; res = TRUE; rescan: curgeneration = object->generation; for (p = vm_page_find_least(object, tstart); p != NULL; p = np) { pi = p->pindex; if (pi >= tend) break; np = TAILQ_NEXT(p, listq); if (vm_page_none_valid(p)) continue; if (vm_page_busy_acquire(p, VM_ALLOC_WAITFAIL) == 0) { if (object->generation != curgeneration && (flags & OBJPC_SYNC) != 0) goto rescan; np = vm_page_find_least(object, pi); continue; } if (!vm_object_page_remove_write(p, flags, &allclean)) { vm_page_xunbusy(p); continue; } n = vm_object_page_collect_flush(object, p, pagerflags, flags, &allclean, &eio); if (eio) { res = FALSE; allclean = FALSE; } if (object->generation != curgeneration && (flags & OBJPC_SYNC) != 0) goto rescan; /* * If the VOP_PUTPAGES() did a truncated write, so * that even the first page of the run is not fully * written, vm_pageout_flush() returns 0 as the run * length. Since the condition that caused truncated * write may be permanent, e.g. exhausted free space, * accepting n == 0 would cause an infinite loop. * * Forwarding the iterator leaves the unwritten page * behind, but there is not much we can do there if * filesystem refuses to write it. */ if (n == 0) { n = 1; allclean = FALSE; } np = vm_page_find_least(object, pi + n); } #if 0 VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC) ? MNT_WAIT : 0); #endif if (allclean) object->cleangeneration = curgeneration; return (res); } static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags, int flags, boolean_t *allclean, boolean_t *eio) { vm_page_t ma[vm_pageout_page_count], p_first, tp; int count, i, mreq, runlen; vm_page_lock_assert(p, MA_NOTOWNED); vm_page_assert_xbusied(p); VM_OBJECT_ASSERT_WLOCKED(object); count = 1; mreq = 0; for (tp = p; count < vm_pageout_page_count; count++) { tp = vm_page_next(tp); if (tp == NULL || vm_page_tryxbusy(tp) == 0) break; if (!vm_object_page_remove_write(tp, flags, allclean)) { vm_page_xunbusy(tp); break; } } for (p_first = p; count < vm_pageout_page_count; count++) { tp = vm_page_prev(p_first); if (tp == NULL || vm_page_tryxbusy(tp) == 0) break; if (!vm_object_page_remove_write(tp, flags, allclean)) { vm_page_xunbusy(tp); break; } p_first = tp; mreq++; } for (tp = p_first, i = 0; i < count; tp = TAILQ_NEXT(tp, listq), i++) ma[i] = tp; vm_pageout_flush(ma, count, pagerflags, mreq, &runlen, eio); return (runlen); } /* * Note that there is absolutely no sense in writing out * anonymous objects, so we track down the vnode object * to write out. * We invalidate (remove) all pages from the address space * for semantic correctness. * * If the backing object is a device object with unmanaged pages, then any * mappings to the specified range of pages must be removed before this * function is called. * * Note: certain anonymous maps, such as MAP_NOSYNC maps, * may start out with a NULL object. */ boolean_t vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size, boolean_t syncio, boolean_t invalidate) { vm_object_t backing_object; struct vnode *vp; struct mount *mp; int error, flags, fsync_after; boolean_t res; if (object == NULL) return (TRUE); res = TRUE; error = 0; VM_OBJECT_WLOCK(object); while ((backing_object = object->backing_object) != NULL) { VM_OBJECT_WLOCK(backing_object); offset += object->backing_object_offset; VM_OBJECT_WUNLOCK(object); object = backing_object; if (object->size < OFF_TO_IDX(offset + size)) size = IDX_TO_OFF(object->size) - offset; } /* * Flush pages if writing is allowed, invalidate them * if invalidation requested. Pages undergoing I/O * will be ignored by vm_object_page_remove(). * * We cannot lock the vnode and then wait for paging * to complete without deadlocking against vm_fault. * Instead we simply call vm_object_page_remove() and * allow it to block internally on a page-by-page * basis when it encounters pages undergoing async * I/O. */ if (object->type == OBJT_VNODE && vm_object_mightbedirty(object) != 0 && ((vp = object->handle)->v_vflag & VV_NOSYNC) == 0) { VM_OBJECT_WUNLOCK(object); (void) vn_start_write(vp, &mp, V_WAIT); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (syncio && !invalidate && offset == 0 && atop(size) == object->size) { /* * If syncing the whole mapping of the file, * it is faster to schedule all the writes in * async mode, also allowing the clustering, * and then wait for i/o to complete. */ flags = 0; fsync_after = TRUE; } else { flags = (syncio || invalidate) ? OBJPC_SYNC : 0; flags |= invalidate ? (OBJPC_SYNC | OBJPC_INVAL) : 0; fsync_after = FALSE; } VM_OBJECT_WLOCK(object); res = vm_object_page_clean(object, offset, offset + size, flags); VM_OBJECT_WUNLOCK(object); if (fsync_after) error = VOP_FSYNC(vp, MNT_WAIT, curthread); VOP_UNLOCK(vp); vn_finished_write(mp); if (error != 0) res = FALSE; VM_OBJECT_WLOCK(object); } if ((object->type == OBJT_VNODE || object->type == OBJT_DEVICE) && invalidate) { if (object->type == OBJT_DEVICE) /* * The option OBJPR_NOTMAPPED must be passed here * because vm_object_page_remove() cannot remove * unmanaged mappings. */ flags = OBJPR_NOTMAPPED; else if (old_msync) flags = 0; else flags = OBJPR_CLEANONLY; vm_object_page_remove(object, OFF_TO_IDX(offset), OFF_TO_IDX(offset + size + PAGE_MASK), flags); } VM_OBJECT_WUNLOCK(object); return (res); } /* * Determine whether the given advice can be applied to the object. Advice is * not applied to unmanaged pages since they never belong to page queues, and * since MADV_FREE is destructive, it can apply only to anonymous pages that * have been mapped at most once. */ static bool vm_object_advice_applies(vm_object_t object, int advice) { if ((object->flags & OBJ_UNMANAGED) != 0) return (false); if (advice != MADV_FREE) return (true); return ((object->flags & (OBJ_ONEMAPPING | OBJ_ANON)) == (OBJ_ONEMAPPING | OBJ_ANON)); } static void vm_object_madvise_freespace(vm_object_t object, int advice, vm_pindex_t pindex, vm_size_t size) { if (advice == MADV_FREE && object->type == OBJT_SWAP) swap_pager_freespace(object, pindex, size); } /* * vm_object_madvise: * * Implements the madvise function at the object/page level. * * MADV_WILLNEED (any object) * * Activate the specified pages if they are resident. * * MADV_DONTNEED (any object) * * Deactivate the specified pages if they are resident. * * MADV_FREE (OBJT_DEFAULT/OBJT_SWAP objects, * OBJ_ONEMAPPING only) * * Deactivate and clean the specified pages if they are * resident. This permits the process to reuse the pages * without faulting or the kernel to reclaim the pages * without I/O. */ void vm_object_madvise(vm_object_t object, vm_pindex_t pindex, vm_pindex_t end, int advice) { vm_pindex_t tpindex; vm_object_t backing_object, tobject; vm_page_t m, tm; if (object == NULL) return; relookup: VM_OBJECT_WLOCK(object); if (!vm_object_advice_applies(object, advice)) { VM_OBJECT_WUNLOCK(object); return; } for (m = vm_page_find_least(object, pindex); pindex < end; pindex++) { tobject = object; /* * If the next page isn't resident in the top-level object, we * need to search the shadow chain. When applying MADV_FREE, we * take care to release any swap space used to store * non-resident pages. */ if (m == NULL || pindex < m->pindex) { /* * Optimize a common case: if the top-level object has * no backing object, we can skip over the non-resident * range in constant time. */ if (object->backing_object == NULL) { tpindex = (m != NULL && m->pindex < end) ? m->pindex : end; vm_object_madvise_freespace(object, advice, pindex, tpindex - pindex); if ((pindex = tpindex) == end) break; goto next_page; } tpindex = pindex; do { vm_object_madvise_freespace(tobject, advice, tpindex, 1); /* * Prepare to search the next object in the * chain. */ backing_object = tobject->backing_object; if (backing_object == NULL) goto next_pindex; VM_OBJECT_WLOCK(backing_object); tpindex += OFF_TO_IDX(tobject->backing_object_offset); if (tobject != object) VM_OBJECT_WUNLOCK(tobject); tobject = backing_object; if (!vm_object_advice_applies(tobject, advice)) goto next_pindex; } while ((tm = vm_page_lookup(tobject, tpindex)) == NULL); } else { next_page: tm = m; m = TAILQ_NEXT(m, listq); } /* * If the page is not in a normal state, skip it. The page * can not be invalidated while the object lock is held. */ if (!vm_page_all_valid(tm) || vm_page_wired(tm)) goto next_pindex; KASSERT((tm->flags & PG_FICTITIOUS) == 0, ("vm_object_madvise: page %p is fictitious", tm)); KASSERT((tm->oflags & VPO_UNMANAGED) == 0, ("vm_object_madvise: page %p is not managed", tm)); if (vm_page_tryxbusy(tm) == 0) { if (object != tobject) VM_OBJECT_WUNLOCK(object); if (advice == MADV_WILLNEED) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_aflag_set(tm, PGA_REFERENCED); } vm_page_busy_sleep(tm, "madvpo", false); goto relookup; } vm_page_advise(tm, advice); vm_page_xunbusy(tm); vm_object_madvise_freespace(tobject, advice, tm->pindex, 1); next_pindex: if (tobject != object) VM_OBJECT_WUNLOCK(tobject); } VM_OBJECT_WUNLOCK(object); } /* * vm_object_shadow: * * Create a new object which is backed by the * specified existing object range. The source * object reference is deallocated. * * The new object and offset into that object * are returned in the source parameters. */ void vm_object_shadow(vm_object_t *object, vm_ooffset_t *offset, vm_size_t length, struct ucred *cred, bool shared) { vm_object_t source; vm_object_t result; source = *object; /* * Don't create the new object if the old object isn't shared. * * If we hold the only reference we can guarantee that it won't * increase while we have the map locked. Otherwise the race is * harmless and we will end up with an extra shadow object that * will be collapsed later. */ if (source != NULL && source->ref_count == 1 && (source->flags & OBJ_ANON) != 0) return; /* * Allocate a new object with the given length. */ result = vm_object_allocate_anon(atop(length), source, cred, length); /* * Store the offset into the source object, and fix up the offset into * the new object. */ result->backing_object_offset = *offset; if (shared || source != NULL) { VM_OBJECT_WLOCK(result); /* * The new object shadows the source object, adding a * reference to it. Our caller changes his reference * to point to the new object, removing a reference to * the source object. Net result: no change of * reference count, unless the caller needs to add one * more reference due to forking a shared map entry. */ if (shared) { vm_object_reference_locked(result); vm_object_clear_flag(result, OBJ_ONEMAPPING); } /* * Try to optimize the result object's page color when * shadowing in order to maintain page coloring * consistency in the combined shadowed object. */ if (source != NULL) { vm_object_backing_insert(result, source); result->domain = source->domain; #if VM_NRESERVLEVEL > 0 result->flags |= source->flags & OBJ_COLORED; result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) & ((1 << (VM_NFREEORDER - 1)) - 1); #endif } VM_OBJECT_WUNLOCK(result); } /* * Return the new things */ *offset = 0; *object = result; } /* * vm_object_split: * * Split the pages in a map entry into a new object. This affords * easier removal of unused pages, and keeps object inheritance from * being a negative impact on memory usage. */ void vm_object_split(vm_map_entry_t entry) { vm_page_t m, m_next; - vm_object_t orig_object, new_object, source; + vm_object_t orig_object, new_object, backing_object; vm_pindex_t idx, offidxstart; vm_size_t size; orig_object = entry->object.vm_object; + KASSERT((orig_object->flags & OBJ_ONEMAPPING) != 0, + ("vm_object_split: Splitting object with multiple mappings.")); if ((orig_object->flags & OBJ_ANON) == 0) return; if (orig_object->ref_count <= 1) return; VM_OBJECT_WUNLOCK(orig_object); offidxstart = OFF_TO_IDX(entry->offset); size = atop(entry->end - entry->start); /* * If swap_pager_copy() is later called, it will convert new_object * into a swap object. */ new_object = vm_object_allocate_anon(size, orig_object, orig_object->cred, ptoa(size)); /* + * We must wait for the orig_object to complete any in-progress + * collapse so that the swap blocks are stable below. The + * additional reference on backing_object by new object will + * prevent further collapse operations until split completes. + */ + VM_OBJECT_WLOCK(orig_object); + vm_object_collapse_wait(orig_object); + + /* * At this point, the new object is still private, so the order in * which the original and new objects are locked does not matter. */ VM_OBJECT_WLOCK(new_object); - VM_OBJECT_WLOCK(orig_object); new_object->domain = orig_object->domain; - source = orig_object->backing_object; - if (source != NULL) { - if ((source->flags & (OBJ_ANON | OBJ_DEAD)) != 0) { - VM_OBJECT_WLOCK(source); - if ((source->flags & OBJ_DEAD) != 0) { - VM_OBJECT_WUNLOCK(source); - VM_OBJECT_WUNLOCK(orig_object); - VM_OBJECT_WUNLOCK(new_object); - new_object->cred = NULL; - vm_object_deallocate(new_object); - VM_OBJECT_WLOCK(orig_object); - return; - } - vm_object_backing_insert_locked(new_object, source); - vm_object_reference_locked(source); /* for new_object */ - vm_object_clear_flag(source, OBJ_ONEMAPPING); - VM_OBJECT_WUNLOCK(source); - } else { - vm_object_backing_insert(new_object, source); - vm_object_reference(source); - } + backing_object = orig_object->backing_object; + if (backing_object != NULL) { + vm_object_backing_insert_ref(new_object, backing_object); new_object->backing_object_offset = - orig_object->backing_object_offset + entry->offset; + orig_object->backing_object_offset + entry->offset; } if (orig_object->cred != NULL) { crhold(orig_object->cred); KASSERT(orig_object->charge >= ptoa(size), ("orig_object->charge < 0")); orig_object->charge -= ptoa(size); } + + /* + * Mark the split operation so that swap_pager_getpages() knows + * that the object is in transition. + */ + vm_object_set_flag(orig_object, OBJ_SPLIT); retry: m = vm_page_find_least(orig_object, offidxstart); for (; m != NULL && (idx = m->pindex - offidxstart) < size; m = m_next) { m_next = TAILQ_NEXT(m, listq); /* * We must wait for pending I/O to complete before we can * rename the page. * * We do not have to VM_PROT_NONE the page as mappings should * not be changed by this operation. */ if (vm_page_tryxbusy(m) == 0) { VM_OBJECT_WUNLOCK(new_object); vm_page_sleep_if_busy(m, "spltwt"); VM_OBJECT_WLOCK(new_object); goto retry; } /* * The page was left invalid. Likely placed there by * an incomplete fault. Just remove and ignore. */ if (vm_page_none_valid(m)) { if (vm_page_remove(m)) vm_page_free(m); continue; } /* vm_page_rename() will dirty the page. */ if (vm_page_rename(m, new_object, idx)) { vm_page_xunbusy(m); VM_OBJECT_WUNLOCK(new_object); VM_OBJECT_WUNLOCK(orig_object); vm_radix_wait(); VM_OBJECT_WLOCK(orig_object); VM_OBJECT_WLOCK(new_object); goto retry; } #if VM_NRESERVLEVEL > 0 /* * If some of the reservation's allocated pages remain with * the original object, then transferring the reservation to * the new object is neither particularly beneficial nor * particularly harmful as compared to leaving the reservation * with the original object. If, however, all of the * reservation's allocated pages are transferred to the new * object, then transferring the reservation is typically * beneficial. Determining which of these two cases applies * would be more costly than unconditionally renaming the * reservation. */ vm_reserv_rename(m, new_object, orig_object, offidxstart); #endif if (orig_object->type != OBJT_SWAP) vm_page_xunbusy(m); } if (orig_object->type == OBJT_SWAP) { /* * swap_pager_copy() can sleep, in which case the orig_object's * and new_object's locks are released and reacquired. */ swap_pager_copy(orig_object, new_object, offidxstart, 0); TAILQ_FOREACH(m, &new_object->memq, listq) vm_page_xunbusy(m); } + vm_object_clear_flag(orig_object, OBJ_SPLIT); VM_OBJECT_WUNLOCK(orig_object); VM_OBJECT_WUNLOCK(new_object); entry->object.vm_object = new_object; entry->offset = 0LL; vm_object_deallocate(orig_object); VM_OBJECT_WLOCK(new_object); } -#define OBSC_COLLAPSE_NOWAIT 0x0002 -#define OBSC_COLLAPSE_WAIT 0x0004 - static vm_page_t -vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p, vm_page_t next, - int op) +vm_object_collapse_scan_wait(vm_object_t object, vm_page_t p) { vm_object_t backing_object; VM_OBJECT_ASSERT_WLOCKED(object); backing_object = object->backing_object; VM_OBJECT_ASSERT_WLOCKED(backing_object); KASSERT(p == NULL || p->object == object || p->object == backing_object, ("invalid ownership %p %p %p", p, object, backing_object)); - if ((op & OBSC_COLLAPSE_NOWAIT) != 0) - return (next); /* The page is only NULL when rename fails. */ if (p == NULL) { VM_OBJECT_WUNLOCK(object); VM_OBJECT_WUNLOCK(backing_object); vm_radix_wait(); } else { if (p->object == object) VM_OBJECT_WUNLOCK(backing_object); else VM_OBJECT_WUNLOCK(object); vm_page_busy_sleep(p, "vmocol", false); } VM_OBJECT_WLOCK(object); VM_OBJECT_WLOCK(backing_object); return (TAILQ_FIRST(&backing_object->memq)); } static bool vm_object_scan_all_shadowed(vm_object_t object) { vm_object_t backing_object; vm_page_t p, pp; vm_pindex_t backing_offset_index, new_pindex, pi, ps; VM_OBJECT_ASSERT_WLOCKED(object); VM_OBJECT_ASSERT_WLOCKED(object->backing_object); backing_object = object->backing_object; if ((backing_object->flags & OBJ_ANON) == 0) return (false); pi = backing_offset_index = OFF_TO_IDX(object->backing_object_offset); p = vm_page_find_least(backing_object, pi); ps = swap_pager_find_least(backing_object, pi); /* * Only check pages inside the parent object's range and * inside the parent object's mapping of the backing object. */ for (;; pi++) { if (p != NULL && p->pindex < pi) p = TAILQ_NEXT(p, listq); if (ps < pi) ps = swap_pager_find_least(backing_object, pi); if (p == NULL && ps >= backing_object->size) break; else if (p == NULL) pi = ps; else pi = MIN(p->pindex, ps); new_pindex = pi - backing_offset_index; if (new_pindex >= object->size) break; /* * If the backing object page is busy a grandparent or older * page may still be undergoing CoW. It is not safe to * collapse the backing object until it is quiesced. */ if (p != NULL && vm_page_busied(p)) return (false); /* * See if the parent has the page or if the parent's object * pager has the page. If the parent has the page but the page * is not valid, the parent's object pager must have the page. * * If this fails, the parent does not completely shadow the * object and we might as well give up now. */ pp = vm_page_lookup(object, new_pindex); /* * The valid check here is stable due to object lock being * required to clear valid and initiate paging. */ if ((pp == NULL || vm_page_none_valid(pp)) && !vm_pager_has_page(object, new_pindex, NULL, NULL)) return (false); } return (true); } -static bool -vm_object_collapse_scan(vm_object_t object, int op) +static void +vm_object_collapse_scan(vm_object_t object) { vm_object_t backing_object; vm_page_t next, p, pp; vm_pindex_t backing_offset_index, new_pindex; VM_OBJECT_ASSERT_WLOCKED(object); VM_OBJECT_ASSERT_WLOCKED(object->backing_object); backing_object = object->backing_object; backing_offset_index = OFF_TO_IDX(object->backing_object_offset); /* - * Initial conditions - */ - if ((op & OBSC_COLLAPSE_WAIT) != 0) - vm_object_set_flag(backing_object, OBJ_DEAD); - - /* * Our scan */ for (p = TAILQ_FIRST(&backing_object->memq); p != NULL; p = next) { next = TAILQ_NEXT(p, listq); new_pindex = p->pindex - backing_offset_index; /* * Check for busy page */ if (vm_page_tryxbusy(p) == 0) { - next = vm_object_collapse_scan_wait(object, p, next, op); + next = vm_object_collapse_scan_wait(object, p); continue; } + KASSERT(object->backing_object == backing_object, + ("vm_object_collapse_scan: backing object mismatch %p != %p", + object->backing_object, backing_object)); KASSERT(p->object == backing_object, - ("vm_object_collapse_scan: object mismatch")); + ("vm_object_collapse_scan: object mismatch %p != %p", + p->object, backing_object)); if (p->pindex < backing_offset_index || new_pindex >= object->size) { if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, p->pindex, 1); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); if (vm_page_remove(p)) vm_page_free(p); continue; } pp = vm_page_lookup(object, new_pindex); if (pp != NULL && vm_page_tryxbusy(pp) == 0) { vm_page_xunbusy(p); /* * The page in the parent is busy and possibly not * (yet) valid. Until its state is finalized by the * busy bit owner, we can't tell whether it shadows the - * original page. Therefore, we must either skip it - * and the original (backing_object) page or wait for - * its state to be finalized. - * - * This is due to a race with vm_fault() where we must - * unbusy the original (backing_obj) page before we can - * (re)lock the parent. Hence we can get here. + * original page. */ - next = vm_object_collapse_scan_wait(object, pp, next, - op); + next = vm_object_collapse_scan_wait(object, pp); continue; } if (pp != NULL && vm_page_none_valid(pp)) { /* * The page was invalid in the parent. Likely placed * there by an incomplete fault. Just remove and * ignore. p can replace it. */ if (vm_page_remove(pp)) vm_page_free(pp); pp = NULL; } if (pp != NULL || vm_pager_has_page(object, new_pindex, NULL, NULL)) { /* * The page already exists in the parent OR swap exists * for this location in the parent. Leave the parent's * page alone. Destroy the original page from the * backing object. */ if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, p->pindex, 1); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); if (vm_page_remove(p)) vm_page_free(p); if (pp != NULL) vm_page_xunbusy(pp); continue; } /* * Page does not exist in parent, rename the page from the * backing object to the main object. * * If the page was mapped to a process, it can remain mapped * through the rename. vm_page_rename() will dirty the page. */ if (vm_page_rename(p, object, new_pindex)) { vm_page_xunbusy(p); - if (pp != NULL) - vm_page_xunbusy(pp); - next = vm_object_collapse_scan_wait(object, NULL, next, - op); + next = vm_object_collapse_scan_wait(object, NULL); continue; } /* Use the old pindex to free the right page. */ if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, new_pindex + backing_offset_index, 1); #if VM_NRESERVLEVEL > 0 /* * Rename the reservation. */ vm_reserv_rename(p, object, backing_object, backing_offset_index); #endif vm_page_xunbusy(p); } - return (true); + return; } - /* - * this version of collapse allows the operation to occur earlier and - * when paging_in_progress is true for an object... This is not a complete - * operation, but should plug 99.9% of the rest of the leaks. - */ -static void -vm_object_qcollapse(vm_object_t object) -{ - vm_object_t backing_object = object->backing_object; - - VM_OBJECT_ASSERT_WLOCKED(object); - VM_OBJECT_ASSERT_WLOCKED(backing_object); - - if (backing_object->ref_count != 1) - return; - - vm_object_collapse_scan(object, OBSC_COLLAPSE_NOWAIT); -} - -/* * vm_object_collapse: * * Collapse an object with the object backing it. * Pages in the backing object are moved into the * parent, and the backing object is deallocated. */ void vm_object_collapse(vm_object_t object) { vm_object_t backing_object, new_backing_object; VM_OBJECT_ASSERT_WLOCKED(object); while (TRUE) { - /* - * Verify that the conditions are right for collapse: - * - * The object exists and the backing object exists. - */ - if ((backing_object = object->backing_object) == NULL) - break; + KASSERT((object->flags & (OBJ_DEAD | OBJ_ANON)) == OBJ_ANON, + ("collapsing invalid object")); /* - * we check the backing object first, because it is most likely - * not collapsable. + * Wait for the backing_object to finish any pending + * collapse so that the caller sees the shortest possible + * shadow chain. */ - if ((backing_object->flags & OBJ_ANON) == 0) - break; - VM_OBJECT_WLOCK(backing_object); - if ((backing_object->flags & OBJ_DEAD) != 0 || - (object->flags & (OBJ_DEAD | OBJ_ANON)) != OBJ_ANON) { - VM_OBJECT_WUNLOCK(backing_object); - break; - } + backing_object = vm_object_backing_collapse_wait(object); + if (backing_object == NULL) + return; - if (REFCOUNT_COUNT(object->paging_in_progress) > 0 || - REFCOUNT_COUNT(backing_object->paging_in_progress) > 0) { - vm_object_qcollapse(object); - VM_OBJECT_WUNLOCK(backing_object); - break; - } + KASSERT(object->ref_count > 0 && + object->ref_count > object->shadow_count, + ("collapse with invalid ref %d or shadow %d count.", + object->ref_count, object->shadow_count)); + KASSERT((backing_object->flags & + (OBJ_COLLAPSING | OBJ_DEAD)) == 0, + ("vm_object_collapse: Backing object already collapsing.")); + KASSERT((object->flags & (OBJ_COLLAPSING | OBJ_DEAD)) == 0, + ("vm_object_collapse: object is already collapsing.")); /* - * We know that we can either collapse the backing object (if - * the parent is the only reference to it) or (perhaps) have + * We know that we can either collapse the backing object if + * the parent is the only reference to it, or (perhaps) have * the parent bypass the object if the parent happens to shadow * all the resident pages in the entire backing object. - * - * This is ignoring pager-backed pages such as swap pages. - * vm_object_collapse_scan fails the shadowing test in this - * case. */ if (backing_object->ref_count == 1) { + KASSERT(backing_object->shadow_count == 1, + ("vm_object_collapse: shadow_count: %d", + backing_object->shadow_count)); vm_object_pip_add(object, 1); + vm_object_set_flag(object, OBJ_COLLAPSING); vm_object_pip_add(backing_object, 1); + vm_object_set_flag(backing_object, OBJ_DEAD); /* * If there is exactly one reference to the backing * object, we can collapse it into the parent. */ - vm_object_collapse_scan(object, OBSC_COLLAPSE_WAIT); + vm_object_collapse_scan(object); #if VM_NRESERVLEVEL > 0 /* * Break any reservations from backing_object. */ if (__predict_false(!LIST_EMPTY(&backing_object->rvq))) vm_reserv_break_all(backing_object); #endif /* * Move the pager from backing_object to object. */ if (backing_object->type == OBJT_SWAP) { /* * swap_pager_copy() can sleep, in which case * the backing_object's and object's locks are * released and reacquired. * Since swap_pager_copy() is being asked to - * destroy the source, it will change the - * backing_object's type to OBJT_DEFAULT. + * destroy backing_object, it will change the + * type to OBJT_DEFAULT. */ swap_pager_copy( backing_object, object, OFF_TO_IDX(object->backing_object_offset), TRUE); } + /* * Object now shadows whatever backing_object did. - * Note that the reference to - * backing_object->backing_object moves from within - * backing_object to within object. */ - vm_object_backing_remove_locked(object); - new_backing_object = backing_object->backing_object; - if (new_backing_object != NULL) { - VM_OBJECT_WLOCK(new_backing_object); - vm_object_backing_remove_locked(backing_object); - vm_object_backing_insert_locked(object, - new_backing_object); - VM_OBJECT_WUNLOCK(new_backing_object); - } + vm_object_clear_flag(object, OBJ_COLLAPSING); + vm_object_backing_transfer(object, backing_object); object->backing_object_offset += backing_object->backing_object_offset; + VM_OBJECT_WUNLOCK(object); + vm_object_pip_wakeup(object); /* * Discard backing_object. * * Since the backing object has no pages, no pager left, * and no object references within it, all that is * necessary is to dispose of it. */ KASSERT(backing_object->ref_count == 1, ( "backing_object %p was somehow re-referenced during collapse!", backing_object)); vm_object_pip_wakeup(backing_object); - backing_object->type = OBJT_DEAD; - refcount_release(&backing_object->ref_count); - VM_OBJECT_WUNLOCK(backing_object); - vm_object_destroy(backing_object); - - vm_object_pip_wakeup(object); + (void)refcount_release(&backing_object->ref_count); + vm_object_terminate(backing_object); counter_u64_add(object_collapses, 1); + VM_OBJECT_WLOCK(object); } else { /* * If we do not entirely shadow the backing object, * there is nothing we can do so we give up. + * + * The object lock and backing_object lock must not + * be dropped during this sequence. */ if (!vm_object_scan_all_shadowed(object)) { VM_OBJECT_WUNLOCK(backing_object); break; } /* * Make the parent shadow the next object in the * chain. Deallocating backing_object will not remove * it, since its reference count is at least 2. */ vm_object_backing_remove_locked(object); - new_backing_object = backing_object->backing_object; if (new_backing_object != NULL) { - vm_object_backing_insert(object, + vm_object_backing_insert_ref(object, new_backing_object); - vm_object_reference(new_backing_object); object->backing_object_offset += - backing_object->backing_object_offset; + backing_object->backing_object_offset; } /* * Drop the reference count on backing_object. Since * its ref_count was at least 2, it will not vanish. */ - refcount_release(&backing_object->ref_count); + (void)refcount_release(&backing_object->ref_count); + KASSERT(backing_object->ref_count >= 1, ( +"backing_object %p was somehow dereferenced during collapse!", + backing_object)); VM_OBJECT_WUNLOCK(backing_object); counter_u64_add(object_bypasses, 1); } /* * Try again with this object's new backing object. */ } } /* * vm_object_page_remove: * * For the given object, either frees or invalidates each of the * specified pages. In general, a page is freed. However, if a page is * wired for any reason other than the existence of a managed, wired * mapping, then it may be invalidated but not removed from the object. * Pages are specified by the given range ["start", "end") and the option * OBJPR_CLEANONLY. As a special case, if "end" is zero, then the range * extends from "start" to the end of the object. If the option * OBJPR_CLEANONLY is specified, then only the non-dirty pages within the * specified range are affected. If the option OBJPR_NOTMAPPED is * specified, then the pages within the specified range must have no * mappings. Otherwise, if this option is not specified, any mappings to * the specified pages are removed before the pages are freed or * invalidated. * * In general, this operation should only be performed on objects that * contain managed pages. There are, however, two exceptions. First, it * is performed on the kernel and kmem objects by vm_map_entry_delete(). * Second, it is used by msync(..., MS_INVALIDATE) to invalidate device- * backed pages. In both of these cases, the option OBJPR_CLEANONLY must * not be specified and the option OBJPR_NOTMAPPED must be specified. * * The object must be locked. */ void vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int options) { vm_page_t p, next; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & OBJ_UNMANAGED) == 0 || (options & (OBJPR_CLEANONLY | OBJPR_NOTMAPPED)) == OBJPR_NOTMAPPED, ("vm_object_page_remove: illegal options for object %p", object)); if (object->resident_page_count == 0) return; vm_object_pip_add(object, 1); again: p = vm_page_find_least(object, start); /* * Here, the variable "p" is either (1) the page with the least pindex * greater than or equal to the parameter "start" or (2) NULL. */ for (; p != NULL && (p->pindex < end || end == 0); p = next) { next = TAILQ_NEXT(p, listq); /* * If the page is wired for any reason besides the existence * of managed, wired mappings, then it cannot be freed. For * example, fictitious pages, which represent device memory, * are inherently wired and cannot be freed. They can, * however, be invalidated if the option OBJPR_CLEANONLY is * not specified. */ if (vm_page_tryxbusy(p) == 0) { vm_page_sleep_if_busy(p, "vmopar"); goto again; } if (vm_page_wired(p)) { wired: if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0) pmap_remove_all(p); if ((options & OBJPR_CLEANONLY) == 0) { vm_page_invalid(p); vm_page_undirty(p); } vm_page_xunbusy(p); continue; } KASSERT((p->flags & PG_FICTITIOUS) == 0, ("vm_object_page_remove: page %p is fictitious", p)); if ((options & OBJPR_CLEANONLY) != 0 && !vm_page_none_valid(p)) { if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0 && !vm_page_try_remove_write(p)) goto wired; if (p->dirty != 0) { vm_page_xunbusy(p); continue; } } if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0 && !vm_page_try_remove_all(p)) goto wired; vm_page_free(p); } vm_object_pip_wakeup(object); } /* * vm_object_page_noreuse: * * For the given object, attempt to move the specified pages to * the head of the inactive queue. This bypasses regular LRU * operation and allows the pages to be reused quickly under memory * pressure. If a page is wired for any reason, then it will not * be queued. Pages are specified by the range ["start", "end"). * As a special case, if "end" is zero, then the range extends from * "start" to the end of the object. * * This operation should only be performed on objects that * contain non-fictitious, managed pages. * * The object must be locked. */ void vm_object_page_noreuse(vm_object_t object, vm_pindex_t start, vm_pindex_t end) { vm_page_t p, next; VM_OBJECT_ASSERT_LOCKED(object); KASSERT((object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0, ("vm_object_page_noreuse: illegal object %p", object)); if (object->resident_page_count == 0) return; p = vm_page_find_least(object, start); /* * Here, the variable "p" is either (1) the page with the least pindex * greater than or equal to the parameter "start" or (2) NULL. */ for (; p != NULL && (p->pindex < end || end == 0); p = next) { next = TAILQ_NEXT(p, listq); vm_page_deactivate_noreuse(p); } } /* * Populate the specified range of the object with valid pages. Returns * TRUE if the range is successfully populated and FALSE otherwise. * * Note: This function should be optimized to pass a larger array of * pages to vm_pager_get_pages() before it is applied to a non- * OBJT_DEVICE object. * * The object must be locked. */ boolean_t vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end) { vm_page_t m; vm_pindex_t pindex; int rv; VM_OBJECT_ASSERT_WLOCKED(object); for (pindex = start; pindex < end; pindex++) { rv = vm_page_grab_valid(&m, object, pindex, VM_ALLOC_NORMAL); if (rv != VM_PAGER_OK) break; /* * Keep "m" busy because a subsequent iteration may unlock * the object. */ } if (pindex > start) { m = vm_page_lookup(object, start); while (m != NULL && m->pindex < pindex) { vm_page_xunbusy(m); m = TAILQ_NEXT(m, listq); } } return (pindex == end); } /* * Routine: vm_object_coalesce * Function: Coalesces two objects backing up adjoining * regions of memory into a single object. * * returns TRUE if objects were combined. * * NOTE: Only works at the moment if the second object is NULL - * if it's not, which object do we lock first? * * Parameters: * prev_object First object to coalesce * prev_offset Offset into prev_object * prev_size Size of reference to prev_object * next_size Size of reference to the second object * reserved Indicator that extension region has * swap accounted for * * Conditions: * The object must *not* be locked. */ boolean_t vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset, vm_size_t prev_size, vm_size_t next_size, boolean_t reserved) { vm_pindex_t next_pindex; if (prev_object == NULL) return (TRUE); if ((prev_object->flags & OBJ_ANON) == 0) return (FALSE); VM_OBJECT_WLOCK(prev_object); /* - * Try to collapse the object first + * Try to collapse the object first. */ vm_object_collapse(prev_object); /* * Can't coalesce if: . more than one reference . paged out . shadows * another object . has a copy elsewhere (any of which mean that the * pages not mapped to prev_entry may be in use anyway) */ if (prev_object->backing_object != NULL) { VM_OBJECT_WUNLOCK(prev_object); return (FALSE); } prev_size >>= PAGE_SHIFT; next_size >>= PAGE_SHIFT; next_pindex = OFF_TO_IDX(prev_offset) + prev_size; if (prev_object->ref_count > 1 && prev_object->size != next_pindex && (prev_object->flags & OBJ_ONEMAPPING) == 0) { VM_OBJECT_WUNLOCK(prev_object); return (FALSE); } /* * Account for the charge. */ if (prev_object->cred != NULL) { /* * If prev_object was charged, then this mapping, * although not charged now, may become writable * later. Non-NULL cred in the object would prevent * swap reservation during enabling of the write * access, so reserve swap now. Failed reservation * cause allocation of the separate object for the map * entry, and swap reservation for this entry is * managed in appropriate time. */ if (!reserved && !swap_reserve_by_cred(ptoa(next_size), prev_object->cred)) { VM_OBJECT_WUNLOCK(prev_object); return (FALSE); } prev_object->charge += ptoa(next_size); } /* * Remove any pages that may still be in the object from a previous * deallocation. */ if (next_pindex < prev_object->size) { vm_object_page_remove(prev_object, next_pindex, next_pindex + next_size, 0); if (prev_object->type == OBJT_SWAP) swap_pager_freespace(prev_object, next_pindex, next_size); #if 0 if (prev_object->cred != NULL) { KASSERT(prev_object->charge >= ptoa(prev_object->size - next_pindex), ("object %p overcharged 1 %jx %jx", prev_object, (uintmax_t)next_pindex, (uintmax_t)next_size)); prev_object->charge -= ptoa(prev_object->size - next_pindex); } #endif } /* * Extend the object if necessary. */ if (next_pindex + next_size > prev_object->size) prev_object->size = next_pindex + next_size; VM_OBJECT_WUNLOCK(prev_object); return (TRUE); } void vm_object_set_writeable_dirty(vm_object_t object) { /* Only set for vnodes & tmpfs */ if (object->type != OBJT_VNODE && (object->flags & OBJ_TMPFS_NODE) == 0) return; atomic_add_int(&object->generation, 1); } /* * vm_object_unwire: * * For each page offset within the specified range of the given object, * find the highest-level page in the shadow chain and unwire it. A page * must exist at every page offset, and the highest-level page must be * wired. */ void vm_object_unwire(vm_object_t object, vm_ooffset_t offset, vm_size_t length, uint8_t queue) { vm_object_t tobject, t1object; vm_page_t m, tm; vm_pindex_t end_pindex, pindex, tpindex; int depth, locked_depth; KASSERT((offset & PAGE_MASK) == 0, ("vm_object_unwire: offset is not page aligned")); KASSERT((length & PAGE_MASK) == 0, ("vm_object_unwire: length is not a multiple of PAGE_SIZE")); /* The wired count of a fictitious page never changes. */ if ((object->flags & OBJ_FICTITIOUS) != 0) return; pindex = OFF_TO_IDX(offset); end_pindex = pindex + atop(length); again: locked_depth = 1; VM_OBJECT_RLOCK(object); m = vm_page_find_least(object, pindex); while (pindex < end_pindex) { if (m == NULL || pindex < m->pindex) { /* * The first object in the shadow chain doesn't * contain a page at the current index. Therefore, * the page must exist in a backing object. */ tobject = object; tpindex = pindex; depth = 0; do { tpindex += OFF_TO_IDX(tobject->backing_object_offset); tobject = tobject->backing_object; KASSERT(tobject != NULL, ("vm_object_unwire: missing page")); if ((tobject->flags & OBJ_FICTITIOUS) != 0) goto next_page; depth++; if (depth == locked_depth) { locked_depth++; VM_OBJECT_RLOCK(tobject); } } while ((tm = vm_page_lookup(tobject, tpindex)) == NULL); } else { tm = m; m = TAILQ_NEXT(m, listq); } if (vm_page_trysbusy(tm) == 0) { for (tobject = object; locked_depth >= 1; locked_depth--) { t1object = tobject->backing_object; if (tm->object != tobject) VM_OBJECT_RUNLOCK(tobject); tobject = t1object; } vm_page_busy_sleep(tm, "unwbo", true); goto again; } vm_page_unwire(tm, queue); vm_page_sunbusy(tm); next_page: pindex++; } /* Release the accumulated object locks. */ for (tobject = object; locked_depth >= 1; locked_depth--) { t1object = tobject->backing_object; VM_OBJECT_RUNLOCK(tobject); tobject = t1object; } } /* * Return the vnode for the given object, or NULL if none exists. * For tmpfs objects, the function may return NULL if there is * no vnode allocated at the time of the call. */ struct vnode * vm_object_vnode(vm_object_t object) { struct vnode *vp; VM_OBJECT_ASSERT_LOCKED(object); if (object->type == OBJT_VNODE) { vp = object->handle; KASSERT(vp != NULL, ("%s: OBJT_VNODE has no vnode", __func__)); } else if (object->type == OBJT_SWAP && (object->flags & OBJ_TMPFS) != 0) { vp = object->un_pager.swp.swp_tmpfs; KASSERT(vp != NULL, ("%s: OBJT_TMPFS has no vnode", __func__)); } else { vp = NULL; } return (vp); } /* * Busy the vm object. This prevents new pages belonging to the object from * becoming busy. Existing pages persist as busy. Callers are responsible * for checking page state before proceeding. */ void vm_object_busy(vm_object_t obj) { VM_OBJECT_ASSERT_LOCKED(obj); refcount_acquire(&obj->busy); /* The fence is required to order loads of page busy. */ atomic_thread_fence_acq_rel(); } void vm_object_unbusy(vm_object_t obj) { refcount_release(&obj->busy); } void vm_object_busy_wait(vm_object_t obj, const char *wmesg) { VM_OBJECT_ASSERT_UNLOCKED(obj); if (obj->busy) refcount_sleep(&obj->busy, wmesg, PVM); } /* * Return the kvme type of the given object. * If vpp is not NULL, set it to the object's vm_object_vnode() or NULL. */ int vm_object_kvme_type(vm_object_t object, struct vnode **vpp) { VM_OBJECT_ASSERT_LOCKED(object); if (vpp != NULL) *vpp = vm_object_vnode(object); switch (object->type) { case OBJT_DEFAULT: return (KVME_TYPE_DEFAULT); case OBJT_VNODE: return (KVME_TYPE_VNODE); case OBJT_SWAP: if ((object->flags & OBJ_TMPFS_NODE) != 0) return (KVME_TYPE_VNODE); return (KVME_TYPE_SWAP); case OBJT_DEVICE: return (KVME_TYPE_DEVICE); case OBJT_PHYS: return (KVME_TYPE_PHYS); case OBJT_DEAD: return (KVME_TYPE_DEAD); case OBJT_SG: return (KVME_TYPE_SG); case OBJT_MGTDEVICE: return (KVME_TYPE_MGTDEVICE); default: return (KVME_TYPE_UNKNOWN); } } static int sysctl_vm_object_list(SYSCTL_HANDLER_ARGS) { struct kinfo_vmobject *kvo; char *fullpath, *freepath; struct vnode *vp; struct vattr va; vm_object_t obj; vm_page_t m; int count, error; if (req->oldptr == NULL) { /* * If an old buffer has not been provided, generate an * estimate of the space needed for a subsequent call. */ mtx_lock(&vm_object_list_mtx); count = 0; TAILQ_FOREACH(obj, &vm_object_list, object_list) { if (obj->type == OBJT_DEAD) continue; count++; } mtx_unlock(&vm_object_list_mtx); return (SYSCTL_OUT(req, NULL, sizeof(struct kinfo_vmobject) * count * 11 / 10)); } kvo = malloc(sizeof(*kvo), M_TEMP, M_WAITOK); error = 0; /* * VM objects are type stable and are never removed from the * list once added. This allows us to safely read obj->object_list * after reacquiring the VM object lock. */ mtx_lock(&vm_object_list_mtx); TAILQ_FOREACH(obj, &vm_object_list, object_list) { if (obj->type == OBJT_DEAD) continue; VM_OBJECT_RLOCK(obj); if (obj->type == OBJT_DEAD) { VM_OBJECT_RUNLOCK(obj); continue; } mtx_unlock(&vm_object_list_mtx); kvo->kvo_size = ptoa(obj->size); kvo->kvo_resident = obj->resident_page_count; kvo->kvo_ref_count = obj->ref_count; kvo->kvo_shadow_count = obj->shadow_count; kvo->kvo_memattr = obj->memattr; kvo->kvo_active = 0; kvo->kvo_inactive = 0; TAILQ_FOREACH(m, &obj->memq, listq) { /* * A page may belong to the object but be * dequeued and set to PQ_NONE while the * object lock is not held. This makes the * reads of m->queue below racy, and we do not * count pages set to PQ_NONE. However, this * sysctl is only meant to give an * approximation of the system anyway. */ if (m->a.queue == PQ_ACTIVE) kvo->kvo_active++; else if (m->a.queue == PQ_INACTIVE) kvo->kvo_inactive++; } kvo->kvo_vn_fileid = 0; kvo->kvo_vn_fsid = 0; kvo->kvo_vn_fsid_freebsd11 = 0; freepath = NULL; fullpath = ""; kvo->kvo_type = vm_object_kvme_type(obj, &vp); if (vp != NULL) vref(vp); VM_OBJECT_RUNLOCK(obj); if (vp != NULL) { vn_fullpath(curthread, vp, &fullpath, &freepath); vn_lock(vp, LK_SHARED | LK_RETRY); if (VOP_GETATTR(vp, &va, curthread->td_ucred) == 0) { kvo->kvo_vn_fileid = va.va_fileid; kvo->kvo_vn_fsid = va.va_fsid; kvo->kvo_vn_fsid_freebsd11 = va.va_fsid; /* truncate */ } vput(vp); } strlcpy(kvo->kvo_path, fullpath, sizeof(kvo->kvo_path)); if (freepath != NULL) free(freepath, M_TEMP); /* Pack record size down */ kvo->kvo_structsize = offsetof(struct kinfo_vmobject, kvo_path) + strlen(kvo->kvo_path) + 1; kvo->kvo_structsize = roundup(kvo->kvo_structsize, sizeof(uint64_t)); error = SYSCTL_OUT(req, kvo, kvo->kvo_structsize); mtx_lock(&vm_object_list_mtx); if (error) break; } mtx_unlock(&vm_object_list_mtx); free(kvo, M_TEMP); return (error); } SYSCTL_PROC(_vm, OID_AUTO, objects, CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_SKIP | CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_object_list, "S,kinfo_vmobject", "List of VM objects"); #include "opt_ddb.h" #ifdef DDB #include #include #include static int _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry) { vm_map_t tmpm; vm_map_entry_t tmpe; vm_object_t obj; if (map == 0) return 0; if (entry == 0) { VM_MAP_ENTRY_FOREACH(tmpe, map) { if (_vm_object_in_map(map, object, tmpe)) { return 1; } } } else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { tmpm = entry->object.sub_map; VM_MAP_ENTRY_FOREACH(tmpe, tmpm) { if (_vm_object_in_map(tmpm, object, tmpe)) { return 1; } } } else if ((obj = entry->object.vm_object) != NULL) { for (; obj; obj = obj->backing_object) if (obj == object) { return 1; } } return 0; } static int vm_object_in_map(vm_object_t object) { struct proc *p; /* sx_slock(&allproc_lock); */ FOREACH_PROC_IN_SYSTEM(p) { if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) continue; if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) { /* sx_sunlock(&allproc_lock); */ return 1; } } /* sx_sunlock(&allproc_lock); */ if (_vm_object_in_map(kernel_map, object, 0)) return 1; return 0; } DB_SHOW_COMMAND(vmochk, vm_object_check) { vm_object_t object; /* * make sure that internal objs are in a map somewhere * and none have zero ref counts. */ TAILQ_FOREACH(object, &vm_object_list, object_list) { if ((object->flags & OBJ_ANON) != 0) { if (object->ref_count == 0) { db_printf("vmochk: internal obj has zero ref count: %ld\n", (long)object->size); } if (!vm_object_in_map(object)) { db_printf( "vmochk: internal obj is not in a map: " "ref: %d, size: %lu: 0x%lx, backing_object: %p\n", object->ref_count, (u_long)object->size, (u_long)object->size, (void *)object->backing_object); } } } } /* * vm_object_print: [ debug ] */ DB_SHOW_COMMAND(object, vm_object_print_static) { /* XXX convert args. */ vm_object_t object = (vm_object_t)addr; boolean_t full = have_addr; vm_page_t p; /* XXX count is an (unused) arg. Avoid shadowing it. */ #define count was_count int count; if (object == NULL) return; db_iprintf( "Object %p: type=%d, size=0x%jx, res=%d, ref=%d, flags=0x%x ruid %d charge %jx\n", object, (int)object->type, (uintmax_t)object->size, object->resident_page_count, object->ref_count, object->flags, object->cred ? object->cred->cr_ruid : -1, (uintmax_t)object->charge); db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%jx\n", object->shadow_count, object->backing_object ? object->backing_object->ref_count : 0, object->backing_object, (uintmax_t)object->backing_object_offset); if (!full) return; db_indent += 2; count = 0; TAILQ_FOREACH(p, &object->memq, listq) { if (count == 0) db_iprintf("memory:="); else if (count == 6) { db_printf("\n"); db_iprintf(" ..."); count = 0; } else db_printf(","); count++; db_printf("(off=0x%jx,page=0x%jx)", (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p)); } if (count != 0) db_printf("\n"); db_indent -= 2; } /* XXX. */ #undef count /* XXX need this non-static entry for calling from vm_map_print. */ void vm_object_print( /* db_expr_t */ long addr, boolean_t have_addr, /* db_expr_t */ long count, char *modif) { vm_object_print_static(addr, have_addr, count, modif); } DB_SHOW_COMMAND(vmopag, vm_object_print_pages) { vm_object_t object; vm_pindex_t fidx; vm_paddr_t pa; vm_page_t m, prev_m; int rcount, nl, c; nl = 0; TAILQ_FOREACH(object, &vm_object_list, object_list) { db_printf("new object: %p\n", (void *)object); if (nl > 18) { c = cngetc(); if (c != ' ') return; nl = 0; } nl++; rcount = 0; fidx = 0; pa = -1; TAILQ_FOREACH(m, &object->memq, listq) { if (m->pindex > 128) break; if ((prev_m = TAILQ_PREV(m, pglist, listq)) != NULL && prev_m->pindex + 1 != m->pindex) { if (rcount) { db_printf(" index(%ld)run(%d)pa(0x%lx)\n", (long)fidx, rcount, (long)pa); if (nl > 18) { c = cngetc(); if (c != ' ') return; nl = 0; } nl++; rcount = 0; } } if (rcount && (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) { ++rcount; continue; } if (rcount) { db_printf(" index(%ld)run(%d)pa(0x%lx)\n", (long)fidx, rcount, (long)pa); if (nl > 18) { c = cngetc(); if (c != ' ') return; nl = 0; } nl++; } fidx = m->pindex; pa = VM_PAGE_TO_PHYS(m); rcount = 1; } if (rcount) { db_printf(" index(%ld)run(%d)pa(0x%lx)\n", (long)fidx, rcount, (long)pa); if (nl > 18) { c = cngetc(); if (c != ' ') return; nl = 0; } nl++; } } } #endif /* DDB */ Index: projects/clang1000-import/sys/vm/vm_object.h =================================================================== --- projects/clang1000-import/sys/vm/vm_object.h (revision 356919) +++ projects/clang1000-import/sys/vm/vm_object.h (revision 356920) @@ -1,377 +1,386 @@ /*- * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU) * * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_object.h 8.3 (Berkeley) 1/12/94 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * * $FreeBSD$ */ /* * Virtual memory object module definitions. */ #ifndef _VM_OBJECT_ #define _VM_OBJECT_ #include #include #include #include #include #include #include /* * Types defined: * * vm_object_t Virtual memory object. * * List of locks * (a) atomic * (c) const until freed * (o) per-object lock * (f) free pages queue mutex * */ #ifndef VM_PAGE_HAVE_PGLIST TAILQ_HEAD(pglist, vm_page); #define VM_PAGE_HAVE_PGLIST #endif struct vm_object { struct rwlock lock; TAILQ_ENTRY(vm_object) object_list; /* list of all objects */ LIST_HEAD(, vm_object) shadow_head; /* objects that this is a shadow for */ LIST_ENTRY(vm_object) shadow_list; /* chain of shadow objects */ struct pglist memq; /* list of resident pages */ struct vm_radix rtree; /* root of the resident page radix trie*/ vm_pindex_t size; /* Object size */ struct domainset_ref domain; /* NUMA policy. */ volatile int generation; /* generation ID */ int cleangeneration; /* Generation at clean time */ volatile u_int ref_count; /* How many refs?? */ int shadow_count; /* how many objects that this is a shadow for */ vm_memattr_t memattr; /* default memory attribute for pages */ objtype_t type; /* type of pager */ u_short flags; /* see below */ u_short pg_color; /* (c) color of first page in obj */ volatile u_int paging_in_progress; /* Paging (in or out) so don't collapse or destroy */ volatile u_int busy; /* (a) object is busy, disallow page busy. */ int resident_page_count; /* number of resident pages */ struct vm_object *backing_object; /* object that I'm a shadow of */ vm_ooffset_t backing_object_offset;/* Offset in backing object */ TAILQ_ENTRY(vm_object) pager_object_list; /* list of all objects of this pager type */ LIST_HEAD(, vm_reserv) rvq; /* list of reservations */ void *handle; union { /* * VNode pager * * vnp_size - current size of file */ struct { off_t vnp_size; vm_ooffset_t writemappings; } vnp; /* * Device pager * * devp_pglist - list of allocated pages */ struct { TAILQ_HEAD(, vm_page) devp_pglist; struct cdev_pager_ops *ops; struct cdev *dev; } devp; /* * SG pager * * sgp_pglist - list of allocated pages */ struct { TAILQ_HEAD(, vm_page) sgp_pglist; } sgp; /* * Swap pager * * swp_tmpfs - back-pointer to the tmpfs vnode, * if any, which uses the vm object * as backing store. The handle * cannot be reused for linking, * because the vnode can be * reclaimed and recreated, making * the handle changed and hash-chain * invalid. * * swp_blks - pc-trie of the allocated swap blocks. * */ struct { void *swp_tmpfs; struct pctrie swp_blks; vm_ooffset_t writemappings; } swp; } un_pager; struct ucred *cred; vm_ooffset_t charge; void *umtx_data; }; /* * Flags */ #define OBJ_FICTITIOUS 0x0001 /* (c) contains fictitious pages */ #define OBJ_UNMANAGED 0x0002 /* (c) contains unmanaged pages */ #define OBJ_POPULATE 0x0004 /* pager implements populate() */ #define OBJ_DEAD 0x0008 /* dead objects (during rundown) */ #define OBJ_ANON 0x0010 /* (c) contains anonymous memory */ #define OBJ_UMTXDEAD 0x0020 /* umtx pshared was terminated */ #define OBJ_SIZEVNLOCK 0x0040 /* lock vnode to check obj size */ #define OBJ_PG_DTOR 0x0080 /* dont reset object, leave that for dtor */ #define OBJ_TMPFS_NODE 0x0200 /* object belongs to tmpfs VREG node */ +#define OBJ_SPLIT 0x0400 /* object is being split */ +#define OBJ_COLLAPSING 0x0800 /* Parent of collapse. */ #define OBJ_COLORED 0x1000 /* pg_color is defined */ #define OBJ_ONEMAPPING 0x2000 /* One USE (a single, non-forked) mapping flag */ #define OBJ_SHADOWLIST 0x4000 /* Object is on the shadow list. */ #define OBJ_TMPFS 0x8000 /* has tmpfs vnode allocated */ /* * Helpers to perform conversion between vm_object page indexes and offsets. * IDX_TO_OFF() converts an index into an offset. * OFF_TO_IDX() converts an offset into an index. * OBJ_MAX_SIZE specifies the maximum page index corresponding to the * maximum unsigned offset. */ #define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT) #define OFF_TO_IDX(off) ((vm_pindex_t)(((vm_ooffset_t)(off)) >> PAGE_SHIFT)) #define OBJ_MAX_SIZE (OFF_TO_IDX(UINT64_MAX) + 1) #ifdef _KERNEL #define OBJPC_SYNC 0x1 /* sync I/O */ #define OBJPC_INVAL 0x2 /* invalidate */ #define OBJPC_NOSYNC 0x4 /* skip if PGA_NOSYNC */ /* * The following options are supported by vm_object_page_remove(). */ #define OBJPR_CLEANONLY 0x1 /* Don't remove dirty pages. */ #define OBJPR_NOTMAPPED 0x2 /* Don't unmap pages. */ TAILQ_HEAD(object_q, vm_object); extern struct object_q vm_object_list; /* list of allocated objects */ extern struct mtx vm_object_list_mtx; /* lock for object list and count */ extern struct vm_object kernel_object_store; /* kernel and kmem are aliased for backwards KPI compat. */ #define kernel_object (&kernel_object_store) #define kmem_object (&kernel_object_store) #define VM_OBJECT_ASSERT_LOCKED(object) \ rw_assert(&(object)->lock, RA_LOCKED) #define VM_OBJECT_ASSERT_RLOCKED(object) \ rw_assert(&(object)->lock, RA_RLOCKED) #define VM_OBJECT_ASSERT_WLOCKED(object) \ rw_assert(&(object)->lock, RA_WLOCKED) #define VM_OBJECT_ASSERT_UNLOCKED(object) \ rw_assert(&(object)->lock, RA_UNLOCKED) #define VM_OBJECT_LOCK_DOWNGRADE(object) \ rw_downgrade(&(object)->lock) #define VM_OBJECT_RLOCK(object) \ rw_rlock(&(object)->lock) #define VM_OBJECT_RUNLOCK(object) \ rw_runlock(&(object)->lock) #define VM_OBJECT_SLEEP(object, wchan, pri, wmesg, timo) \ rw_sleep((wchan), &(object)->lock, (pri), (wmesg), (timo)) #define VM_OBJECT_TRYRLOCK(object) \ rw_try_rlock(&(object)->lock) #define VM_OBJECT_TRYWLOCK(object) \ rw_try_wlock(&(object)->lock) #define VM_OBJECT_TRYUPGRADE(object) \ rw_try_upgrade(&(object)->lock) #define VM_OBJECT_WLOCK(object) \ rw_wlock(&(object)->lock) #define VM_OBJECT_WOWNED(object) \ rw_wowned(&(object)->lock) #define VM_OBJECT_WUNLOCK(object) \ rw_wunlock(&(object)->lock) #define VM_OBJECT_DROP(object) \ lock_class_rw.lc_unlock(&(object)->lock.lock_object) #define VM_OBJECT_PICKUP(object, state) \ lock_class_rw.lc_lock(&(object)->lock.lock_object, (state)) + +#define VM_OBJECT_ASSERT_PAGING(object) \ + KASSERT((object)->paging_in_progress != 0, \ + ("vm_object %p is not paging", object)) +#define VM_OBJECT_ASSERT_REFERENCE(object) \ + KASSERT((object)->reference_count != 0, \ + ("vm_object %p is not referenced", object)) struct vnode; /* * The object must be locked or thread private. */ static __inline void vm_object_set_flag(vm_object_t object, u_short bits) { object->flags |= bits; } /* * Conditionally set the object's color, which (1) enables the allocation * of physical memory reservations for anonymous objects and larger-than- * superpage-sized named objects and (2) determines the first page offset * within the object at which a reservation may be allocated. In other * words, the color determines the alignment of the object with respect * to the largest superpage boundary. When mapping named objects, like * files or POSIX shared memory objects, the color should be set to zero * before a virtual address is selected for the mapping. In contrast, * for anonymous objects, the color may be set after the virtual address * is selected. * * The object must be locked. */ static __inline void vm_object_color(vm_object_t object, u_short color) { if ((object->flags & OBJ_COLORED) == 0) { object->pg_color = color; object->flags |= OBJ_COLORED; } } static __inline bool vm_object_reserv(vm_object_t object) { if (object != NULL && (object->flags & (OBJ_COLORED | OBJ_FICTITIOUS)) == OBJ_COLORED) { return (true); } return (false); } static __inline bool vm_object_mightbedirty(vm_object_t object) { return (object->type == OBJT_VNODE && object->generation != object->cleangeneration); } void vm_object_clear_flag(vm_object_t object, u_short bits); void vm_object_pip_add(vm_object_t object, short i); void vm_object_pip_wakeup(vm_object_t object); void vm_object_pip_wakeupn(vm_object_t object, short i); void vm_object_pip_wait(vm_object_t object, char *waitid); void vm_object_pip_wait_unlocked(vm_object_t object, char *waitid); void vm_object_busy(vm_object_t object); void vm_object_unbusy(vm_object_t object); void vm_object_busy_wait(vm_object_t object, const char *wmesg); static inline bool vm_object_busied(vm_object_t object) { return (object->busy != 0); } #define VM_OBJECT_ASSERT_BUSY(object) MPASS(vm_object_busied((object))) void umtx_shm_object_init(vm_object_t object); void umtx_shm_object_terminated(vm_object_t object); extern int umtx_shm_vnobj_persistent; vm_object_t vm_object_allocate (objtype_t, vm_pindex_t); vm_object_t vm_object_allocate_anon(vm_pindex_t, vm_object_t, struct ucred *, vm_size_t); boolean_t vm_object_coalesce(vm_object_t, vm_ooffset_t, vm_size_t, vm_size_t, boolean_t); void vm_object_collapse (vm_object_t); void vm_object_deallocate (vm_object_t); void vm_object_destroy (vm_object_t); void vm_object_terminate (vm_object_t); void vm_object_set_writeable_dirty (vm_object_t); void vm_object_init (void); int vm_object_kvme_type(vm_object_t object, struct vnode **vpp); void vm_object_madvise(vm_object_t, vm_pindex_t, vm_pindex_t, int); boolean_t vm_object_page_clean(vm_object_t object, vm_ooffset_t start, vm_ooffset_t end, int flags); void vm_object_page_noreuse(vm_object_t object, vm_pindex_t start, vm_pindex_t end); void vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int options); boolean_t vm_object_populate(vm_object_t, vm_pindex_t, vm_pindex_t); void vm_object_print(long addr, boolean_t have_addr, long count, char *modif); void vm_object_reference (vm_object_t); void vm_object_reference_locked(vm_object_t); int vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr); void vm_object_shadow(vm_object_t *, vm_ooffset_t *, vm_size_t, struct ucred *, bool); void vm_object_split(vm_map_entry_t); boolean_t vm_object_sync(vm_object_t, vm_ooffset_t, vm_size_t, boolean_t, boolean_t); void vm_object_unwire(vm_object_t object, vm_ooffset_t offset, vm_size_t length, uint8_t queue); struct vnode *vm_object_vnode(vm_object_t object); #endif /* _KERNEL */ #endif /* _VM_OBJECT_ */ Index: projects/clang1000-import/sys/vm/vm_page.c =================================================================== --- projects/clang1000-import/sys/vm/vm_page.c (revision 356919) +++ projects/clang1000-import/sys/vm/vm_page.c (revision 356920) @@ -1,5194 +1,5198 @@ /*- * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU) * * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1998 Matthew Dillon. All Rights Reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 */ /*- * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * Resident memory management module. */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct vm_domain vm_dom[MAXMEMDOM]; DPCPU_DEFINE_STATIC(struct vm_batchqueue, pqbatch[MAXMEMDOM][PQ_COUNT]); struct mtx_padalign __exclusive_cache_line pa_lock[PA_LOCK_COUNT]; struct mtx_padalign __exclusive_cache_line vm_domainset_lock; /* The following fields are protected by the domainset lock. */ domainset_t __exclusive_cache_line vm_min_domains; domainset_t __exclusive_cache_line vm_severe_domains; static int vm_min_waiters; static int vm_severe_waiters; static int vm_pageproc_waiters; static SYSCTL_NODE(_vm_stats, OID_AUTO, page, CTLFLAG_RD, 0, "VM page statistics"); static counter_u64_t pqstate_commit_retries = EARLY_COUNTER; SYSCTL_COUNTER_U64(_vm_stats_page, OID_AUTO, pqstate_commit_retries, CTLFLAG_RD, &pqstate_commit_retries, "Number of failed per-page atomic queue state updates"); static counter_u64_t queue_ops = EARLY_COUNTER; SYSCTL_COUNTER_U64(_vm_stats_page, OID_AUTO, queue_ops, CTLFLAG_RD, &queue_ops, "Number of batched queue operations"); static counter_u64_t queue_nops = EARLY_COUNTER; SYSCTL_COUNTER_U64(_vm_stats_page, OID_AUTO, queue_nops, CTLFLAG_RD, &queue_nops, "Number of batched queue operations with no effects"); static void counter_startup(void) { pqstate_commit_retries = counter_u64_alloc(M_WAITOK); queue_ops = counter_u64_alloc(M_WAITOK); queue_nops = counter_u64_alloc(M_WAITOK); } SYSINIT(page_counters, SI_SUB_CPU, SI_ORDER_ANY, counter_startup, NULL); /* * bogus page -- for I/O to/from partially complete buffers, * or for paging into sparsely invalid regions. */ vm_page_t bogus_page; vm_page_t vm_page_array; long vm_page_array_size; long first_page; static TAILQ_HEAD(, vm_page) blacklist_head; static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_page_blacklist, "A", "Blacklist pages"); static uma_zone_t fakepg_zone; static void vm_page_alloc_check(vm_page_t m); static bool _vm_page_busy_sleep(vm_object_t obj, vm_page_t m, const char *wmesg, bool nonshared, bool locked); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); static void vm_page_enqueue(vm_page_t m, uint8_t queue); static bool vm_page_free_prep(vm_page_t m); static void vm_page_free_toq(vm_page_t m); static void vm_page_init(void *dummy); static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred); static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred); static void vm_page_mvqueue(vm_page_t m, const uint8_t queue, const uint16_t nflag); static int vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, vm_paddr_t high); static void vm_page_release_toq(vm_page_t m, uint8_t nqueue, bool noreuse); static int vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, int req); static int vm_page_zone_import(void *arg, void **store, int cnt, int domain, int flags); static void vm_page_zone_release(void *arg, void **store, int cnt); SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init, NULL); static void vm_page_init(void *dummy) { fakepg_zone = uma_zcreate("fakepg", sizeof(struct vm_page), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM); bogus_page = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED); } /* * The cache page zone is initialized later since we need to be able to allocate * pages before UMA is fully initialized. */ static void vm_page_init_cache_zones(void *dummy __unused) { struct vm_domain *vmd; struct vm_pgcache *pgcache; int cache, domain, maxcache, pool; maxcache = 0; TUNABLE_INT_FETCH("vm.pgcache_zone_max_pcpu", &maxcache); maxcache *= mp_ncpus; for (domain = 0; domain < vm_ndomains; domain++) { vmd = VM_DOMAIN(domain); for (pool = 0; pool < VM_NFREEPOOL; pool++) { pgcache = &vmd->vmd_pgcache[pool]; pgcache->domain = domain; pgcache->pool = pool; pgcache->zone = uma_zcache_create("vm pgcache", PAGE_SIZE, NULL, NULL, NULL, NULL, vm_page_zone_import, vm_page_zone_release, pgcache, UMA_ZONE_VM); /* * Limit each pool's zone to 0.1% of the pages in the * domain. */ cache = maxcache != 0 ? maxcache : vmd->vmd_page_count / 1000; uma_zone_set_maxcache(pgcache->zone, cache); } } } SYSINIT(vm_page2, SI_SUB_VM_CONF, SI_ORDER_ANY, vm_page_init_cache_zones, NULL); /* Make sure that u_long is at least 64 bits when PAGE_SIZE is 32K. */ #if PAGE_SIZE == 32768 #ifdef CTASSERT CTASSERT(sizeof(u_long) >= 8); #endif #endif /* * vm_set_page_size: * * Sets the page size, perhaps based upon the memory * size. Must be called before any use of page-size * dependent functions. */ void vm_set_page_size(void) { if (vm_cnt.v_page_size == 0) vm_cnt.v_page_size = PAGE_SIZE; if (((vm_cnt.v_page_size - 1) & vm_cnt.v_page_size) != 0) panic("vm_set_page_size: page size not a power of two"); } /* * vm_page_blacklist_next: * * Find the next entry in the provided string of blacklist * addresses. Entries are separated by space, comma, or newline. * If an invalid integer is encountered then the rest of the * string is skipped. Updates the list pointer to the next * character, or NULL if the string is exhausted or invalid. */ static vm_paddr_t vm_page_blacklist_next(char **list, char *end) { vm_paddr_t bad; char *cp, *pos; if (list == NULL || *list == NULL) return (0); if (**list =='\0') { *list = NULL; return (0); } /* * If there's no end pointer then the buffer is coming from * the kenv and we know it's null-terminated. */ if (end == NULL) end = *list + strlen(*list); /* Ensure that strtoq() won't walk off the end */ if (*end != '\0') { if (*end == '\n' || *end == ' ' || *end == ',') *end = '\0'; else { printf("Blacklist not terminated, skipping\n"); *list = NULL; return (0); } } for (pos = *list; *pos != '\0'; pos = cp) { bad = strtoq(pos, &cp, 0); if (*cp == '\0' || *cp == ' ' || *cp == ',' || *cp == '\n') { if (bad == 0) { if (++cp < end) continue; else break; } } else break; if (*cp == '\0' || ++cp >= end) *list = NULL; else *list = cp; return (trunc_page(bad)); } printf("Garbage in RAM blacklist, skipping\n"); *list = NULL; return (0); } bool vm_page_blacklist_add(vm_paddr_t pa, bool verbose) { struct vm_domain *vmd; vm_page_t m; int ret; m = vm_phys_paddr_to_vm_page(pa); if (m == NULL) return (true); /* page does not exist, no failure */ vmd = vm_pagequeue_domain(m); vm_domain_free_lock(vmd); ret = vm_phys_unfree_page(m); vm_domain_free_unlock(vmd); if (ret != 0) { vm_domain_freecnt_inc(vmd, -1); TAILQ_INSERT_TAIL(&blacklist_head, m, listq); if (verbose) printf("Skipping page with pa 0x%jx\n", (uintmax_t)pa); } return (ret); } /* * vm_page_blacklist_check: * * Iterate through the provided string of blacklist addresses, pulling * each entry out of the physical allocator free list and putting it * onto a list for reporting via the vm.page_blacklist sysctl. */ static void vm_page_blacklist_check(char *list, char *end) { vm_paddr_t pa; char *next; next = list; while (next != NULL) { if ((pa = vm_page_blacklist_next(&next, end)) == 0) continue; vm_page_blacklist_add(pa, bootverbose); } } /* * vm_page_blacklist_load: * * Search for a special module named "ram_blacklist". It'll be a * plain text file provided by the user via the loader directive * of the same name. */ static void vm_page_blacklist_load(char **list, char **end) { void *mod; u_char *ptr; u_int len; mod = NULL; ptr = NULL; mod = preload_search_by_type("ram_blacklist"); if (mod != NULL) { ptr = preload_fetch_addr(mod); len = preload_fetch_size(mod); } *list = ptr; if (ptr != NULL) *end = ptr + len; else *end = NULL; return; } static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS) { vm_page_t m; struct sbuf sbuf; int error, first; first = 1; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sbuf_new_for_sysctl(&sbuf, NULL, 128, req); TAILQ_FOREACH(m, &blacklist_head, listq) { sbuf_printf(&sbuf, "%s%#jx", first ? "" : ",", (uintmax_t)m->phys_addr); first = 0; } error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); return (error); } /* * Initialize a dummy page for use in scans of the specified paging queue. * In principle, this function only needs to set the flag PG_MARKER. * Nonetheless, it write busies the page as a safety precaution. */ static void vm_page_init_marker(vm_page_t marker, int queue, uint16_t aflags) { bzero(marker, sizeof(*marker)); marker->flags = PG_MARKER; marker->a.flags = aflags; marker->busy_lock = VPB_CURTHREAD_EXCLUSIVE; marker->a.queue = queue; } static void vm_page_domain_init(int domain) { struct vm_domain *vmd; struct vm_pagequeue *pq; int i; vmd = VM_DOMAIN(domain); bzero(vmd, sizeof(*vmd)); *__DECONST(char **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_name) = "vm inactive pagequeue"; *__DECONST(char **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_name) = "vm active pagequeue"; *__DECONST(char **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_name) = "vm laundry pagequeue"; *__DECONST(char **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_name) = "vm unswappable pagequeue"; vmd->vmd_domain = domain; vmd->vmd_page_count = 0; vmd->vmd_free_count = 0; vmd->vmd_segs = 0; vmd->vmd_oom = FALSE; for (i = 0; i < PQ_COUNT; i++) { pq = &vmd->vmd_pagequeues[i]; TAILQ_INIT(&pq->pq_pl); mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue", MTX_DEF | MTX_DUPOK); pq->pq_pdpages = 0; vm_page_init_marker(&vmd->vmd_markers[i], i, 0); } mtx_init(&vmd->vmd_free_mtx, "vm page free queue", NULL, MTX_DEF); mtx_init(&vmd->vmd_pageout_mtx, "vm pageout lock", NULL, MTX_DEF); snprintf(vmd->vmd_name, sizeof(vmd->vmd_name), "%d", domain); /* * inacthead is used to provide FIFO ordering for LRU-bypassing * insertions. */ vm_page_init_marker(&vmd->vmd_inacthead, PQ_INACTIVE, PGA_ENQUEUED); TAILQ_INSERT_HEAD(&vmd->vmd_pagequeues[PQ_INACTIVE].pq_pl, &vmd->vmd_inacthead, plinks.q); /* * The clock pages are used to implement active queue scanning without * requeues. Scans start at clock[0], which is advanced after the scan * ends. When the two clock hands meet, they are reset and scanning * resumes from the head of the queue. */ vm_page_init_marker(&vmd->vmd_clock[0], PQ_ACTIVE, PGA_ENQUEUED); vm_page_init_marker(&vmd->vmd_clock[1], PQ_ACTIVE, PGA_ENQUEUED); TAILQ_INSERT_HEAD(&vmd->vmd_pagequeues[PQ_ACTIVE].pq_pl, &vmd->vmd_clock[0], plinks.q); TAILQ_INSERT_TAIL(&vmd->vmd_pagequeues[PQ_ACTIVE].pq_pl, &vmd->vmd_clock[1], plinks.q); } /* * Initialize a physical page in preparation for adding it to the free * lists. */ static void vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segind) { m->object = NULL; m->ref_count = 0; m->busy_lock = VPB_UNBUSIED; m->flags = m->a.flags = 0; m->phys_addr = pa; m->a.queue = PQ_NONE; m->psind = 0; m->segind = segind; m->order = VM_NFREEORDER; m->pool = VM_FREEPOOL_DEFAULT; m->valid = m->dirty = 0; pmap_page_init(m); } #ifndef PMAP_HAS_PAGE_ARRAY static vm_paddr_t vm_page_array_alloc(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t page_range) { vm_paddr_t new_end; /* * Reserve an unmapped guard page to trap access to vm_page_array[-1]. * However, because this page is allocated from KVM, out-of-bounds * accesses using the direct map will not be trapped. */ *vaddr += PAGE_SIZE; /* * Allocate physical memory for the page structures, and map it. */ new_end = trunc_page(end - page_range * sizeof(struct vm_page)); vm_page_array = (vm_page_t)pmap_map(vaddr, new_end, end, VM_PROT_READ | VM_PROT_WRITE); vm_page_array_size = page_range; return (new_end); } #endif /* * vm_page_startup: * * Initializes the resident memory module. Allocates physical memory for * bootstrapping UMA and some data structures that are used to manage * physical pages. Initializes these structures, and populates the free * page queues. */ vm_offset_t vm_page_startup(vm_offset_t vaddr) { struct vm_phys_seg *seg; vm_page_t m; char *list, *listend; vm_paddr_t end, high_avail, low_avail, new_end, size; vm_paddr_t page_range __unused; vm_paddr_t last_pa, pa; u_long pagecount; int biggestone, i, segind; #ifdef WITNESS vm_offset_t mapped; int witness_size; #endif #if defined(__i386__) && defined(VM_PHYSSEG_DENSE) long ii; #endif vaddr = round_page(vaddr); vm_phys_early_startup(); biggestone = vm_phys_avail_largest(); end = phys_avail[biggestone+1]; /* * Initialize the page and queue locks. */ mtx_init(&vm_domainset_lock, "vm domainset lock", NULL, MTX_DEF); for (i = 0; i < PA_LOCK_COUNT; i++) mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF); for (i = 0; i < vm_ndomains; i++) vm_page_domain_init(i); new_end = end; #ifdef WITNESS witness_size = round_page(witness_startup_count()); new_end -= witness_size; mapped = pmap_map(&vaddr, new_end, new_end + witness_size, VM_PROT_READ | VM_PROT_WRITE); bzero((void *)mapped, witness_size); witness_startup((void *)mapped); #endif #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \ defined(__i386__) || defined(__mips__) || defined(__riscv) || \ defined(__powerpc64__) /* * Allocate a bitmap to indicate that a random physical page * needs to be included in a minidump. * * The amd64 port needs this to indicate which direct map pages * need to be dumped, via calls to dump_add_page()/dump_drop_page(). * * However, i386 still needs this workspace internally within the * minidump code. In theory, they are not needed on i386, but are * included should the sf_buf code decide to use them. */ last_pa = 0; for (i = 0; dump_avail[i + 1] != 0; i += 2) if (dump_avail[i + 1] > last_pa) last_pa = dump_avail[i + 1]; page_range = last_pa / PAGE_SIZE; vm_page_dump_size = round_page(roundup2(page_range, NBBY) / NBBY); new_end -= vm_page_dump_size; vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end, new_end + vm_page_dump_size, VM_PROT_READ | VM_PROT_WRITE); bzero((void *)vm_page_dump, vm_page_dump_size); #else (void)last_pa; #endif #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \ defined(__riscv) || defined(__powerpc64__) /* * Include the UMA bootstrap pages, witness pages and vm_page_dump * in a crash dump. When pmap_map() uses the direct map, they are * not automatically included. */ for (pa = new_end; pa < end; pa += PAGE_SIZE) dump_add_page(pa); #endif phys_avail[biggestone + 1] = new_end; #ifdef __amd64__ /* * Request that the physical pages underlying the message buffer be * included in a crash dump. Since the message buffer is accessed * through the direct map, they are not automatically included. */ pa = DMAP_TO_PHYS((vm_offset_t)msgbufp->msg_ptr); last_pa = pa + round_page(msgbufsize); while (pa < last_pa) { dump_add_page(pa); pa += PAGE_SIZE; } #endif /* * Compute the number of pages of memory that will be available for * use, taking into account the overhead of a page structure per page. * In other words, solve * "available physical memory" - round_page(page_range * * sizeof(struct vm_page)) = page_range * PAGE_SIZE * for page_range. */ low_avail = phys_avail[0]; high_avail = phys_avail[1]; for (i = 0; i < vm_phys_nsegs; i++) { if (vm_phys_segs[i].start < low_avail) low_avail = vm_phys_segs[i].start; if (vm_phys_segs[i].end > high_avail) high_avail = vm_phys_segs[i].end; } /* Skip the first chunk. It is already accounted for. */ for (i = 2; phys_avail[i + 1] != 0; i += 2) { if (phys_avail[i] < low_avail) low_avail = phys_avail[i]; if (phys_avail[i + 1] > high_avail) high_avail = phys_avail[i + 1]; } first_page = low_avail / PAGE_SIZE; #ifdef VM_PHYSSEG_SPARSE size = 0; for (i = 0; i < vm_phys_nsegs; i++) size += vm_phys_segs[i].end - vm_phys_segs[i].start; for (i = 0; phys_avail[i + 1] != 0; i += 2) size += phys_avail[i + 1] - phys_avail[i]; #elif defined(VM_PHYSSEG_DENSE) size = high_avail - low_avail; #else #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined." #endif #ifdef PMAP_HAS_PAGE_ARRAY pmap_page_array_startup(size / PAGE_SIZE); biggestone = vm_phys_avail_largest(); end = new_end = phys_avail[biggestone + 1]; #else #ifdef VM_PHYSSEG_DENSE /* * In the VM_PHYSSEG_DENSE case, the number of pages can account for * the overhead of a page structure per page only if vm_page_array is * allocated from the last physical memory chunk. Otherwise, we must * allocate page structures representing the physical memory * underlying vm_page_array, even though they will not be used. */ if (new_end != high_avail) page_range = size / PAGE_SIZE; else #endif { page_range = size / (PAGE_SIZE + sizeof(struct vm_page)); /* * If the partial bytes remaining are large enough for * a page (PAGE_SIZE) without a corresponding * 'struct vm_page', then new_end will contain an * extra page after subtracting the length of the VM * page array. Compensate by subtracting an extra * page from new_end. */ if (size % (PAGE_SIZE + sizeof(struct vm_page)) >= PAGE_SIZE) { if (new_end == high_avail) high_avail -= PAGE_SIZE; new_end -= PAGE_SIZE; } } end = new_end; new_end = vm_page_array_alloc(&vaddr, end, page_range); #endif #if VM_NRESERVLEVEL > 0 /* * Allocate physical memory for the reservation management system's * data structures, and map it. */ new_end = vm_reserv_startup(&vaddr, new_end); #endif #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \ defined(__riscv) || defined(__powerpc64__) /* * Include vm_page_array and vm_reserv_array in a crash dump. */ for (pa = new_end; pa < end; pa += PAGE_SIZE) dump_add_page(pa); #endif phys_avail[biggestone + 1] = new_end; /* * Add physical memory segments corresponding to the available * physical pages. */ for (i = 0; phys_avail[i + 1] != 0; i += 2) if (vm_phys_avail_size(i) != 0) vm_phys_add_seg(phys_avail[i], phys_avail[i + 1]); /* * Initialize the physical memory allocator. */ vm_phys_init(); /* * Initialize the page structures and add every available page to the * physical memory allocator's free lists. */ #if defined(__i386__) && defined(VM_PHYSSEG_DENSE) for (ii = 0; ii < vm_page_array_size; ii++) { m = &vm_page_array[ii]; vm_page_init_page(m, (first_page + ii) << PAGE_SHIFT, 0); m->flags = PG_FICTITIOUS; } #endif vm_cnt.v_page_count = 0; for (segind = 0; segind < vm_phys_nsegs; segind++) { seg = &vm_phys_segs[segind]; for (m = seg->first_page, pa = seg->start; pa < seg->end; m++, pa += PAGE_SIZE) vm_page_init_page(m, pa, segind); /* * Add the segment to the free lists only if it is covered by * one of the ranges in phys_avail. Because we've added the * ranges to the vm_phys_segs array, we can assume that each * segment is either entirely contained in one of the ranges, * or doesn't overlap any of them. */ for (i = 0; phys_avail[i + 1] != 0; i += 2) { struct vm_domain *vmd; if (seg->start < phys_avail[i] || seg->end > phys_avail[i + 1]) continue; m = seg->first_page; pagecount = (u_long)atop(seg->end - seg->start); vmd = VM_DOMAIN(seg->domain); vm_domain_free_lock(vmd); vm_phys_enqueue_contig(m, pagecount); vm_domain_free_unlock(vmd); vm_domain_freecnt_inc(vmd, pagecount); vm_cnt.v_page_count += (u_int)pagecount; vmd = VM_DOMAIN(seg->domain); vmd->vmd_page_count += (u_int)pagecount; vmd->vmd_segs |= 1UL << m->segind; break; } } /* * Remove blacklisted pages from the physical memory allocator. */ TAILQ_INIT(&blacklist_head); vm_page_blacklist_load(&list, &listend); vm_page_blacklist_check(list, listend); list = kern_getenv("vm.blacklist"); vm_page_blacklist_check(list, NULL); freeenv(list); #if VM_NRESERVLEVEL > 0 /* * Initialize the reservation management system. */ vm_reserv_init(); #endif return (vaddr); } void vm_page_reference(vm_page_t m) { vm_page_aflag_set(m, PGA_REFERENCED); } static bool vm_page_acquire_flags(vm_page_t m, int allocflags) { bool locked; if ((allocflags & (VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY)) != 0) locked = vm_page_trysbusy(m); else locked = vm_page_tryxbusy(m); if (locked && (allocflags & VM_ALLOC_WIRED) != 0) vm_page_wire(m); return (locked); } /* * vm_page_busy_sleep_flags * * Sleep for busy according to VM_ALLOC_ parameters. */ static bool vm_page_busy_sleep_flags(vm_object_t object, vm_page_t m, const char *wmesg, int allocflags) { if ((allocflags & VM_ALLOC_NOWAIT) != 0) return (false); /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ if ((allocflags & VM_ALLOC_NOCREAT) == 0) vm_page_aflag_set(m, PGA_REFERENCED); if (_vm_page_busy_sleep(object, m, wmesg, (allocflags & VM_ALLOC_IGN_SBUSY) != 0, true)) VM_OBJECT_WLOCK(object); if ((allocflags & VM_ALLOC_WAITFAIL) != 0) return (false); return (true); } /* * vm_page_busy_acquire: * * Acquire the busy lock as described by VM_ALLOC_* flags. Will loop * and drop the object lock if necessary. */ bool vm_page_busy_acquire(vm_page_t m, int allocflags) { vm_object_t obj; bool locked; /* * The page-specific object must be cached because page * identity can change during the sleep, causing the * re-lock of a different object. * It is assumed that a reference to the object is already * held by the callers. */ obj = m->object; for (;;) { if (vm_page_acquire_flags(m, allocflags)) return (true); if ((allocflags & VM_ALLOC_NOWAIT) != 0) return (false); if (obj != NULL) locked = VM_OBJECT_WOWNED(obj); else locked = false; MPASS(locked || vm_page_wired(m)); if (_vm_page_busy_sleep(obj, m, "vmpba", (allocflags & VM_ALLOC_SBUSY) != 0, locked)) VM_OBJECT_WLOCK(obj); if ((allocflags & VM_ALLOC_WAITFAIL) != 0) return (false); KASSERT(m->object == obj || m->object == NULL, ("vm_page_busy_acquire: page %p does not belong to %p", m, obj)); } } /* * vm_page_busy_downgrade: * * Downgrade an exclusive busy page into a single shared busy page. */ void vm_page_busy_downgrade(vm_page_t m) { u_int x; vm_page_assert_xbusied(m); x = m->busy_lock; for (;;) { if (atomic_fcmpset_rel_int(&m->busy_lock, &x, VPB_SHARERS_WORD(1))) break; } if ((x & VPB_BIT_WAITERS) != 0) wakeup(m); } /* * * vm_page_busy_tryupgrade: * * Attempt to upgrade a single shared busy into an exclusive busy. */ int vm_page_busy_tryupgrade(vm_page_t m) { u_int ce, x; vm_page_assert_sbusied(m); x = m->busy_lock; ce = VPB_CURTHREAD_EXCLUSIVE; for (;;) { if (VPB_SHARERS(x) > 1) return (0); KASSERT((x & ~VPB_BIT_WAITERS) == VPB_SHARERS_WORD(1), ("vm_page_busy_tryupgrade: invalid lock state")); if (!atomic_fcmpset_acq_int(&m->busy_lock, &x, ce | (x & VPB_BIT_WAITERS))) continue; return (1); } } /* * vm_page_sbusied: * * Return a positive value if the page is shared busied, 0 otherwise. */ int vm_page_sbusied(vm_page_t m) { u_int x; x = m->busy_lock; return ((x & VPB_BIT_SHARED) != 0 && x != VPB_UNBUSIED); } /* * vm_page_sunbusy: * * Shared unbusy a page. */ void vm_page_sunbusy(vm_page_t m) { u_int x; vm_page_assert_sbusied(m); x = m->busy_lock; for (;;) { if (VPB_SHARERS(x) > 1) { if (atomic_fcmpset_int(&m->busy_lock, &x, x - VPB_ONE_SHARER)) break; continue; } KASSERT((x & ~VPB_BIT_WAITERS) == VPB_SHARERS_WORD(1), ("vm_page_sunbusy: invalid lock state")); if (!atomic_fcmpset_rel_int(&m->busy_lock, &x, VPB_UNBUSIED)) continue; if ((x & VPB_BIT_WAITERS) == 0) break; wakeup(m); break; } } /* * vm_page_busy_sleep: * * Sleep if the page is busy, using the page pointer as wchan. * This is used to implement the hard-path of busying mechanism. * * If nonshared is true, sleep only if the page is xbusy. * * The object lock must be held on entry and will be released on exit. */ void vm_page_busy_sleep(vm_page_t m, const char *wmesg, bool nonshared) { vm_object_t obj; obj = m->object; VM_OBJECT_ASSERT_LOCKED(obj); vm_page_lock_assert(m, MA_NOTOWNED); if (!_vm_page_busy_sleep(obj, m, wmesg, nonshared, true)) VM_OBJECT_DROP(obj); } /* * _vm_page_busy_sleep: * * Internal busy sleep function. */ static bool _vm_page_busy_sleep(vm_object_t obj, vm_page_t m, const char *wmesg, bool nonshared, bool locked) { u_int x; /* * If the object is busy we must wait for that to drain to zero * before trying the page again. */ if (obj != NULL && vm_object_busied(obj)) { if (locked) VM_OBJECT_DROP(obj); vm_object_busy_wait(obj, wmesg); return (locked); } sleepq_lock(m); x = m->busy_lock; if (x == VPB_UNBUSIED || (nonshared && (x & VPB_BIT_SHARED) != 0) || ((x & VPB_BIT_WAITERS) == 0 && !atomic_cmpset_int(&m->busy_lock, x, x | VPB_BIT_WAITERS))) { sleepq_release(m); return (false); } if (locked) VM_OBJECT_DROP(obj); DROP_GIANT(); sleepq_add(m, NULL, wmesg, 0, 0); sleepq_wait(m, PVM); PICKUP_GIANT(); return (locked); } /* * vm_page_trysbusy: * * Try to shared busy a page. * If the operation succeeds 1 is returned otherwise 0. * The operation never sleeps. */ int vm_page_trysbusy(vm_page_t m) { vm_object_t obj; u_int x; obj = m->object; x = m->busy_lock; for (;;) { if ((x & VPB_BIT_SHARED) == 0) return (0); /* * Reduce the window for transient busies that will trigger * false negatives in vm_page_ps_test(). */ if (obj != NULL && vm_object_busied(obj)) return (0); if (atomic_fcmpset_acq_int(&m->busy_lock, &x, x + VPB_ONE_SHARER)) break; } /* Refetch the object now that we're guaranteed that it is stable. */ obj = m->object; if (obj != NULL && vm_object_busied(obj)) { vm_page_sunbusy(m); return (0); } return (1); } /* * vm_page_tryxbusy: * * Try to exclusive busy a page. * If the operation succeeds 1 is returned otherwise 0. * The operation never sleeps. */ int vm_page_tryxbusy(vm_page_t m) { vm_object_t obj; if (atomic_cmpset_acq_int(&(m)->busy_lock, VPB_UNBUSIED, VPB_CURTHREAD_EXCLUSIVE) == 0) return (0); obj = m->object; if (obj != NULL && vm_object_busied(obj)) { vm_page_xunbusy(m); return (0); } return (1); } static void vm_page_xunbusy_hard_tail(vm_page_t m) { atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED); /* Wake the waiter. */ wakeup(m); } /* * vm_page_xunbusy_hard: * * Called when unbusy has failed because there is a waiter. */ void vm_page_xunbusy_hard(vm_page_t m) { vm_page_assert_xbusied(m); vm_page_xunbusy_hard_tail(m); } void vm_page_xunbusy_hard_unchecked(vm_page_t m) { vm_page_assert_xbusied_unchecked(m); vm_page_xunbusy_hard_tail(m); } /* * Avoid releasing and reacquiring the same page lock. */ void vm_page_change_lock(vm_page_t m, struct mtx **mtx) { struct mtx *mtx1; mtx1 = vm_page_lockptr(m); if (*mtx == mtx1) return; if (*mtx != NULL) mtx_unlock(*mtx); *mtx = mtx1; mtx_lock(mtx1); } /* * vm_page_unhold_pages: * * Unhold each of the pages that is referenced by the given array. */ void vm_page_unhold_pages(vm_page_t *ma, int count) { for (; count != 0; count--) { vm_page_unwire(*ma, PQ_ACTIVE); ma++; } } vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa) { vm_page_t m; #ifdef VM_PHYSSEG_SPARSE m = vm_phys_paddr_to_vm_page(pa); if (m == NULL) m = vm_phys_fictitious_to_vm_page(pa); return (m); #elif defined(VM_PHYSSEG_DENSE) long pi; pi = atop(pa); if (pi >= first_page && (pi - first_page) < vm_page_array_size) { m = &vm_page_array[pi - first_page]; return (m); } return (vm_phys_fictitious_to_vm_page(pa)); #else #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined." #endif } /* * vm_page_getfake: * * Create a fictitious page with the specified physical address and * memory attribute. The memory attribute is the only the machine- * dependent aspect of a fictitious page that must be initialized. */ vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr) { vm_page_t m; m = uma_zalloc(fakepg_zone, M_WAITOK | M_ZERO); vm_page_initfake(m, paddr, memattr); return (m); } void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) { if ((m->flags & PG_FICTITIOUS) != 0) { /* * The page's memattr might have changed since the * previous initialization. Update the pmap to the * new memattr. */ goto memattr; } m->phys_addr = paddr; m->a.queue = PQ_NONE; /* Fictitious pages don't use "segind". */ m->flags = PG_FICTITIOUS; /* Fictitious pages don't use "order" or "pool". */ m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_CURTHREAD_EXCLUSIVE; /* Fictitious pages are unevictable. */ m->ref_count = 1; pmap_page_init(m); memattr: pmap_page_set_memattr(m, memattr); } /* * vm_page_putfake: * * Release a fictitious page. */ void vm_page_putfake(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) != 0, ("managed %p", m)); KASSERT((m->flags & PG_FICTITIOUS) != 0, ("vm_page_putfake: bad page %p", m)); vm_page_xunbusy(m); uma_zfree(fakepg_zone, m); } /* * vm_page_updatefake: * * Update the given fictitious page to the specified physical address and * memory attribute. */ void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) { KASSERT((m->flags & PG_FICTITIOUS) != 0, ("vm_page_updatefake: bad page %p", m)); m->phys_addr = paddr; pmap_page_set_memattr(m, memattr); } /* * vm_page_free: * * Free a page. */ void vm_page_free(vm_page_t m) { m->flags &= ~PG_ZERO; vm_page_free_toq(m); } /* * vm_page_free_zero: * * Free a page to the zerod-pages queue */ void vm_page_free_zero(vm_page_t m) { m->flags |= PG_ZERO; vm_page_free_toq(m); } /* * Unbusy and handle the page queueing for a page from a getpages request that * was optionally read ahead or behind. */ void vm_page_readahead_finish(vm_page_t m) { /* We shouldn't put invalid pages on queues. */ KASSERT(!vm_page_none_valid(m), ("%s: %p is invalid", __func__, m)); /* * Since the page is not the actually needed one, whether it should * be activated or deactivated is not obvious. Empirical results * have shown that deactivating the page is usually the best choice, * unless the page is wanted by another thread. */ if ((m->busy_lock & VPB_BIT_WAITERS) != 0) vm_page_activate(m); else vm_page_deactivate(m); vm_page_xunbusy_unchecked(m); } /* * vm_page_sleep_if_busy: * * Sleep and release the object lock if the page is busied. * Returns TRUE if the thread slept. * * The given page must be unlocked and object containing it must * be locked. */ int vm_page_sleep_if_busy(vm_page_t m, const char *msg) { vm_object_t obj; vm_page_lock_assert(m, MA_NOTOWNED); VM_OBJECT_ASSERT_WLOCKED(m->object); /* * The page-specific object must be cached because page * identity can change during the sleep, causing the * re-lock of a different object. * It is assumed that a reference to the object is already * held by the callers. */ obj = m->object; if (vm_page_busied(m) || (obj != NULL && obj->busy)) { vm_page_busy_sleep(m, msg, false); VM_OBJECT_WLOCK(obj); return (TRUE); } return (FALSE); } /* * vm_page_sleep_if_xbusy: * * Sleep and release the object lock if the page is xbusied. * Returns TRUE if the thread slept. * * The given page must be unlocked and object containing it must * be locked. */ int vm_page_sleep_if_xbusy(vm_page_t m, const char *msg) { vm_object_t obj; vm_page_lock_assert(m, MA_NOTOWNED); VM_OBJECT_ASSERT_WLOCKED(m->object); /* * The page-specific object must be cached because page * identity can change during the sleep, causing the * re-lock of a different object. * It is assumed that a reference to the object is already * held by the callers. */ obj = m->object; if (vm_page_xbusied(m) || (obj != NULL && obj->busy)) { vm_page_busy_sleep(m, msg, true); VM_OBJECT_WLOCK(obj); return (TRUE); } return (FALSE); } /* * vm_page_dirty_KBI: [ internal use only ] * * Set all bits in the page's dirty field. * * The object containing the specified page must be locked if the * call is made from the machine-independent layer. * * See vm_page_clear_dirty_mask(). * * This function should only be called by vm_page_dirty(). */ void vm_page_dirty_KBI(vm_page_t m) { /* Refer to this operation by its public name. */ KASSERT(vm_page_all_valid(m), ("vm_page_dirty: page is invalid!")); m->dirty = VM_PAGE_BITS_ALL; } /* * vm_page_insert: [ internal use only ] * * Inserts the given mem entry into the object and object list. * * The object must be locked. */ int vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) { vm_page_t mpred; VM_OBJECT_ASSERT_WLOCKED(object); mpred = vm_radix_lookup_le(&object->rtree, pindex); return (vm_page_insert_after(m, object, pindex, mpred)); } /* * vm_page_insert_after: * * Inserts the page "m" into the specified object at offset "pindex". * * The page "mpred" must immediately precede the offset "pindex" within * the specified object. * * The object must be locked. */ static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred) { vm_page_t msucc; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(m->object == NULL, ("vm_page_insert_after: page already inserted")); if (mpred != NULL) { KASSERT(mpred->object == object, ("vm_page_insert_after: object doesn't contain mpred")); KASSERT(mpred->pindex < pindex, ("vm_page_insert_after: mpred doesn't precede pindex")); msucc = TAILQ_NEXT(mpred, listq); } else msucc = TAILQ_FIRST(&object->memq); if (msucc != NULL) KASSERT(msucc->pindex > pindex, ("vm_page_insert_after: msucc doesn't succeed pindex")); /* * Record the object/offset pair in this page. */ m->object = object; m->pindex = pindex; m->ref_count |= VPRC_OBJREF; /* * Now link into the object's ordered list of backed pages. */ if (vm_radix_insert(&object->rtree, m)) { m->object = NULL; m->pindex = 0; m->ref_count &= ~VPRC_OBJREF; return (1); } vm_page_insert_radixdone(m, object, mpred); return (0); } /* * vm_page_insert_radixdone: * * Complete page "m" insertion into the specified object after the * radix trie hooking. * * The page "mpred" must precede the offset "m->pindex" within the * specified object. * * The object must be locked. */ static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object != NULL && m->object == object, ("vm_page_insert_radixdone: page %p has inconsistent object", m)); KASSERT((m->ref_count & VPRC_OBJREF) != 0, ("vm_page_insert_radixdone: page %p is missing object ref", m)); if (mpred != NULL) { KASSERT(mpred->object == object, ("vm_page_insert_radixdone: object doesn't contain mpred")); KASSERT(mpred->pindex < m->pindex, ("vm_page_insert_radixdone: mpred doesn't precede pindex")); } if (mpred != NULL) TAILQ_INSERT_AFTER(&object->memq, mpred, m, listq); else TAILQ_INSERT_HEAD(&object->memq, m, listq); /* * Show that the object has one more resident page. */ object->resident_page_count++; /* * Hold the vnode until the last page is released. */ if (object->resident_page_count == 1 && object->type == OBJT_VNODE) vhold(object->handle); /* * Since we are inserting a new and possibly dirty page, * update the object's generation count. */ if (pmap_page_is_write_mapped(m)) vm_object_set_writeable_dirty(object); } /* * Do the work to remove a page from its object. The caller is responsible for * updating the page's fields to reflect this removal. */ static void vm_page_object_remove(vm_page_t m) { vm_object_t object; vm_page_t mrem; vm_page_assert_xbusied(m); object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((m->ref_count & VPRC_OBJREF) != 0, ("page %p is missing its object ref", m)); /* Deferred free of swap space. */ if ((m->a.flags & PGA_SWAP_FREE) != 0) vm_pager_page_unswapped(m); mrem = vm_radix_remove(&object->rtree, m->pindex); KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m)); /* * Now remove from the object's list of backed pages. */ TAILQ_REMOVE(&object->memq, m, listq); /* * And show that the object has one fewer resident page. */ object->resident_page_count--; /* * The vnode may now be recycled. */ if (object->resident_page_count == 0 && object->type == OBJT_VNODE) vdrop(object->handle); } /* * vm_page_remove: * * Removes the specified page from its containing object, but does not * invalidate any backing storage. Returns true if the object's reference * was the last reference to the page, and false otherwise. * * The object must be locked and the page must be exclusively busied. * The exclusive busy will be released on return. If this is not the * final ref and the caller does not hold a wire reference it may not * continue to access the page. */ bool vm_page_remove(vm_page_t m) { bool dropped; dropped = vm_page_remove_xbusy(m); vm_page_xunbusy(m); return (dropped); } /* * vm_page_remove_xbusy * * Removes the page but leaves the xbusy held. Returns true if this * removed the final ref and false otherwise. */ bool vm_page_remove_xbusy(vm_page_t m) { vm_page_object_remove(m); m->object = NULL; return (vm_page_drop(m, VPRC_OBJREF) == VPRC_OBJREF); } /* * vm_page_lookup: * * Returns the page associated with the object/offset * pair specified; if none is found, NULL is returned. * * The object must be locked. */ vm_page_t vm_page_lookup(vm_object_t object, vm_pindex_t pindex) { VM_OBJECT_ASSERT_LOCKED(object); return (vm_radix_lookup(&object->rtree, pindex)); } /* * vm_page_find_least: * * Returns the page associated with the object with least pindex * greater than or equal to the parameter pindex, or NULL. * * The object must be locked. */ vm_page_t vm_page_find_least(vm_object_t object, vm_pindex_t pindex) { vm_page_t m; VM_OBJECT_ASSERT_LOCKED(object); if ((m = TAILQ_FIRST(&object->memq)) != NULL && m->pindex < pindex) m = vm_radix_lookup_ge(&object->rtree, pindex); return (m); } /* * Returns the given page's successor (by pindex) within the object if it is * resident; if none is found, NULL is returned. * * The object must be locked. */ vm_page_t vm_page_next(vm_page_t m) { vm_page_t next; VM_OBJECT_ASSERT_LOCKED(m->object); if ((next = TAILQ_NEXT(m, listq)) != NULL) { MPASS(next->object == m->object); if (next->pindex != m->pindex + 1) next = NULL; } return (next); } /* * Returns the given page's predecessor (by pindex) within the object if it is * resident; if none is found, NULL is returned. * * The object must be locked. */ vm_page_t vm_page_prev(vm_page_t m) { vm_page_t prev; VM_OBJECT_ASSERT_LOCKED(m->object); if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL) { MPASS(prev->object == m->object); if (prev->pindex != m->pindex - 1) prev = NULL; } return (prev); } /* * Uses the page mnew as a replacement for an existing page at index * pindex which must be already present in the object. * * Both pages must be exclusively busied on enter. The old page is * unbusied on exit. * * A return value of true means mold is now free. If this is not the * final ref and the caller does not hold a wire reference it may not * continue to access the page. */ static bool vm_page_replace_hold(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex, vm_page_t mold) { vm_page_t mret; bool dropped; VM_OBJECT_ASSERT_WLOCKED(object); vm_page_assert_xbusied(mold); KASSERT(mnew->object == NULL && (mnew->ref_count & VPRC_OBJREF) == 0, ("vm_page_replace: page %p already in object", mnew)); /* * This function mostly follows vm_page_insert() and * vm_page_remove() without the radix, object count and vnode * dance. Double check such functions for more comments. */ mnew->object = object; mnew->pindex = pindex; atomic_set_int(&mnew->ref_count, VPRC_OBJREF); mret = vm_radix_replace(&object->rtree, mnew); KASSERT(mret == mold, ("invalid page replacement, mold=%p, mret=%p", mold, mret)); KASSERT((mold->oflags & VPO_UNMANAGED) == (mnew->oflags & VPO_UNMANAGED), ("vm_page_replace: mismatched VPO_UNMANAGED")); /* Keep the resident page list in sorted order. */ TAILQ_INSERT_AFTER(&object->memq, mold, mnew, listq); TAILQ_REMOVE(&object->memq, mold, listq); mold->object = NULL; /* * The object's resident_page_count does not change because we have * swapped one page for another, but the generation count should * change if the page is dirty. */ if (pmap_page_is_write_mapped(mnew)) vm_object_set_writeable_dirty(object); dropped = vm_page_drop(mold, VPRC_OBJREF) == VPRC_OBJREF; vm_page_xunbusy(mold); return (dropped); } void vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex, vm_page_t mold) { vm_page_assert_xbusied(mnew); if (vm_page_replace_hold(mnew, object, pindex, mold)) vm_page_free(mold); } /* * vm_page_rename: * * Move the given memory entry from its * current object to the specified target object/offset. * * Note: swap associated with the page must be invalidated by the move. We * have to do this for several reasons: (1) we aren't freeing the * page, (2) we are dirtying the page, (3) the VM system is probably * moving the page from object A to B, and will then later move * the backing store from A to B and we can't have a conflict. * * Note: we *always* dirty the page. It is necessary both for the * fact that we moved it, and because we may be invalidating * swap. * * The objects must be locked. */ int vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) { vm_page_t mpred; vm_pindex_t opidx; VM_OBJECT_ASSERT_WLOCKED(new_object); KASSERT(m->ref_count != 0, ("vm_page_rename: page %p has no refs", m)); mpred = vm_radix_lookup_le(&new_object->rtree, new_pindex); KASSERT(mpred == NULL || mpred->pindex != new_pindex, ("vm_page_rename: pindex already renamed")); /* * Create a custom version of vm_page_insert() which does not depend * by m_prev and can cheat on the implementation aspects of the * function. */ opidx = m->pindex; m->pindex = new_pindex; if (vm_radix_insert(&new_object->rtree, m)) { m->pindex = opidx; return (1); } /* * The operation cannot fail anymore. The removal must happen before * the listq iterator is tainted. */ m->pindex = opidx; vm_page_object_remove(m); /* Return back to the new pindex to complete vm_page_insert(). */ m->pindex = new_pindex; m->object = new_object; vm_page_insert_radixdone(m, new_object, mpred); vm_page_dirty(m); return (0); } /* * vm_page_alloc: * * Allocate and return a page that is associated with the specified * object and offset pair. By default, this page is exclusive busied. * * The caller must always specify an allocation class. * * allocation classes: * VM_ALLOC_NORMAL normal process request * VM_ALLOC_SYSTEM system *really* needs a page * VM_ALLOC_INTERRUPT interrupt time request * * optional allocation flags: * VM_ALLOC_COUNT(number) the number of additional pages that the caller * intends to allocate * VM_ALLOC_NOBUSY do not exclusive busy the page * VM_ALLOC_NODUMP do not include the page in a kernel core dump * VM_ALLOC_NOOBJ page is not associated with an object and * should not be exclusive busy * VM_ALLOC_SBUSY shared busy the allocated page * VM_ALLOC_WIRED wire the allocated page * VM_ALLOC_ZERO prefer a zeroed page */ vm_page_t vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) { return (vm_page_alloc_after(object, pindex, req, object != NULL ? vm_radix_lookup_le(&object->rtree, pindex) : NULL)); } vm_page_t vm_page_alloc_domain(vm_object_t object, vm_pindex_t pindex, int domain, int req) { return (vm_page_alloc_domain_after(object, pindex, domain, req, object != NULL ? vm_radix_lookup_le(&object->rtree, pindex) : NULL)); } /* * Allocate a page in the specified object with the given page index. To * optimize insertion of the page into the object, the caller must also specifiy * the resident page in the object with largest index smaller than the given * page index, or NULL if no such page exists. */ vm_page_t vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex, int req, vm_page_t mpred) { struct vm_domainset_iter di; vm_page_t m; int domain; vm_domainset_iter_page_init(&di, object, pindex, &domain, &req); do { m = vm_page_alloc_domain_after(object, pindex, domain, req, mpred); if (m != NULL) break; } while (vm_domainset_iter_page(&di, object, &domain) == 0); return (m); } /* * Returns true if the number of free pages exceeds the minimum * for the request class and false otherwise. */ static int _vm_domain_allocate(struct vm_domain *vmd, int req_class, int npages) { u_int limit, old, new; if (req_class == VM_ALLOC_INTERRUPT) limit = 0; else if (req_class == VM_ALLOC_SYSTEM) limit = vmd->vmd_interrupt_free_min; else limit = vmd->vmd_free_reserved; /* * Attempt to reserve the pages. Fail if we're below the limit. */ limit += npages; old = vmd->vmd_free_count; do { if (old < limit) return (0); new = old - npages; } while (atomic_fcmpset_int(&vmd->vmd_free_count, &old, new) == 0); /* Wake the page daemon if we've crossed the threshold. */ if (vm_paging_needed(vmd, new) && !vm_paging_needed(vmd, old)) pagedaemon_wakeup(vmd->vmd_domain); /* Only update bitsets on transitions. */ if ((old >= vmd->vmd_free_min && new < vmd->vmd_free_min) || (old >= vmd->vmd_free_severe && new < vmd->vmd_free_severe)) vm_domain_set(vmd); return (1); } int vm_domain_allocate(struct vm_domain *vmd, int req, int npages) { int req_class; /* * The page daemon is allowed to dig deeper into the free page list. */ req_class = req & VM_ALLOC_CLASS_MASK; if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) req_class = VM_ALLOC_SYSTEM; return (_vm_domain_allocate(vmd, req_class, npages)); } vm_page_t vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain, int req, vm_page_t mpred) { struct vm_domain *vmd; vm_page_t m; int flags, pool; KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && (object != NULL || (req & VM_ALLOC_SBUSY) == 0) && ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) != (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)), ("inconsistent object(%p)/req(%x)", object, req)); KASSERT(object == NULL || (req & VM_ALLOC_WAITOK) == 0, ("Can't sleep and retry object insertion.")); KASSERT(mpred == NULL || mpred->pindex < pindex, ("mpred %p doesn't precede pindex 0x%jx", mpred, (uintmax_t)pindex)); if (object != NULL) VM_OBJECT_ASSERT_WLOCKED(object); flags = 0; m = NULL; pool = object != NULL ? VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT; again: #if VM_NRESERVLEVEL > 0 /* * Can we allocate the page from a reservation? */ if (vm_object_reserv(object) && (m = vm_reserv_alloc_page(object, pindex, domain, req, mpred)) != NULL) { domain = vm_phys_domain(m); vmd = VM_DOMAIN(domain); goto found; } #endif vmd = VM_DOMAIN(domain); if (vmd->vmd_pgcache[pool].zone != NULL) { m = uma_zalloc(vmd->vmd_pgcache[pool].zone, M_NOWAIT); if (m != NULL) { flags |= PG_PCPU_CACHE; goto found; } } if (vm_domain_allocate(vmd, req, 1)) { /* * If not, allocate it from the free page queues. */ vm_domain_free_lock(vmd); m = vm_phys_alloc_pages(domain, pool, 0); vm_domain_free_unlock(vmd); if (m == NULL) { vm_domain_freecnt_inc(vmd, 1); #if VM_NRESERVLEVEL > 0 if (vm_reserv_reclaim_inactive(domain)) goto again; #endif } } if (m == NULL) { /* * Not allocatable, give up. */ if (vm_domain_alloc_fail(vmd, object, req)) goto again; return (NULL); } /* * At this point we had better have found a good page. */ found: vm_page_dequeue(m); vm_page_alloc_check(m); /* * Initialize the page. Only the PG_ZERO flag is inherited. */ if ((req & VM_ALLOC_ZERO) != 0) flags |= (m->flags & PG_ZERO); if ((req & VM_ALLOC_NODUMP) != 0) flags |= PG_NODUMP; m->flags = flags; m->a.flags = 0; m->oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ? VPO_UNMANAGED : 0; m->busy_lock = VPB_UNBUSIED; if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0) m->busy_lock = VPB_CURTHREAD_EXCLUSIVE; if ((req & VM_ALLOC_SBUSY) != 0) m->busy_lock = VPB_SHARERS_WORD(1); if (req & VM_ALLOC_WIRED) { vm_wire_add(1); m->ref_count = 1; } m->a.act_count = 0; if (object != NULL) { if (vm_page_insert_after(m, object, pindex, mpred)) { if (req & VM_ALLOC_WIRED) { vm_wire_sub(1); m->ref_count = 0; } KASSERT(m->object == NULL, ("page %p has object", m)); m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_UNBUSIED; /* Don't change PG_ZERO. */ vm_page_free_toq(m); if (req & VM_ALLOC_WAITFAIL) { VM_OBJECT_WUNLOCK(object); vm_radix_wait(); VM_OBJECT_WLOCK(object); } return (NULL); } /* Ignore device objects; the pager sets "memattr" for them. */ if (object->memattr != VM_MEMATTR_DEFAULT && (object->flags & OBJ_FICTITIOUS) == 0) pmap_page_set_memattr(m, object->memattr); } else m->pindex = pindex; return (m); } /* * vm_page_alloc_contig: * * Allocate a contiguous set of physical pages of the given size "npages" * from the free lists. All of the physical pages must be at or above * the given physical address "low" and below the given physical address * "high". The given value "alignment" determines the alignment of the * first physical page in the set. If the given value "boundary" is * non-zero, then the set of physical pages cannot cross any physical * address boundary that is a multiple of that value. Both "alignment" * and "boundary" must be a power of two. * * If the specified memory attribute, "memattr", is VM_MEMATTR_DEFAULT, * then the memory attribute setting for the physical pages is configured * to the object's memory attribute setting. Otherwise, the memory * attribute setting for the physical pages is configured to "memattr", * overriding the object's memory attribute setting. However, if the * object's memory attribute setting is not VM_MEMATTR_DEFAULT, then the * memory attribute setting for the physical pages cannot be configured * to VM_MEMATTR_DEFAULT. * * The specified object may not contain fictitious pages. * * The caller must always specify an allocation class. * * allocation classes: * VM_ALLOC_NORMAL normal process request * VM_ALLOC_SYSTEM system *really* needs a page * VM_ALLOC_INTERRUPT interrupt time request * * optional allocation flags: * VM_ALLOC_NOBUSY do not exclusive busy the page * VM_ALLOC_NODUMP do not include the page in a kernel core dump * VM_ALLOC_NOOBJ page is not associated with an object and * should not be exclusive busy * VM_ALLOC_SBUSY shared busy the allocated page * VM_ALLOC_WIRED wire the allocated page * VM_ALLOC_ZERO prefer a zeroed page */ vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr) { struct vm_domainset_iter di; vm_page_t m; int domain; vm_domainset_iter_page_init(&di, object, pindex, &domain, &req); do { m = vm_page_alloc_contig_domain(object, pindex, domain, req, npages, low, high, alignment, boundary, memattr); if (m != NULL) break; } while (vm_domainset_iter_page(&di, object, &domain) == 0); return (m); } vm_page_t vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain, int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr) { struct vm_domain *vmd; vm_page_t m, m_ret, mpred; u_int busy_lock, flags, oflags; mpred = NULL; /* XXX: pacify gcc */ KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && (object != NULL || (req & VM_ALLOC_SBUSY) == 0) && ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) != (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)), ("vm_page_alloc_contig: inconsistent object(%p)/req(%x)", object, req)); KASSERT(object == NULL || (req & VM_ALLOC_WAITOK) == 0, ("Can't sleep and retry object insertion.")); if (object != NULL) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & OBJ_FICTITIOUS) == 0, ("vm_page_alloc_contig: object %p has fictitious pages", object)); } KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero")); if (object != NULL) { mpred = vm_radix_lookup_le(&object->rtree, pindex); KASSERT(mpred == NULL || mpred->pindex != pindex, ("vm_page_alloc_contig: pindex already allocated")); } /* * Can we allocate the pages without the number of free pages falling * below the lower bound for the allocation class? */ m_ret = NULL; again: #if VM_NRESERVLEVEL > 0 /* * Can we allocate the pages from a reservation? */ if (vm_object_reserv(object) && (m_ret = vm_reserv_alloc_contig(object, pindex, domain, req, mpred, npages, low, high, alignment, boundary)) != NULL) { domain = vm_phys_domain(m_ret); vmd = VM_DOMAIN(domain); goto found; } #endif vmd = VM_DOMAIN(domain); if (vm_domain_allocate(vmd, req, npages)) { /* * allocate them from the free page queues. */ vm_domain_free_lock(vmd); m_ret = vm_phys_alloc_contig(domain, npages, low, high, alignment, boundary); vm_domain_free_unlock(vmd); if (m_ret == NULL) { vm_domain_freecnt_inc(vmd, npages); #if VM_NRESERVLEVEL > 0 if (vm_reserv_reclaim_contig(domain, npages, low, high, alignment, boundary)) goto again; #endif } } if (m_ret == NULL) { if (vm_domain_alloc_fail(vmd, object, req)) goto again; return (NULL); } #if VM_NRESERVLEVEL > 0 found: #endif for (m = m_ret; m < &m_ret[npages]; m++) { vm_page_dequeue(m); vm_page_alloc_check(m); } /* * Initialize the pages. Only the PG_ZERO flag is inherited. */ flags = 0; if ((req & VM_ALLOC_ZERO) != 0) flags = PG_ZERO; if ((req & VM_ALLOC_NODUMP) != 0) flags |= PG_NODUMP; oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ? VPO_UNMANAGED : 0; busy_lock = VPB_UNBUSIED; if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0) busy_lock = VPB_CURTHREAD_EXCLUSIVE; if ((req & VM_ALLOC_SBUSY) != 0) busy_lock = VPB_SHARERS_WORD(1); if ((req & VM_ALLOC_WIRED) != 0) vm_wire_add(npages); if (object != NULL) { if (object->memattr != VM_MEMATTR_DEFAULT && memattr == VM_MEMATTR_DEFAULT) memattr = object->memattr; } for (m = m_ret; m < &m_ret[npages]; m++) { m->a.flags = 0; m->flags = (m->flags | PG_NODUMP) & flags; m->busy_lock = busy_lock; if ((req & VM_ALLOC_WIRED) != 0) m->ref_count = 1; m->a.act_count = 0; m->oflags = oflags; if (object != NULL) { if (vm_page_insert_after(m, object, pindex, mpred)) { if ((req & VM_ALLOC_WIRED) != 0) vm_wire_sub(npages); KASSERT(m->object == NULL, ("page %p has object", m)); mpred = m; for (m = m_ret; m < &m_ret[npages]; m++) { if (m <= mpred && (req & VM_ALLOC_WIRED) != 0) m->ref_count = 0; m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_UNBUSIED; /* Don't change PG_ZERO. */ vm_page_free_toq(m); } if (req & VM_ALLOC_WAITFAIL) { VM_OBJECT_WUNLOCK(object); vm_radix_wait(); VM_OBJECT_WLOCK(object); } return (NULL); } mpred = m; } else m->pindex = pindex; if (memattr != VM_MEMATTR_DEFAULT) pmap_page_set_memattr(m, memattr); pindex++; } return (m_ret); } /* * Check a page that has been freshly dequeued from a freelist. */ static void vm_page_alloc_check(vm_page_t m) { KASSERT(m->object == NULL, ("page %p has object", m)); KASSERT(m->a.queue == PQ_NONE && (m->a.flags & PGA_QUEUE_STATE_MASK) == 0, ("page %p has unexpected queue %d, flags %#x", m, m->a.queue, (m->a.flags & PGA_QUEUE_STATE_MASK))); KASSERT(m->ref_count == 0, ("page %p has references", m)); KASSERT(!vm_page_busied(m), ("page %p is busy", m)); KASSERT(m->dirty == 0, ("page %p is dirty", m)); KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, ("page %p has unexpected memattr %d", m, pmap_page_get_memattr(m))); KASSERT(m->valid == 0, ("free page %p is valid", m)); } /* * vm_page_alloc_freelist: * * Allocate a physical page from the specified free page list. * * The caller must always specify an allocation class. * * allocation classes: * VM_ALLOC_NORMAL normal process request * VM_ALLOC_SYSTEM system *really* needs a page * VM_ALLOC_INTERRUPT interrupt time request * * optional allocation flags: * VM_ALLOC_COUNT(number) the number of additional pages that the caller * intends to allocate * VM_ALLOC_WIRED wire the allocated page * VM_ALLOC_ZERO prefer a zeroed page */ vm_page_t vm_page_alloc_freelist(int freelist, int req) { struct vm_domainset_iter di; vm_page_t m; int domain; vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req); do { m = vm_page_alloc_freelist_domain(domain, freelist, req); if (m != NULL) break; } while (vm_domainset_iter_page(&di, NULL, &domain) == 0); return (m); } vm_page_t vm_page_alloc_freelist_domain(int domain, int freelist, int req) { struct vm_domain *vmd; vm_page_t m; u_int flags; m = NULL; vmd = VM_DOMAIN(domain); again: if (vm_domain_allocate(vmd, req, 1)) { vm_domain_free_lock(vmd); m = vm_phys_alloc_freelist_pages(domain, freelist, VM_FREEPOOL_DIRECT, 0); vm_domain_free_unlock(vmd); if (m == NULL) vm_domain_freecnt_inc(vmd, 1); } if (m == NULL) { if (vm_domain_alloc_fail(vmd, NULL, req)) goto again; return (NULL); } vm_page_dequeue(m); vm_page_alloc_check(m); /* * Initialize the page. Only the PG_ZERO flag is inherited. */ m->a.flags = 0; flags = 0; if ((req & VM_ALLOC_ZERO) != 0) flags = PG_ZERO; m->flags &= flags; if ((req & VM_ALLOC_WIRED) != 0) { vm_wire_add(1); m->ref_count = 1; } /* Unmanaged pages don't use "act_count". */ m->oflags = VPO_UNMANAGED; return (m); } static int vm_page_zone_import(void *arg, void **store, int cnt, int domain, int flags) { struct vm_domain *vmd; struct vm_pgcache *pgcache; int i; pgcache = arg; vmd = VM_DOMAIN(pgcache->domain); /* * The page daemon should avoid creating extra memory pressure since its * main purpose is to replenish the store of free pages. */ if (vmd->vmd_severeset || curproc == pageproc || !_vm_domain_allocate(vmd, VM_ALLOC_NORMAL, cnt)) return (0); domain = vmd->vmd_domain; vm_domain_free_lock(vmd); i = vm_phys_alloc_npages(domain, pgcache->pool, cnt, (vm_page_t *)store); vm_domain_free_unlock(vmd); if (cnt != i) vm_domain_freecnt_inc(vmd, cnt - i); return (i); } static void vm_page_zone_release(void *arg, void **store, int cnt) { struct vm_domain *vmd; struct vm_pgcache *pgcache; vm_page_t m; int i; pgcache = arg; vmd = VM_DOMAIN(pgcache->domain); vm_domain_free_lock(vmd); for (i = 0; i < cnt; i++) { m = (vm_page_t)store[i]; vm_phys_free_pages(m, 0); } vm_domain_free_unlock(vmd); vm_domain_freecnt_inc(vmd, cnt); } #define VPSC_ANY 0 /* No restrictions. */ #define VPSC_NORESERV 1 /* Skip reservations; implies VPSC_NOSUPER. */ #define VPSC_NOSUPER 2 /* Skip superpages. */ /* * vm_page_scan_contig: * * Scan vm_page_array[] between the specified entries "m_start" and * "m_end" for a run of contiguous physical pages that satisfy the * specified conditions, and return the lowest page in the run. The * specified "alignment" determines the alignment of the lowest physical * page in the run. If the specified "boundary" is non-zero, then the * run of physical pages cannot span a physical address that is a * multiple of "boundary". * * "m_end" is never dereferenced, so it need not point to a vm_page * structure within vm_page_array[]. * * "npages" must be greater than zero. "m_start" and "m_end" must not * span a hole (or discontiguity) in the physical address space. Both * "alignment" and "boundary" must be a power of two. */ vm_page_t vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end, u_long alignment, vm_paddr_t boundary, int options) { struct mtx *m_mtx; vm_object_t object; vm_paddr_t pa; vm_page_t m, m_run; #if VM_NRESERVLEVEL > 0 int level; #endif int m_inc, order, run_ext, run_len; KASSERT(npages > 0, ("npages is 0")); KASSERT(powerof2(alignment), ("alignment is not a power of 2")); KASSERT(powerof2(boundary), ("boundary is not a power of 2")); m_run = NULL; run_len = 0; m_mtx = NULL; for (m = m_start; m < m_end && run_len < npages; m += m_inc) { KASSERT((m->flags & PG_MARKER) == 0, ("page %p is PG_MARKER", m)); KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->ref_count >= 1, ("fictitious page %p has invalid ref count", m)); /* * If the current page would be the start of a run, check its * physical address against the end, alignment, and boundary * conditions. If it doesn't satisfy these conditions, either * terminate the scan or advance to the next page that * satisfies the failed condition. */ if (run_len == 0) { KASSERT(m_run == NULL, ("m_run != NULL")); if (m + npages > m_end) break; pa = VM_PAGE_TO_PHYS(m); if ((pa & (alignment - 1)) != 0) { m_inc = atop(roundup2(pa, alignment) - pa); continue; } if (rounddown2(pa ^ (pa + ptoa(npages) - 1), boundary) != 0) { m_inc = atop(roundup2(pa, boundary) - pa); continue; } } else KASSERT(m_run != NULL, ("m_run == NULL")); vm_page_change_lock(m, &m_mtx); m_inc = 1; retry: if (vm_page_wired(m)) run_ext = 0; #if VM_NRESERVLEVEL > 0 else if ((level = vm_reserv_level(m)) >= 0 && (options & VPSC_NORESERV) != 0) { run_ext = 0; /* Advance to the end of the reservation. */ pa = VM_PAGE_TO_PHYS(m); m_inc = atop(roundup2(pa + 1, vm_reserv_size(level)) - pa); } #endif else if ((object = m->object) != NULL) { /* * The page is considered eligible for relocation if * and only if it could be laundered or reclaimed by * the page daemon. */ if (!VM_OBJECT_TRYRLOCK(object)) { mtx_unlock(m_mtx); VM_OBJECT_RLOCK(object); mtx_lock(m_mtx); if (m->object != object) { /* * The page may have been freed. */ VM_OBJECT_RUNLOCK(object); goto retry; } } /* Don't care: PG_NODUMP, PG_ZERO. */ if (object->type != OBJT_DEFAULT && object->type != OBJT_SWAP && object->type != OBJT_VNODE) { run_ext = 0; #if VM_NRESERVLEVEL > 0 } else if ((options & VPSC_NOSUPER) != 0 && (level = vm_reserv_level_iffullpop(m)) >= 0) { run_ext = 0; /* Advance to the end of the superpage. */ pa = VM_PAGE_TO_PHYS(m); m_inc = atop(roundup2(pa + 1, vm_reserv_size(level)) - pa); #endif } else if (object->memattr == VM_MEMATTR_DEFAULT && vm_page_queue(m) != PQ_NONE && !vm_page_busied(m) && !vm_page_wired(m)) { /* * The page is allocated but eligible for * relocation. Extend the current run by one * page. */ KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, ("page %p has an unexpected memattr", m)); KASSERT((m->oflags & (VPO_SWAPINPROG | VPO_SWAPSLEEP | VPO_UNMANAGED)) == 0, ("page %p has unexpected oflags", m)); /* Don't care: PGA_NOSYNC. */ run_ext = 1; } else run_ext = 0; VM_OBJECT_RUNLOCK(object); #if VM_NRESERVLEVEL > 0 } else if (level >= 0) { /* * The page is reserved but not yet allocated. In * other words, it is still free. Extend the current * run by one page. */ run_ext = 1; #endif } else if ((order = m->order) < VM_NFREEORDER) { /* * The page is enqueued in the physical memory * allocator's free page queues. Moreover, it is the * first page in a power-of-two-sized run of * contiguous free pages. Add these pages to the end * of the current run, and jump ahead. */ run_ext = 1 << order; m_inc = 1 << order; } else { /* * Skip the page for one of the following reasons: (1) * It is enqueued in the physical memory allocator's * free page queues. However, it is not the first * page in a run of contiguous free pages. (This case * rarely occurs because the scan is performed in * ascending order.) (2) It is not reserved, and it is * transitioning from free to allocated. (Conversely, * the transition from allocated to free for managed * pages is blocked by the page lock.) (3) It is * allocated but not contained by an object and not * wired, e.g., allocated by Xen's balloon driver. */ run_ext = 0; } /* * Extend or reset the current run of pages. */ if (run_ext > 0) { if (run_len == 0) m_run = m; run_len += run_ext; } else { if (run_len > 0) { m_run = NULL; run_len = 0; } } } if (m_mtx != NULL) mtx_unlock(m_mtx); if (run_len >= npages) return (m_run); return (NULL); } /* * vm_page_reclaim_run: * * Try to relocate each of the allocated virtual pages within the * specified run of physical pages to a new physical address. Free the * physical pages underlying the relocated virtual pages. A virtual page * is relocatable if and only if it could be laundered or reclaimed by * the page daemon. Whenever possible, a virtual page is relocated to a * physical address above "high". * * Returns 0 if every physical page within the run was already free or * just freed by a successful relocation. Otherwise, returns a non-zero * value indicating why the last attempt to relocate a virtual page was * unsuccessful. * * "req_class" must be an allocation class. */ static int vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, vm_paddr_t high) { struct vm_domain *vmd; struct mtx *m_mtx; struct spglist free; vm_object_t object; vm_paddr_t pa; vm_page_t m, m_end, m_new; int error, order, req; KASSERT((req_class & VM_ALLOC_CLASS_MASK) == req_class, ("req_class is not an allocation class")); SLIST_INIT(&free); error = 0; m = m_run; m_end = m_run + npages; m_mtx = NULL; for (; error == 0 && m < m_end; m++) { KASSERT((m->flags & (PG_FICTITIOUS | PG_MARKER)) == 0, ("page %p is PG_FICTITIOUS or PG_MARKER", m)); /* * Avoid releasing and reacquiring the same page lock. */ vm_page_change_lock(m, &m_mtx); retry: /* * Racily check for wirings. Races are handled below. */ if (vm_page_wired(m)) error = EBUSY; else if ((object = m->object) != NULL) { /* * The page is relocated if and only if it could be * laundered or reclaimed by the page daemon. */ if (!VM_OBJECT_TRYWLOCK(object)) { mtx_unlock(m_mtx); VM_OBJECT_WLOCK(object); mtx_lock(m_mtx); if (m->object != object) { /* * The page may have been freed. */ VM_OBJECT_WUNLOCK(object); goto retry; } } /* Don't care: PG_NODUMP, PG_ZERO. */ if (object->type != OBJT_DEFAULT && object->type != OBJT_SWAP && object->type != OBJT_VNODE) error = EINVAL; else if (object->memattr != VM_MEMATTR_DEFAULT) error = EINVAL; else if (vm_page_queue(m) != PQ_NONE && vm_page_tryxbusy(m) != 0) { if (vm_page_wired(m)) { vm_page_xunbusy(m); error = EBUSY; goto unlock; } KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, ("page %p has an unexpected memattr", m)); KASSERT(m->oflags == 0, ("page %p has unexpected oflags", m)); /* Don't care: PGA_NOSYNC. */ if (!vm_page_none_valid(m)) { /* * First, try to allocate a new page * that is above "high". Failing * that, try to allocate a new page * that is below "m_run". Allocate * the new page between the end of * "m_run" and "high" only as a last * resort. */ req = req_class | VM_ALLOC_NOOBJ; if ((m->flags & PG_NODUMP) != 0) req |= VM_ALLOC_NODUMP; if (trunc_page(high) != ~(vm_paddr_t)PAGE_MASK) { m_new = vm_page_alloc_contig( NULL, 0, req, 1, round_page(high), ~(vm_paddr_t)0, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); } else m_new = NULL; if (m_new == NULL) { pa = VM_PAGE_TO_PHYS(m_run); m_new = vm_page_alloc_contig( NULL, 0, req, 1, 0, pa - 1, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); } if (m_new == NULL) { pa += ptoa(npages); m_new = vm_page_alloc_contig( NULL, 0, req, 1, pa, high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); } if (m_new == NULL) { vm_page_xunbusy(m); error = ENOMEM; goto unlock; } /* * Unmap the page and check for new * wirings that may have been acquired * through a pmap lookup. */ if (object->ref_count != 0 && !vm_page_try_remove_all(m)) { vm_page_xunbusy(m); vm_page_free(m_new); error = EBUSY; goto unlock; } /* * Replace "m" with the new page. For * vm_page_replace(), "m" must be busy * and dequeued. Finally, change "m" * as if vm_page_free() was called. */ m_new->a.flags = m->a.flags & ~PGA_QUEUE_STATE_MASK; KASSERT(m_new->oflags == VPO_UNMANAGED, ("page %p is managed", m_new)); m_new->oflags = 0; pmap_copy_page(m, m_new); m_new->valid = m->valid; m_new->dirty = m->dirty; m->flags &= ~PG_ZERO; vm_page_dequeue(m); if (vm_page_replace_hold(m_new, object, m->pindex, m) && vm_page_free_prep(m)) SLIST_INSERT_HEAD(&free, m, plinks.s.ss); /* * The new page must be deactivated * before the object is unlocked. */ vm_page_change_lock(m_new, &m_mtx); vm_page_deactivate(m_new); } else { m->flags &= ~PG_ZERO; vm_page_dequeue(m); if (vm_page_free_prep(m)) SLIST_INSERT_HEAD(&free, m, plinks.s.ss); KASSERT(m->dirty == 0, ("page %p is dirty", m)); } } else error = EBUSY; unlock: VM_OBJECT_WUNLOCK(object); } else { MPASS(vm_phys_domain(m) == domain); vmd = VM_DOMAIN(domain); vm_domain_free_lock(vmd); order = m->order; if (order < VM_NFREEORDER) { /* * The page is enqueued in the physical memory * allocator's free page queues. Moreover, it * is the first page in a power-of-two-sized * run of contiguous free pages. Jump ahead * to the last page within that run, and * continue from there. */ m += (1 << order) - 1; } #if VM_NRESERVLEVEL > 0 else if (vm_reserv_is_page_free(m)) order = 0; #endif vm_domain_free_unlock(vmd); if (order == VM_NFREEORDER) error = EINVAL; } } if (m_mtx != NULL) mtx_unlock(m_mtx); if ((m = SLIST_FIRST(&free)) != NULL) { int cnt; vmd = VM_DOMAIN(domain); cnt = 0; vm_domain_free_lock(vmd); do { MPASS(vm_phys_domain(m) == domain); SLIST_REMOVE_HEAD(&free, plinks.s.ss); vm_phys_free_pages(m, 0); cnt++; } while ((m = SLIST_FIRST(&free)) != NULL); vm_domain_free_unlock(vmd); vm_domain_freecnt_inc(vmd, cnt); } return (error); } #define NRUNS 16 CTASSERT(powerof2(NRUNS)); #define RUN_INDEX(count) ((count) & (NRUNS - 1)) #define MIN_RECLAIM 8 /* * vm_page_reclaim_contig: * * Reclaim allocated, contiguous physical memory satisfying the specified * conditions by relocating the virtual pages using that physical memory. * Returns true if reclamation is successful and false otherwise. Since * relocation requires the allocation of physical pages, reclamation may * fail due to a shortage of free pages. When reclamation fails, callers * are expected to perform vm_wait() before retrying a failed allocation * operation, e.g., vm_page_alloc_contig(). * * The caller must always specify an allocation class through "req". * * allocation classes: * VM_ALLOC_NORMAL normal process request * VM_ALLOC_SYSTEM system *really* needs a page * VM_ALLOC_INTERRUPT interrupt time request * * The optional allocation flags are ignored. * * "npages" must be greater than zero. Both "alignment" and "boundary" * must be a power of two. */ bool vm_page_reclaim_contig_domain(int domain, int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) { struct vm_domain *vmd; vm_paddr_t curr_low; vm_page_t m_run, m_runs[NRUNS]; u_long count, reclaimed; int error, i, options, req_class; KASSERT(npages > 0, ("npages is 0")); KASSERT(powerof2(alignment), ("alignment is not a power of 2")); KASSERT(powerof2(boundary), ("boundary is not a power of 2")); req_class = req & VM_ALLOC_CLASS_MASK; /* * The page daemon is allowed to dig deeper into the free page list. */ if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) req_class = VM_ALLOC_SYSTEM; /* * Return if the number of free pages cannot satisfy the requested * allocation. */ vmd = VM_DOMAIN(domain); count = vmd->vmd_free_count; if (count < npages + vmd->vmd_free_reserved || (count < npages + vmd->vmd_interrupt_free_min && req_class == VM_ALLOC_SYSTEM) || (count < npages && req_class == VM_ALLOC_INTERRUPT)) return (false); /* * Scan up to three times, relaxing the restrictions ("options") on * the reclamation of reservations and superpages each time. */ for (options = VPSC_NORESERV;;) { /* * Find the highest runs that satisfy the given constraints * and restrictions, and record them in "m_runs". */ curr_low = low; count = 0; for (;;) { m_run = vm_phys_scan_contig(domain, npages, curr_low, high, alignment, boundary, options); if (m_run == NULL) break; curr_low = VM_PAGE_TO_PHYS(m_run) + ptoa(npages); m_runs[RUN_INDEX(count)] = m_run; count++; } /* * Reclaim the highest runs in LIFO (descending) order until * the number of reclaimed pages, "reclaimed", is at least * MIN_RECLAIM. Reset "reclaimed" each time because each * reclamation is idempotent, and runs will (likely) recur * from one scan to the next as restrictions are relaxed. */ reclaimed = 0; for (i = 0; count > 0 && i < NRUNS; i++) { count--; m_run = m_runs[RUN_INDEX(count)]; error = vm_page_reclaim_run(req_class, domain, npages, m_run, high); if (error == 0) { reclaimed += npages; if (reclaimed >= MIN_RECLAIM) return (true); } } /* * Either relax the restrictions on the next scan or return if * the last scan had no restrictions. */ if (options == VPSC_NORESERV) options = VPSC_NOSUPER; else if (options == VPSC_NOSUPER) options = VPSC_ANY; else if (options == VPSC_ANY) return (reclaimed != 0); } } bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) { struct vm_domainset_iter di; int domain; bool ret; vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req); do { ret = vm_page_reclaim_contig_domain(domain, req, npages, low, high, alignment, boundary); if (ret) break; } while (vm_domainset_iter_page(&di, NULL, &domain) == 0); return (ret); } /* * Set the domain in the appropriate page level domainset. */ void vm_domain_set(struct vm_domain *vmd) { mtx_lock(&vm_domainset_lock); if (!vmd->vmd_minset && vm_paging_min(vmd)) { vmd->vmd_minset = 1; DOMAINSET_SET(vmd->vmd_domain, &vm_min_domains); } if (!vmd->vmd_severeset && vm_paging_severe(vmd)) { vmd->vmd_severeset = 1; DOMAINSET_SET(vmd->vmd_domain, &vm_severe_domains); } mtx_unlock(&vm_domainset_lock); } /* * Clear the domain from the appropriate page level domainset. */ void vm_domain_clear(struct vm_domain *vmd) { mtx_lock(&vm_domainset_lock); if (vmd->vmd_minset && !vm_paging_min(vmd)) { vmd->vmd_minset = 0; DOMAINSET_CLR(vmd->vmd_domain, &vm_min_domains); if (vm_min_waiters != 0) { vm_min_waiters = 0; wakeup(&vm_min_domains); } } if (vmd->vmd_severeset && !vm_paging_severe(vmd)) { vmd->vmd_severeset = 0; DOMAINSET_CLR(vmd->vmd_domain, &vm_severe_domains); if (vm_severe_waiters != 0) { vm_severe_waiters = 0; wakeup(&vm_severe_domains); } } /* * If pageout daemon needs pages, then tell it that there are * some free. */ if (vmd->vmd_pageout_pages_needed && vmd->vmd_free_count >= vmd->vmd_pageout_free_min) { wakeup(&vmd->vmd_pageout_pages_needed); vmd->vmd_pageout_pages_needed = 0; } /* See comments in vm_wait_doms(). */ if (vm_pageproc_waiters) { vm_pageproc_waiters = 0; wakeup(&vm_pageproc_waiters); } mtx_unlock(&vm_domainset_lock); } /* * Wait for free pages to exceed the min threshold globally. */ void vm_wait_min(void) { mtx_lock(&vm_domainset_lock); while (vm_page_count_min()) { vm_min_waiters++; msleep(&vm_min_domains, &vm_domainset_lock, PVM, "vmwait", 0); } mtx_unlock(&vm_domainset_lock); } /* * Wait for free pages to exceed the severe threshold globally. */ void vm_wait_severe(void) { mtx_lock(&vm_domainset_lock); while (vm_page_count_severe()) { vm_severe_waiters++; msleep(&vm_severe_domains, &vm_domainset_lock, PVM, "vmwait", 0); } mtx_unlock(&vm_domainset_lock); } u_int vm_wait_count(void) { return (vm_severe_waiters + vm_min_waiters + vm_pageproc_waiters); } void vm_wait_doms(const domainset_t *wdoms) { /* * We use racey wakeup synchronization to avoid expensive global * locking for the pageproc when sleeping with a non-specific vm_wait. * To handle this, we only sleep for one tick in this instance. It * is expected that most allocations for the pageproc will come from * kmem or vm_page_grab* which will use the more specific and * race-free vm_wait_domain(). */ if (curproc == pageproc) { mtx_lock(&vm_domainset_lock); vm_pageproc_waiters++; msleep(&vm_pageproc_waiters, &vm_domainset_lock, PVM | PDROP, "pageprocwait", 1); } else { /* * XXX Ideally we would wait only until the allocation could * be satisfied. This condition can cause new allocators to * consume all freed pages while old allocators wait. */ mtx_lock(&vm_domainset_lock); if (vm_page_count_min_set(wdoms)) { vm_min_waiters++; msleep(&vm_min_domains, &vm_domainset_lock, PVM | PDROP, "vmwait", 0); } else mtx_unlock(&vm_domainset_lock); } } /* * vm_wait_domain: * * Sleep until free pages are available for allocation. * - Called in various places after failed memory allocations. */ void vm_wait_domain(int domain) { struct vm_domain *vmd; domainset_t wdom; vmd = VM_DOMAIN(domain); vm_domain_free_assert_unlocked(vmd); if (curproc == pageproc) { mtx_lock(&vm_domainset_lock); if (vmd->vmd_free_count < vmd->vmd_pageout_free_min) { vmd->vmd_pageout_pages_needed = 1; msleep(&vmd->vmd_pageout_pages_needed, &vm_domainset_lock, PDROP | PSWP, "VMWait", 0); } else mtx_unlock(&vm_domainset_lock); } else { if (pageproc == NULL) panic("vm_wait in early boot"); DOMAINSET_ZERO(&wdom); DOMAINSET_SET(vmd->vmd_domain, &wdom); vm_wait_doms(&wdom); } } /* * vm_wait: * * Sleep until free pages are available for allocation in the * affinity domains of the obj. If obj is NULL, the domain set * for the calling thread is used. * Called in various places after failed memory allocations. */ void vm_wait(vm_object_t obj) { struct domainset *d; d = NULL; /* * Carefully fetch pointers only once: the struct domainset * itself is ummutable but the pointer might change. */ if (obj != NULL) d = obj->domain.dr_policy; if (d == NULL) d = curthread->td_domain.dr_policy; vm_wait_doms(&d->ds_mask); } /* * vm_domain_alloc_fail: * * Called when a page allocation function fails. Informs the * pagedaemon and performs the requested wait. Requires the * domain_free and object lock on entry. Returns with the * object lock held and free lock released. Returns an error when * retry is necessary. * */ static int vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, int req) { vm_domain_free_assert_unlocked(vmd); atomic_add_int(&vmd->vmd_pageout_deficit, max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); if (req & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) { if (object != NULL) VM_OBJECT_WUNLOCK(object); vm_wait_domain(vmd->vmd_domain); if (object != NULL) VM_OBJECT_WLOCK(object); if (req & VM_ALLOC_WAITOK) return (EAGAIN); } return (0); } /* * vm_waitpfault: * * Sleep until free pages are available for allocation. * - Called only in vm_fault so that processes page faulting * can be easily tracked. * - Sleeps at a lower priority than vm_wait() so that vm_wait()ing * processes will be able to grab memory first. Do not change * this balance without careful testing first. */ void vm_waitpfault(struct domainset *dset, int timo) { /* * XXX Ideally we would wait only until the allocation could * be satisfied. This condition can cause new allocators to * consume all freed pages while old allocators wait. */ mtx_lock(&vm_domainset_lock); if (vm_page_count_min_set(&dset->ds_mask)) { vm_min_waiters++; msleep(&vm_min_domains, &vm_domainset_lock, PUSER | PDROP, "pfault", timo); } else mtx_unlock(&vm_domainset_lock); } static struct vm_pagequeue * _vm_page_pagequeue(vm_page_t m, uint8_t queue) { return (&vm_pagequeue_domain(m)->vmd_pagequeues[queue]); } #ifdef INVARIANTS static struct vm_pagequeue * vm_page_pagequeue(vm_page_t m) { return (_vm_page_pagequeue(m, vm_page_astate_load(m).queue)); } #endif static __always_inline bool vm_page_pqstate_fcmpset(vm_page_t m, vm_page_astate_t *old, vm_page_astate_t new) { vm_page_astate_t tmp; tmp = *old; do { if (__predict_true(vm_page_astate_fcmpset(m, old, new))) return (true); counter_u64_add(pqstate_commit_retries, 1); } while (old->_bits == tmp._bits); return (false); } /* * Do the work of committing a queue state update that moves the page out of * its current queue. */ static bool _vm_page_pqstate_commit_dequeue(struct vm_pagequeue *pq, vm_page_t m, vm_page_astate_t *old, vm_page_astate_t new) { vm_page_t next; vm_pagequeue_assert_locked(pq); KASSERT(vm_page_pagequeue(m) == pq, ("%s: queue %p does not match page %p", __func__, pq, m)); KASSERT(old->queue != PQ_NONE && new.queue != old->queue, ("%s: invalid queue indices %d %d", __func__, old->queue, new.queue)); /* * Once the queue index of the page changes there is nothing * synchronizing with further updates to the page's physical * queue state. Therefore we must speculatively remove the page * from the queue now and be prepared to roll back if the queue * state update fails. If the page is not physically enqueued then * we just update its queue index. */ if ((old->flags & PGA_ENQUEUED) != 0) { new.flags &= ~PGA_ENQUEUED; next = TAILQ_NEXT(m, plinks.q); TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_dec(pq); if (!vm_page_pqstate_fcmpset(m, old, new)) { if (next == NULL) TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); else TAILQ_INSERT_BEFORE(next, m, plinks.q); vm_pagequeue_cnt_inc(pq); return (false); } else { return (true); } } else { return (vm_page_pqstate_fcmpset(m, old, new)); } } static bool vm_page_pqstate_commit_dequeue(vm_page_t m, vm_page_astate_t *old, vm_page_astate_t new) { struct vm_pagequeue *pq; vm_page_astate_t as; bool ret; pq = _vm_page_pagequeue(m, old->queue); /* * The queue field and PGA_ENQUEUED flag are stable only so long as the * corresponding page queue lock is held. */ vm_pagequeue_lock(pq); as = vm_page_astate_load(m); if (__predict_false(as._bits != old->_bits)) { *old = as; ret = false; } else { ret = _vm_page_pqstate_commit_dequeue(pq, m, old, new); } vm_pagequeue_unlock(pq); return (ret); } /* * Commit a queue state update that enqueues or requeues a page. */ static bool _vm_page_pqstate_commit_requeue(struct vm_pagequeue *pq, vm_page_t m, vm_page_astate_t *old, vm_page_astate_t new) { struct vm_domain *vmd; vm_pagequeue_assert_locked(pq); KASSERT(old->queue != PQ_NONE && new.queue == old->queue, ("%s: invalid queue indices %d %d", __func__, old->queue, new.queue)); new.flags |= PGA_ENQUEUED; if (!vm_page_pqstate_fcmpset(m, old, new)) return (false); if ((old->flags & PGA_ENQUEUED) != 0) TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); else vm_pagequeue_cnt_inc(pq); /* * Give PGA_REQUEUE_HEAD precedence over PGA_REQUEUE. In particular, if * both flags are set in close succession, only PGA_REQUEUE_HEAD will be * applied, even if it was set first. */ if ((old->flags & PGA_REQUEUE_HEAD) != 0) { vmd = vm_pagequeue_domain(m); KASSERT(pq == &vmd->vmd_pagequeues[PQ_INACTIVE], ("%s: invalid page queue for page %p", __func__, m)); TAILQ_INSERT_BEFORE(&vmd->vmd_inacthead, m, plinks.q); } else { TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); } return (true); } /* * Commit a queue state update that encodes a request for a deferred queue * operation. */ static bool vm_page_pqstate_commit_request(vm_page_t m, vm_page_astate_t *old, vm_page_astate_t new) { KASSERT(old->queue == new.queue || new.queue != PQ_NONE, ("%s: invalid state, queue %d flags %x", __func__, new.queue, new.flags)); if (old->_bits != new._bits && !vm_page_pqstate_fcmpset(m, old, new)) return (false); vm_page_pqbatch_submit(m, new.queue); return (true); } /* * A generic queue state update function. This handles more cases than the * specialized functions above. */ bool vm_page_pqstate_commit(vm_page_t m, vm_page_astate_t *old, vm_page_astate_t new) { if (old->_bits == new._bits) return (true); if (old->queue != PQ_NONE && new.queue != old->queue) { if (!vm_page_pqstate_commit_dequeue(m, old, new)) return (false); if (new.queue != PQ_NONE) vm_page_pqbatch_submit(m, new.queue); } else { if (!vm_page_pqstate_fcmpset(m, old, new)) return (false); if (new.queue != PQ_NONE && ((new.flags & ~old->flags) & PGA_QUEUE_OP_MASK) != 0) vm_page_pqbatch_submit(m, new.queue); } return (true); } /* * Apply deferred queue state updates to a page. */ static inline void vm_pqbatch_process_page(struct vm_pagequeue *pq, vm_page_t m, uint8_t queue) { vm_page_astate_t new, old; CRITICAL_ASSERT(curthread); vm_pagequeue_assert_locked(pq); KASSERT(queue < PQ_COUNT, ("%s: invalid queue index %d", __func__, queue)); KASSERT(pq == _vm_page_pagequeue(m, queue), ("%s: page %p does not belong to queue %p", __func__, m, pq)); for (old = vm_page_astate_load(m);;) { if (__predict_false(old.queue != queue || (old.flags & PGA_QUEUE_OP_MASK) == 0)) { counter_u64_add(queue_nops, 1); break; } KASSERT(old.queue != PQ_NONE || (old.flags & PGA_QUEUE_STATE_MASK) == 0, ("%s: page %p has unexpected queue state", __func__, m)); new = old; if ((old.flags & PGA_DEQUEUE) != 0) { new.flags &= ~PGA_QUEUE_OP_MASK; new.queue = PQ_NONE; if (__predict_true(_vm_page_pqstate_commit_dequeue(pq, m, &old, new))) { counter_u64_add(queue_ops, 1); break; } } else { new.flags &= ~(PGA_REQUEUE | PGA_REQUEUE_HEAD); if (__predict_true(_vm_page_pqstate_commit_requeue(pq, m, &old, new))) { counter_u64_add(queue_ops, 1); break; } } } } static void vm_pqbatch_process(struct vm_pagequeue *pq, struct vm_batchqueue *bq, uint8_t queue) { int i; for (i = 0; i < bq->bq_cnt; i++) vm_pqbatch_process_page(pq, bq->bq_pa[i], queue); vm_batchqueue_init(bq); } /* * vm_page_pqbatch_submit: [ internal use only ] * * Enqueue a page in the specified page queue's batched work queue. * The caller must have encoded the requested operation in the page * structure's a.flags field. */ void vm_page_pqbatch_submit(vm_page_t m, uint8_t queue) { struct vm_batchqueue *bq; struct vm_pagequeue *pq; int domain; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("page %p is unmanaged", m)); KASSERT(queue < PQ_COUNT, ("invalid queue %d", queue)); domain = vm_phys_domain(m); pq = &vm_pagequeue_domain(m)->vmd_pagequeues[queue]; critical_enter(); bq = DPCPU_PTR(pqbatch[domain][queue]); if (vm_batchqueue_insert(bq, m)) { critical_exit(); return; } critical_exit(); vm_pagequeue_lock(pq); critical_enter(); bq = DPCPU_PTR(pqbatch[domain][queue]); vm_pqbatch_process(pq, bq, queue); vm_pqbatch_process_page(pq, m, queue); vm_pagequeue_unlock(pq); critical_exit(); } /* * vm_page_pqbatch_drain: [ internal use only ] * * Force all per-CPU page queue batch queues to be drained. This is * intended for use in severe memory shortages, to ensure that pages * do not remain stuck in the batch queues. */ void vm_page_pqbatch_drain(void) { struct thread *td; struct vm_domain *vmd; struct vm_pagequeue *pq; int cpu, domain, queue; td = curthread; CPU_FOREACH(cpu) { thread_lock(td); sched_bind(td, cpu); thread_unlock(td); for (domain = 0; domain < vm_ndomains; domain++) { vmd = VM_DOMAIN(domain); for (queue = 0; queue < PQ_COUNT; queue++) { pq = &vmd->vmd_pagequeues[queue]; vm_pagequeue_lock(pq); critical_enter(); vm_pqbatch_process(pq, DPCPU_PTR(pqbatch[domain][queue]), queue); critical_exit(); vm_pagequeue_unlock(pq); } } } thread_lock(td); sched_unbind(td); thread_unlock(td); } /* * vm_page_dequeue_deferred: [ internal use only ] * * Request removal of the given page from its current page * queue. Physical removal from the queue may be deferred * indefinitely. * * The page must be locked. */ void vm_page_dequeue_deferred(vm_page_t m) { vm_page_astate_t new, old; old = vm_page_astate_load(m); do { if (old.queue == PQ_NONE) { KASSERT((old.flags & PGA_QUEUE_STATE_MASK) == 0, ("%s: page %p has unexpected queue state", __func__, m)); break; } new = old; new.flags |= PGA_DEQUEUE; } while (!vm_page_pqstate_commit_request(m, &old, new)); } /* * vm_page_dequeue: * * Remove the page from whichever page queue it's in, if any, before * returning. */ void vm_page_dequeue(vm_page_t m) { vm_page_astate_t new, old; old = vm_page_astate_load(m); do { if (old.queue == PQ_NONE) { KASSERT((old.flags & PGA_QUEUE_STATE_MASK) == 0, ("%s: page %p has unexpected queue state", __func__, m)); break; } new = old; new.flags &= ~PGA_QUEUE_OP_MASK; new.queue = PQ_NONE; } while (!vm_page_pqstate_commit_dequeue(m, &old, new)); } /* * Schedule the given page for insertion into the specified page queue. * Physical insertion of the page may be deferred indefinitely. */ static void vm_page_enqueue(vm_page_t m, uint8_t queue) { KASSERT(m->a.queue == PQ_NONE && (m->a.flags & PGA_QUEUE_STATE_MASK) == 0, ("%s: page %p is already enqueued", __func__, m)); KASSERT(m->ref_count > 0, ("%s: page %p does not carry any references", __func__, m)); m->a.queue = queue; if ((m->a.flags & PGA_REQUEUE) == 0) vm_page_aflag_set(m, PGA_REQUEUE); vm_page_pqbatch_submit(m, queue); } /* * vm_page_free_prep: * * Prepares the given page to be put on the free list, * disassociating it from any VM object. The caller may return * the page to the free list only if this function returns true. * * The object must be locked. The page must be locked if it is * managed. */ static bool vm_page_free_prep(vm_page_t m) { /* * Synchronize with threads that have dropped a reference to this * page. */ atomic_thread_fence_acq(); if (vm_page_sbusied(m)) panic("vm_page_free_prep: freeing shared busy page %p", m); #if defined(DIAGNOSTIC) && defined(PHYS_TO_DMAP) if (PMAP_HAS_DMAP && (m->flags & PG_ZERO) != 0) { uint64_t *p; int i; p = (uint64_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); for (i = 0; i < PAGE_SIZE / sizeof(uint64_t); i++, p++) KASSERT(*p == 0, ("vm_page_free_prep %p PG_ZERO %d %jx", m, i, (uintmax_t)*p)); } #endif if ((m->oflags & VPO_UNMANAGED) == 0) { KASSERT(!pmap_page_is_mapped(m), ("vm_page_free_prep: freeing mapped page %p", m)); KASSERT((m->a.flags & (PGA_EXECUTABLE | PGA_WRITEABLE)) == 0, ("vm_page_free_prep: mapping flags set in page %p", m)); } else { KASSERT(m->a.queue == PQ_NONE, ("vm_page_free_prep: unmanaged page %p is queued", m)); } VM_CNT_INC(v_tfree); if (m->object != NULL) { KASSERT(((m->oflags & VPO_UNMANAGED) != 0) == ((m->object->flags & OBJ_UNMANAGED) != 0), ("vm_page_free_prep: managed flag mismatch for page %p", m)); vm_page_object_remove(m); /* * The object reference can be released without an atomic * operation. */ KASSERT((m->flags & PG_FICTITIOUS) != 0 || m->ref_count == VPRC_OBJREF, ("vm_page_free_prep: page %p has unexpected ref_count %u", m, m->ref_count)); m->object = NULL; m->ref_count -= VPRC_OBJREF; vm_page_xunbusy(m); } if (vm_page_xbusied(m)) panic("vm_page_free_prep: freeing exclusive busy page %p", m); /* * If fictitious remove object association and * return. */ if ((m->flags & PG_FICTITIOUS) != 0) { KASSERT(m->ref_count == 1, ("fictitious page %p is referenced", m)); KASSERT(m->a.queue == PQ_NONE, ("fictitious page %p is queued", m)); return (false); } /* * Pages need not be dequeued before they are returned to the physical * memory allocator, but they must at least be marked for a deferred * dequeue. */ if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_dequeue_deferred(m); m->valid = 0; vm_page_undirty(m); if (m->ref_count != 0) panic("vm_page_free_prep: page %p has references", m); /* * Restore the default memory attribute to the page. */ if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT) pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT); #if VM_NRESERVLEVEL > 0 /* * Determine whether the page belongs to a reservation. If the page was * allocated from a per-CPU cache, it cannot belong to a reservation, so * as an optimization, we avoid the check in that case. */ if ((m->flags & PG_PCPU_CACHE) == 0 && vm_reserv_free_page(m)) return (false); #endif return (true); } /* * vm_page_free_toq: * * Returns the given page to the free list, disassociating it * from any VM object. * * The object must be locked. The page must be locked if it is * managed. */ static void vm_page_free_toq(vm_page_t m) { struct vm_domain *vmd; uma_zone_t zone; if (!vm_page_free_prep(m)) return; vmd = vm_pagequeue_domain(m); zone = vmd->vmd_pgcache[m->pool].zone; if ((m->flags & PG_PCPU_CACHE) != 0 && zone != NULL) { uma_zfree(zone, m); return; } vm_domain_free_lock(vmd); vm_phys_free_pages(m, 0); vm_domain_free_unlock(vmd); vm_domain_freecnt_inc(vmd, 1); } /* * vm_page_free_pages_toq: * * Returns a list of pages to the free list, disassociating it * from any VM object. In other words, this is equivalent to * calling vm_page_free_toq() for each page of a list of VM objects. * * The objects must be locked. The pages must be locked if it is * managed. */ void vm_page_free_pages_toq(struct spglist *free, bool update_wire_count) { vm_page_t m; int count; if (SLIST_EMPTY(free)) return; count = 0; while ((m = SLIST_FIRST(free)) != NULL) { count++; SLIST_REMOVE_HEAD(free, plinks.s.ss); vm_page_free_toq(m); } if (update_wire_count) vm_wire_sub(count); } /* * Mark this page as wired down, preventing reclamation by the page daemon * or when the containing object is destroyed. */ void vm_page_wire(vm_page_t m) { u_int old; KASSERT(m->object != NULL, ("vm_page_wire: page %p does not belong to an object", m)); if (!vm_page_busied(m) && !vm_object_busied(m->object)) VM_OBJECT_ASSERT_LOCKED(m->object); KASSERT((m->flags & PG_FICTITIOUS) == 0 || VPRC_WIRE_COUNT(m->ref_count) >= 1, ("vm_page_wire: fictitious page %p has zero wirings", m)); old = atomic_fetchadd_int(&m->ref_count, 1); KASSERT(VPRC_WIRE_COUNT(old) != VPRC_WIRE_COUNT_MAX, ("vm_page_wire: counter overflow for page %p", m)); if (VPRC_WIRE_COUNT(old) == 0) { if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_aflag_set(m, PGA_DEQUEUE); vm_wire_add(1); } } /* * Attempt to wire a mapped page following a pmap lookup of that page. * This may fail if a thread is concurrently tearing down mappings of the page. * The transient failure is acceptable because it translates to the * failure of the caller pmap_extract_and_hold(), which should be then * followed by the vm_fault() fallback, see e.g. vm_fault_quick_hold_pages(). */ bool vm_page_wire_mapped(vm_page_t m) { u_int old; old = m->ref_count; do { KASSERT(old > 0, ("vm_page_wire_mapped: wiring unreferenced page %p", m)); if ((old & VPRC_BLOCKED) != 0) return (false); } while (!atomic_fcmpset_int(&m->ref_count, &old, old + 1)); if (VPRC_WIRE_COUNT(old) == 0) { if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_aflag_set(m, PGA_DEQUEUE); vm_wire_add(1); } return (true); } /* * Release a wiring reference to a managed page. If the page still belongs to * an object, update its position in the page queues to reflect the reference. * If the wiring was the last reference to the page, free the page. */ static void vm_page_unwire_managed(vm_page_t m, uint8_t nqueue, bool noreuse) { u_int old; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("%s: page %p is unmanaged", __func__, m)); /* * Update LRU state before releasing the wiring reference. * Use a release store when updating the reference count to * synchronize with vm_page_free_prep(). */ old = m->ref_count; do { KASSERT(VPRC_WIRE_COUNT(old) > 0, ("vm_page_unwire: wire count underflow for page %p", m)); if (old > VPRC_OBJREF + 1) { /* * The page has at least one other wiring reference. An * earlier iteration of this loop may have called * vm_page_release_toq() and cleared PGA_DEQUEUE, so * re-set it if necessary. */ if ((vm_page_astate_load(m).flags & PGA_DEQUEUE) == 0) vm_page_aflag_set(m, PGA_DEQUEUE); } else if (old == VPRC_OBJREF + 1) { /* * This is the last wiring. Clear PGA_DEQUEUE and * update the page's queue state to reflect the * reference. If the page does not belong to an object * (i.e., the VPRC_OBJREF bit is clear), we only need to * clear leftover queue state. */ vm_page_release_toq(m, nqueue, false); } else if (old == 1) { vm_page_aflag_clear(m, PGA_DEQUEUE); } } while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1)); if (VPRC_WIRE_COUNT(old) == 1) { vm_wire_sub(1); if (old == 1) vm_page_free(m); } } /* * Release one wiring of the specified page, potentially allowing it to be * paged out. * * Only managed pages belonging to an object can be paged out. If the number * of wirings transitions to zero and the page is eligible for page out, then * the page is added to the specified paging queue. If the released wiring * represented the last reference to the page, the page is freed. * * A managed page must be locked. */ void vm_page_unwire(vm_page_t m, uint8_t nqueue) { KASSERT(nqueue < PQ_COUNT, ("vm_page_unwire: invalid queue %u request for page %p", nqueue, m)); if ((m->oflags & VPO_UNMANAGED) != 0) { if (vm_page_unwire_noq(m) && m->ref_count == 0) vm_page_free(m); return; } vm_page_unwire_managed(m, nqueue, false); } /* * Unwire a page without (re-)inserting it into a page queue. It is up * to the caller to enqueue, requeue, or free the page as appropriate. * In most cases involving managed pages, vm_page_unwire() should be used * instead. */ bool vm_page_unwire_noq(vm_page_t m) { u_int old; old = vm_page_drop(m, 1); KASSERT(VPRC_WIRE_COUNT(old) != 0, ("vm_page_unref: counter underflow for page %p", m)); KASSERT((m->flags & PG_FICTITIOUS) == 0 || VPRC_WIRE_COUNT(old) > 1, ("vm_page_unref: missing ref on fictitious page %p", m)); if (VPRC_WIRE_COUNT(old) > 1) return (false); if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_aflag_clear(m, PGA_DEQUEUE); vm_wire_sub(1); return (true); } /* * Ensure that the page ends up in the specified page queue. If the page is * active or being moved to the active queue, ensure that its act_count is * at least ACT_INIT but do not otherwise mess with it. * * A managed page must be locked. */ static __always_inline void vm_page_mvqueue(vm_page_t m, const uint8_t nqueue, const uint16_t nflag) { vm_page_astate_t old, new; KASSERT(m->ref_count > 0, ("%s: page %p does not carry any references", __func__, m)); KASSERT(nflag == PGA_REQUEUE || nflag == PGA_REQUEUE_HEAD, ("%s: invalid flags %x", __func__, nflag)); if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m)) return; old = vm_page_astate_load(m); do { if ((old.flags & PGA_DEQUEUE) != 0) break; new = old; new.flags &= ~PGA_QUEUE_OP_MASK; if (nqueue == PQ_ACTIVE) new.act_count = max(old.act_count, ACT_INIT); if (old.queue == nqueue) { if (nqueue != PQ_ACTIVE) new.flags |= nflag; } else { new.flags |= nflag; new.queue = nqueue; } } while (!vm_page_pqstate_commit(m, &old, new)); } /* * Put the specified page on the active list (if appropriate). */ void vm_page_activate(vm_page_t m) { vm_page_mvqueue(m, PQ_ACTIVE, PGA_REQUEUE); } /* * Move the specified page to the tail of the inactive queue, or requeue * the page if it is already in the inactive queue. */ void vm_page_deactivate(vm_page_t m) { vm_page_mvqueue(m, PQ_INACTIVE, PGA_REQUEUE); } void vm_page_deactivate_noreuse(vm_page_t m) { vm_page_mvqueue(m, PQ_INACTIVE, PGA_REQUEUE_HEAD); } /* * Put a page in the laundry, or requeue it if it is already there. */ void vm_page_launder(vm_page_t m) { vm_page_mvqueue(m, PQ_LAUNDRY, PGA_REQUEUE); } /* * Put a page in the PQ_UNSWAPPABLE holding queue. */ void vm_page_unswappable(vm_page_t m) { KASSERT(!vm_page_wired(m) && (m->oflags & VPO_UNMANAGED) == 0, ("page %p already unswappable", m)); vm_page_dequeue(m); vm_page_enqueue(m, PQ_UNSWAPPABLE); } /* * Release a page back to the page queues in preparation for unwiring. */ static void vm_page_release_toq(vm_page_t m, uint8_t nqueue, const bool noreuse) { vm_page_astate_t old, new; uint16_t nflag; /* * Use a check of the valid bits to determine whether we should * accelerate reclamation of the page. The object lock might not be * held here, in which case the check is racy. At worst we will either * accelerate reclamation of a valid page and violate LRU, or * unnecessarily defer reclamation of an invalid page. * * If we were asked to not cache the page, place it near the head of the * inactive queue so that is reclaimed sooner. */ if (noreuse || m->valid == 0) { nqueue = PQ_INACTIVE; nflag = PGA_REQUEUE_HEAD; } else { nflag = PGA_REQUEUE; } old = vm_page_astate_load(m); do { new = old; /* * If the page is already in the active queue and we are not * trying to accelerate reclamation, simply mark it as * referenced and avoid any queue operations. */ new.flags &= ~PGA_QUEUE_OP_MASK; if (nflag != PGA_REQUEUE_HEAD && old.queue == PQ_ACTIVE) new.flags |= PGA_REFERENCED; else { new.flags |= nflag; new.queue = nqueue; } } while (!vm_page_pqstate_commit(m, &old, new)); } /* * Unwire a page and either attempt to free it or re-add it to the page queues. */ void vm_page_release(vm_page_t m, int flags) { vm_object_t object; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("vm_page_release: page %p is unmanaged", m)); if ((flags & VPR_TRYFREE) != 0) { for (;;) { object = (vm_object_t)atomic_load_ptr(&m->object); if (object == NULL) break; /* Depends on type-stability. */ if (vm_page_busied(m) || !VM_OBJECT_TRYWLOCK(object)) break; if (object == m->object) { vm_page_release_locked(m, flags); VM_OBJECT_WUNLOCK(object); return; } VM_OBJECT_WUNLOCK(object); } } vm_page_unwire_managed(m, PQ_INACTIVE, flags != 0); } /* See vm_page_release(). */ void vm_page_release_locked(vm_page_t m, int flags) { VM_OBJECT_ASSERT_WLOCKED(m->object); KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("vm_page_release_locked: page %p is unmanaged", m)); if (vm_page_unwire_noq(m)) { if ((flags & VPR_TRYFREE) != 0 && (m->object->ref_count == 0 || !pmap_page_is_mapped(m)) && m->dirty == 0 && vm_page_tryxbusy(m)) { vm_page_free(m); } else { vm_page_release_toq(m, PQ_INACTIVE, flags != 0); } } } static bool vm_page_try_blocked_op(vm_page_t m, void (*op)(vm_page_t)) { u_int old; KASSERT(m->object != NULL && (m->oflags & VPO_UNMANAGED) == 0, ("vm_page_try_blocked_op: page %p has no object", m)); KASSERT(vm_page_busied(m), ("vm_page_try_blocked_op: page %p is not busy", m)); VM_OBJECT_ASSERT_LOCKED(m->object); old = m->ref_count; do { KASSERT(old != 0, ("vm_page_try_blocked_op: page %p has no references", m)); if (VPRC_WIRE_COUNT(old) != 0) return (false); } while (!atomic_fcmpset_int(&m->ref_count, &old, old | VPRC_BLOCKED)); (op)(m); /* * If the object is read-locked, new wirings may be created via an * object lookup. */ old = vm_page_drop(m, VPRC_BLOCKED); KASSERT(!VM_OBJECT_WOWNED(m->object) || old == (VPRC_BLOCKED | VPRC_OBJREF), ("vm_page_try_blocked_op: unexpected refcount value %u for %p", old, m)); return (true); } /* * Atomically check for wirings and remove all mappings of the page. */ bool vm_page_try_remove_all(vm_page_t m) { return (vm_page_try_blocked_op(m, pmap_remove_all)); } /* * Atomically check for wirings and remove all writeable mappings of the page. */ bool vm_page_try_remove_write(vm_page_t m) { return (vm_page_try_blocked_op(m, pmap_remove_write)); } /* * vm_page_advise * * Apply the specified advice to the given page. * * The object and page must be locked. */ void vm_page_advise(vm_page_t m, int advice) { VM_OBJECT_ASSERT_WLOCKED(m->object); if (advice == MADV_FREE) /* * Mark the page clean. This will allow the page to be freed * without first paging it out. MADV_FREE pages are often * quickly reused by malloc(3), so we do not do anything that * would result in a page fault on a later access. */ vm_page_undirty(m); else if (advice != MADV_DONTNEED) { if (advice == MADV_WILLNEED) vm_page_activate(m); return; } if (advice != MADV_FREE && m->dirty == 0 && pmap_is_modified(m)) vm_page_dirty(m); /* * Clear any references to the page. Otherwise, the page daemon will * immediately reactivate the page. */ vm_page_aflag_clear(m, PGA_REFERENCED); /* * Place clean pages near the head of the inactive queue rather than * the tail, thus defeating the queue's LRU operation and ensuring that * the page will be reused quickly. Dirty pages not already in the * laundry are moved there. */ if (m->dirty == 0) vm_page_deactivate_noreuse(m); else if (!vm_page_in_laundry(m)) vm_page_launder(m); } static inline int vm_page_grab_pflags(int allocflags) { int pflags; KASSERT((allocflags & VM_ALLOC_NOBUSY) == 0 || (allocflags & VM_ALLOC_WIRED) != 0, ("vm_page_grab_pflags: the pages must be busied or wired")); KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || (allocflags & VM_ALLOC_IGN_SBUSY) != 0, ("vm_page_grab_pflags: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY " "mismatch")); pflags = allocflags & ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL | VM_ALLOC_NOBUSY); if ((allocflags & VM_ALLOC_NOWAIT) == 0) pflags |= VM_ALLOC_WAITFAIL; if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0) pflags |= VM_ALLOC_SBUSY; return (pflags); } /* * Grab a page, waiting until we are waken up due to the page * changing state. We keep on waiting, if the page continues * to be in the object. If the page doesn't exist, first allocate it * and then conditionally zero it. * * This routine may sleep. * * The object must be locked on entry. The lock will, however, be released * and reacquired if the routine sleeps. */ vm_page_t vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) { vm_page_t m; int pflags; VM_OBJECT_ASSERT_WLOCKED(object); pflags = vm_page_grab_pflags(allocflags); retrylookup: if ((m = vm_page_lookup(object, pindex)) != NULL) { if (!vm_page_acquire_flags(m, allocflags)) { if (vm_page_busy_sleep_flags(object, m, "pgrbwt", allocflags)) goto retrylookup; return (NULL); } goto out; } if ((allocflags & VM_ALLOC_NOCREAT) != 0) return (NULL); m = vm_page_alloc(object, pindex, pflags); if (m == NULL) { if ((allocflags & VM_ALLOC_NOWAIT) != 0) return (NULL); goto retrylookup; } if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); out: if ((allocflags & VM_ALLOC_NOBUSY) != 0) { if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0) vm_page_sunbusy(m); else vm_page_xunbusy(m); } return (m); } /* * Grab a page and make it valid, paging in if necessary. Pages missing from * their pager are zero filled and validated. If a VM_ALLOC_COUNT is supplied * and the page is not valid as many as VM_INITIAL_PAGEIN pages can be brought * in simultaneously. Additional pages will be left on a paging queue but * will neither be wired nor busy regardless of allocflags. */ int vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex, int allocflags) { vm_page_t m; vm_page_t ma[VM_INITIAL_PAGEIN]; bool sleep, xbusy; int after, i, pflags, rv; KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || (allocflags & VM_ALLOC_IGN_SBUSY) != 0, ("vm_page_grab_valid: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch")); KASSERT((allocflags & (VM_ALLOC_NOWAIT | VM_ALLOC_WAITFAIL | VM_ALLOC_ZERO)) == 0, ("vm_page_grab_valid: Invalid flags 0x%X", allocflags)); VM_OBJECT_ASSERT_WLOCKED(object); pflags = allocflags & ~(VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY); pflags |= VM_ALLOC_WAITFAIL; retrylookup: xbusy = false; if ((m = vm_page_lookup(object, pindex)) != NULL) { /* * If the page is fully valid it can only become invalid * with the object lock held. If it is not valid it can * become valid with the busy lock held. Therefore, we * may unnecessarily lock the exclusive busy here if we * race with I/O completion not using the object lock. * However, we will not end up with an invalid page and a * shared lock. */ if (!vm_page_all_valid(m) || (allocflags & (VM_ALLOC_IGN_SBUSY | VM_ALLOC_SBUSY)) == 0) { sleep = !vm_page_tryxbusy(m); xbusy = true; } else sleep = !vm_page_trysbusy(m); if (sleep) { (void)vm_page_busy_sleep_flags(object, m, "pgrbwt", allocflags); goto retrylookup; } if ((allocflags & VM_ALLOC_NOCREAT) != 0 && !vm_page_all_valid(m)) { if (xbusy) vm_page_xunbusy(m); else vm_page_sunbusy(m); *mp = NULL; return (VM_PAGER_FAIL); } if ((allocflags & VM_ALLOC_WIRED) != 0) vm_page_wire(m); if (vm_page_all_valid(m)) goto out; } else if ((allocflags & VM_ALLOC_NOCREAT) != 0) { *mp = NULL; return (VM_PAGER_FAIL); } else if ((m = vm_page_alloc(object, pindex, pflags)) != NULL) { xbusy = true; } else { goto retrylookup; } vm_page_assert_xbusied(m); MPASS(xbusy); if (vm_pager_has_page(object, pindex, NULL, &after)) { after = MIN(after, VM_INITIAL_PAGEIN); after = MIN(after, allocflags >> VM_ALLOC_COUNT_SHIFT); after = MAX(after, 1); ma[0] = m; for (i = 1; i < after; i++) { if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) { if (ma[i]->valid || !vm_page_tryxbusy(ma[i])) break; } else { ma[i] = vm_page_alloc(object, m->pindex + i, VM_ALLOC_NORMAL); if (ma[i] == NULL) break; } } after = i; + vm_object_pip_add(object, after); + VM_OBJECT_WUNLOCK(object); rv = vm_pager_get_pages(object, ma, after, NULL, NULL); + VM_OBJECT_WLOCK(object); + vm_object_pip_wakeupn(object, after); /* Pager may have replaced a page. */ m = ma[0]; if (rv != VM_PAGER_OK) { if ((allocflags & VM_ALLOC_WIRED) != 0) vm_page_unwire_noq(m); for (i = 0; i < after; i++) { if (!vm_page_wired(ma[i])) vm_page_free(ma[i]); else vm_page_xunbusy(ma[i]); } *mp = NULL; return (rv); } for (i = 1; i < after; i++) vm_page_readahead_finish(ma[i]); MPASS(vm_page_all_valid(m)); } else { vm_page_zero_invalid(m, TRUE); } out: if ((allocflags & VM_ALLOC_NOBUSY) != 0) { if (xbusy) vm_page_xunbusy(m); else vm_page_sunbusy(m); } if ((allocflags & VM_ALLOC_SBUSY) != 0 && xbusy) vm_page_busy_downgrade(m); *mp = m; return (VM_PAGER_OK); } /* * Return the specified range of pages from the given object. For each * page offset within the range, if a page already exists within the object * at that offset and it is busy, then wait for it to change state. If, * instead, the page doesn't exist, then allocate it. * * The caller must always specify an allocation class. * * allocation classes: * VM_ALLOC_NORMAL normal process request * VM_ALLOC_SYSTEM system *really* needs the pages * * The caller must always specify that the pages are to be busied and/or * wired. * * optional allocation flags: * VM_ALLOC_IGN_SBUSY do not sleep on soft busy pages * VM_ALLOC_NOBUSY do not exclusive busy the page * VM_ALLOC_NOWAIT do not sleep * VM_ALLOC_SBUSY set page to sbusy state * VM_ALLOC_WIRED wire the pages * VM_ALLOC_ZERO zero and validate any invalid pages * * If VM_ALLOC_NOWAIT is not specified, this routine may sleep. Otherwise, it * may return a partial prefix of the requested range. */ int vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags, vm_page_t *ma, int count) { vm_page_t m, mpred; int pflags; int i; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(((u_int)allocflags >> VM_ALLOC_COUNT_SHIFT) == 0, ("vm_page_grap_pages: VM_ALLOC_COUNT() is not allowed")); pflags = vm_page_grab_pflags(allocflags); if (count == 0) return (0); i = 0; retrylookup: m = vm_radix_lookup_le(&object->rtree, pindex + i); if (m == NULL || m->pindex != pindex + i) { mpred = m; m = NULL; } else mpred = TAILQ_PREV(m, pglist, listq); for (; i < count; i++) { if (m != NULL) { if (!vm_page_acquire_flags(m, allocflags)) { if (vm_page_busy_sleep_flags(object, m, "grbmaw", allocflags)) goto retrylookup; break; } } else { if ((allocflags & VM_ALLOC_NOCREAT) != 0) break; m = vm_page_alloc_after(object, pindex + i, pflags | VM_ALLOC_COUNT(count - i), mpred); if (m == NULL) { if ((allocflags & VM_ALLOC_NOWAIT) != 0) break; goto retrylookup; } } if (vm_page_none_valid(m) && (allocflags & VM_ALLOC_ZERO) != 0) { if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); vm_page_valid(m); } if ((allocflags & VM_ALLOC_NOBUSY) != 0) { if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0) vm_page_sunbusy(m); else vm_page_xunbusy(m); } ma[i] = mpred = m; m = vm_page_next(m); } return (i); } /* * Mapping function for valid or dirty bits in a page. * * Inputs are required to range within a page. */ vm_page_bits_t vm_page_bits(int base, int size) { int first_bit; int last_bit; KASSERT( base + size <= PAGE_SIZE, ("vm_page_bits: illegal base/size %d/%d", base, size) ); if (size == 0) /* handle degenerate case */ return (0); first_bit = base >> DEV_BSHIFT; last_bit = (base + size - 1) >> DEV_BSHIFT; return (((vm_page_bits_t)2 << last_bit) - ((vm_page_bits_t)1 << first_bit)); } void vm_page_bits_set(vm_page_t m, vm_page_bits_t *bits, vm_page_bits_t set) { #if PAGE_SIZE == 32768 atomic_set_64((uint64_t *)bits, set); #elif PAGE_SIZE == 16384 atomic_set_32((uint32_t *)bits, set); #elif (PAGE_SIZE == 8192) && defined(atomic_set_16) atomic_set_16((uint16_t *)bits, set); #elif (PAGE_SIZE == 4096) && defined(atomic_set_8) atomic_set_8((uint8_t *)bits, set); #else /* PAGE_SIZE <= 8192 */ uintptr_t addr; int shift; addr = (uintptr_t)bits; /* * Use a trick to perform a 32-bit atomic on the * containing aligned word, to not depend on the existence * of atomic_{set, clear}_{8, 16}. */ shift = addr & (sizeof(uint32_t) - 1); #if BYTE_ORDER == BIG_ENDIAN shift = (sizeof(uint32_t) - sizeof(vm_page_bits_t) - shift) * NBBY; #else shift *= NBBY; #endif addr &= ~(sizeof(uint32_t) - 1); atomic_set_32((uint32_t *)addr, set << shift); #endif /* PAGE_SIZE */ } static inline void vm_page_bits_clear(vm_page_t m, vm_page_bits_t *bits, vm_page_bits_t clear) { #if PAGE_SIZE == 32768 atomic_clear_64((uint64_t *)bits, clear); #elif PAGE_SIZE == 16384 atomic_clear_32((uint32_t *)bits, clear); #elif (PAGE_SIZE == 8192) && defined(atomic_clear_16) atomic_clear_16((uint16_t *)bits, clear); #elif (PAGE_SIZE == 4096) && defined(atomic_clear_8) atomic_clear_8((uint8_t *)bits, clear); #else /* PAGE_SIZE <= 8192 */ uintptr_t addr; int shift; addr = (uintptr_t)bits; /* * Use a trick to perform a 32-bit atomic on the * containing aligned word, to not depend on the existence * of atomic_{set, clear}_{8, 16}. */ shift = addr & (sizeof(uint32_t) - 1); #if BYTE_ORDER == BIG_ENDIAN shift = (sizeof(uint32_t) - sizeof(vm_page_bits_t) - shift) * NBBY; #else shift *= NBBY; #endif addr &= ~(sizeof(uint32_t) - 1); atomic_clear_32((uint32_t *)addr, clear << shift); #endif /* PAGE_SIZE */ } static inline vm_page_bits_t vm_page_bits_swap(vm_page_t m, vm_page_bits_t *bits, vm_page_bits_t newbits) { #if PAGE_SIZE == 32768 uint64_t old; old = *bits; while (atomic_fcmpset_64(bits, &old, newbits) == 0); return (old); #elif PAGE_SIZE == 16384 uint32_t old; old = *bits; while (atomic_fcmpset_32(bits, &old, newbits) == 0); return (old); #elif (PAGE_SIZE == 8192) && defined(atomic_fcmpset_16) uint16_t old; old = *bits; while (atomic_fcmpset_16(bits, &old, newbits) == 0); return (old); #elif (PAGE_SIZE == 4096) && defined(atomic_fcmpset_8) uint8_t old; old = *bits; while (atomic_fcmpset_8(bits, &old, newbits) == 0); return (old); #else /* PAGE_SIZE <= 4096*/ uintptr_t addr; uint32_t old, new, mask; int shift; addr = (uintptr_t)bits; /* * Use a trick to perform a 32-bit atomic on the * containing aligned word, to not depend on the existence * of atomic_{set, swap, clear}_{8, 16}. */ shift = addr & (sizeof(uint32_t) - 1); #if BYTE_ORDER == BIG_ENDIAN shift = (sizeof(uint32_t) - sizeof(vm_page_bits_t) - shift) * NBBY; #else shift *= NBBY; #endif addr &= ~(sizeof(uint32_t) - 1); mask = VM_PAGE_BITS_ALL << shift; old = *bits; do { new = old & ~mask; new |= newbits << shift; } while (atomic_fcmpset_32((uint32_t *)addr, &old, new) == 0); return (old >> shift); #endif /* PAGE_SIZE */ } /* * vm_page_set_valid_range: * * Sets portions of a page valid. The arguments are expected * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive * of any partial chunks touched by the range. The invalid portion of * such chunks will be zeroed. * * (base + size) must be less then or equal to PAGE_SIZE. */ void vm_page_set_valid_range(vm_page_t m, int base, int size) { int endoff, frag; vm_page_bits_t pagebits; vm_page_assert_busied(m); if (size == 0) /* handle degenerate case */ return; /* * If the base is not DEV_BSIZE aligned and the valid * bit is clear, we have to zero out a portion of the * first block. */ if ((frag = rounddown2(base, DEV_BSIZE)) != base && (m->valid & (1 << (base >> DEV_BSHIFT))) == 0) pmap_zero_page_area(m, frag, base - frag); /* * If the ending offset is not DEV_BSIZE aligned and the * valid bit is clear, we have to zero out a portion of * the last block. */ endoff = base + size; if ((frag = rounddown2(endoff, DEV_BSIZE)) != endoff && (m->valid & (1 << (endoff >> DEV_BSHIFT))) == 0) pmap_zero_page_area(m, endoff, DEV_BSIZE - (endoff & (DEV_BSIZE - 1))); /* * Assert that no previously invalid block that is now being validated * is already dirty. */ KASSERT((~m->valid & vm_page_bits(base, size) & m->dirty) == 0, ("vm_page_set_valid_range: page %p is dirty", m)); /* * Set valid bits inclusive of any overlap. */ pagebits = vm_page_bits(base, size); if (vm_page_xbusied(m)) m->valid |= pagebits; else vm_page_bits_set(m, &m->valid, pagebits); } /* * Set the page dirty bits and free the invalid swap space if * present. Returns the previous dirty bits. */ vm_page_bits_t vm_page_set_dirty(vm_page_t m) { vm_page_bits_t old; VM_PAGE_OBJECT_BUSY_ASSERT(m); if (vm_page_xbusied(m) && !pmap_page_is_write_mapped(m)) { old = m->dirty; m->dirty = VM_PAGE_BITS_ALL; } else old = vm_page_bits_swap(m, &m->dirty, VM_PAGE_BITS_ALL); if (old == 0 && (m->a.flags & PGA_SWAP_SPACE) != 0) vm_pager_page_unswapped(m); return (old); } /* * Clear the given bits from the specified page's dirty field. */ static __inline void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits) { vm_page_assert_busied(m); /* * If the page is xbusied and not write mapped we are the * only thread that can modify dirty bits. Otherwise, The pmap * layer can call vm_page_dirty() without holding a distinguished * lock. The combination of page busy and atomic operations * suffice to guarantee consistency of the page dirty field. */ if (vm_page_xbusied(m) && !pmap_page_is_write_mapped(m)) m->dirty &= ~pagebits; else vm_page_bits_clear(m, &m->dirty, pagebits); } /* * vm_page_set_validclean: * * Sets portions of a page valid and clean. The arguments are expected * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive * of any partial chunks touched by the range. The invalid portion of * such chunks will be zero'd. * * (base + size) must be less then or equal to PAGE_SIZE. */ void vm_page_set_validclean(vm_page_t m, int base, int size) { vm_page_bits_t oldvalid, pagebits; int endoff, frag; vm_page_assert_busied(m); if (size == 0) /* handle degenerate case */ return; /* * If the base is not DEV_BSIZE aligned and the valid * bit is clear, we have to zero out a portion of the * first block. */ if ((frag = rounddown2(base, DEV_BSIZE)) != base && (m->valid & ((vm_page_bits_t)1 << (base >> DEV_BSHIFT))) == 0) pmap_zero_page_area(m, frag, base - frag); /* * If the ending offset is not DEV_BSIZE aligned and the * valid bit is clear, we have to zero out a portion of * the last block. */ endoff = base + size; if ((frag = rounddown2(endoff, DEV_BSIZE)) != endoff && (m->valid & ((vm_page_bits_t)1 << (endoff >> DEV_BSHIFT))) == 0) pmap_zero_page_area(m, endoff, DEV_BSIZE - (endoff & (DEV_BSIZE - 1))); /* * Set valid, clear dirty bits. If validating the entire * page we can safely clear the pmap modify bit. We also * use this opportunity to clear the PGA_NOSYNC flag. If a process * takes a write fault on a MAP_NOSYNC memory area the flag will * be set again. * * We set valid bits inclusive of any overlap, but we can only * clear dirty bits for DEV_BSIZE chunks that are fully within * the range. */ oldvalid = m->valid; pagebits = vm_page_bits(base, size); if (vm_page_xbusied(m)) m->valid |= pagebits; else vm_page_bits_set(m, &m->valid, pagebits); #if 0 /* NOT YET */ if ((frag = base & (DEV_BSIZE - 1)) != 0) { frag = DEV_BSIZE - frag; base += frag; size -= frag; if (size < 0) size = 0; } pagebits = vm_page_bits(base, size & (DEV_BSIZE - 1)); #endif if (base == 0 && size == PAGE_SIZE) { /* * The page can only be modified within the pmap if it is * mapped, and it can only be mapped if it was previously * fully valid. */ if (oldvalid == VM_PAGE_BITS_ALL) /* * Perform the pmap_clear_modify() first. Otherwise, * a concurrent pmap operation, such as * pmap_protect(), could clear a modification in the * pmap and set the dirty field on the page before * pmap_clear_modify() had begun and after the dirty * field was cleared here. */ pmap_clear_modify(m); m->dirty = 0; vm_page_aflag_clear(m, PGA_NOSYNC); } else if (oldvalid != VM_PAGE_BITS_ALL && vm_page_xbusied(m)) m->dirty &= ~pagebits; else vm_page_clear_dirty_mask(m, pagebits); } void vm_page_clear_dirty(vm_page_t m, int base, int size) { vm_page_clear_dirty_mask(m, vm_page_bits(base, size)); } /* * vm_page_set_invalid: * * Invalidates DEV_BSIZE'd chunks within a page. Both the * valid and dirty bits for the effected areas are cleared. */ void vm_page_set_invalid(vm_page_t m, int base, int size) { vm_page_bits_t bits; vm_object_t object; /* * The object lock is required so that pages can't be mapped * read-only while we're in the process of invalidating them. */ object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); vm_page_assert_busied(m); if (object->type == OBJT_VNODE && base == 0 && IDX_TO_OFF(m->pindex) + size >= object->un_pager.vnp.vnp_size) bits = VM_PAGE_BITS_ALL; else bits = vm_page_bits(base, size); if (object->ref_count != 0 && vm_page_all_valid(m) && bits != 0) pmap_remove_all(m); KASSERT((bits == 0 && vm_page_all_valid(m)) || !pmap_page_is_mapped(m), ("vm_page_set_invalid: page %p is mapped", m)); if (vm_page_xbusied(m)) { m->valid &= ~bits; m->dirty &= ~bits; } else { vm_page_bits_clear(m, &m->valid, bits); vm_page_bits_clear(m, &m->dirty, bits); } } /* * vm_page_invalid: * * Invalidates the entire page. The page must be busy, unmapped, and * the enclosing object must be locked. The object locks protects * against concurrent read-only pmap enter which is done without * busy. */ void vm_page_invalid(vm_page_t m) { vm_page_assert_busied(m); VM_OBJECT_ASSERT_LOCKED(m->object); MPASS(!pmap_page_is_mapped(m)); if (vm_page_xbusied(m)) m->valid = 0; else vm_page_bits_clear(m, &m->valid, VM_PAGE_BITS_ALL); } /* * vm_page_zero_invalid() * * The kernel assumes that the invalid portions of a page contain * garbage, but such pages can be mapped into memory by user code. * When this occurs, we must zero out the non-valid portions of the * page so user code sees what it expects. * * Pages are most often semi-valid when the end of a file is mapped * into memory and the file's size is not page aligned. */ void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid) { int b; int i; /* * Scan the valid bits looking for invalid sections that * must be zeroed. Invalid sub-DEV_BSIZE'd areas ( where the * valid bit may be set ) have already been zeroed by * vm_page_set_validclean(). */ for (b = i = 0; i <= PAGE_SIZE / DEV_BSIZE; ++i) { if (i == (PAGE_SIZE / DEV_BSIZE) || (m->valid & ((vm_page_bits_t)1 << i))) { if (i > b) { pmap_zero_page_area(m, b << DEV_BSHIFT, (i - b) << DEV_BSHIFT); } b = i + 1; } } /* * setvalid is TRUE when we can safely set the zero'd areas * as being valid. We can do this if there are no cache consistancy * issues. e.g. it is ok to do with UFS, but not ok to do with NFS. */ if (setvalid) vm_page_valid(m); } /* * vm_page_is_valid: * * Is (partial) page valid? Note that the case where size == 0 * will return FALSE in the degenerate case where the page is * entirely invalid, and TRUE otherwise. * * Some callers envoke this routine without the busy lock held and * handle races via higher level locks. Typical callers should * hold a busy lock to prevent invalidation. */ int vm_page_is_valid(vm_page_t m, int base, int size) { vm_page_bits_t bits; bits = vm_page_bits(base, size); return (m->valid != 0 && (m->valid & bits) == bits); } /* * Returns true if all of the specified predicates are true for the entire * (super)page and false otherwise. */ bool vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m) { vm_object_t object; int i, npages; object = m->object; if (skip_m != NULL && skip_m->object != object) return (false); VM_OBJECT_ASSERT_LOCKED(object); npages = atop(pagesizes[m->psind]); /* * The physically contiguous pages that make up a superpage, i.e., a * page with a page size index ("psind") greater than zero, will * occupy adjacent entries in vm_page_array[]. */ for (i = 0; i < npages; i++) { /* Always test object consistency, including "skip_m". */ if (m[i].object != object) return (false); if (&m[i] == skip_m) continue; if ((flags & PS_NONE_BUSY) != 0 && vm_page_busied(&m[i])) return (false); if ((flags & PS_ALL_DIRTY) != 0) { /* * Calling vm_page_test_dirty() or pmap_is_modified() * might stop this case from spuriously returning * "false". However, that would require a write lock * on the object containing "m[i]". */ if (m[i].dirty != VM_PAGE_BITS_ALL) return (false); } if ((flags & PS_ALL_VALID) != 0 && m[i].valid != VM_PAGE_BITS_ALL) return (false); } return (true); } /* * Set the page's dirty bits if the page is modified. */ void vm_page_test_dirty(vm_page_t m) { vm_page_assert_busied(m); if (m->dirty != VM_PAGE_BITS_ALL && pmap_is_modified(m)) vm_page_dirty(m); } void vm_page_valid(vm_page_t m) { vm_page_assert_busied(m); if (vm_page_xbusied(m)) m->valid = VM_PAGE_BITS_ALL; else vm_page_bits_set(m, &m->valid, VM_PAGE_BITS_ALL); } void vm_page_lock_KBI(vm_page_t m, const char *file, int line) { mtx_lock_flags_(vm_page_lockptr(m), 0, file, line); } void vm_page_unlock_KBI(vm_page_t m, const char *file, int line) { mtx_unlock_flags_(vm_page_lockptr(m), 0, file, line); } int vm_page_trylock_KBI(vm_page_t m, const char *file, int line) { return (mtx_trylock_flags_(vm_page_lockptr(m), 0, file, line)); } #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) void vm_page_assert_locked_KBI(vm_page_t m, const char *file, int line) { vm_page_lock_assert_KBI(m, MA_OWNED, file, line); } void vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line) { mtx_assert_(vm_page_lockptr(m), a, file, line); } #endif #ifdef INVARIANTS void vm_page_object_busy_assert(vm_page_t m) { /* * Certain of the page's fields may only be modified by the * holder of a page or object busy. */ if (m->object != NULL && !vm_page_busied(m)) VM_OBJECT_ASSERT_BUSY(m->object); } void vm_page_assert_pga_writeable(vm_page_t m, uint16_t bits) { if ((bits & PGA_WRITEABLE) == 0) return; /* * The PGA_WRITEABLE flag can only be set if the page is * managed, is exclusively busied or the object is locked. * Currently, this flag is only set by pmap_enter(). */ KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("PGA_WRITEABLE on unmanaged page")); if (!vm_page_xbusied(m)) VM_OBJECT_ASSERT_BUSY(m->object); } #endif #include "opt_ddb.h" #ifdef DDB #include #include DB_SHOW_COMMAND(page, vm_page_print_page_info) { db_printf("vm_cnt.v_free_count: %d\n", vm_free_count()); db_printf("vm_cnt.v_inactive_count: %d\n", vm_inactive_count()); db_printf("vm_cnt.v_active_count: %d\n", vm_active_count()); db_printf("vm_cnt.v_laundry_count: %d\n", vm_laundry_count()); db_printf("vm_cnt.v_wire_count: %d\n", vm_wire_count()); db_printf("vm_cnt.v_free_reserved: %d\n", vm_cnt.v_free_reserved); db_printf("vm_cnt.v_free_min: %d\n", vm_cnt.v_free_min); db_printf("vm_cnt.v_free_target: %d\n", vm_cnt.v_free_target); db_printf("vm_cnt.v_inactive_target: %d\n", vm_cnt.v_inactive_target); } DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) { int dom; db_printf("pq_free %d\n", vm_free_count()); for (dom = 0; dom < vm_ndomains; dom++) { db_printf( "dom %d page_cnt %d free %d pq_act %d pq_inact %d pq_laund %d pq_unsw %d\n", dom, vm_dom[dom].vmd_page_count, vm_dom[dom].vmd_free_count, vm_dom[dom].vmd_pagequeues[PQ_ACTIVE].pq_cnt, vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt, vm_dom[dom].vmd_pagequeues[PQ_LAUNDRY].pq_cnt, vm_dom[dom].vmd_pagequeues[PQ_UNSWAPPABLE].pq_cnt); } } DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo) { vm_page_t m; boolean_t phys, virt; if (!have_addr) { db_printf("show pginfo addr\n"); return; } phys = strchr(modif, 'p') != NULL; virt = strchr(modif, 'v') != NULL; if (virt) m = PHYS_TO_VM_PAGE(pmap_kextract(addr)); else if (phys) m = PHYS_TO_VM_PAGE(addr); else m = (vm_page_t)addr; db_printf( "page %p obj %p pidx 0x%jx phys 0x%jx q %d ref %u\n" " af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n", m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr, m->a.queue, m->ref_count, m->a.flags, m->oflags, m->flags, m->a.act_count, m->busy_lock, m->valid, m->dirty); } #endif /* DDB */ Index: projects/clang1000-import/sys/vm/vm_pager.c =================================================================== --- projects/clang1000-import/sys/vm/vm_pager.c (revision 356919) +++ projects/clang1000-import/sys/vm/vm_pager.c (revision 356920) @@ -1,493 +1,498 @@ /*- * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU) * * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_pager.c 8.6 (Berkeley) 1/12/94 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * Paging space routine stubs. Emulates a matchmaker-like interface * for builtin pagers. */ #include __FBSDID("$FreeBSD$"); #include "opt_param.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include uma_zone_t pbuf_zone; static int pbuf_init(void *, int, int); static int pbuf_ctor(void *, int, void *, int); static void pbuf_dtor(void *, int, void *); static int dead_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); static vm_object_t dead_pager_alloc(void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *); static void dead_pager_putpages(vm_object_t, vm_page_t *, int, int, int *); static boolean_t dead_pager_haspage(vm_object_t, vm_pindex_t, int *, int *); static void dead_pager_dealloc(vm_object_t); static int dead_pager_getpages(vm_object_t obj, vm_page_t *ma, int count, int *rbehind, int *rahead) { return (VM_PAGER_FAIL); } static vm_object_t dead_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t off, struct ucred *cred) { return (NULL); } static void dead_pager_putpages(vm_object_t object, vm_page_t *m, int count, int flags, int *rtvals) { int i; for (i = 0; i < count; i++) rtvals[i] = VM_PAGER_AGAIN; } static int dead_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *prev, int *next) { if (prev != NULL) *prev = 0; if (next != NULL) *next = 0; return (FALSE); } static void dead_pager_dealloc(vm_object_t object) { } static struct pagerops deadpagerops = { .pgo_alloc = dead_pager_alloc, .pgo_dealloc = dead_pager_dealloc, .pgo_getpages = dead_pager_getpages, .pgo_putpages = dead_pager_putpages, .pgo_haspage = dead_pager_haspage, }; struct pagerops *pagertab[] = { &defaultpagerops, /* OBJT_DEFAULT */ &swappagerops, /* OBJT_SWAP */ &vnodepagerops, /* OBJT_VNODE */ &devicepagerops, /* OBJT_DEVICE */ &physpagerops, /* OBJT_PHYS */ &deadpagerops, /* OBJT_DEAD */ &sgpagerops, /* OBJT_SG */ &mgtdevicepagerops, /* OBJT_MGTDEVICE */ }; void vm_pager_init(void) { struct pagerops **pgops; /* * Initialize known pagers */ for (pgops = pagertab; pgops < &pagertab[nitems(pagertab)]; pgops++) if ((*pgops)->pgo_init != NULL) (*(*pgops)->pgo_init)(); } static int nswbuf_max; void vm_pager_bufferinit(void) { /* Main zone for paging bufs. */ pbuf_zone = uma_zcreate("pbuf", sizeof(struct buf), pbuf_ctor, pbuf_dtor, pbuf_init, NULL, UMA_ALIGN_CACHE, UMA_ZONE_VM | UMA_ZONE_NOFREE); /* Few systems may still use this zone directly, so it needs a limit. */ nswbuf_max += uma_zone_set_max(pbuf_zone, NSWBUF_MIN); } uma_zone_t pbuf_zsecond_create(char *name, int max) { uma_zone_t zone; zone = uma_zsecond_create(name, pbuf_ctor, pbuf_dtor, NULL, NULL, pbuf_zone); /* * uma_prealloc() rounds up to items per slab. If we would prealloc * immediately on every pbuf_zsecond_create(), we may accumulate too * much of difference between hard limit and prealloced items, which * means wasted memory. */ if (nswbuf_max > 0) nswbuf_max += uma_zone_set_max(zone, max); else uma_prealloc(pbuf_zone, uma_zone_set_max(zone, max)); return (zone); } static void pbuf_prealloc(void *arg __unused) { uma_prealloc(pbuf_zone, nswbuf_max); nswbuf_max = -1; } SYSINIT(pbuf, SI_SUB_KTHREAD_BUF, SI_ORDER_ANY, pbuf_prealloc, NULL); /* * Allocate an instance of a pager of the given type. * Size, protection and offset parameters are passed in for pagers that * need to perform page-level validation (e.g. the device pager). */ vm_object_t vm_pager_allocate(objtype_t type, void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t off, struct ucred *cred) { vm_object_t ret; struct pagerops *ops; ops = pagertab[type]; if (ops) ret = (*ops->pgo_alloc)(handle, size, prot, off, cred); else ret = NULL; return (ret); } /* * The object must be locked. */ void vm_pager_deallocate(vm_object_t object) { VM_OBJECT_ASSERT_WLOCKED(object); (*pagertab[object->type]->pgo_dealloc) (object); } static void vm_pager_assert_in(vm_object_t object, vm_page_t *m, int count) { #ifdef INVARIANTS /* * All pages must be consecutive, busied, not mapped, not fully valid, * not dirty and belong to the proper object. Some pages may be the * bogus page, but the first and last pages must be a real ones. */ - VM_OBJECT_ASSERT_WLOCKED(object); + VM_OBJECT_ASSERT_UNLOCKED(object); + VM_OBJECT_ASSERT_PAGING(object); KASSERT(count > 0, ("%s: 0 count", __func__)); for (int i = 0 ; i < count; i++) { if (m[i] == bogus_page) { KASSERT(i != 0 && i != count - 1, ("%s: page %d is the bogus page", __func__, i)); continue; } vm_page_assert_xbusied(m[i]); KASSERT(!pmap_page_is_mapped(m[i]), ("%s: page %p is mapped", __func__, m[i])); KASSERT(m[i]->valid != VM_PAGE_BITS_ALL, ("%s: request for a valid page %p", __func__, m[i])); KASSERT(m[i]->dirty == 0, ("%s: page %p is dirty", __func__, m[i])); KASSERT(m[i]->object == object, ("%s: wrong object %p/%p", __func__, object, m[i]->object)); KASSERT(m[i]->pindex == m[0]->pindex + i, ("%s: page %p isn't consecutive", __func__, m[i])); } #endif } /* * Page in the pages for the object using its associated pager. * The requested page must be fully valid on successful return. */ int vm_pager_get_pages(vm_object_t object, vm_page_t *m, int count, int *rbehind, int *rahead) { #ifdef INVARIANTS vm_pindex_t pindex = m[0]->pindex; #endif int r; vm_pager_assert_in(object, m, count); r = (*pagertab[object->type]->pgo_getpages)(object, m, count, rbehind, rahead); if (r != VM_PAGER_OK) return (r); for (int i = 0; i < count; i++) { /* * If pager has replaced a page, assert that it had * updated the array. */ +#ifdef INVARIANTS + VM_OBJECT_RLOCK(object); KASSERT(m[i] == vm_page_lookup(object, pindex++), ("%s: mismatch page %p pindex %ju", __func__, m[i], (uintmax_t )pindex - 1)); + VM_OBJECT_RUNLOCK(object); +#endif /* * Zero out partially filled data. */ if (m[i]->valid != VM_PAGE_BITS_ALL) vm_page_zero_invalid(m[i], TRUE); } return (VM_PAGER_OK); } int vm_pager_get_pages_async(vm_object_t object, vm_page_t *m, int count, int *rbehind, int *rahead, pgo_getpages_iodone_t iodone, void *arg) { vm_pager_assert_in(object, m, count); return ((*pagertab[object->type]->pgo_getpages_async)(object, m, count, rbehind, rahead, iodone, arg)); } /* * vm_pager_put_pages() - inline, see vm/vm_pager.h * vm_pager_has_page() - inline, see vm/vm_pager.h */ /* * Search the specified pager object list for an object with the * specified handle. If an object with the specified handle is found, * increase its reference count and return it. Otherwise, return NULL. * * The pager object list must be locked. */ vm_object_t vm_pager_object_lookup(struct pagerlst *pg_list, void *handle) { vm_object_t object; TAILQ_FOREACH(object, pg_list, pager_object_list) { if (object->handle == handle) { VM_OBJECT_WLOCK(object); if ((object->flags & OBJ_DEAD) == 0) { vm_object_reference_locked(object); VM_OBJECT_WUNLOCK(object); break; } VM_OBJECT_WUNLOCK(object); } } return (object); } static int pbuf_ctor(void *mem, int size, void *arg, int flags) { struct buf *bp = mem; bp->b_vp = NULL; bp->b_bufobj = NULL; /* copied from initpbuf() */ bp->b_rcred = NOCRED; bp->b_wcred = NOCRED; bp->b_qindex = 0; /* On no queue (QUEUE_NONE) */ bp->b_data = bp->b_kvabase; bp->b_xflags = 0; bp->b_flags = 0; bp->b_ioflags = 0; bp->b_iodone = NULL; bp->b_error = 0; BUF_LOCK(bp, LK_EXCLUSIVE, NULL); return (0); } static void pbuf_dtor(void *mem, int size, void *arg) { struct buf *bp = mem; if (bp->b_rcred != NOCRED) { crfree(bp->b_rcred); bp->b_rcred = NOCRED; } if (bp->b_wcred != NOCRED) { crfree(bp->b_wcred); bp->b_wcred = NOCRED; } BUF_UNLOCK(bp); } static int pbuf_init(void *mem, int size, int flags) { struct buf *bp = mem; bp->b_kvabase = (void *)kva_alloc(MAXPHYS); if (bp->b_kvabase == NULL) return (ENOMEM); bp->b_kvasize = MAXPHYS; BUF_LOCKINIT(bp); LIST_INIT(&bp->b_dep); bp->b_rcred = bp->b_wcred = NOCRED; bp->b_xflags = 0; return (0); } /* * Associate a p-buffer with a vnode. * * Also sets B_PAGING flag to indicate that vnode is not fully associated * with the buffer. i.e. the bp has not been linked into the vnode or * ref-counted. */ void pbgetvp(struct vnode *vp, struct buf *bp) { KASSERT(bp->b_vp == NULL, ("pbgetvp: not free")); KASSERT(bp->b_bufobj == NULL, ("pbgetvp: not free (bufobj)")); bp->b_vp = vp; bp->b_flags |= B_PAGING; bp->b_bufobj = &vp->v_bufobj; } /* * Associate a p-buffer with a vnode. * * Also sets B_PAGING flag to indicate that vnode is not fully associated * with the buffer. i.e. the bp has not been linked into the vnode or * ref-counted. */ void pbgetbo(struct bufobj *bo, struct buf *bp) { KASSERT(bp->b_vp == NULL, ("pbgetbo: not free (vnode)")); KASSERT(bp->b_bufobj == NULL, ("pbgetbo: not free (bufobj)")); bp->b_flags |= B_PAGING; bp->b_bufobj = bo; } /* * Disassociate a p-buffer from a vnode. */ void pbrelvp(struct buf *bp) { KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL")); KASSERT(bp->b_bufobj != NULL, ("pbrelvp: NULL bufobj")); KASSERT((bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) == 0, ("pbrelvp: pager buf on vnode list.")); bp->b_vp = NULL; bp->b_bufobj = NULL; bp->b_flags &= ~B_PAGING; } /* * Disassociate a p-buffer from a bufobj. */ void pbrelbo(struct buf *bp) { KASSERT(bp->b_vp == NULL, ("pbrelbo: vnode")); KASSERT(bp->b_bufobj != NULL, ("pbrelbo: NULL bufobj")); KASSERT((bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) == 0, ("pbrelbo: pager buf on vnode list.")); bp->b_bufobj = NULL; bp->b_flags &= ~B_PAGING; } Index: projects/clang1000-import/sys/vm/vm_swapout.c =================================================================== --- projects/clang1000-import/sys/vm/vm_swapout.c (revision 356919) +++ projects/clang1000-import/sys/vm/vm_swapout.c (revision 356920) @@ -1,947 +1,949 @@ /*- * SPDX-License-Identifier: (BSD-4-Clause AND MIT-CMU) * * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * Copyright (c) 2005 Yahoo! Technologies Norway AS * All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_pageout.c 7.4 (Berkeley) 5/7/91 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ #include __FBSDID("$FreeBSD$"); #include "opt_kstack_pages.h" #include "opt_kstack_max_pages.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* the kernel process "vm_daemon" */ static void vm_daemon(void); static struct proc *vmproc; static struct kproc_desc vm_kp = { "vmdaemon", vm_daemon, &vmproc }; SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp); static int vm_swap_enabled = 1; static int vm_swap_idle_enabled = 0; SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, CTLFLAG_RW, &vm_swap_enabled, 0, "Enable entire process swapout"); SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria"); /* * Swap_idle_threshold1 is the guaranteed swapped in time for a process */ static int swap_idle_threshold1 = 2; SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold1, CTLFLAG_RW, &swap_idle_threshold1, 0, "Guaranteed swapped in time for a process"); /* * Swap_idle_threshold2 is the time that a process can be idle before * it will be swapped out, if idle swapping is enabled. */ static int swap_idle_threshold2 = 10; SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2, CTLFLAG_RW, &swap_idle_threshold2, 0, "Time before a process will be swapped out"); static int vm_pageout_req_swapout; /* XXX */ static int vm_daemon_needed; static struct mtx vm_daemon_mtx; /* Allow for use by vm_pageout before vm_daemon is initialized. */ MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF); static int swapped_cnt; static int swap_inprogress; /* Pending swap-ins done outside swapper. */ static int last_swapin; static void swapclear(struct proc *); static int swapout(struct proc *); static void vm_swapout_map_deactivate_pages(vm_map_t, long); static void vm_swapout_object_deactivate(pmap_t, vm_object_t, long); static void swapout_procs(int action); static void vm_req_vmdaemon(int req); static void vm_thread_swapout(struct thread *td); static void vm_swapout_object_deactivate_page(pmap_t pmap, vm_page_t m, bool unmap) { /* * Ignore unreclaimable wired pages. Repeat the check after busying * since a busy holder may wire the page. */ if (vm_page_wired(m) || !vm_page_tryxbusy(m)) return; if (vm_page_wired(m) || !pmap_page_exists_quick(pmap, m)) { vm_page_xunbusy(m); return; } if (!pmap_is_referenced(m)) { if (!vm_page_active(m)) (void)vm_page_try_remove_all(m); else if (unmap && vm_page_try_remove_all(m)) vm_page_deactivate(m); } vm_page_xunbusy(m); } /* * vm_swapout_object_deactivate * * Deactivate enough pages to satisfy the inactive target * requirements. * * The object and map must be locked. */ static void vm_swapout_object_deactivate(pmap_t pmap, vm_object_t first_object, long desired) { vm_object_t backing_object, object; vm_page_t m; bool unmap; VM_OBJECT_ASSERT_LOCKED(first_object); if ((first_object->flags & OBJ_FICTITIOUS) != 0) return; for (object = first_object;; object = backing_object) { if (pmap_resident_count(pmap) <= desired) goto unlock_return; VM_OBJECT_ASSERT_LOCKED(object); if ((object->flags & OBJ_UNMANAGED) != 0 || REFCOUNT_COUNT(object->paging_in_progress) > 0) goto unlock_return; unmap = true; if (object->shadow_count > 1) unmap = false; /* * Scan the object's entire memory queue. */ TAILQ_FOREACH(m, &object->memq, listq) { if (pmap_resident_count(pmap) <= desired) goto unlock_return; if (should_yield()) goto unlock_return; vm_swapout_object_deactivate_page(pmap, m, unmap); } if ((backing_object = object->backing_object) == NULL) goto unlock_return; VM_OBJECT_RLOCK(backing_object); if (object != first_object) VM_OBJECT_RUNLOCK(object); } unlock_return: if (object != first_object) VM_OBJECT_RUNLOCK(object); } /* * deactivate some number of pages in a map, try to do it fairly, but * that is really hard to do. */ static void vm_swapout_map_deactivate_pages(vm_map_t map, long desired) { vm_map_entry_t tmpe; vm_object_t obj, bigobj; int nothingwired; if (!vm_map_trylock_read(map)) return; bigobj = NULL; nothingwired = TRUE; /* * first, search out the biggest object, and try to free pages from * that. */ VM_MAP_ENTRY_FOREACH(tmpe, map) { if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { obj = tmpe->object.vm_object; if (obj != NULL && VM_OBJECT_TRYRLOCK(obj)) { if (obj->shadow_count <= 1 && (bigobj == NULL || bigobj->resident_page_count < obj->resident_page_count)) { if (bigobj != NULL) VM_OBJECT_RUNLOCK(bigobj); bigobj = obj; } else VM_OBJECT_RUNLOCK(obj); } } if (tmpe->wired_count > 0) nothingwired = FALSE; } if (bigobj != NULL) { vm_swapout_object_deactivate(map->pmap, bigobj, desired); VM_OBJECT_RUNLOCK(bigobj); } /* * Next, hunt around for other pages to deactivate. We actually * do this search sort of wrong -- .text first is not the best idea. */ VM_MAP_ENTRY_FOREACH(tmpe, map) { if (pmap_resident_count(vm_map_pmap(map)) <= desired) break; if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { obj = tmpe->object.vm_object; if (obj != NULL) { VM_OBJECT_RLOCK(obj); vm_swapout_object_deactivate(map->pmap, obj, desired); VM_OBJECT_RUNLOCK(obj); } } } /* * Remove all mappings if a process is swapped out, this will free page * table pages. */ if (desired == 0 && nothingwired) { pmap_remove(vm_map_pmap(map), vm_map_min(map), vm_map_max(map)); } vm_map_unlock_read(map); } /* * Swap out requests */ #define VM_SWAP_NORMAL 1 #define VM_SWAP_IDLE 2 void vm_swapout_run(void) { if (vm_swap_enabled) vm_req_vmdaemon(VM_SWAP_NORMAL); } /* * Idle process swapout -- run once per second when pagedaemons are * reclaiming pages. */ void vm_swapout_run_idle(void) { static long lsec; if (!vm_swap_idle_enabled || time_second == lsec) return; vm_req_vmdaemon(VM_SWAP_IDLE); lsec = time_second; } static void vm_req_vmdaemon(int req) { static int lastrun = 0; mtx_lock(&vm_daemon_mtx); vm_pageout_req_swapout |= req; if ((ticks > (lastrun + hz)) || (ticks < lastrun)) { wakeup(&vm_daemon_needed); lastrun = ticks; } mtx_unlock(&vm_daemon_mtx); } static void vm_daemon(void) { struct rlimit rsslim; struct proc *p; struct thread *td; struct vmspace *vm; int breakout, swapout_flags, tryagain, attempts; #ifdef RACCT uint64_t rsize, ravailable; #endif while (TRUE) { mtx_lock(&vm_daemon_mtx); msleep(&vm_daemon_needed, &vm_daemon_mtx, PPAUSE, "psleep", #ifdef RACCT racct_enable ? hz : 0 #else 0 #endif ); swapout_flags = vm_pageout_req_swapout; vm_pageout_req_swapout = 0; mtx_unlock(&vm_daemon_mtx); if (swapout_flags != 0) { /* * Drain the per-CPU page queue batches as a deadlock * avoidance measure. */ if ((swapout_flags & VM_SWAP_NORMAL) != 0) vm_page_pqbatch_drain(); swapout_procs(swapout_flags); } /* * scan the processes for exceeding their rlimits or if * process is swapped out -- deactivate pages */ tryagain = 0; attempts = 0; again: attempts++; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { vm_pindex_t limit, size; /* * if this is a system process or if we have already * looked at this process, skip it. */ PROC_LOCK(p); if (p->p_state != PRS_NORMAL || p->p_flag & (P_INEXEC | P_SYSTEM | P_WEXIT)) { PROC_UNLOCK(p); continue; } /* * if the process is in a non-running type state, * don't touch it. */ breakout = 0; FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); if (!TD_ON_RUNQ(td) && !TD_IS_RUNNING(td) && !TD_IS_SLEEPING(td) && !TD_IS_SUSPENDED(td)) { thread_unlock(td); breakout = 1; break; } thread_unlock(td); } if (breakout) { PROC_UNLOCK(p); continue; } /* * get a limit */ lim_rlimit_proc(p, RLIMIT_RSS, &rsslim); limit = OFF_TO_IDX( qmin(rsslim.rlim_cur, rsslim.rlim_max)); /* * let processes that are swapped out really be * swapped out set the limit to nothing (will force a * swap-out.) */ if ((p->p_flag & P_INMEM) == 0) limit = 0; /* XXX */ vm = vmspace_acquire_ref(p); _PHOLD_LITE(p); PROC_UNLOCK(p); if (vm == NULL) { PRELE(p); continue; } sx_sunlock(&allproc_lock); size = vmspace_resident_count(vm); if (size >= limit) { vm_swapout_map_deactivate_pages( &vm->vm_map, limit); size = vmspace_resident_count(vm); } #ifdef RACCT if (racct_enable) { rsize = IDX_TO_OFF(size); PROC_LOCK(p); if (p->p_state == PRS_NORMAL) racct_set(p, RACCT_RSS, rsize); ravailable = racct_get_available(p, RACCT_RSS); PROC_UNLOCK(p); if (rsize > ravailable) { /* * Don't be overly aggressive; this * might be an innocent process, * and the limit could've been exceeded * by some memory hog. Don't try * to deactivate more than 1/4th * of process' resident set size. */ if (attempts <= 8) { if (ravailable < rsize - (rsize / 4)) { ravailable = rsize - (rsize / 4); } } vm_swapout_map_deactivate_pages( &vm->vm_map, OFF_TO_IDX(ravailable)); /* Update RSS usage after paging out. */ size = vmspace_resident_count(vm); rsize = IDX_TO_OFF(size); PROC_LOCK(p); if (p->p_state == PRS_NORMAL) racct_set(p, RACCT_RSS, rsize); PROC_UNLOCK(p); if (rsize > ravailable) tryagain = 1; } } #endif vmspace_free(vm); sx_slock(&allproc_lock); PRELE(p); } sx_sunlock(&allproc_lock); if (tryagain != 0 && attempts <= 10) { maybe_yield(); goto again; } } } /* * Allow a thread's kernel stack to be paged out. */ static void vm_thread_swapout(struct thread *td) { vm_object_t ksobj; vm_page_t m; int i, pages; cpu_thread_swapout(td); pages = td->td_kstack_pages; ksobj = td->td_kstack_obj; pmap_qremove(td->td_kstack, pages); VM_OBJECT_WLOCK(ksobj); for (i = 0; i < pages; i++) { m = vm_page_lookup(ksobj, i); if (m == NULL) panic("vm_thread_swapout: kstack already missing?"); vm_page_dirty(m); vm_page_unwire(m, PQ_LAUNDRY); } VM_OBJECT_WUNLOCK(ksobj); } /* * Bring the kernel stack for a specified thread back in. */ static void vm_thread_swapin(struct thread *td, int oom_alloc) { vm_object_t ksobj; vm_page_t ma[KSTACK_MAX_PAGES]; int a, count, i, j, pages, rv; pages = td->td_kstack_pages; ksobj = td->td_kstack_obj; VM_OBJECT_WLOCK(ksobj); (void)vm_page_grab_pages(ksobj, 0, oom_alloc | VM_ALLOC_WIRED, ma, pages); + VM_OBJECT_WUNLOCK(ksobj); for (i = 0; i < pages;) { vm_page_assert_xbusied(ma[i]); if (vm_page_all_valid(ma[i])) { vm_page_xunbusy(ma[i]); i++; continue; } vm_object_pip_add(ksobj, 1); for (j = i + 1; j < pages; j++) if (vm_page_all_valid(ma[j])) break; + VM_OBJECT_WLOCK(ksobj); rv = vm_pager_has_page(ksobj, ma[i]->pindex, NULL, &a); + VM_OBJECT_WUNLOCK(ksobj); KASSERT(rv == 1, ("%s: missing page %p", __func__, ma[i])); count = min(a + 1, j - i); rv = vm_pager_get_pages(ksobj, ma + i, count, NULL, NULL); KASSERT(rv == VM_PAGER_OK, ("%s: cannot get kstack for proc %d", __func__, td->td_proc->p_pid)); vm_object_pip_wakeup(ksobj); for (j = i; j < i + count; j++) vm_page_xunbusy(ma[j]); i += count; } - VM_OBJECT_WUNLOCK(ksobj); pmap_qenter(td->td_kstack, ma, pages); cpu_thread_swapin(td); } void faultin(struct proc *p) { struct thread *td; int oom_alloc; PROC_LOCK_ASSERT(p, MA_OWNED); /* * If another process is swapping in this process, * just wait until it finishes. */ if (p->p_flag & P_SWAPPINGIN) { while (p->p_flag & P_SWAPPINGIN) msleep(&p->p_flag, &p->p_mtx, PVM, "faultin", 0); return; } if ((p->p_flag & P_INMEM) == 0) { oom_alloc = (p->p_flag & P_WKILLED) != 0 ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL; /* * Don't let another thread swap process p out while we are * busy swapping it in. */ ++p->p_lock; p->p_flag |= P_SWAPPINGIN; PROC_UNLOCK(p); sx_xlock(&allproc_lock); MPASS(swapped_cnt > 0); swapped_cnt--; if (curthread != &thread0) swap_inprogress++; sx_xunlock(&allproc_lock); /* * We hold no lock here because the list of threads * can not change while all threads in the process are * swapped out. */ FOREACH_THREAD_IN_PROC(p, td) vm_thread_swapin(td, oom_alloc); if (curthread != &thread0) { sx_xlock(&allproc_lock); MPASS(swap_inprogress > 0); swap_inprogress--; last_swapin = ticks; sx_xunlock(&allproc_lock); } PROC_LOCK(p); swapclear(p); p->p_swtick = ticks; /* Allow other threads to swap p out now. */ wakeup(&p->p_flag); --p->p_lock; } } /* * This swapin algorithm attempts to swap-in processes only if there * is enough space for them. Of course, if a process waits for a long * time, it will be swapped in anyway. */ static struct proc * swapper_selector(bool wkilled_only) { struct proc *p, *res; struct thread *td; int ppri, pri, slptime, swtime; sx_assert(&allproc_lock, SA_SLOCKED); if (swapped_cnt == 0) return (NULL); res = NULL; ppri = INT_MIN; FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NEW || (p->p_flag & (P_SWAPPINGOUT | P_SWAPPINGIN | P_INMEM)) != 0) { PROC_UNLOCK(p); continue; } if (p->p_state == PRS_NORMAL && (p->p_flag & P_WKILLED) != 0) { /* * A swapped-out process might have mapped a * large portion of the system's pages as * anonymous memory. There is no other way to * release the memory other than to kill the * process, for which we need to swap it in. */ return (p); } if (wkilled_only) { PROC_UNLOCK(p); continue; } swtime = (ticks - p->p_swtick) / hz; FOREACH_THREAD_IN_PROC(p, td) { /* * An otherwise runnable thread of a process * swapped out has only the TDI_SWAPPED bit set. */ thread_lock(td); if (td->td_inhibitors == TDI_SWAPPED) { slptime = (ticks - td->td_slptick) / hz; pri = swtime + slptime; if ((td->td_flags & TDF_SWAPINREQ) == 0) pri -= p->p_nice * 8; /* * if this thread is higher priority * and there is enough space, then select * this process instead of the previous * selection. */ if (pri > ppri) { res = p; ppri = pri; } } thread_unlock(td); } PROC_UNLOCK(p); } if (res != NULL) PROC_LOCK(res); return (res); } #define SWAPIN_INTERVAL (MAXSLP * hz / 2) /* * Limit swapper to swap in one non-WKILLED process in MAXSLP/2 * interval, assuming that there is: * - at least one domain that is not suffering from a shortage of free memory; * - no parallel swap-ins; * - no other swap-ins in the current SWAPIN_INTERVAL. */ static bool swapper_wkilled_only(void) { return (vm_page_count_min_set(&all_domains) || swap_inprogress > 0 || (u_int)(ticks - last_swapin) < SWAPIN_INTERVAL); } void swapper(void) { struct proc *p; for (;;) { sx_slock(&allproc_lock); p = swapper_selector(swapper_wkilled_only()); sx_sunlock(&allproc_lock); if (p == NULL) { tsleep(&proc0, PVM, "swapin", SWAPIN_INTERVAL); } else { PROC_LOCK_ASSERT(p, MA_OWNED); /* * Another process may be bringing or may have * already brought this process in while we * traverse all threads. Or, this process may * have exited or even being swapped out * again. */ if (p->p_state == PRS_NORMAL && (p->p_flag & (P_INMEM | P_SWAPPINGOUT | P_SWAPPINGIN)) == 0) { faultin(p); } PROC_UNLOCK(p); } } } /* * First, if any processes have been sleeping or stopped for at least * "swap_idle_threshold1" seconds, they are swapped out. If, however, * no such processes exist, then the longest-sleeping or stopped * process is swapped out. Finally, and only as a last resort, if * there are no sleeping or stopped processes, the longest-resident * process is swapped out. */ static void swapout_procs(int action) { struct proc *p; struct thread *td; int slptime; bool didswap, doswap; MPASS((action & (VM_SWAP_NORMAL | VM_SWAP_IDLE)) != 0); didswap = false; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { /* * Filter out not yet fully constructed processes. Do * not swap out held processes. Avoid processes which * are system, exiting, execing, traced, already swapped * out or are in the process of being swapped in or out. */ PROC_LOCK(p); if (p->p_state != PRS_NORMAL || p->p_lock != 0 || (p->p_flag & (P_SYSTEM | P_WEXIT | P_INEXEC | P_STOPPED_SINGLE | P_TRACED | P_SWAPPINGOUT | P_SWAPPINGIN | P_INMEM)) != P_INMEM) { PROC_UNLOCK(p); continue; } /* * Further consideration of this process for swap out * requires iterating over its threads. We release * allproc_lock here so that process creation and * destruction are not blocked while we iterate. * * To later reacquire allproc_lock and resume * iteration over the allproc list, we will first have * to release the lock on the process. We place a * hold on the process so that it remains in the * allproc list while it is unlocked. */ _PHOLD_LITE(p); sx_sunlock(&allproc_lock); /* * Do not swapout a realtime process. * Guarantee swap_idle_threshold1 time in memory. * If the system is under memory stress, or if we are * swapping idle processes >= swap_idle_threshold2, * then swap the process out. */ doswap = true; FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); slptime = (ticks - td->td_slptick) / hz; if (PRI_IS_REALTIME(td->td_pri_class) || slptime < swap_idle_threshold1 || !thread_safetoswapout(td) || ((action & VM_SWAP_NORMAL) == 0 && slptime < swap_idle_threshold2)) doswap = false; thread_unlock(td); if (!doswap) break; } if (doswap && swapout(p) == 0) didswap = true; PROC_UNLOCK(p); if (didswap) { sx_xlock(&allproc_lock); swapped_cnt++; sx_downgrade(&allproc_lock); } else sx_slock(&allproc_lock); PRELE(p); } sx_sunlock(&allproc_lock); /* * If we swapped something out, and another process needed memory, * then wakeup the sched process. */ if (didswap) wakeup(&proc0); } static void swapclear(struct proc *p) { struct thread *td; PROC_LOCK_ASSERT(p, MA_OWNED); FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); td->td_flags |= TDF_INMEM; td->td_flags &= ~TDF_SWAPINREQ; TD_CLR_SWAPPED(td); if (TD_CAN_RUN(td)) { if (setrunnable(td, 0)) { #ifdef INVARIANTS /* * XXX: We just cleared TDI_SWAPPED * above and set TDF_INMEM, so this * should never happen. */ panic("not waking up swapper"); #endif } } else thread_unlock(td); } p->p_flag &= ~(P_SWAPPINGIN | P_SWAPPINGOUT); p->p_flag |= P_INMEM; } static int swapout(struct proc *p) { struct thread *td; PROC_LOCK_ASSERT(p, MA_OWNED); /* * The states of this process and its threads may have changed * by now. Assuming that there is only one pageout daemon thread, * this process should still be in memory. */ KASSERT((p->p_flag & (P_INMEM | P_SWAPPINGOUT | P_SWAPPINGIN)) == P_INMEM, ("swapout: lost a swapout race?")); /* * Remember the resident count. */ p->p_vmspace->vm_swrss = vmspace_resident_count(p->p_vmspace); /* * Check and mark all threads before we proceed. */ p->p_flag &= ~P_INMEM; p->p_flag |= P_SWAPPINGOUT; FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); if (!thread_safetoswapout(td)) { thread_unlock(td); swapclear(p); return (EBUSY); } td->td_flags &= ~TDF_INMEM; TD_SET_SWAPPED(td); thread_unlock(td); } td = FIRST_THREAD_IN_PROC(p); ++td->td_ru.ru_nswap; PROC_UNLOCK(p); /* * This list is stable because all threads are now prevented from * running. The list is only modified in the context of a running * thread in this process. */ FOREACH_THREAD_IN_PROC(p, td) vm_thread_swapout(td); PROC_LOCK(p); p->p_flag &= ~P_SWAPPINGOUT; p->p_swtick = ticks; return (0); } Index: projects/clang1000-import/sys/vm/vnode_pager.c =================================================================== --- projects/clang1000-import/sys/vm/vnode_pager.c (revision 356919) +++ projects/clang1000-import/sys/vm/vnode_pager.c (revision 356920) @@ -1,1596 +1,1581 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1990 University of Utah. * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * Copyright (c) 1993, 1994 John S. Dyson * Copyright (c) 1995, David Greenman * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 */ /* * Page to/from files (vnodes). */ /* * TODO: * Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will * greatly re-simplify the vnode_pager. */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int vnode_pager_addr(struct vnode *vp, vm_ooffset_t address, daddr_t *rtaddress, int *run); static int vnode_pager_input_smlfs(vm_object_t object, vm_page_t m); static int vnode_pager_input_old(vm_object_t object, vm_page_t m); static void vnode_pager_dealloc(vm_object_t); static int vnode_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); static int vnode_pager_getpages_async(vm_object_t, vm_page_t *, int, int *, int *, vop_getpages_iodone_t, void *); static void vnode_pager_putpages(vm_object_t, vm_page_t *, int, int, int *); static boolean_t vnode_pager_haspage(vm_object_t, vm_pindex_t, int *, int *); static vm_object_t vnode_pager_alloc(void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *cred); static int vnode_pager_generic_getpages_done(struct buf *); static void vnode_pager_generic_getpages_done_async(struct buf *); static void vnode_pager_update_writecount(vm_object_t, vm_offset_t, vm_offset_t); static void vnode_pager_release_writecount(vm_object_t, vm_offset_t, vm_offset_t); struct pagerops vnodepagerops = { .pgo_alloc = vnode_pager_alloc, .pgo_dealloc = vnode_pager_dealloc, .pgo_getpages = vnode_pager_getpages, .pgo_getpages_async = vnode_pager_getpages_async, .pgo_putpages = vnode_pager_putpages, .pgo_haspage = vnode_pager_haspage, .pgo_update_writecount = vnode_pager_update_writecount, .pgo_release_writecount = vnode_pager_release_writecount, }; static struct domainset *vnode_domainset = NULL; SYSCTL_PROC(_debug, OID_AUTO, vnode_domainset, CTLTYPE_STRING | CTLFLAG_MPSAFE | CTLFLAG_RW, &vnode_domainset, 0, sysctl_handle_domainset, "A", "Default vnode NUMA policy"); static int nvnpbufs; SYSCTL_INT(_vm, OID_AUTO, vnode_pbufs, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &nvnpbufs, 0, "number of physical buffers allocated for vnode pager"); static uma_zone_t vnode_pbuf_zone; static void vnode_pager_init(void *dummy) { #ifdef __LP64__ nvnpbufs = nswbuf * 2; #else nvnpbufs = nswbuf / 2; #endif TUNABLE_INT_FETCH("vm.vnode_pbufs", &nvnpbufs); vnode_pbuf_zone = pbuf_zsecond_create("vnpbuf", nvnpbufs); } SYSINIT(vnode_pager, SI_SUB_CPU, SI_ORDER_ANY, vnode_pager_init, NULL); /* Create the VM system backing object for this vnode */ int vnode_create_vobject(struct vnode *vp, off_t isize, struct thread *td) { vm_object_t object; vm_ooffset_t size = isize; struct vattr va; bool last; if (!vn_isdisk(vp, NULL) && vn_canvmio(vp) == FALSE) return (0); object = vp->v_object; if (object != NULL) return (0); if (size == 0) { if (vn_isdisk(vp, NULL)) { size = IDX_TO_OFF(INT_MAX); } else { if (VOP_GETATTR(vp, &va, td->td_ucred)) return (0); size = va.va_size; } } object = vnode_pager_alloc(vp, size, 0, 0, td->td_ucred); /* * Dereference the reference we just created. This assumes * that the object is associated with the vp. We still have * to serialize with vnode_pager_dealloc() for the last * potential reference. */ VM_OBJECT_RLOCK(object); last = refcount_release(&object->ref_count); VM_OBJECT_RUNLOCK(object); if (last) vrele(vp); KASSERT(vp->v_object != NULL, ("vnode_create_vobject: NULL object")); return (0); } void vnode_destroy_vobject(struct vnode *vp) { struct vm_object *obj; obj = vp->v_object; if (obj == NULL || obj->handle != vp) return; ASSERT_VOP_ELOCKED(vp, "vnode_destroy_vobject"); VM_OBJECT_WLOCK(obj); MPASS(obj->type == OBJT_VNODE); umtx_shm_object_terminated(obj); if (obj->ref_count == 0) { + KASSERT((obj->flags & OBJ_DEAD) == 0, + ("vnode_destroy_vobject: Terminating dead object")); + vm_object_set_flag(obj, OBJ_DEAD); + /* - * don't double-terminate the object + * Clean pages and flush buffers. */ - if ((obj->flags & OBJ_DEAD) == 0) { - vm_object_set_flag(obj, OBJ_DEAD); + vm_object_page_clean(obj, 0, 0, OBJPC_SYNC); + VM_OBJECT_WUNLOCK(obj); - /* - * Clean pages and flush buffers. - */ - vm_object_page_clean(obj, 0, 0, OBJPC_SYNC); - VM_OBJECT_WUNLOCK(obj); + vinvalbuf(vp, V_SAVE, 0, 0); - vinvalbuf(vp, V_SAVE, 0, 0); + BO_LOCK(&vp->v_bufobj); + vp->v_bufobj.bo_flag |= BO_DEAD; + BO_UNLOCK(&vp->v_bufobj); - BO_LOCK(&vp->v_bufobj); - vp->v_bufobj.bo_flag |= BO_DEAD; - BO_UNLOCK(&vp->v_bufobj); - - VM_OBJECT_WLOCK(obj); - vm_object_terminate(obj); - } else { - /* - * Waiters were already handled during object - * termination. The exclusive vnode lock hopefully - * prevented new waiters from referencing the dying - * object. - */ - vp->v_object = NULL; - VM_OBJECT_WUNLOCK(obj); - } + VM_OBJECT_WLOCK(obj); + vm_object_terminate(obj); } else { /* * Woe to the process that tries to page now :-). */ vm_pager_deallocate(obj); VM_OBJECT_WUNLOCK(obj); } KASSERT(vp->v_object == NULL, ("vp %p obj %p", vp, vp->v_object)); } /* * Allocate (or lookup) pager for a vnode. * Handle is a vnode pointer. */ vm_object_t vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t offset, struct ucred *cred) { vm_object_t object; struct vnode *vp; /* * Pageout to vnode, no can do yet. */ if (handle == NULL) return (NULL); vp = (struct vnode *)handle; ASSERT_VOP_LOCKED(vp, "vnode_pager_alloc"); KASSERT(vp->v_usecount != 0, ("vnode_pager_alloc: no vnode reference")); retry: object = vp->v_object; if (object == NULL) { /* * Add an object of the appropriate size */ object = vm_object_allocate(OBJT_VNODE, OFF_TO_IDX(round_page(size))); object->un_pager.vnp.vnp_size = size; object->un_pager.vnp.writemappings = 0; object->domain.dr_policy = vnode_domainset; object->handle = handle; if ((vp->v_vflag & VV_VMSIZEVNLOCK) != 0) { VM_OBJECT_WLOCK(object); vm_object_set_flag(object, OBJ_SIZEVNLOCK); VM_OBJECT_WUNLOCK(object); } VI_LOCK(vp); if (vp->v_object != NULL) { /* * Object has been created while we were allocating. */ VI_UNLOCK(vp); VM_OBJECT_WLOCK(object); KASSERT(object->ref_count == 1, ("leaked ref %p %d", object, object->ref_count)); object->type = OBJT_DEAD; refcount_init(&object->ref_count, 0); VM_OBJECT_WUNLOCK(object); vm_object_destroy(object); goto retry; } vp->v_object = object; VI_UNLOCK(vp); vrefact(vp); } else { vm_object_reference(object); #if VM_NRESERVLEVEL > 0 if ((object->flags & OBJ_COLORED) == 0) { VM_OBJECT_WLOCK(object); vm_object_color(object, 0); VM_OBJECT_WUNLOCK(object); } #endif } return (object); } /* * The object must be locked. */ static void vnode_pager_dealloc(vm_object_t object) { struct vnode *vp; int refs; vp = object->handle; if (vp == NULL) panic("vnode_pager_dealloc: pager already dealloced"); VM_OBJECT_ASSERT_WLOCKED(object); vm_object_pip_wait(object, "vnpdea"); refs = object->ref_count; object->handle = NULL; object->type = OBJT_DEAD; ASSERT_VOP_ELOCKED(vp, "vnode_pager_dealloc"); if (object->un_pager.vnp.writemappings > 0) { object->un_pager.vnp.writemappings = 0; VOP_ADD_WRITECOUNT_CHECKED(vp, -1); CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d", __func__, vp, vp->v_writecount); } vp->v_object = NULL; VI_LOCK(vp); /* * vm_map_entry_set_vnode_text() cannot reach this vnode by * following object->handle. Clear all text references now. * This also clears the transient references from * kern_execve(), which is fine because dead_vnodeops uses nop * for VOP_UNSET_TEXT(). */ if (vp->v_writecount < 0) vp->v_writecount = 0; VI_UNLOCK(vp); VM_OBJECT_WUNLOCK(object); if (refs > 0) vunref(vp); VM_OBJECT_WLOCK(object); } static boolean_t vnode_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after) { struct vnode *vp = object->handle; daddr_t bn; uintptr_t lockstate; int err; daddr_t reqblock; int poff; int bsize; int pagesperblock, blocksperpage; VM_OBJECT_ASSERT_LOCKED(object); /* * If no vp or vp is doomed or marked transparent to VM, we do not * have the page. */ if (vp == NULL || VN_IS_DOOMED(vp)) return FALSE; /* * If the offset is beyond end of file we do * not have the page. */ if (IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size) return FALSE; bsize = vp->v_mount->mnt_stat.f_iosize; pagesperblock = bsize / PAGE_SIZE; blocksperpage = 0; if (pagesperblock > 0) { reqblock = pindex / pagesperblock; } else { blocksperpage = (PAGE_SIZE / bsize); reqblock = pindex * blocksperpage; } lockstate = VM_OBJECT_DROP(object); err = VOP_BMAP(vp, reqblock, NULL, &bn, after, before); VM_OBJECT_PICKUP(object, lockstate); if (err) return TRUE; if (bn == -1) return FALSE; if (pagesperblock > 0) { poff = pindex - (reqblock * pagesperblock); if (before) { *before *= pagesperblock; *before += poff; } if (after) { /* * The BMAP vop can report a partial block in the * 'after', but must not report blocks after EOF. * Assert the latter, and truncate 'after' in case * of the former. */ KASSERT((reqblock + *after) * pagesperblock < roundup2(object->size, pagesperblock), ("%s: reqblock %jd after %d size %ju", __func__, (intmax_t )reqblock, *after, (uintmax_t )object->size)); *after *= pagesperblock; *after += pagesperblock - (poff + 1); if (pindex + *after >= object->size) *after = object->size - 1 - pindex; } } else { if (before) { *before /= blocksperpage; } if (after) { *after /= blocksperpage; } } return TRUE; } /* * Lets the VM system know about a change in size for a file. * We adjust our own internal size and flush any cached pages in * the associated object that are affected by the size change. * * Note: this routine may be invoked as a result of a pager put * operation (possibly at object termination time), so we must be careful. */ void vnode_pager_setsize(struct vnode *vp, vm_ooffset_t nsize) { vm_object_t object; vm_page_t m; vm_pindex_t nobjsize; if ((object = vp->v_object) == NULL) return; #ifdef DEBUG_VFS_LOCKS { struct mount *mp; mp = vp->v_mount; if (mp != NULL && (mp->mnt_kern_flag & MNTK_VMSETSIZE_BUG) == 0) assert_vop_elocked(vp, "vnode_pager_setsize and not locked vnode"); } #endif VM_OBJECT_WLOCK(object); if (object->type == OBJT_DEAD) { VM_OBJECT_WUNLOCK(object); return; } KASSERT(object->type == OBJT_VNODE, ("not vnode-backed object %p", object)); if (nsize == object->un_pager.vnp.vnp_size) { /* * Hasn't changed size */ VM_OBJECT_WUNLOCK(object); return; } nobjsize = OFF_TO_IDX(nsize + PAGE_MASK); if (nsize < object->un_pager.vnp.vnp_size) { /* * File has shrunk. Toss any cached pages beyond the new EOF. */ if (nobjsize < object->size) vm_object_page_remove(object, nobjsize, object->size, 0); /* * this gets rid of garbage at the end of a page that is now * only partially backed by the vnode. * * XXX for some reason (I don't know yet), if we take a * completely invalid page and mark it partially valid * it can screw up NFS reads, so we don't allow the case. */ if (!(nsize & PAGE_MASK)) goto out; m = vm_page_grab(object, OFF_TO_IDX(nsize), VM_ALLOC_NOCREAT); if (m == NULL) goto out; if (!vm_page_none_valid(m)) { int base = (int)nsize & PAGE_MASK; int size = PAGE_SIZE - base; /* * Clear out partial-page garbage in case * the page has been mapped. */ pmap_zero_page_area(m, base, size); /* * Update the valid bits to reflect the blocks that * have been zeroed. Some of these valid bits may * have already been set. */ vm_page_set_valid_range(m, base, size); /* * Round "base" to the next block boundary so that the * dirty bit for a partially zeroed block is not * cleared. */ base = roundup2(base, DEV_BSIZE); /* * Clear out partial-page dirty bits. * * note that we do not clear out the valid * bits. This would prevent bogus_page * replacement from working properly. */ vm_page_clear_dirty(m, base, PAGE_SIZE - base); } vm_page_xunbusy(m); } out: object->un_pager.vnp.vnp_size = nsize; object->size = nobjsize; VM_OBJECT_WUNLOCK(object); } /* * calculate the linear (byte) disk address of specified virtual * file address */ static int vnode_pager_addr(struct vnode *vp, vm_ooffset_t address, daddr_t *rtaddress, int *run) { int bsize; int err; daddr_t vblock; daddr_t voffset; if (address < 0) return -1; if (VN_IS_DOOMED(vp)) return -1; bsize = vp->v_mount->mnt_stat.f_iosize; vblock = address / bsize; voffset = address % bsize; err = VOP_BMAP(vp, vblock, NULL, rtaddress, run, NULL); if (err == 0) { if (*rtaddress != -1) *rtaddress += voffset / DEV_BSIZE; if (run) { *run += 1; *run *= bsize / PAGE_SIZE; *run -= voffset / PAGE_SIZE; } } return (err); } /* * small block filesystem vnode pager input */ static int vnode_pager_input_smlfs(vm_object_t object, vm_page_t m) { struct vnode *vp; struct bufobj *bo; struct buf *bp; struct sf_buf *sf; daddr_t fileaddr; vm_offset_t bsize; vm_page_bits_t bits; int error, i; error = 0; vp = object->handle; if (VN_IS_DOOMED(vp)) return VM_PAGER_BAD; bsize = vp->v_mount->mnt_stat.f_iosize; VOP_BMAP(vp, 0, &bo, 0, NULL, NULL); sf = sf_buf_alloc(m, 0); for (i = 0; i < PAGE_SIZE / bsize; i++) { vm_ooffset_t address; bits = vm_page_bits(i * bsize, bsize); if (m->valid & bits) continue; address = IDX_TO_OFF(m->pindex) + i * bsize; if (address >= object->un_pager.vnp.vnp_size) { fileaddr = -1; } else { error = vnode_pager_addr(vp, address, &fileaddr, NULL); if (error) break; } if (fileaddr != -1) { bp = uma_zalloc(vnode_pbuf_zone, M_WAITOK); /* build a minimal buffer header */ bp->b_iocmd = BIO_READ; bp->b_iodone = bdone; KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred")); KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred")); bp->b_rcred = crhold(curthread->td_ucred); bp->b_wcred = crhold(curthread->td_ucred); bp->b_data = (caddr_t)sf_buf_kva(sf) + i * bsize; bp->b_blkno = fileaddr; pbgetbo(bo, bp); bp->b_vp = vp; bp->b_bcount = bsize; bp->b_bufsize = bsize; bp->b_runningbufspace = bp->b_bufsize; atomic_add_long(&runningbufspace, bp->b_runningbufspace); /* do the input */ bp->b_iooffset = dbtob(bp->b_blkno); bstrategy(bp); bwait(bp, PVM, "vnsrd"); if ((bp->b_ioflags & BIO_ERROR) != 0) error = EIO; /* * free the buffer header back to the swap buffer pool */ bp->b_vp = NULL; pbrelbo(bp); uma_zfree(vnode_pbuf_zone, bp); if (error) break; } else bzero((caddr_t)sf_buf_kva(sf) + i * bsize, bsize); KASSERT((m->dirty & bits) == 0, ("vnode_pager_input_smlfs: page %p is dirty", m)); vm_page_bits_set(m, &m->valid, bits); } sf_buf_free(sf); if (error) { return VM_PAGER_ERROR; } return VM_PAGER_OK; } /* * old style vnode pager input routine */ static int vnode_pager_input_old(vm_object_t object, vm_page_t m) { struct uio auio; struct iovec aiov; int error; int size; struct sf_buf *sf; struct vnode *vp; VM_OBJECT_ASSERT_WLOCKED(object); error = 0; /* * Return failure if beyond current EOF */ if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) { return VM_PAGER_BAD; } else { size = PAGE_SIZE; if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size) size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex); vp = object->handle; VM_OBJECT_WUNLOCK(object); /* * Allocate a kernel virtual address and initialize so that * we can use VOP_READ/WRITE routines. */ sf = sf_buf_alloc(m, 0); aiov.iov_base = (caddr_t)sf_buf_kva(sf); aiov.iov_len = size; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = IDX_TO_OFF(m->pindex); auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_resid = size; auio.uio_td = curthread; error = VOP_READ(vp, &auio, 0, curthread->td_ucred); if (!error) { int count = size - auio.uio_resid; if (count == 0) error = EINVAL; else if (count != PAGE_SIZE) bzero((caddr_t)sf_buf_kva(sf) + count, PAGE_SIZE - count); } sf_buf_free(sf); VM_OBJECT_WLOCK(object); } KASSERT(m->dirty == 0, ("vnode_pager_input_old: page %p is dirty", m)); if (!error) vm_page_valid(m); return error ? VM_PAGER_ERROR : VM_PAGER_OK; } /* * generic vnode pager input routine */ /* * Local media VFS's that do not implement their own VOP_GETPAGES * should have their VOP_GETPAGES call to vnode_pager_generic_getpages() * to implement the previous behaviour. * * All other FS's should use the bypass to get to the local media * backing vp's VOP_GETPAGES. */ static int vnode_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, int *rahead) { struct vnode *vp; int rtval; + /* Handle is stable with paging in progress. */ vp = object->handle; - VM_OBJECT_WUNLOCK(object); rtval = VOP_GETPAGES(vp, m, count, rbehind, rahead); KASSERT(rtval != EOPNOTSUPP, ("vnode_pager: FS getpages not implemented\n")); - VM_OBJECT_WLOCK(object); return rtval; } static int vnode_pager_getpages_async(vm_object_t object, vm_page_t *m, int count, int *rbehind, int *rahead, vop_getpages_iodone_t iodone, void *arg) { struct vnode *vp; int rtval; vp = object->handle; - VM_OBJECT_WUNLOCK(object); rtval = VOP_GETPAGES_ASYNC(vp, m, count, rbehind, rahead, iodone, arg); KASSERT(rtval != EOPNOTSUPP, ("vnode_pager: FS getpages_async not implemented\n")); - VM_OBJECT_WLOCK(object); return (rtval); } /* * The implementation of VOP_GETPAGES() and VOP_GETPAGES_ASYNC() for * local filesystems, where partially valid pages can only occur at * the end of file. */ int vnode_pager_local_getpages(struct vop_getpages_args *ap) { return (vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, ap->a_rahead, NULL, NULL)); } int vnode_pager_local_getpages_async(struct vop_getpages_async_args *ap) { return (vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, ap->a_rahead, ap->a_iodone, ap->a_arg)); } /* * This is now called from local media FS's to operate against their * own vnodes if they fail to implement VOP_GETPAGES. */ int vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count, int *a_rbehind, int *a_rahead, vop_getpages_iodone_t iodone, void *arg) { vm_object_t object; struct bufobj *bo; struct buf *bp; off_t foff; #ifdef INVARIANTS off_t blkno0; #endif int bsize, pagesperblock; int error, before, after, rbehind, rahead, poff, i; int bytecount, secmask; KASSERT(vp->v_type != VCHR && vp->v_type != VBLK, ("%s does not support devices", __func__)); if (VN_IS_DOOMED(vp)) return (VM_PAGER_BAD); object = vp->v_object; foff = IDX_TO_OFF(m[0]->pindex); bsize = vp->v_mount->mnt_stat.f_iosize; pagesperblock = bsize / PAGE_SIZE; KASSERT(foff < object->un_pager.vnp.vnp_size, ("%s: page %p offset beyond vp %p size", __func__, m[0], vp)); KASSERT(count <= nitems(bp->b_pages), ("%s: requested %d pages", __func__, count)); /* * The last page has valid blocks. Invalid part can only * exist at the end of file, and the page is made fully valid * by zeroing in vm_pager_get_pages(). */ if (!vm_page_none_valid(m[count - 1]) && --count == 0) { if (iodone != NULL) iodone(arg, m, 1, 0); return (VM_PAGER_OK); } bp = uma_zalloc(vnode_pbuf_zone, M_WAITOK); /* * Get the underlying device blocks for the file with VOP_BMAP(). * If the file system doesn't support VOP_BMAP, use old way of * getting pages via VOP_READ. */ error = VOP_BMAP(vp, foff / bsize, &bo, &bp->b_blkno, &after, &before); if (error == EOPNOTSUPP) { uma_zfree(vnode_pbuf_zone, bp); VM_OBJECT_WLOCK(object); for (i = 0; i < count; i++) { VM_CNT_INC(v_vnodein); VM_CNT_INC(v_vnodepgsin); error = vnode_pager_input_old(object, m[i]); if (error) break; } VM_OBJECT_WUNLOCK(object); return (error); } else if (error != 0) { uma_zfree(vnode_pbuf_zone, bp); return (VM_PAGER_ERROR); } /* * If the file system supports BMAP, but blocksize is smaller * than a page size, then use special small filesystem code. */ if (pagesperblock == 0) { uma_zfree(vnode_pbuf_zone, bp); for (i = 0; i < count; i++) { VM_CNT_INC(v_vnodein); VM_CNT_INC(v_vnodepgsin); error = vnode_pager_input_smlfs(object, m[i]); if (error) break; } return (error); } /* * A sparse file can be encountered only for a single page request, * which may not be preceded by call to vm_pager_haspage(). */ if (bp->b_blkno == -1) { KASSERT(count == 1, ("%s: array[%d] request to a sparse file %p", __func__, count, vp)); uma_zfree(vnode_pbuf_zone, bp); pmap_zero_page(m[0]); KASSERT(m[0]->dirty == 0, ("%s: page %p is dirty", __func__, m[0])); vm_page_valid(m[0]); return (VM_PAGER_OK); } #ifdef INVARIANTS blkno0 = bp->b_blkno; #endif bp->b_blkno += (foff % bsize) / DEV_BSIZE; /* Recalculate blocks available after/before to pages. */ poff = (foff % bsize) / PAGE_SIZE; before *= pagesperblock; before += poff; after *= pagesperblock; after += pagesperblock - (poff + 1); if (m[0]->pindex + after >= object->size) after = object->size - 1 - m[0]->pindex; KASSERT(count <= after + 1, ("%s: %d pages asked, can do only %d", __func__, count, after + 1)); after -= count - 1; /* Trim requested rbehind/rahead to possible values. */ rbehind = a_rbehind ? *a_rbehind : 0; rahead = a_rahead ? *a_rahead : 0; rbehind = min(rbehind, before); rbehind = min(rbehind, m[0]->pindex); rahead = min(rahead, after); rahead = min(rahead, object->size - m[count - 1]->pindex); /* * Check that total amount of pages fit into buf. Trim rbehind and * rahead evenly if not. */ if (rbehind + rahead + count > nitems(bp->b_pages)) { int trim, sum; trim = rbehind + rahead + count - nitems(bp->b_pages) + 1; sum = rbehind + rahead; if (rbehind == before) { /* Roundup rbehind trim to block size. */ rbehind -= roundup(trim * rbehind / sum, pagesperblock); if (rbehind < 0) rbehind = 0; } else rbehind -= trim * rbehind / sum; rahead -= trim * rahead / sum; } KASSERT(rbehind + rahead + count <= nitems(bp->b_pages), ("%s: behind %d ahead %d count %d", __func__, rbehind, rahead, count)); /* * Fill in the bp->b_pages[] array with requested and optional * read behind or read ahead pages. Read behind pages are looked * up in a backward direction, down to a first cached page. Same * for read ahead pages, but there is no need to shift the array * in case of encountering a cached page. */ i = bp->b_npages = 0; if (rbehind) { vm_pindex_t startpindex, tpindex; vm_page_t p; VM_OBJECT_WLOCK(object); startpindex = m[0]->pindex - rbehind; if ((p = TAILQ_PREV(m[0], pglist, listq)) != NULL && p->pindex >= startpindex) startpindex = p->pindex + 1; /* tpindex is unsigned; beware of numeric underflow. */ for (tpindex = m[0]->pindex - 1; tpindex >= startpindex && tpindex < m[0]->pindex; tpindex--, i++) { p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL); if (p == NULL) { /* Shift the array. */ for (int j = 0; j < i; j++) bp->b_pages[j] = bp->b_pages[j + tpindex + 1 - startpindex]; break; } bp->b_pages[tpindex - startpindex] = p; } bp->b_pgbefore = i; bp->b_npages += i; bp->b_blkno -= IDX_TO_OFF(i) / DEV_BSIZE; } else bp->b_pgbefore = 0; /* Requested pages. */ for (int j = 0; j < count; j++, i++) bp->b_pages[i] = m[j]; bp->b_npages += count; if (rahead) { vm_pindex_t endpindex, tpindex; vm_page_t p; if (!VM_OBJECT_WOWNED(object)) VM_OBJECT_WLOCK(object); endpindex = m[count - 1]->pindex + rahead + 1; if ((p = TAILQ_NEXT(m[count - 1], listq)) != NULL && p->pindex < endpindex) endpindex = p->pindex; if (endpindex > object->size) endpindex = object->size; for (tpindex = m[count - 1]->pindex + 1; tpindex < endpindex; i++, tpindex++) { p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL); if (p == NULL) break; bp->b_pages[i] = p; } bp->b_pgafter = i - bp->b_npages; bp->b_npages = i; } else bp->b_pgafter = 0; if (VM_OBJECT_WOWNED(object)) VM_OBJECT_WUNLOCK(object); /* Report back actual behind/ahead read. */ if (a_rbehind) *a_rbehind = bp->b_pgbefore; if (a_rahead) *a_rahead = bp->b_pgafter; #ifdef INVARIANTS KASSERT(bp->b_npages <= nitems(bp->b_pages), ("%s: buf %p overflowed", __func__, bp)); for (int j = 1, prev = 0; j < bp->b_npages; j++) { if (bp->b_pages[j] == bogus_page) continue; KASSERT(bp->b_pages[j]->pindex - bp->b_pages[prev]->pindex == j - prev, ("%s: pages array not consecutive, bp %p", __func__, bp)); prev = j; } #endif /* * Recalculate first offset and bytecount with regards to read behind. * Truncate bytecount to vnode real size and round up physical size * for real devices. */ foff = IDX_TO_OFF(bp->b_pages[0]->pindex); bytecount = bp->b_npages << PAGE_SHIFT; if ((foff + bytecount) > object->un_pager.vnp.vnp_size) bytecount = object->un_pager.vnp.vnp_size - foff; secmask = bo->bo_bsize - 1; KASSERT(secmask < PAGE_SIZE && secmask > 0, ("%s: sector size %d too large", __func__, secmask + 1)); bytecount = (bytecount + secmask) & ~secmask; /* * And map the pages to be read into the kva, if the filesystem * requires mapped buffers. */ if ((vp->v_mount->mnt_kern_flag & MNTK_UNMAPPED_BUFS) != 0 && unmapped_buf_allowed) { bp->b_data = unmapped_buf; bp->b_offset = 0; } else { bp->b_data = bp->b_kvabase; pmap_qenter((vm_offset_t)bp->b_data, bp->b_pages, bp->b_npages); } /* Build a minimal buffer header. */ bp->b_iocmd = BIO_READ; KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred")); KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred")); bp->b_rcred = crhold(curthread->td_ucred); bp->b_wcred = crhold(curthread->td_ucred); pbgetbo(bo, bp); bp->b_vp = vp; bp->b_bcount = bp->b_bufsize = bp->b_runningbufspace = bytecount; bp->b_iooffset = dbtob(bp->b_blkno); KASSERT(IDX_TO_OFF(m[0]->pindex - bp->b_pages[0]->pindex) == (blkno0 - bp->b_blkno) * DEV_BSIZE + IDX_TO_OFF(m[0]->pindex) % bsize, ("wrong offsets bsize %d m[0] %ju b_pages[0] %ju " "blkno0 %ju b_blkno %ju", bsize, (uintmax_t)m[0]->pindex, (uintmax_t)bp->b_pages[0]->pindex, (uintmax_t)blkno0, (uintmax_t)bp->b_blkno)); atomic_add_long(&runningbufspace, bp->b_runningbufspace); VM_CNT_INC(v_vnodein); VM_CNT_ADD(v_vnodepgsin, bp->b_npages); if (iodone != NULL) { /* async */ bp->b_pgiodone = iodone; bp->b_caller1 = arg; bp->b_iodone = vnode_pager_generic_getpages_done_async; bp->b_flags |= B_ASYNC; BUF_KERNPROC(bp); bstrategy(bp); return (VM_PAGER_OK); } else { bp->b_iodone = bdone; bstrategy(bp); bwait(bp, PVM, "vnread"); error = vnode_pager_generic_getpages_done(bp); for (i = 0; i < bp->b_npages; i++) bp->b_pages[i] = NULL; bp->b_vp = NULL; pbrelbo(bp); uma_zfree(vnode_pbuf_zone, bp); return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK); } } static void vnode_pager_generic_getpages_done_async(struct buf *bp) { int error; error = vnode_pager_generic_getpages_done(bp); /* Run the iodone upon the requested range. */ bp->b_pgiodone(bp->b_caller1, bp->b_pages + bp->b_pgbefore, bp->b_npages - bp->b_pgbefore - bp->b_pgafter, error); for (int i = 0; i < bp->b_npages; i++) bp->b_pages[i] = NULL; bp->b_vp = NULL; pbrelbo(bp); uma_zfree(vnode_pbuf_zone, bp); } static int vnode_pager_generic_getpages_done(struct buf *bp) { vm_object_t object; off_t tfoff, nextoff; int i, error; error = (bp->b_ioflags & BIO_ERROR) != 0 ? EIO : 0; object = bp->b_vp->v_object; if (error == 0 && bp->b_bcount != bp->b_npages * PAGE_SIZE) { if (!buf_mapped(bp)) { bp->b_data = bp->b_kvabase; pmap_qenter((vm_offset_t)bp->b_data, bp->b_pages, bp->b_npages); } bzero(bp->b_data + bp->b_bcount, PAGE_SIZE * bp->b_npages - bp->b_bcount); } if (buf_mapped(bp)) { pmap_qremove((vm_offset_t)bp->b_data, bp->b_npages); bp->b_data = unmapped_buf; } /* Read lock to protect size. */ VM_OBJECT_RLOCK(object); for (i = 0, tfoff = IDX_TO_OFF(bp->b_pages[0]->pindex); i < bp->b_npages; i++, tfoff = nextoff) { vm_page_t mt; nextoff = tfoff + PAGE_SIZE; mt = bp->b_pages[i]; if (mt == bogus_page) continue; if (nextoff <= object->un_pager.vnp.vnp_size) { /* * Read filled up entire page. */ vm_page_valid(mt); KASSERT(mt->dirty == 0, ("%s: page %p is dirty", __func__, mt)); KASSERT(!pmap_page_is_mapped(mt), ("%s: page %p is mapped", __func__, mt)); } else { /* * Read did not fill up entire page. * * Currently we do not set the entire page valid, * we just try to clear the piece that we couldn't * read. */ vm_page_set_valid_range(mt, 0, object->un_pager.vnp.vnp_size - tfoff); KASSERT((mt->dirty & vm_page_bits(0, object->un_pager.vnp.vnp_size - tfoff)) == 0, ("%s: page %p is dirty", __func__, mt)); } if (i < bp->b_pgbefore || i >= bp->b_npages - bp->b_pgafter) vm_page_readahead_finish(mt); } VM_OBJECT_RUNLOCK(object); if (error != 0) printf("%s: I/O read error %d\n", __func__, error); return (error); } /* * EOPNOTSUPP is no longer legal. For local media VFS's that do not * implement their own VOP_PUTPAGES, their VOP_PUTPAGES should call to * vnode_pager_generic_putpages() to implement the previous behaviour. * * All other FS's should use the bypass to get to the local media * backing vp's VOP_PUTPAGES. */ static void vnode_pager_putpages(vm_object_t object, vm_page_t *m, int count, int flags, int *rtvals) { int rtval; struct vnode *vp; int bytes = count * PAGE_SIZE; /* * Force synchronous operation if we are extremely low on memory * to prevent a low-memory deadlock. VOP operations often need to * allocate more memory to initiate the I/O ( i.e. do a BMAP * operation ). The swapper handles the case by limiting the amount * of asynchronous I/O, but that sort of solution doesn't scale well * for the vnode pager without a lot of work. * * Also, the backing vnode's iodone routine may not wake the pageout * daemon up. This should be probably be addressed XXX. */ if (vm_page_count_min()) flags |= VM_PAGER_PUT_SYNC; /* * Call device-specific putpages function */ vp = object->handle; VM_OBJECT_WUNLOCK(object); rtval = VOP_PUTPAGES(vp, m, bytes, flags, rtvals); KASSERT(rtval != EOPNOTSUPP, ("vnode_pager: stale FS putpages\n")); VM_OBJECT_WLOCK(object); } static int vn_off2bidx(vm_ooffset_t offset) { return ((offset & PAGE_MASK) / DEV_BSIZE); } static bool vn_dirty_blk(vm_page_t m, vm_ooffset_t offset) { KASSERT(IDX_TO_OFF(m->pindex) <= offset && offset < IDX_TO_OFF(m->pindex + 1), ("page %p pidx %ju offset %ju", m, (uintmax_t)m->pindex, (uintmax_t)offset)); return ((m->dirty & ((vm_page_bits_t)1 << vn_off2bidx(offset))) != 0); } /* * This is now called from local media FS's to operate against their * own vnodes if they fail to implement VOP_PUTPAGES. * * This is typically called indirectly via the pageout daemon and * clustering has already typically occurred, so in general we ask the * underlying filesystem to write the data out asynchronously rather * then delayed. */ int vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *ma, int bytecount, int flags, int *rtvals) { vm_object_t object; vm_page_t m; vm_ooffset_t maxblksz, next_offset, poffset, prev_offset; struct uio auio; struct iovec aiov; off_t prev_resid, wrsz; int count, error, i, maxsize, ncount, pgoff, ppscheck; bool in_hole; static struct timeval lastfail; static int curfail; object = vp->v_object; count = bytecount / PAGE_SIZE; for (i = 0; i < count; i++) rtvals[i] = VM_PAGER_ERROR; if ((int64_t)ma[0]->pindex < 0) { printf("vnode_pager_generic_putpages: " "attempt to write meta-data 0x%jx(%lx)\n", (uintmax_t)ma[0]->pindex, (u_long)ma[0]->dirty); rtvals[0] = VM_PAGER_BAD; return (VM_PAGER_BAD); } maxsize = count * PAGE_SIZE; ncount = count; poffset = IDX_TO_OFF(ma[0]->pindex); /* * If the page-aligned write is larger then the actual file we * have to invalidate pages occurring beyond the file EOF. However, * there is an edge case where a file may not be page-aligned where * the last page is partially invalid. In this case the filesystem * may not properly clear the dirty bits for the entire page (which * could be VM_PAGE_BITS_ALL due to the page having been mmap()d). * With the page locked we are free to fix-up the dirty bits here. * * We do not under any circumstances truncate the valid bits, as * this will screw up bogus page replacement. */ VM_OBJECT_RLOCK(object); if (maxsize + poffset > object->un_pager.vnp.vnp_size) { if (object->un_pager.vnp.vnp_size > poffset) { maxsize = object->un_pager.vnp.vnp_size - poffset; ncount = btoc(maxsize); if ((pgoff = (int)maxsize & PAGE_MASK) != 0) { pgoff = roundup2(pgoff, DEV_BSIZE); /* * If the page is busy and the following * conditions hold, then the page's dirty * field cannot be concurrently changed by a * pmap operation. */ m = ma[ncount - 1]; vm_page_assert_sbusied(m); KASSERT(!pmap_page_is_write_mapped(m), ("vnode_pager_generic_putpages: page %p is not read-only", m)); MPASS(m->dirty != 0); vm_page_clear_dirty(m, pgoff, PAGE_SIZE - pgoff); } } else { maxsize = 0; ncount = 0; } for (i = ncount; i < count; i++) rtvals[i] = VM_PAGER_BAD; } VM_OBJECT_RUNLOCK(object); auio.uio_iov = &aiov; auio.uio_segflg = UIO_NOCOPY; auio.uio_rw = UIO_WRITE; auio.uio_td = NULL; maxblksz = roundup2(poffset + maxsize, DEV_BSIZE); for (prev_offset = poffset; prev_offset < maxblksz;) { /* Skip clean blocks. */ for (in_hole = true; in_hole && prev_offset < maxblksz;) { m = ma[OFF_TO_IDX(prev_offset - poffset)]; for (i = vn_off2bidx(prev_offset); i < sizeof(vm_page_bits_t) * NBBY && prev_offset < maxblksz; i++) { if (vn_dirty_blk(m, prev_offset)) { in_hole = false; break; } prev_offset += DEV_BSIZE; } } if (in_hole) goto write_done; /* Find longest run of dirty blocks. */ for (next_offset = prev_offset; next_offset < maxblksz;) { m = ma[OFF_TO_IDX(next_offset - poffset)]; for (i = vn_off2bidx(next_offset); i < sizeof(vm_page_bits_t) * NBBY && next_offset < maxblksz; i++) { if (!vn_dirty_blk(m, next_offset)) goto start_write; next_offset += DEV_BSIZE; } } start_write: if (next_offset > poffset + maxsize) next_offset = poffset + maxsize; /* * Getting here requires finding a dirty block in the * 'skip clean blocks' loop. */ MPASS(prev_offset < next_offset); aiov.iov_base = NULL; auio.uio_iovcnt = 1; auio.uio_offset = prev_offset; prev_resid = auio.uio_resid = aiov.iov_len = next_offset - prev_offset; error = VOP_WRITE(vp, &auio, vnode_pager_putpages_ioflags(flags), curthread->td_ucred); wrsz = prev_resid - auio.uio_resid; if (wrsz == 0) { if (ppsratecheck(&lastfail, &curfail, 1) != 0) { vn_printf(vp, "vnode_pager_putpages: " "zero-length write at %ju resid %zd\n", auio.uio_offset, auio.uio_resid); } break; } /* Adjust the starting offset for next iteration. */ prev_offset += wrsz; MPASS(auio.uio_offset == prev_offset); ppscheck = 0; if (error != 0 && (ppscheck = ppsratecheck(&lastfail, &curfail, 1)) != 0) vn_printf(vp, "vnode_pager_putpages: I/O error %d\n", error); if (auio.uio_resid != 0 && (ppscheck != 0 || ppsratecheck(&lastfail, &curfail, 1) != 0)) vn_printf(vp, "vnode_pager_putpages: residual I/O %zd " "at %ju\n", auio.uio_resid, (uintmax_t)ma[0]->pindex); if (error != 0 || auio.uio_resid != 0) break; } write_done: /* Mark completely processed pages. */ for (i = 0; i < OFF_TO_IDX(prev_offset - poffset); i++) rtvals[i] = VM_PAGER_OK; /* Mark partial EOF page. */ if (prev_offset == poffset + maxsize && (prev_offset & PAGE_MASK) != 0) rtvals[i++] = VM_PAGER_OK; /* Unwritten pages in range, free bonus if the page is clean. */ for (; i < ncount; i++) rtvals[i] = ma[i]->dirty == 0 ? VM_PAGER_OK : VM_PAGER_ERROR; VM_CNT_ADD(v_vnodepgsout, i); VM_CNT_INC(v_vnodeout); return (rtvals[0]); } int vnode_pager_putpages_ioflags(int pager_flags) { int ioflags; /* * Pageouts are already clustered, use IO_ASYNC to force a * bawrite() rather then a bdwrite() to prevent paging I/O * from saturating the buffer cache. Dummy-up the sequential * heuristic to cause large ranges to cluster. If neither * IO_SYNC or IO_ASYNC is set, the system decides how to * cluster. */ ioflags = IO_VMIO; if ((pager_flags & (VM_PAGER_PUT_SYNC | VM_PAGER_PUT_INVAL)) != 0) ioflags |= IO_SYNC; else if ((pager_flags & VM_PAGER_CLUSTER_OK) == 0) ioflags |= IO_ASYNC; ioflags |= (pager_flags & VM_PAGER_PUT_INVAL) != 0 ? IO_INVAL: 0; ioflags |= (pager_flags & VM_PAGER_PUT_NOREUSE) != 0 ? IO_NOREUSE : 0; ioflags |= IO_SEQMAX << IO_SEQSHIFT; return (ioflags); } /* * vnode_pager_undirty_pages(). * * A helper to mark pages as clean after pageout that was possibly * done with a short write. The lpos argument specifies the page run * length in bytes, and the written argument specifies how many bytes * were actually written. eof is the offset past the last valid byte * in the vnode using the absolute file position of the first byte in * the run as the base from which it is computed. */ void vnode_pager_undirty_pages(vm_page_t *ma, int *rtvals, int written, off_t eof, int lpos) { vm_object_t obj; int i, pos, pos_devb; if (written == 0 && eof >= lpos) return; obj = ma[0]->object; for (i = 0, pos = 0; pos < written; i++, pos += PAGE_SIZE) { if (pos < trunc_page(written)) { rtvals[i] = VM_PAGER_OK; vm_page_undirty(ma[i]); } else { /* Partially written page. */ rtvals[i] = VM_PAGER_AGAIN; vm_page_clear_dirty(ma[i], 0, written & PAGE_MASK); } } if (eof >= lpos) /* avoid truncation */ return; for (pos = eof, i = OFF_TO_IDX(trunc_page(pos)); pos < lpos; i++) { if (pos != trunc_page(pos)) { /* * The page contains the last valid byte in * the vnode, mark the rest of the page as * clean, potentially making the whole page * clean. */ pos_devb = roundup2(pos & PAGE_MASK, DEV_BSIZE); vm_page_clear_dirty(ma[i], pos_devb, PAGE_SIZE - pos_devb); /* * If the page was cleaned, report the pageout * on it as successful. msync() no longer * needs to write out the page, endlessly * creating write requests and dirty buffers. */ if (ma[i]->dirty == 0) rtvals[i] = VM_PAGER_OK; pos = round_page(pos); } else { /* vm_pageout_flush() clears dirty */ rtvals[i] = VM_PAGER_BAD; pos += PAGE_SIZE; } } } static void vnode_pager_update_writecount(vm_object_t object, vm_offset_t start, vm_offset_t end) { struct vnode *vp; vm_ooffset_t old_wm; VM_OBJECT_WLOCK(object); if (object->type != OBJT_VNODE) { VM_OBJECT_WUNLOCK(object); return; } old_wm = object->un_pager.vnp.writemappings; object->un_pager.vnp.writemappings += (vm_ooffset_t)end - start; vp = object->handle; if (old_wm == 0 && object->un_pager.vnp.writemappings != 0) { ASSERT_VOP_LOCKED(vp, "v_writecount inc"); VOP_ADD_WRITECOUNT_CHECKED(vp, 1); CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d", __func__, vp, vp->v_writecount); } else if (old_wm != 0 && object->un_pager.vnp.writemappings == 0) { ASSERT_VOP_LOCKED(vp, "v_writecount dec"); VOP_ADD_WRITECOUNT_CHECKED(vp, -1); CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d", __func__, vp, vp->v_writecount); } VM_OBJECT_WUNLOCK(object); } static void vnode_pager_release_writecount(vm_object_t object, vm_offset_t start, vm_offset_t end) { struct vnode *vp; struct mount *mp; vm_offset_t inc; VM_OBJECT_WLOCK(object); /* * First, recheck the object type to account for the race when * the vnode is reclaimed. */ if (object->type != OBJT_VNODE) { VM_OBJECT_WUNLOCK(object); return; } /* * Optimize for the case when writemappings is not going to * zero. */ inc = end - start; if (object->un_pager.vnp.writemappings != inc) { object->un_pager.vnp.writemappings -= inc; VM_OBJECT_WUNLOCK(object); return; } vp = object->handle; vhold(vp); VM_OBJECT_WUNLOCK(object); mp = NULL; vn_start_write(vp, &mp, V_WAIT); vn_lock(vp, LK_SHARED | LK_RETRY); /* * Decrement the object's writemappings, by swapping the start * and end arguments for vnode_pager_update_writecount(). If * there was not a race with vnode reclaimation, then the * vnode's v_writecount is decremented. */ vnode_pager_update_writecount(object, end, start); VOP_UNLOCK(vp); vdrop(vp); if (mp != NULL) vn_finished_write(mp); } Index: projects/clang1000-import/sys/x86/x86/mp_x86.c =================================================================== --- projects/clang1000-import/sys/x86/x86/mp_x86.c (revision 356919) +++ projects/clang1000-import/sys/x86/x86/mp_x86.c (revision 356920) @@ -1,1844 +1,1844 @@ /*- * Copyright (c) 1996, by Steve Passe * Copyright (c) 2003, by Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #ifdef __i386__ #include "opt_apic.h" #endif #include "opt_cpu.h" #include "opt_kstack_pages.h" #include "opt_pmap.h" #include "opt_sched.h" #include "opt_smp.h" #include #include #include #include /* cngetc() */ #include #include #ifdef GPROF #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_CPUS, "cpus", "CPU items"); /* lock region used by kernel profiling */ int mcount_lock; int mp_naps; /* # of Applications processors */ int boot_cpu_id = -1; /* designated BSP */ /* AP uses this during bootstrap. Do not staticize. */ char *bootSTK; int bootAP; /* Free these after use */ void *bootstacks[MAXCPU]; void *dpcpu; struct pcb stoppcbs[MAXCPU]; struct susppcb **susppcbs; #ifdef COUNT_IPIS /* Interrupt counts. */ static u_long *ipi_preempt_counts[MAXCPU]; static u_long *ipi_ast_counts[MAXCPU]; u_long *ipi_invltlb_counts[MAXCPU]; u_long *ipi_invlrng_counts[MAXCPU]; u_long *ipi_invlpg_counts[MAXCPU]; u_long *ipi_invlcache_counts[MAXCPU]; u_long *ipi_rendezvous_counts[MAXCPU]; static u_long *ipi_hardclock_counts[MAXCPU]; #endif /* Default cpu_ops implementation. */ struct cpu_ops cpu_ops; /* * Local data and functions. */ static volatile cpuset_t ipi_stop_nmi_pending; volatile cpuset_t resuming_cpus; volatile cpuset_t toresume_cpus; /* used to hold the AP's until we are ready to release them */ struct mtx ap_boot_mtx; /* Set to 1 once we're ready to let the APs out of the pen. */ volatile int aps_ready = 0; /* * Store data from cpu_add() until later in the boot when we actually setup * the APs. */ struct cpu_info *cpu_info; int *apic_cpuids; int cpu_apic_ids[MAXCPU]; _Static_assert(MAXCPU <= MAX_APIC_ID, "MAXCPU cannot be larger that MAX_APIC_ID"); _Static_assert(xAPIC_MAX_APIC_ID <= MAX_APIC_ID, "xAPIC_MAX_APIC_ID cannot be larger that MAX_APIC_ID"); static void release_aps(void *dummy); static void cpustop_handler_post(u_int cpu); static int hyperthreading_allowed = 1; SYSCTL_INT(_machdep, OID_AUTO, hyperthreading_allowed, CTLFLAG_RDTUN, &hyperthreading_allowed, 0, "Use Intel HTT logical CPUs"); static int hyperthreading_intr_allowed = 0; SYSCTL_INT(_machdep, OID_AUTO, hyperthreading_intr_allowed, CTLFLAG_RDTUN, &hyperthreading_intr_allowed, 0, "Allow interrupts on HTT logical CPUs"); static struct topo_node topo_root; static int pkg_id_shift; static int node_id_shift; static int core_id_shift; static int disabled_cpus; struct cache_info { int id_shift; int present; } static caches[MAX_CACHE_LEVELS]; unsigned int boot_address; static bool stop_mwait = false; SYSCTL_BOOL(_machdep, OID_AUTO, stop_mwait, CTLFLAG_RWTUN, &stop_mwait, 0, "Use MONITOR/MWAIT when stopping CPU, if available"); #define MiB(v) (v ## ULL << 20) void mem_range_AP_init(void) { if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP) mem_range_softc.mr_op->initAP(&mem_range_softc); } /* * Round up to the next power of two, if necessary, and then * take log2. * Returns -1 if argument is zero. */ static __inline int mask_width(u_int x) { return (fls(x << (1 - powerof2(x))) - 1); } /* * Add a cache level to the cache topology description. */ static int add_deterministic_cache(int type, int level, int share_count) { if (type == 0) return (0); if (type > 3) { printf("unexpected cache type %d\n", type); return (1); } if (type == 2) /* ignore instruction cache */ return (1); if (level == 0 || level > MAX_CACHE_LEVELS) { printf("unexpected cache level %d\n", type); return (1); } if (caches[level - 1].present) { printf("WARNING: multiple entries for L%u data cache\n", level); printf("%u => %u\n", caches[level - 1].id_shift, mask_width(share_count)); } caches[level - 1].id_shift = mask_width(share_count); caches[level - 1].present = 1; if (caches[level - 1].id_shift > pkg_id_shift) { printf("WARNING: L%u data cache covers more " "APIC IDs than a package (%u > %u)\n", level, caches[level - 1].id_shift, pkg_id_shift); caches[level - 1].id_shift = pkg_id_shift; } if (caches[level - 1].id_shift < core_id_shift) { printf("WARNING: L%u data cache covers fewer " "APIC IDs than a core (%u < %u)\n", level, caches[level - 1].id_shift, core_id_shift); caches[level - 1].id_shift = core_id_shift; } return (1); } /* * Determine topology of processing units and caches for AMD CPUs. * See: * - AMD CPUID Specification (Publication # 25481) * - BKDG for AMD NPT Family 0Fh Processors (Publication # 32559) * - BKDG For AMD Family 10h Processors (Publication # 31116) * - BKDG For AMD Family 15h Models 00h-0Fh Processors (Publication # 42301) * - BKDG For AMD Family 16h Models 00h-0Fh Processors (Publication # 48751) * - PPR For AMD Family 17h Models 00h-0Fh Processors (Publication # 54945) */ static void topo_probe_amd(void) { u_int p[4]; uint64_t v; int level; int nodes_per_socket; int share_count; int type; int i; /* No multi-core capability. */ if ((amd_feature2 & AMDID2_CMP) == 0) return; /* For families 10h and newer. */ pkg_id_shift = (cpu_procinfo2 & AMDID_COREID_SIZE) >> AMDID_COREID_SIZE_SHIFT; /* For 0Fh family. */ if (pkg_id_shift == 0) pkg_id_shift = mask_width((cpu_procinfo2 & AMDID_CMP_CORES) + 1); /* * Families prior to 16h define the following value as * cores per compute unit and we don't really care about the AMD * compute units at the moment. Perhaps we should treat them as * cores and cores within the compute units as hardware threads, * but that's up for debate. * Later families define the value as threads per compute unit, * so we are following AMD's nomenclature here. */ if ((amd_feature2 & AMDID2_TOPOLOGY) != 0 && CPUID_TO_FAMILY(cpu_id) >= 0x16) { cpuid_count(0x8000001e, 0, p); share_count = ((p[1] >> 8) & 0xff) + 1; core_id_shift = mask_width(share_count); /* * For Zen (17h), gather Nodes per Processor. Each node is a * Zeppelin die; TR and EPYC CPUs will have multiple dies per * package. Communication latency between dies is higher than * within them. */ nodes_per_socket = ((p[2] >> 8) & 0x7) + 1; node_id_shift = pkg_id_shift - mask_width(nodes_per_socket); } if ((amd_feature2 & AMDID2_TOPOLOGY) != 0) { for (i = 0; ; i++) { cpuid_count(0x8000001d, i, p); type = p[0] & 0x1f; level = (p[0] >> 5) & 0x7; share_count = 1 + ((p[0] >> 14) & 0xfff); if (!add_deterministic_cache(type, level, share_count)) break; } } else { if (cpu_exthigh >= 0x80000005) { cpuid_count(0x80000005, 0, p); if (((p[2] >> 24) & 0xff) != 0) { caches[0].id_shift = 0; caches[0].present = 1; } } if (cpu_exthigh >= 0x80000006) { cpuid_count(0x80000006, 0, p); if (((p[2] >> 16) & 0xffff) != 0) { caches[1].id_shift = 0; caches[1].present = 1; } if (((p[3] >> 18) & 0x3fff) != 0) { nodes_per_socket = 1; if ((amd_feature2 & AMDID2_NODE_ID) != 0) { /* * Handle multi-node processors that * have multiple chips, each with its * own L3 cache, on the same die. */ v = rdmsr(0xc001100c); nodes_per_socket = 1 + ((v >> 3) & 0x7); } caches[2].id_shift = pkg_id_shift - mask_width(nodes_per_socket); caches[2].present = 1; } } } } /* * Determine topology of processing units for Intel CPUs * using CPUID Leaf 1 and Leaf 4, if supported. * See: * - Intel 64 Architecture Processor Topology Enumeration * - Intel 64 and IA-32 ArchitecturesSoftware Developer’s Manual, * Volume 3A: System Programming Guide, PROGRAMMING CONSIDERATIONS * FOR HARDWARE MULTI-THREADING CAPABLE PROCESSORS */ static void topo_probe_intel_0x4(void) { u_int p[4]; int max_cores; int max_logical; /* Both zero and one here mean one logical processor per package. */ max_logical = (cpu_feature & CPUID_HTT) != 0 ? (cpu_procinfo & CPUID_HTT_CORES) >> 16 : 1; if (max_logical <= 1) return; if (cpu_high >= 0x4) { cpuid_count(0x04, 0, p); max_cores = ((p[0] >> 26) & 0x3f) + 1; } else max_cores = 1; core_id_shift = mask_width(max_logical/max_cores); KASSERT(core_id_shift >= 0, ("intel topo: max_cores > max_logical\n")); pkg_id_shift = core_id_shift + mask_width(max_cores); } /* * Determine topology of processing units for Intel CPUs * using CPUID Leaf 11, if supported. * See: * - Intel 64 Architecture Processor Topology Enumeration * - Intel 64 and IA-32 ArchitecturesSoftware Developer’s Manual, * Volume 3A: System Programming Guide, PROGRAMMING CONSIDERATIONS * FOR HARDWARE MULTI-THREADING CAPABLE PROCESSORS */ static void topo_probe_intel_0xb(void) { u_int p[4]; int bits; int type; int i; /* Fall back if CPU leaf 11 doesn't really exist. */ cpuid_count(0x0b, 0, p); if (p[1] == 0) { topo_probe_intel_0x4(); return; } /* We only support three levels for now. */ for (i = 0; ; i++) { cpuid_count(0x0b, i, p); bits = p[0] & 0x1f; type = (p[2] >> 8) & 0xff; if (type == 0) break; /* TODO: check for duplicate (re-)assignment */ if (type == CPUID_TYPE_SMT) core_id_shift = bits; else if (type == CPUID_TYPE_CORE) pkg_id_shift = bits; else printf("unknown CPU level type %d\n", type); } if (pkg_id_shift < core_id_shift) { printf("WARNING: core covers more APIC IDs than a package\n"); core_id_shift = pkg_id_shift; } } /* * Determine topology of caches for Intel CPUs. * See: * - Intel 64 Architecture Processor Topology Enumeration * - Intel 64 and IA-32 Architectures Software Developer’s Manual * Volume 2A: Instruction Set Reference, A-M, * CPUID instruction */ static void topo_probe_intel_caches(void) { u_int p[4]; int level; int share_count; int type; int i; if (cpu_high < 0x4) { /* * Available cache level and sizes can be determined * via CPUID leaf 2, but that requires a huge table of hardcoded * values, so for now just assume L1 and L2 caches potentially * shared only by HTT processing units, if HTT is present. */ caches[0].id_shift = pkg_id_shift; caches[0].present = 1; caches[1].id_shift = pkg_id_shift; caches[1].present = 1; return; } for (i = 0; ; i++) { cpuid_count(0x4, i, p); type = p[0] & 0x1f; level = (p[0] >> 5) & 0x7; share_count = 1 + ((p[0] >> 14) & 0xfff); if (!add_deterministic_cache(type, level, share_count)) break; } } /* * Determine topology of processing units and caches for Intel CPUs. * See: * - Intel 64 Architecture Processor Topology Enumeration */ static void topo_probe_intel(void) { /* * Note that 0x1 <= cpu_high < 4 case should be * compatible with topo_probe_intel_0x4() logic when * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1) * or it should trigger the fallback otherwise. */ if (cpu_high >= 0xb) topo_probe_intel_0xb(); else if (cpu_high >= 0x1) topo_probe_intel_0x4(); topo_probe_intel_caches(); } /* * Topology information is queried only on BSP, on which this * code runs and for which it can query CPUID information. * Then topology is extrapolated on all packages using an * assumption that APIC ID to hardware component ID mapping is * homogenious. * That doesn't necesserily imply that the topology is uniform. */ void topo_probe(void) { static int cpu_topo_probed = 0; struct x86_topo_layer { int type; int subtype; int id_shift; } topo_layers[MAX_CACHE_LEVELS + 4]; struct topo_node *parent; struct topo_node *node; int layer; int nlayers; int node_id; int i; if (cpu_topo_probed) return; CPU_ZERO(&logical_cpus_mask); if (mp_ncpus <= 1) ; /* nothing */ else if (cpu_vendor_id == CPU_VENDOR_AMD) topo_probe_amd(); else if (cpu_vendor_id == CPU_VENDOR_INTEL) topo_probe_intel(); KASSERT(pkg_id_shift >= core_id_shift, ("bug in APIC topology discovery")); nlayers = 0; bzero(topo_layers, sizeof(topo_layers)); topo_layers[nlayers].type = TOPO_TYPE_PKG; topo_layers[nlayers].id_shift = pkg_id_shift; if (bootverbose) printf("Package ID shift: %u\n", topo_layers[nlayers].id_shift); nlayers++; if (pkg_id_shift > node_id_shift && node_id_shift != 0) { topo_layers[nlayers].type = TOPO_TYPE_GROUP; topo_layers[nlayers].id_shift = node_id_shift; if (bootverbose) printf("Node ID shift: %u\n", topo_layers[nlayers].id_shift); nlayers++; } /* * Consider all caches to be within a package/chip * and "in front" of all sub-components like * cores and hardware threads. */ for (i = MAX_CACHE_LEVELS - 1; i >= 0; --i) { if (caches[i].present) { if (node_id_shift != 0) KASSERT(caches[i].id_shift <= node_id_shift, ("bug in APIC topology discovery")); KASSERT(caches[i].id_shift <= pkg_id_shift, ("bug in APIC topology discovery")); KASSERT(caches[i].id_shift >= core_id_shift, ("bug in APIC topology discovery")); topo_layers[nlayers].type = TOPO_TYPE_CACHE; topo_layers[nlayers].subtype = i + 1; topo_layers[nlayers].id_shift = caches[i].id_shift; if (bootverbose) printf("L%u cache ID shift: %u\n", topo_layers[nlayers].subtype, topo_layers[nlayers].id_shift); nlayers++; } } if (pkg_id_shift > core_id_shift) { topo_layers[nlayers].type = TOPO_TYPE_CORE; topo_layers[nlayers].id_shift = core_id_shift; if (bootverbose) printf("Core ID shift: %u\n", topo_layers[nlayers].id_shift); nlayers++; } topo_layers[nlayers].type = TOPO_TYPE_PU; topo_layers[nlayers].id_shift = 0; nlayers++; topo_init_root(&topo_root); for (i = 0; i <= max_apic_id; ++i) { if (!cpu_info[i].cpu_present) continue; parent = &topo_root; for (layer = 0; layer < nlayers; ++layer) { node_id = i >> topo_layers[layer].id_shift; parent = topo_add_node_by_hwid(parent, node_id, topo_layers[layer].type, topo_layers[layer].subtype); } } parent = &topo_root; for (layer = 0; layer < nlayers; ++layer) { node_id = boot_cpu_id >> topo_layers[layer].id_shift; node = topo_find_node_by_hwid(parent, node_id, topo_layers[layer].type, topo_layers[layer].subtype); topo_promote_child(node); parent = node; } cpu_topo_probed = 1; } /* * Assign logical CPU IDs to local APICs. */ void assign_cpu_ids(void) { struct topo_node *node; u_int smt_mask; int nhyper; smt_mask = (1u << core_id_shift) - 1; /* * Assign CPU IDs to local APIC IDs and disable any CPUs * beyond MAXCPU. CPU 0 is always assigned to the BSP. */ mp_ncpus = 0; nhyper = 0; TOPO_FOREACH(node, &topo_root) { if (node->type != TOPO_TYPE_PU) continue; if ((node->hwid & smt_mask) != (boot_cpu_id & smt_mask)) cpu_info[node->hwid].cpu_hyperthread = 1; if (resource_disabled("lapic", node->hwid)) { if (node->hwid != boot_cpu_id) cpu_info[node->hwid].cpu_disabled = 1; else printf("Cannot disable BSP, APIC ID = %d\n", node->hwid); } if (!hyperthreading_allowed && cpu_info[node->hwid].cpu_hyperthread) cpu_info[node->hwid].cpu_disabled = 1; if (mp_ncpus >= MAXCPU) cpu_info[node->hwid].cpu_disabled = 1; if (cpu_info[node->hwid].cpu_disabled) { disabled_cpus++; continue; } if (cpu_info[node->hwid].cpu_hyperthread) nhyper++; cpu_apic_ids[mp_ncpus] = node->hwid; apic_cpuids[node->hwid] = mp_ncpus; topo_set_pu_id(node, mp_ncpus); mp_ncpus++; } KASSERT(mp_maxid >= mp_ncpus - 1, ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, mp_ncpus)); mp_ncores = mp_ncpus - nhyper; smp_threads_per_core = mp_ncpus / mp_ncores; } /* * Print various information about the SMP system hardware and setup. */ void cpu_mp_announce(void) { struct topo_node *node; const char *hyperthread; struct topo_analysis topology; printf("FreeBSD/SMP: "); if (topo_analyze(&topo_root, 1, &topology)) { printf("%d package(s)", topology.entities[TOPO_LEVEL_PKG]); if (topology.entities[TOPO_LEVEL_GROUP] > 1) printf(" x %d groups", topology.entities[TOPO_LEVEL_GROUP]); if (topology.entities[TOPO_LEVEL_CACHEGROUP] > 1) printf(" x %d cache groups", topology.entities[TOPO_LEVEL_CACHEGROUP]); if (topology.entities[TOPO_LEVEL_CORE] > 0) printf(" x %d core(s)", topology.entities[TOPO_LEVEL_CORE]); if (topology.entities[TOPO_LEVEL_THREAD] > 1) printf(" x %d hardware threads", topology.entities[TOPO_LEVEL_THREAD]); } else { printf("Non-uniform topology"); } printf("\n"); if (disabled_cpus) { printf("FreeBSD/SMP Online: "); if (topo_analyze(&topo_root, 0, &topology)) { printf("%d package(s)", topology.entities[TOPO_LEVEL_PKG]); if (topology.entities[TOPO_LEVEL_GROUP] > 1) printf(" x %d groups", topology.entities[TOPO_LEVEL_GROUP]); if (topology.entities[TOPO_LEVEL_CACHEGROUP] > 1) printf(" x %d cache groups", topology.entities[TOPO_LEVEL_CACHEGROUP]); if (topology.entities[TOPO_LEVEL_CORE] > 0) printf(" x %d core(s)", topology.entities[TOPO_LEVEL_CORE]); if (topology.entities[TOPO_LEVEL_THREAD] > 1) printf(" x %d hardware threads", topology.entities[TOPO_LEVEL_THREAD]); } else { printf("Non-uniform topology"); } printf("\n"); } if (!bootverbose) return; TOPO_FOREACH(node, &topo_root) { switch (node->type) { case TOPO_TYPE_PKG: printf("Package HW ID = %u\n", node->hwid); break; case TOPO_TYPE_CORE: printf("\tCore HW ID = %u\n", node->hwid); break; case TOPO_TYPE_PU: if (cpu_info[node->hwid].cpu_hyperthread) hyperthread = "/HT"; else hyperthread = ""; if (node->subtype == 0) printf("\t\tCPU (AP%s): APIC ID: %u" "(disabled)\n", hyperthread, node->hwid); else if (node->id == 0) printf("\t\tCPU0 (BSP): APIC ID: %u\n", node->hwid); else printf("\t\tCPU%u (AP%s): APIC ID: %u\n", node->id, hyperthread, node->hwid); break; default: /* ignored */ break; } } } /* * Add a scheduling group, a group of logical processors sharing * a particular cache (and, thus having an affinity), to the scheduling * topology. * This function recursively works on lower level caches. */ static void x86topo_add_sched_group(struct topo_node *root, struct cpu_group *cg_root) { struct topo_node *node; int nchildren; int ncores; int i; KASSERT(root->type == TOPO_TYPE_SYSTEM || root->type == TOPO_TYPE_CACHE || root->type == TOPO_TYPE_GROUP, ("x86topo_add_sched_group: bad type: %u", root->type)); CPU_COPY(&root->cpuset, &cg_root->cg_mask); cg_root->cg_count = root->cpu_count; if (root->type == TOPO_TYPE_SYSTEM) cg_root->cg_level = CG_SHARE_NONE; else cg_root->cg_level = root->subtype; /* * Check how many core nodes we have under the given root node. * If we have multiple logical processors, but not multiple * cores, then those processors must be hardware threads. */ ncores = 0; node = root; while (node != NULL) { if (node->type != TOPO_TYPE_CORE) { node = topo_next_node(root, node); continue; } ncores++; node = topo_next_nonchild_node(root, node); } if (cg_root->cg_level != CG_SHARE_NONE && root->cpu_count > 1 && ncores < 2) cg_root->cg_flags = CG_FLAG_SMT; /* * Find out how many cache nodes we have under the given root node. * We ignore cache nodes that cover all the same processors as the * root node. Also, we do not descend below found cache nodes. * That is, we count top-level "non-redundant" caches under the root * node. */ nchildren = 0; node = root; while (node != NULL) { if ((node->type != TOPO_TYPE_GROUP && node->type != TOPO_TYPE_CACHE) || (root->type != TOPO_TYPE_SYSTEM && CPU_CMP(&node->cpuset, &root->cpuset) == 0)) { node = topo_next_node(root, node); continue; } nchildren++; node = topo_next_nonchild_node(root, node); } cg_root->cg_child = smp_topo_alloc(nchildren); cg_root->cg_children = nchildren; /* * Now find again the same cache nodes as above and recursively * build scheduling topologies for them. */ node = root; i = 0; while (node != NULL) { if ((node->type != TOPO_TYPE_GROUP && node->type != TOPO_TYPE_CACHE) || (root->type != TOPO_TYPE_SYSTEM && CPU_CMP(&node->cpuset, &root->cpuset) == 0)) { node = topo_next_node(root, node); continue; } cg_root->cg_child[i].cg_parent = cg_root; x86topo_add_sched_group(node, &cg_root->cg_child[i]); i++; node = topo_next_nonchild_node(root, node); } } /* * Build the MI scheduling topology from the discovered hardware topology. */ struct cpu_group * cpu_topo(void) { struct cpu_group *cg_root; if (mp_ncpus <= 1) return (smp_topo_none()); cg_root = smp_topo_alloc(1); x86topo_add_sched_group(&topo_root, cg_root); return (cg_root); } static void cpu_alloc(void *dummy __unused) { /* * Dynamically allocate the arrays that depend on the * maximum APIC ID. */ cpu_info = malloc(sizeof(*cpu_info) * (max_apic_id + 1), M_CPUS, M_WAITOK | M_ZERO); apic_cpuids = malloc(sizeof(*apic_cpuids) * (max_apic_id + 1), M_CPUS, M_WAITOK | M_ZERO); } SYSINIT(cpu_alloc, SI_SUB_CPU, SI_ORDER_FIRST, cpu_alloc, NULL); /* * Add a logical CPU to the topology. */ void cpu_add(u_int apic_id, char boot_cpu) { if (apic_id > max_apic_id) { panic("SMP: APIC ID %d too high", apic_id); return; } KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %u added twice", apic_id)); cpu_info[apic_id].cpu_present = 1; if (boot_cpu) { KASSERT(boot_cpu_id == -1, ("CPU %u claims to be BSP, but CPU %u already is", apic_id, boot_cpu_id)); boot_cpu_id = apic_id; cpu_info[apic_id].cpu_bsp = 1; } if (bootverbose) printf("SMP: Added CPU %u (%s)\n", apic_id, boot_cpu ? "BSP" : "AP"); } void cpu_mp_setmaxid(void) { /* * mp_ncpus and mp_maxid should be already set by calls to cpu_add(). * If there were no calls to cpu_add() assume this is a UP system. */ if (mp_ncpus == 0) mp_ncpus = 1; } int cpu_mp_probe(void) { /* * Always record BSP in CPU map so that the mbuf init code works * correctly. */ CPU_SETOF(0, &all_cpus); return (mp_ncpus > 1); } /* Allocate memory for the AP trampoline. */ void alloc_ap_trampoline(vm_paddr_t *physmap, unsigned int *physmap_idx) { unsigned int i; bool allocated; allocated = false; for (i = *physmap_idx; i <= *physmap_idx; i -= 2) { /* * Find a memory region big enough and below the 1MB boundary * for the trampoline code. * NB: needs to be page aligned. */ if (physmap[i] >= MiB(1) || (trunc_page(physmap[i + 1]) - round_page(physmap[i])) < round_page(bootMP_size)) continue; allocated = true; /* * Try to steal from the end of the region to mimic previous * behaviour, else fallback to steal from the start. */ if (physmap[i + 1] < MiB(1)) { boot_address = trunc_page(physmap[i + 1]); if ((physmap[i + 1] - boot_address) < bootMP_size) boot_address -= round_page(bootMP_size); physmap[i + 1] = boot_address; } else { boot_address = round_page(physmap[i]); physmap[i] = boot_address + round_page(bootMP_size); } if (physmap[i] == physmap[i + 1] && *physmap_idx != 0) { memmove(&physmap[i], &physmap[i + 2], sizeof(*physmap) * (*physmap_idx - i + 2)); *physmap_idx -= 2; } break; } if (!allocated) { boot_address = basemem * 1024 - bootMP_size; if (bootverbose) printf( "Cannot find enough space for the boot trampoline, placing it at %#x", boot_address); } } /* * AP CPU's call this to initialize themselves. */ void init_secondary_tail(void) { u_int cpuid; pmap_activate_boot(vmspace_pmap(proc0.p_vmspace)); /* * On real hardware, switch to x2apic mode if possible. Do it * after aps_ready was signalled, to avoid manipulating the * mode while BSP might still want to send some IPI to us * (second startup IPI is ignored on modern hardware etc). */ lapic_xapic_mode(); /* Initialize the PAT MSR. */ pmap_init_pat(); /* set up CPU registers and state */ cpu_setregs(); /* set up SSE/NX */ initializecpu(); /* set up FPU state on the AP */ #ifdef __amd64__ fpuinit(); #else npxinit(false); #endif if (cpu_ops.cpu_init) cpu_ops.cpu_init(); /* A quick check from sanity claus */ cpuid = PCPU_GET(cpuid); if (PCPU_GET(apic_id) != lapic_id()) { printf("SMP: cpuid = %d\n", cpuid); printf("SMP: actual apic_id = %d\n", lapic_id()); printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); panic("cpuid mismatch! boom!!"); } /* Initialize curthread. */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); PCPU_SET(curthread, PCPU_GET(idlethread)); mtx_lock_spin(&ap_boot_mtx); mca_init(); /* Init local apic for irq's */ lapic_setup(1); /* Set memory range attributes for this CPU to match the BSP */ mem_range_AP_init(); smp_cpus++; CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid); if (bootverbose) printf("SMP: AP CPU #%d Launched!\n", cpuid); else printf("%s%d%s", smp_cpus == 2 ? "Launching APs: " : "", cpuid, smp_cpus == mp_ncpus ? "\n" : " "); /* Determine if we are a logical CPU. */ if (cpu_info[PCPU_GET(apic_id)].cpu_hyperthread) CPU_SET(cpuid, &logical_cpus_mask); if (bootverbose) lapic_dump("AP"); if (smp_cpus == mp_ncpus) { /* enable IPI's, tlb shootdown, freezes etc */ atomic_store_rel_int(&smp_started, 1); } #ifdef __amd64__ /* * Enable global pages TLB extension * This also implicitly flushes the TLB */ load_cr4(rcr4() | CR4_PGE); if (pmap_pcid_enabled) load_cr4(rcr4() | CR4_PCIDE); load_ds(_udatasel); load_es(_udatasel); load_fs(_ufssel); #endif mtx_unlock_spin(&ap_boot_mtx); /* Wait until all the AP's are up. */ while (atomic_load_acq_int(&smp_started) == 0) ia32_pause(); #ifndef EARLY_AP_STARTUP /* Start per-CPU event timers. */ cpu_initclocks_ap(); #endif kcsan_cpu_init(cpuid); sched_throw(NULL); panic("scheduler returned us to %s", __func__); /* NOTREACHED */ } static void smp_after_idle_runnable(void *arg __unused) { - struct thread *idle_td; + struct pcpu *pc; int cpu; for (cpu = 1; cpu < mp_ncpus; cpu++) { - idle_td = pcpu_find(cpu)->pc_idlethread; - while (atomic_load_int(&idle_td->td_lastcpu) == NOCPU && - atomic_load_int(&idle_td->td_oncpu) == NOCPU) + pc = pcpu_find(cpu); + while (atomic_load_ptr(&pc->pc_curthread) == (uintptr_t)NULL) cpu_spinwait(); kmem_free((vm_offset_t)bootstacks[cpu], kstack_pages * PAGE_SIZE); } } SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY, smp_after_idle_runnable, NULL); /* * We tell the I/O APIC code about all the CPUs we want to receive * interrupts. If we don't want certain CPUs to receive IRQs we * can simply not tell the I/O APIC code about them in this function. * We also do not tell it about the BSP since it tells itself about * the BSP internally to work with UP kernels and on UP machines. */ void set_interrupt_apic_ids(void) { u_int i, apic_id; for (i = 0; i < MAXCPU; i++) { apic_id = cpu_apic_ids[i]; if (apic_id == -1) continue; if (cpu_info[apic_id].cpu_bsp) continue; if (cpu_info[apic_id].cpu_disabled) continue; /* Don't let hyperthreads service interrupts. */ if (cpu_info[apic_id].cpu_hyperthread && !hyperthreading_intr_allowed) continue; intr_add_cpu(i); } } #ifdef COUNT_XINVLTLB_HITS u_int xhits_gbl[MAXCPU]; u_int xhits_pg[MAXCPU]; u_int xhits_rng[MAXCPU]; static SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, sizeof(xhits_gbl), "IU", ""); SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, sizeof(xhits_pg), "IU", ""); SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, sizeof(xhits_rng), "IU", ""); u_int ipi_global; u_int ipi_page; u_int ipi_range; u_int ipi_range_size; SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, 0, ""); #endif /* COUNT_XINVLTLB_HITS */ /* * Init and startup IPI. */ void ipi_startup(int apic_id, int vector) { /* * This attempts to follow the algorithm described in the * Intel Multiprocessor Specification v1.4 in section B.4. * For each IPI, we allow the local APIC ~20us to deliver the * IPI. If that times out, we panic. */ /* * first we do an INIT IPI: this INIT IPI might be run, resetting * and running the target CPU. OR this INIT IPI might be latched (P5 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be * ignored. */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL | APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id); lapic_ipi_wait(100); /* Explicitly deassert the INIT IPI. */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL | APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id); DELAY(10000); /* wait ~10mS */ /* * next we do a STARTUP IPI: the previous INIT IPI might still be * latched, (P5 bug) this 1st STARTUP would then terminate * immediately, and the previously started INIT IPI would continue. OR * the previous INIT IPI has already run. and this STARTUP IPI will * run. OR the previous INIT IPI was ignored. and this STARTUP IPI * will run. */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | vector, apic_id); if (!lapic_ipi_wait(100)) panic("Failed to deliver first STARTUP IPI to APIC %d", apic_id); DELAY(200); /* wait ~200uS */ /* * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is * recognized after hardware RESET or INIT IPI. */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | vector, apic_id); if (!lapic_ipi_wait(100)) panic("Failed to deliver second STARTUP IPI to APIC %d", apic_id); DELAY(200); /* wait ~200uS */ } /* * Send an IPI to specified CPU handling the bitmap logic. */ void ipi_send_cpu(int cpu, u_int ipi) { u_int bitmap, old, new; u_int *cpu_bitmap; - KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); + KASSERT((u_int)cpu < MAXCPU && cpu_apic_ids[cpu] != -1, + ("IPI to non-existent CPU %d", cpu)); if (IPI_IS_BITMAPED(ipi)) { bitmap = 1 << ipi; ipi = IPI_BITMAP_VECTOR; cpu_bitmap = &cpuid_to_pcpu[cpu]->pc_ipi_bitmap; old = *cpu_bitmap; for (;;) { if ((old & bitmap) == bitmap) break; new = old | bitmap; if (atomic_fcmpset_int(cpu_bitmap, &old, new)) break; } if (old) return; } lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); } void ipi_bitmap_handler(struct trapframe frame) { struct trapframe *oldframe; struct thread *td; int cpu = PCPU_GET(cpuid); u_int ipi_bitmap; td = curthread; ipi_bitmap = atomic_readandclear_int(&cpuid_to_pcpu[cpu]-> pc_ipi_bitmap); /* * sched_preempt() must be called to clear the pending preempt * IPI to enable delivery of further preempts. However, the * critical section will cause extra scheduler lock thrashing * when used unconditionally. Only critical_enter() if * hardclock must also run, which requires the section entry. */ if (ipi_bitmap & (1 << IPI_HARDCLOCK)) critical_enter(); td->td_intr_nesting_level++; oldframe = td->td_intr_frame; td->td_intr_frame = &frame; if (ipi_bitmap & (1 << IPI_PREEMPT)) { #ifdef COUNT_IPIS (*ipi_preempt_counts[cpu])++; #endif sched_preempt(td); } if (ipi_bitmap & (1 << IPI_AST)) { #ifdef COUNT_IPIS (*ipi_ast_counts[cpu])++; #endif /* Nothing to do for AST */ } if (ipi_bitmap & (1 << IPI_HARDCLOCK)) { #ifdef COUNT_IPIS (*ipi_hardclock_counts[cpu])++; #endif hardclockintr(); } td->td_intr_frame = oldframe; td->td_intr_nesting_level--; if (ipi_bitmap & (1 << IPI_HARDCLOCK)) critical_exit(); } /* * send an IPI to a set of cpus. */ void ipi_selected(cpuset_t cpus, u_int ipi) { int cpu; /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) CPU_OR_ATOMIC(&ipi_stop_nmi_pending, &cpus); while ((cpu = CPU_FFS(&cpus)) != 0) { cpu--; CPU_CLR(cpu, &cpus); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); } } /* * send an IPI to a specific CPU. */ void ipi_cpu(int cpu, u_int ipi) { /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) CPU_SET_ATOMIC(cpu, &ipi_stop_nmi_pending); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); } /* * send an IPI to all CPUs EXCEPT myself */ void ipi_all_but_self(u_int ipi) { cpuset_t other_cpus; other_cpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &other_cpus); if (IPI_IS_BITMAPED(ipi)) { ipi_selected(other_cpus, ipi); return; } /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) CPU_OR_ATOMIC(&ipi_stop_nmi_pending, &other_cpus); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); } int ipi_nmi_handler(void) { u_int cpuid; /* * As long as there is not a simple way to know about a NMI's * source, if the bitmask for the current CPU is present in * the global pending bitword an IPI_STOP_HARD has been issued * and should be handled. */ cpuid = PCPU_GET(cpuid); if (!CPU_ISSET(cpuid, &ipi_stop_nmi_pending)) return (1); CPU_CLR_ATOMIC(cpuid, &ipi_stop_nmi_pending); cpustop_handler(); return (0); } int nmi_kdb_lock; void nmi_call_kdb_smp(u_int type, struct trapframe *frame) { int cpu; bool call_post; cpu = PCPU_GET(cpuid); if (atomic_cmpset_acq_int(&nmi_kdb_lock, 0, 1)) { nmi_call_kdb(cpu, type, frame); call_post = false; } else { savectx(&stoppcbs[cpu]); CPU_SET_ATOMIC(cpu, &stopped_cpus); while (!atomic_cmpset_acq_int(&nmi_kdb_lock, 0, 1)) ia32_pause(); call_post = true; } atomic_store_rel_int(&nmi_kdb_lock, 0); if (call_post) cpustop_handler_post(cpu); } /* * Handle an IPI_STOP by saving our current context and spinning (or mwaiting, * if available) until we are resumed. */ void cpustop_handler(void) { struct monitorbuf *mb; u_int cpu; bool use_mwait; cpu = PCPU_GET(cpuid); savectx(&stoppcbs[cpu]); use_mwait = (stop_mwait && (cpu_feature2 & CPUID2_MON) != 0 && !mwait_cpustop_broken); if (use_mwait) { mb = PCPU_PTR(monitorbuf); atomic_store_int(&mb->stop_state, MONITOR_STOPSTATE_STOPPED); } /* Indicate that we are stopped */ CPU_SET_ATOMIC(cpu, &stopped_cpus); /* Wait for restart */ while (!CPU_ISSET(cpu, &started_cpus)) { if (use_mwait) { cpu_monitor(mb, 0, 0); if (atomic_load_int(&mb->stop_state) == MONITOR_STOPSTATE_STOPPED) cpu_mwait(0, MWAIT_C1); continue; } ia32_pause(); /* * Halt non-BSP CPUs on panic -- we're never going to need them * again, and might as well save power / release resources * (e.g., overprovisioned VM infrastructure). */ while (__predict_false(!IS_BSP() && KERNEL_PANICKED())) halt(); } cpustop_handler_post(cpu); } static void cpustop_handler_post(u_int cpu) { CPU_CLR_ATOMIC(cpu, &started_cpus); CPU_CLR_ATOMIC(cpu, &stopped_cpus); /* * We don't broadcast TLB invalidations to other CPUs when they are * stopped. Hence, we clear the TLB before resuming. */ invltlb_glob(); #if defined(__amd64__) && defined(DDB) amd64_db_resume_dbreg(); #endif if (cpu == 0 && cpustop_restartfunc != NULL) { cpustop_restartfunc(); cpustop_restartfunc = NULL; } } /* * Handle an IPI_SUSPEND by saving our current context and spinning until we * are resumed. */ void cpususpend_handler(void) { u_int cpu; mtx_assert(&smp_ipi_mtx, MA_NOTOWNED); cpu = PCPU_GET(cpuid); if (savectx(&susppcbs[cpu]->sp_pcb)) { #ifdef __amd64__ fpususpend(susppcbs[cpu]->sp_fpususpend); #else npxsuspend(susppcbs[cpu]->sp_fpususpend); #endif /* * suspended_cpus is cleared shortly after each AP is restarted * by a Startup IPI, so that the BSP can proceed to restarting * the next AP. * * resuming_cpus gets cleared when the AP completes * initialization after having been released by the BSP. * resuming_cpus is probably not the best name for the * variable, because it is actually a set of processors that * haven't resumed yet and haven't necessarily started resuming. * * Note that suspended_cpus is meaningful only for ACPI suspend * as it's not really used for Xen suspend since the APs are * automatically restored to the running state and the correct * context. For the same reason resumectx is never called in * that case. */ CPU_SET_ATOMIC(cpu, &suspended_cpus); CPU_SET_ATOMIC(cpu, &resuming_cpus); /* * Invalidate the cache after setting the global status bits. * The last AP to set its bit may end up being an Owner of the * corresponding cache line in MOESI protocol. The AP may be * stopped before the cache line is written to the main memory. */ wbinvd(); } else { #ifdef __amd64__ fpuresume(susppcbs[cpu]->sp_fpususpend); #else npxresume(susppcbs[cpu]->sp_fpususpend); #endif pmap_init_pat(); initializecpu(); PCPU_SET(switchtime, 0); PCPU_SET(switchticks, ticks); /* Indicate that we have restarted and restored the context. */ CPU_CLR_ATOMIC(cpu, &suspended_cpus); } /* Wait for resume directive */ while (!CPU_ISSET(cpu, &toresume_cpus)) ia32_pause(); /* Re-apply microcode updates. */ ucode_reload(); #ifdef __i386__ /* Finish removing the identity mapping of low memory for this AP. */ invltlb_glob(); #endif if (cpu_ops.cpu_resume) cpu_ops.cpu_resume(); #ifdef __amd64__ if (vmm_resume_p) vmm_resume_p(); #endif /* Resume MCA and local APIC */ lapic_xapic_mode(); mca_resume(); lapic_setup(0); /* Indicate that we are resumed */ CPU_CLR_ATOMIC(cpu, &resuming_cpus); CPU_CLR_ATOMIC(cpu, &suspended_cpus); CPU_CLR_ATOMIC(cpu, &toresume_cpus); } void invlcache_handler(void) { uint32_t generation; #ifdef COUNT_IPIS (*ipi_invlcache_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ /* * Reading the generation here allows greater parallelism * since wbinvd is a serializing instruction. Without the * temporary, we'd wait for wbinvd to complete, then the read * would execute, then the dependent write, which must then * complete before return from interrupt. */ generation = smp_tlb_generation; wbinvd(); PCPU_SET(smp_tlb_done, generation); } /* * This is called once the rest of the system is up and running and we're * ready to let the AP's out of the pen. */ static void release_aps(void *dummy __unused) { if (mp_ncpus == 1) return; atomic_store_rel_int(&aps_ready, 1); while (smp_started == 0) ia32_pause(); } SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); #ifdef COUNT_IPIS /* * Setup interrupt counters for IPI handlers. */ static void mp_ipi_intrcnt(void *dummy) { char buf[64]; int i; CPU_FOREACH(i) { snprintf(buf, sizeof(buf), "cpu%d:invltlb", i); intrcnt_add(buf, &ipi_invltlb_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:invlrng", i); intrcnt_add(buf, &ipi_invlrng_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:invlpg", i); intrcnt_add(buf, &ipi_invlpg_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:invlcache", i); intrcnt_add(buf, &ipi_invlcache_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:preempt", i); intrcnt_add(buf, &ipi_preempt_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:ast", i); intrcnt_add(buf, &ipi_ast_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i); intrcnt_add(buf, &ipi_rendezvous_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:hardclock", i); intrcnt_add(buf, &ipi_hardclock_counts[i]); } } SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL); #endif /* * Flush the TLB on other CPU's */ /* Variables needed for SMP tlb shootdown. */ vm_offset_t smp_tlb_addr1, smp_tlb_addr2; pmap_t smp_tlb_pmap; volatile uint32_t smp_tlb_generation; #ifdef __amd64__ #define read_eflags() read_rflags() #endif static void smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2) { cpuset_t other_cpus; volatile uint32_t *p_cpudone; uint32_t generation; int cpu; /* It is not necessary to signal other CPUs while in the debugger. */ if (kdb_active || KERNEL_PANICKED()) return; /* * Check for other cpus. Return if none. */ if (CPU_ISFULLSET(&mask)) { if (mp_ncpus <= 1) return; } else { CPU_CLR(PCPU_GET(cpuid), &mask); if (CPU_EMPTY(&mask)) return; } if (!(read_eflags() & PSL_I)) panic("%s: interrupts disabled", __func__); mtx_lock_spin(&smp_ipi_mtx); smp_tlb_addr1 = addr1; smp_tlb_addr2 = addr2; smp_tlb_pmap = pmap; generation = ++smp_tlb_generation; if (CPU_ISFULLSET(&mask)) { ipi_all_but_self(vector); other_cpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &other_cpus); } else { other_cpus = mask; while ((cpu = CPU_FFS(&mask)) != 0) { cpu--; CPU_CLR(cpu, &mask); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, vector); ipi_send_cpu(cpu, vector); } } while ((cpu = CPU_FFS(&other_cpus)) != 0) { cpu--; CPU_CLR(cpu, &other_cpus); p_cpudone = &cpuid_to_pcpu[cpu]->pc_smp_tlb_done; while (*p_cpudone != generation) ia32_pause(); } mtx_unlock_spin(&smp_ipi_mtx); } void smp_masked_invltlb(cpuset_t mask, pmap_t pmap) { if (smp_started) { smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0); #ifdef COUNT_XINVLTLB_HITS ipi_global++; #endif } } void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap) { if (smp_started) { smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0); #ifdef COUNT_XINVLTLB_HITS ipi_page++; #endif } } void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2, pmap_t pmap) { if (smp_started) { smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap, addr1, addr2); #ifdef COUNT_XINVLTLB_HITS ipi_range++; ipi_range_size += (addr2 - addr1) / PAGE_SIZE; #endif } } void smp_cache_flush(void) { if (smp_started) { smp_targeted_tlb_shootdown(all_cpus, IPI_INVLCACHE, NULL, 0, 0); } } /* * Handlers for TLB related IPIs */ void invltlb_handler(void) { uint32_t generation; #ifdef COUNT_XINVLTLB_HITS xhits_gbl[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ #ifdef COUNT_IPIS (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ /* * Reading the generation here allows greater parallelism * since invalidating the TLB is a serializing operation. */ generation = smp_tlb_generation; if (smp_tlb_pmap == kernel_pmap) invltlb_glob(); #ifdef __amd64__ else invltlb(); #endif PCPU_SET(smp_tlb_done, generation); } void invlpg_handler(void) { uint32_t generation; #ifdef COUNT_XINVLTLB_HITS xhits_pg[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ #ifdef COUNT_IPIS (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ generation = smp_tlb_generation; /* Overlap with serialization */ #ifdef __i386__ if (smp_tlb_pmap == kernel_pmap) #endif invlpg(smp_tlb_addr1); PCPU_SET(smp_tlb_done, generation); } void invlrng_handler(void) { vm_offset_t addr, addr2; uint32_t generation; #ifdef COUNT_XINVLTLB_HITS xhits_rng[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ #ifdef COUNT_IPIS (*ipi_invlrng_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ addr = smp_tlb_addr1; addr2 = smp_tlb_addr2; generation = smp_tlb_generation; /* Overlap with serialization */ #ifdef __i386__ if (smp_tlb_pmap == kernel_pmap) #endif do { invlpg(addr); addr += PAGE_SIZE; } while (addr < addr2); PCPU_SET(smp_tlb_done, generation); } Index: projects/clang1000-import/tools/build/options/WITHOUT_PC_SYSINSTALL =================================================================== --- projects/clang1000-import/tools/build/options/WITHOUT_PC_SYSINSTALL (revision 356919) +++ projects/clang1000-import/tools/build/options/WITHOUT_PC_SYSINSTALL (nonexistent) @@ -1,4 +0,0 @@ -.\" $FreeBSD$ -Set to not build -.Xr pc-sysinstall 8 -and related programs. Property changes on: projects/clang1000-import/tools/build/options/WITHOUT_PC_SYSINSTALL ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/clang1000-import/tools/build/options/WITHOUT_BINUTILS =================================================================== --- projects/clang1000-import/tools/build/options/WITHOUT_BINUTILS (revision 356919) +++ projects/clang1000-import/tools/build/options/WITHOUT_BINUTILS (revision 356920) @@ -1,8 +1,7 @@ .\" $FreeBSD$ Do not build or install GNU .Xr as 1 , .Xr ld.bfd 1 , and .Xr objdump 1 as part of the normal system build. -The resulting system cannot build programs from source. Index: projects/clang1000-import/tools/build/options/WITHOUT_BINUTILS_BOOTSTRAP =================================================================== --- projects/clang1000-import/tools/build/options/WITHOUT_BINUTILS_BOOTSTRAP (revision 356919) +++ projects/clang1000-import/tools/build/options/WITHOUT_BINUTILS_BOOTSTRAP (revision 356920) @@ -1,7 +1,3 @@ .\" $FreeBSD$ Do not build binutils (as, ld.bfd, and objdump) as part of the bootstrap process. -.Bf -symbolic -The option does not work for build targets unless some alternative -toolchain is provided. -.Ef Index: projects/clang1000-import/tools/build/options/WITH_BINUTILS =================================================================== --- projects/clang1000-import/tools/build/options/WITH_BINUTILS (revision 356919) +++ projects/clang1000-import/tools/build/options/WITH_BINUTILS (revision 356920) @@ -1,8 +1,8 @@ .\" $FreeBSD$ -Set to build and install GNU -.Xr as 1 , +Build and install GNU +.Xr as 1 +on i386 and amd64, .Xr objdump 1 , -and, on powerpc, +and .Xr ld.bfd 1 -as part -of the normal system build. +on powerpc as part of the normal system build. Index: projects/clang1000-import/tools/build/options/WITH_BINUTILS_BOOTSTRAP =================================================================== --- projects/clang1000-import/tools/build/options/WITH_BINUTILS_BOOTSTRAP (revision 356919) +++ projects/clang1000-import/tools/build/options/WITH_BINUTILS_BOOTSTRAP (revision 356920) @@ -1,3 +1,3 @@ .\" $FreeBSD$ -Set build binutils (as, objdump, and on powerpc ld) +Build binutils (as on i386 and amd64, objdump, and ld on powerpc) as part of the bootstrap process. Index: projects/clang1000-import/usr.sbin/bsdinstall/partedit/partedit.c =================================================================== --- projects/clang1000-import/usr.sbin/bsdinstall/partedit/partedit.c (revision 356919) +++ projects/clang1000-import/usr.sbin/bsdinstall/partedit/partedit.c (revision 356920) @@ -1,579 +1,579 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 Nathan Whitehorn * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include "diskeditor.h" #include "partedit.h" struct pmetadata_head part_metadata; int tmpdfd; static int sade_mode = 0; static int apply_changes(struct gmesh *mesh); static void apply_workaround(struct gmesh *mesh); static struct partedit_item *read_geom_mesh(struct gmesh *mesh, int *nitems); static void add_geom_children(struct ggeom *gp, int recurse, struct partedit_item **items, int *nitems); static void init_fstab_metadata(void); static void get_mount_points(struct partedit_item *items, int nitems); static int validate_setup(void); static void sigint_handler(int sig) { struct gmesh mesh; /* Revert all changes and exit dialog-mode cleanly on SIGINT */ geom_gettree(&mesh); gpart_revert_all(&mesh); geom_deletetree(&mesh); end_dialog(); close(tmpdfd); exit(1); } int main(int argc, const char **argv) { struct partition_metadata *md; const char *progname, *prompt, *tmpdir; struct partedit_item *items = NULL; struct gmesh mesh; int i, op, nitems, nscroll; int error; progname = getprogname(); if (strcmp(progname, "sade") == 0) sade_mode = 1; TAILQ_INIT(&part_metadata); tmpdir = getenv("TMPDIR"); if (tmpdir == NULL) tmpdir = "/tmp"; - tmpdfd = open(tmpdir, O_RDWR | O_DIRECTORY); + tmpdfd = open(tmpdir, O_DIRECTORY); if (tmpdfd < 0) err(EX_OSERR, "%s", tmpdir); unlinkat(tmpdfd, "bsdinstall-esps", 0); init_fstab_metadata(); init_dialog(stdin, stdout); if (!sade_mode) dialog_vars.backtitle = __DECONST(char *, "FreeBSD Installer"); dialog_vars.item_help = TRUE; nscroll = i = 0; /* Revert changes on SIGINT */ signal(SIGINT, sigint_handler); if (strcmp(progname, "autopart") == 0) { /* Guided */ prompt = "Please review the disk setup. When complete, press " "the Finish button."; /* Experimental ZFS autopartition support */ if (argc > 1 && strcmp(argv[1], "zfs") == 0) { part_wizard("zfs"); } else { part_wizard("ufs"); } } else if (strcmp(progname, "scriptedpart") == 0) { error = scripted_editor(argc, argv); prompt = NULL; if (error != 0) { end_dialog(); return (error); } } else { prompt = "Create partitions for FreeBSD. No changes will be " "made until you select Finish."; } /* Show the part editor either immediately, or to confirm wizard */ while (prompt != NULL) { dlg_clear(); dlg_put_backtitle(); error = geom_gettree(&mesh); if (error == 0) items = read_geom_mesh(&mesh, &nitems); if (error || items == NULL) { dialog_msgbox("Error", "No disks found. If you need to " "install a kernel driver, choose Shell at the " "installation menu.", 0, 0, TRUE); break; } get_mount_points(items, nitems); if (i >= nitems) i = nitems - 1; op = diskeditor_show("Partition Editor", prompt, items, nitems, &i, &nscroll); switch (op) { case 0: /* Create */ gpart_create((struct gprovider *)(items[i].cookie), NULL, NULL, NULL, NULL, 1); break; case 1: /* Delete */ gpart_delete((struct gprovider *)(items[i].cookie)); break; case 2: /* Modify */ gpart_edit((struct gprovider *)(items[i].cookie)); break; case 3: /* Revert */ gpart_revert_all(&mesh); while ((md = TAILQ_FIRST(&part_metadata)) != NULL) { if (md->fstab != NULL) { free(md->fstab->fs_spec); free(md->fstab->fs_file); free(md->fstab->fs_vfstype); free(md->fstab->fs_mntops); free(md->fstab->fs_type); free(md->fstab); } if (md->newfs != NULL) free(md->newfs); free(md->name); TAILQ_REMOVE(&part_metadata, md, metadata); free(md); } init_fstab_metadata(); break; case 4: /* Auto */ part_wizard("ufs"); break; } error = 0; if (op == 5) { /* Finished */ dialog_vars.ok_label = __DECONST(char *, "Commit"); dialog_vars.extra_label = __DECONST(char *, "Revert & Exit"); dialog_vars.extra_button = TRUE; dialog_vars.cancel_label = __DECONST(char *, "Back"); op = dialog_yesno("Confirmation", "Your changes will " "now be written to disk. If you have chosen to " "overwrite existing data, it will be PERMANENTLY " "ERASED. Are you sure you want to commit your " "changes?", 0, 0); dialog_vars.ok_label = NULL; dialog_vars.extra_button = FALSE; dialog_vars.cancel_label = NULL; if (op == 0 && validate_setup()) { /* Save */ error = apply_changes(&mesh); if (!error) apply_workaround(&mesh); break; } else if (op == 3) { /* Quit */ gpart_revert_all(&mesh); error = -1; break; } } geom_deletetree(&mesh); free(items); } if (prompt == NULL) { error = geom_gettree(&mesh); if (validate_setup()) { error = apply_changes(&mesh); } else { gpart_revert_all(&mesh); error = -1; } } geom_deletetree(&mesh); free(items); end_dialog(); close(tmpdfd); return (error); } struct partition_metadata * get_part_metadata(const char *name, int create) { struct partition_metadata *md; TAILQ_FOREACH(md, &part_metadata, metadata) if (md->name != NULL && strcmp(md->name, name) == 0) break; if (md == NULL && create) { md = calloc(1, sizeof(*md)); md->name = strdup(name); TAILQ_INSERT_TAIL(&part_metadata, md, metadata); } return (md); } void delete_part_metadata(const char *name) { struct partition_metadata *md; TAILQ_FOREACH(md, &part_metadata, metadata) { if (md->name != NULL && strcmp(md->name, name) == 0) { if (md->fstab != NULL) { free(md->fstab->fs_spec); free(md->fstab->fs_file); free(md->fstab->fs_vfstype); free(md->fstab->fs_mntops); free(md->fstab->fs_type); free(md->fstab); } if (md->newfs != NULL) free(md->newfs); free(md->name); TAILQ_REMOVE(&part_metadata, md, metadata); free(md); break; } } } static int validate_setup(void) { struct partition_metadata *md, *root = NULL; int cancel; TAILQ_FOREACH(md, &part_metadata, metadata) { if (md->fstab != NULL && strcmp(md->fstab->fs_file, "/") == 0) root = md; /* XXX: Check for duplicate mountpoints */ } if (root == NULL) { dialog_msgbox("Error", "No root partition was found. " "The root FreeBSD partition must have a mountpoint of '/'.", 0, 0, TRUE); return (FALSE); } /* * Check for root partitions that we aren't formatting, which is * usually a mistake */ if (root->newfs == NULL && !sade_mode) { dialog_vars.defaultno = TRUE; cancel = dialog_yesno("Warning", "The chosen root partition " "has a preexisting filesystem. If it contains an existing " "FreeBSD system, please update it with freebsd-update " "instead of installing a new system on it. The partition " "can also be erased by pressing \"No\" and then deleting " "and recreating it. Are you sure you want to proceed?", 0, 0); dialog_vars.defaultno = FALSE; if (cancel) return (FALSE); } return (TRUE); } static int apply_changes(struct gmesh *mesh) { struct partition_metadata *md; char message[512]; int i, nitems, error; const char **items; const char *fstab_path; FILE *fstab; nitems = 1; /* Partition table changes */ TAILQ_FOREACH(md, &part_metadata, metadata) { if (md->newfs != NULL) nitems++; } items = calloc(nitems * 2, sizeof(const char *)); items[0] = "Writing partition tables"; items[1] = "7"; /* In progress */ i = 1; TAILQ_FOREACH(md, &part_metadata, metadata) { if (md->newfs != NULL) { char *item; item = malloc(255); sprintf(item, "Initializing %s", md->name); items[i*2] = item; items[i*2 + 1] = "Pending"; i++; } } i = 0; dialog_mixedgauge("Initializing", "Initializing file systems. Please wait.", 0, 0, i*100/nitems, nitems, __DECONST(char **, items)); gpart_commit(mesh); items[i*2 + 1] = "3"; i++; if (getenv("BSDINSTALL_LOG") == NULL) setenv("BSDINSTALL_LOG", "/dev/null", 1); TAILQ_FOREACH(md, &part_metadata, metadata) { if (md->newfs != NULL) { items[i*2 + 1] = "7"; /* In progress */ dialog_mixedgauge("Initializing", "Initializing file systems. Please wait.", 0, 0, i*100/nitems, nitems, __DECONST(char **, items)); sprintf(message, "(echo %s; %s) >>%s 2>>%s", md->newfs, md->newfs, getenv("BSDINSTALL_LOG"), getenv("BSDINSTALL_LOG")); error = system(message); items[i*2 + 1] = (error == 0) ? "3" : "1"; i++; } } dialog_mixedgauge("Initializing", "Initializing file systems. Please wait.", 0, 0, i*100/nitems, nitems, __DECONST(char **, items)); for (i = 1; i < nitems; i++) free(__DECONST(char *, items[i*2])); free(items); if (getenv("PATH_FSTAB") != NULL) fstab_path = getenv("PATH_FSTAB"); else fstab_path = "/etc/fstab"; fstab = fopen(fstab_path, "w+"); if (fstab == NULL) { sprintf(message, "Cannot open fstab file %s for writing (%s)\n", getenv("PATH_FSTAB"), strerror(errno)); dialog_msgbox("Error", message, 0, 0, TRUE); return (-1); } fprintf(fstab, "# Device\tMountpoint\tFStype\tOptions\tDump\tPass#\n"); TAILQ_FOREACH(md, &part_metadata, metadata) { if (md->fstab != NULL) fprintf(fstab, "%s\t%s\t\t%s\t%s\t%d\t%d\n", md->fstab->fs_spec, md->fstab->fs_file, md->fstab->fs_vfstype, md->fstab->fs_mntops, md->fstab->fs_freq, md->fstab->fs_passno); } fclose(fstab); return (0); } static void apply_workaround(struct gmesh *mesh) { struct gclass *classp; struct ggeom *gp; struct gconfig *gc; const char *scheme = NULL, *modified = NULL; LIST_FOREACH(classp, &mesh->lg_class, lg_class) { if (strcmp(classp->lg_name, "PART") == 0) break; } if (strcmp(classp->lg_name, "PART") != 0) { dialog_msgbox("Error", "gpart not found!", 0, 0, TRUE); return; } LIST_FOREACH(gp, &classp->lg_geom, lg_geom) { LIST_FOREACH(gc, &gp->lg_config, lg_config) { if (strcmp(gc->lg_name, "scheme") == 0) { scheme = gc->lg_val; } else if (strcmp(gc->lg_name, "modified") == 0) { modified = gc->lg_val; } } if (scheme && strcmp(scheme, "GPT") == 0 && modified && strcmp(modified, "true") == 0) { if (getenv("WORKAROUND_LENOVO")) gpart_set_root(gp->lg_name, "lenovofix"); if (getenv("WORKAROUND_GPTACTIVE")) gpart_set_root(gp->lg_name, "active"); } } } static struct partedit_item * read_geom_mesh(struct gmesh *mesh, int *nitems) { struct gclass *classp; struct ggeom *gp; struct partedit_item *items; *nitems = 0; items = NULL; /* * Build the device table. First add all disks (and CDs). */ LIST_FOREACH(classp, &mesh->lg_class, lg_class) { if (strcmp(classp->lg_name, "DISK") != 0 && strcmp(classp->lg_name, "MD") != 0) continue; /* Now recurse into all children */ LIST_FOREACH(gp, &classp->lg_geom, lg_geom) add_geom_children(gp, 0, &items, nitems); } return (items); } static void add_geom_children(struct ggeom *gp, int recurse, struct partedit_item **items, int *nitems) { struct gconsumer *cp; struct gprovider *pp; struct gconfig *gc; if (strcmp(gp->lg_class->lg_name, "PART") == 0 && !LIST_EMPTY(&gp->lg_config)) { LIST_FOREACH(gc, &gp->lg_config, lg_config) { if (strcmp(gc->lg_name, "scheme") == 0) (*items)[*nitems-1].type = gc->lg_val; } } if (LIST_EMPTY(&gp->lg_provider)) return; LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { if (strcmp(gp->lg_class->lg_name, "LABEL") == 0) continue; /* Skip WORM media */ if (strncmp(pp->lg_name, "cd", 2) == 0) continue; *items = realloc(*items, (*nitems+1)*sizeof(struct partedit_item)); (*items)[*nitems].indentation = recurse; (*items)[*nitems].name = pp->lg_name; (*items)[*nitems].size = pp->lg_mediasize; (*items)[*nitems].mountpoint = NULL; (*items)[*nitems].type = ""; (*items)[*nitems].cookie = pp; LIST_FOREACH(gc, &pp->lg_config, lg_config) { if (strcmp(gc->lg_name, "type") == 0) (*items)[*nitems].type = gc->lg_val; } /* Skip swap-backed MD devices */ if (strcmp(gp->lg_class->lg_name, "MD") == 0 && strcmp((*items)[*nitems].type, "swap") == 0) continue; (*nitems)++; LIST_FOREACH(cp, &pp->lg_consumers, lg_consumers) add_geom_children(cp->lg_geom, recurse+1, items, nitems); /* Only use first provider for acd */ if (strcmp(gp->lg_class->lg_name, "ACD") == 0) break; } } static void init_fstab_metadata(void) { struct fstab *fstab; struct partition_metadata *md; setfsent(); while ((fstab = getfsent()) != NULL) { md = calloc(1, sizeof(struct partition_metadata)); md->name = NULL; if (strncmp(fstab->fs_spec, "/dev/", 5) == 0) md->name = strdup(&fstab->fs_spec[5]); md->fstab = malloc(sizeof(struct fstab)); md->fstab->fs_spec = strdup(fstab->fs_spec); md->fstab->fs_file = strdup(fstab->fs_file); md->fstab->fs_vfstype = strdup(fstab->fs_vfstype); md->fstab->fs_mntops = strdup(fstab->fs_mntops); md->fstab->fs_type = strdup(fstab->fs_type); md->fstab->fs_freq = fstab->fs_freq; md->fstab->fs_passno = fstab->fs_passno; md->newfs = NULL; TAILQ_INSERT_TAIL(&part_metadata, md, metadata); } } static void get_mount_points(struct partedit_item *items, int nitems) { struct partition_metadata *md; int i; for (i = 0; i < nitems; i++) { TAILQ_FOREACH(md, &part_metadata, metadata) { if (md->name != NULL && md->fstab != NULL && strcmp(md->name, items[i].name) == 0) { items[i].mountpoint = md->fstab->fs_file; break; } } } } Index: projects/clang1000-import =================================================================== --- projects/clang1000-import (revision 356919) +++ projects/clang1000-import (revision 356920) Property changes on: projects/clang1000-import ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r356848-356919