Index: projects/ipsec/Makefile.inc1 =================================================================== --- projects/ipsec/Makefile.inc1 (revision 313186) +++ projects/ipsec/Makefile.inc1 (revision 313187) @@ -1,2629 +1,2628 @@ # # $FreeBSD$ # # Make command line options: # -DNO_CLEANDIR run ${MAKE} clean, instead of ${MAKE} cleandir # -DNO_CLEAN do not clean at all # -DDB_FROM_SRC use the user/group databases in src/etc instead of # the system database when installing. # -DNO_SHARE do not go into share subdir # -DKERNFAST define NO_KERNEL{CONFIG,CLEAN,OBJ} # -DNO_KERNELCONFIG do not run config in ${MAKE} buildkernel # -DNO_KERNELCLEAN do not run ${MAKE} clean in ${MAKE} buildkernel # -DNO_KERNELOBJ do not run ${MAKE} obj in ${MAKE} buildkernel # -DNO_PORTSUPDATE do not update ports in ${MAKE} update # -DNO_ROOT install without using root privilege # -DNO_DOCUPDATE do not update doc in ${MAKE} update # -DWITHOUT_CTF do not run the DTrace CTF conversion tools on built objects # LOCAL_DIRS="list of dirs" to add additional dirs to the SUBDIR list # LOCAL_ITOOLS="list of tools" to add additional tools to the ITOOLS list # LOCAL_LIB_DIRS="list of dirs" to add additional dirs to libraries target # LOCAL_MTREE="list of mtree files" to process to allow local directories # to be created before files are installed # LOCAL_TOOL_DIRS="list of dirs" to add additional dirs to the build-tools # list # LOCAL_XTOOL_DIRS="list of dirs" to add additional dirs to the # cross-tools target # METALOG="path to metadata log" to write permission and ownership # when NO_ROOT is set. (default: ${DESTDIR}/METALOG) # TARGET="machine" to crossbuild world for a different machine type # TARGET_ARCH= may be required when a TARGET supports multiple endians # BUILDENV_SHELL= shell to launch for the buildenv target (def:${SHELL}) # WORLD_FLAGS= additional flags to pass to make(1) during buildworld # KERNEL_FLAGS= additional flags to pass to make(1) during buildkernel # SUBDIR_OVERRIDE="list of dirs" to build rather than everything. # All libraries and includes, and some build tools will still build. # # The intended user-driven targets are: # buildworld - rebuild *everything*, including glue to help do upgrades # installworld- install everything built by "buildworld" # checkworld - run test suite on installed world # doxygen - build API documentation of the kernel # update - convenient way to update your source tree (eg: svn/svnup) # # Standard targets (not defined here) are documented in the makefiles in # /usr/share/mk. These include: # obj depend all install clean cleandepend cleanobj .if !defined(TARGET) || !defined(TARGET_ARCH) .error "Both TARGET and TARGET_ARCH must be defined." .endif SRCDIR?= ${.CURDIR} LOCALBASE?= /usr/local # Cross toolchain changes must be in effect before bsd.compiler.mk # so that gets the right CC, and pass CROSS_TOOLCHAIN to submakes. .if defined(CROSS_TOOLCHAIN) .include "${LOCALBASE}/share/toolchains/${CROSS_TOOLCHAIN}.mk" CROSSENV+=CROSS_TOOLCHAIN="${CROSS_TOOLCHAIN}" .endif .if defined(CROSS_TOOLCHAIN_PREFIX) CROSS_COMPILER_PREFIX?=${CROSS_TOOLCHAIN_PREFIX} .endif XCOMPILERS= CC CXX CPP .for COMPILER in ${XCOMPILERS} .if defined(CROSS_COMPILER_PREFIX) X${COMPILER}?= ${CROSS_COMPILER_PREFIX}${${COMPILER}} .else X${COMPILER}?= ${${COMPILER}} .endif .endfor # If a full path to an external cross compiler is given, don't build # a cross compiler. .if ${XCC:N${CCACHE_BIN}:M/*} MK_CLANG_BOOTSTRAP= no MK_GCC_BOOTSTRAP= no .endif # Pull in COMPILER_TYPE and COMPILER_FREEBSD_VERSION early. .include .include "share/mk/src.opts.mk" # Check if there is a local compiler that can satisfy as an external compiler. # Which compiler is expected to be used? .if ${MK_CLANG_BOOTSTRAP} == "yes" WANT_COMPILER_TYPE= clang .elif ${MK_GCC_BOOTSTRAP} == "yes" WANT_COMPILER_TYPE= gcc .else WANT_COMPILER_TYPE= .endif .if !defined(WANT_COMPILER_FREEBSD_VERSION) .if ${WANT_COMPILER_TYPE} == "clang" WANT_COMPILER_FREEBSD_VERSION_FILE= lib/clang/freebsd_cc_version.h WANT_COMPILER_FREEBSD_VERSION!= \ awk '$$2 == "FREEBSD_CC_VERSION" {printf("%d\n", $$3)}' \ ${SRCDIR}/${WANT_COMPILER_FREEBSD_VERSION_FILE} || echo unknown WANT_COMPILER_VERSION_FILE= lib/clang/include/clang/Basic/Version.inc WANT_COMPILER_VERSION!= \ awk '$$2 == "CLANG_VERSION" {split($$3, a, "."); print a[1] * 10000 + a[2] * 100 + a[3]}' \ ${SRCDIR}/${WANT_COMPILER_VERSION_FILE} || echo unknown .elif ${WANT_COMPILER_TYPE} == "gcc" WANT_COMPILER_FREEBSD_VERSION_FILE= gnu/usr.bin/cc/cc_tools/freebsd-native.h WANT_COMPILER_FREEBSD_VERSION!= \ awk '$$2 == "FBSD_CC_VER" {printf("%d\n", $$3)}' \ ${SRCDIR}/${WANT_COMPILER_FREEBSD_VERSION_FILE} || echo unknown WANT_COMPILER_VERSION_FILE= contrib/gcc/BASE-VER WANT_COMPILER_VERSION!= \ awk -F. '{print $$1 * 10000 + $$2 * 100 + $$3}' \ ${SRCDIR}/${WANT_COMPILER_VERSION_FILE} || echo unknown .endif .export WANT_COMPILER_FREEBSD_VERSION WANT_COMPILER_VERSION .endif # !defined(WANT_COMPILER_FREEBSD_VERSION) # It needs to be the same revision as we would build for the bootstrap. # If the expected vs CC is different then we can't skip. # GCC cannot be used for cross-arch yet. For clang we pass -target later if # TARGET_ARCH!=MACHINE_ARCH. .if ${MK_SYSTEM_COMPILER} == "yes" && \ (${MK_CLANG_BOOTSTRAP} == "yes" || ${MK_GCC_BOOTSTRAP} == "yes") && \ !make(showconfig) && !make(native-xtools) && !make(xdev*) && \ ${WANT_COMPILER_TYPE} == ${COMPILER_TYPE} && \ (${COMPILER_TYPE} == "clang" || ${TARGET_ARCH} == ${MACHINE_ARCH}) && \ ${COMPILER_VERSION} == ${WANT_COMPILER_VERSION} && \ ${COMPILER_FREEBSD_VERSION} == ${WANT_COMPILER_FREEBSD_VERSION} # Everything matches, disable the bootstrap compiler. MK_CLANG_BOOTSTRAP= no MK_GCC_BOOTSTRAP= no USING_SYSTEM_COMPILER= yes .endif # ${WANT_COMPILER_TYPE} == ${COMPILER_TYPE} USING_SYSTEM_COMPILER?= no TEST_SYSTEM_COMPILER_VARS= \ USING_SYSTEM_COMPILER MK_SYSTEM_COMPILER \ MK_CROSS_COMPILER MK_CLANG_BOOTSTRAP MK_GCC_BOOTSTRAP \ WANT_COMPILER_TYPE WANT_COMPILER_VERSION WANT_COMPILER_VERSION_FILE \ WANT_COMPILER_FREEBSD_VERSION WANT_COMPILER_FREEBSD_VERSION_FILE \ CC COMPILER_TYPE COMPILER_VERSION COMPILER_FREEBSD_VERSION test-system-compiler: .PHONY .for v in ${TEST_SYSTEM_COMPILER_VARS} ${_+_}@printf "%-35s= %s\n" "${v}" "${${v}}" .endfor .if ${USING_SYSTEM_COMPILER} == "yes" && \ (make(buildworld) || make(buildkernel) || make(kernel-toolchain) || \ make(toolchain) || make(_cross-tools)) .info SYSTEM_COMPILER: Determined that CC=${CC} matches the source tree. Not bootstrapping a cross-compiler. .endif # For installworld need to ensure that the looked-up compiler metadata is # passed along rather than trying to run cc from the restricted # STRICTTMPPATH. .if ${MK_CLANG_BOOTSTRAP} == "no" && ${MK_GCC_BOOTSTRAP} == "no" .if !defined(X_COMPILER_TYPE) CROSSENV+= COMPILER_VERSION=${COMPILER_VERSION} \ COMPILER_TYPE=${COMPILER_TYPE} \ COMPILER_FREEBSD_VERSION=${COMPILER_FREEBSD_VERSION} .else CROSSENV+= COMPILER_VERSION=${X_COMPILER_VERSION} \ COMPILER_TYPE=${X_COMPILER_TYPE} \ COMPILER_FREEBSD_VERSION=${X_COMPILER_FREEBSD_VERSION} .endif .endif # Handle external binutils. .if defined(CROSS_TOOLCHAIN_PREFIX) CROSS_BINUTILS_PREFIX?=${CROSS_TOOLCHAIN_PREFIX} .endif # If we do not have a bootstrap binutils (because the in-tree one does not # support the target architecture), provide a default cross-binutils prefix. # This allows aarch64 builds, for example, to automatically use the # aarch64-binutils port or package. .if !make(showconfig) .if !empty(BROKEN_OPTIONS:MBINUTILS_BOOTSTRAP) && \ !defined(CROSS_BINUTILS_PREFIX) CROSS_BINUTILS_PREFIX=/usr/local/${TARGET_ARCH}-freebsd/bin/ .if !exists(${CROSS_BINUTILS_PREFIX}) .error In-tree binutils does not support the ${TARGET_ARCH} architecture. Install the ${TARGET_ARCH}-binutils port or package or set CROSS_BINUTILS_PREFIX. .endif .endif .endif XBINUTILS= AS AR LD NM OBJCOPY RANLIB SIZE STRINGS .for BINUTIL in ${XBINUTILS} .if defined(CROSS_BINUTILS_PREFIX) && \ exists(${CROSS_BINUTILS_PREFIX}${${BINUTIL}}) X${BINUTIL}?= ${CROSS_BINUTILS_PREFIX}${${BINUTIL}} .else X${BINUTIL}?= ${${BINUTIL}} .endif .endfor # We must do lib/ and libexec/ before bin/ in case of a mid-install error to # keep the users system reasonably usable. For static->dynamic root upgrades, # we don't want to install a dynamic binary without rtld and the needed # libraries. More commonly, for dynamic root, we don't want to install a # binary that requires a newer library version that hasn't been installed yet. # This ordering is not a guarantee though. The only guarantee of a working # system here would require fine-grained ordering of all components based # on their dependencies. .if !empty(SUBDIR_OVERRIDE) SUBDIR= ${SUBDIR_OVERRIDE} .else SUBDIR= lib libexec .if !defined(NO_ROOT) && (make(installworld) || make(install)) # Ensure libraries are installed before progressing. SUBDIR+=.WAIT .endif SUBDIR+=bin .if ${MK_CDDL} != "no" SUBDIR+=cddl .endif SUBDIR+=gnu include .if ${MK_KERBEROS} != "no" SUBDIR+=kerberos5 .endif .if ${MK_RESCUE} != "no" SUBDIR+=rescue .endif SUBDIR+=sbin .if ${MK_CRYPT} != "no" SUBDIR+=secure .endif .if !defined(NO_SHARE) SUBDIR+=share .endif SUBDIR+=sys usr.bin usr.sbin .if ${MK_TESTS} != "no" SUBDIR+= tests .endif .if ${MK_OFED} != "no" SUBDIR+=contrib/ofed .endif # Local directories are last, since it is nice to at least get the base # system rebuilt before you do them. .for _DIR in ${LOCAL_DIRS} .if exists(${.CURDIR}/${_DIR}/Makefile) SUBDIR+= ${_DIR} .endif .endfor # Add LOCAL_LIB_DIRS, but only if they will not be picked up as a SUBDIR # of a LOCAL_DIRS directory. This allows LOCAL_DIRS=foo and # LOCAL_LIB_DIRS=foo/lib to behave as expected. .for _DIR in ${LOCAL_DIRS:M*/} ${LOCAL_DIRS:N*/:S|$|/|} -_REDUNDENT_LIB_DIRS+= ${LOCAL_LIB_DIRS:M${_DIR}*} +_REDUNDANT_LIB_DIRS+= ${LOCAL_LIB_DIRS:M${_DIR}*} .endfor .for _DIR in ${LOCAL_LIB_DIRS} -.if empty(_REDUNDENT_LIB_DIRS:M${_DIR}) && exists(${.CURDIR}/${_DIR}/Makefile) +.if empty(_REDUNDANT_LIB_DIRS:M${_DIR}) && exists(${.CURDIR}/${_DIR}/Makefile) SUBDIR+= ${_DIR} -.else -.warning ${_DIR} not added to SUBDIR list. See UPDATING 20141121. .endif .endfor # We must do etc/ last as it hooks into building the man whatis file # by calling 'makedb' in share/man. This is only relevant for # install/distribute so they build the whatis file after every manpage is # installed. .if make(installworld) || make(install) SUBDIR+=.WAIT .endif SUBDIR+=etc .endif # !empty(SUBDIR_OVERRIDE) .if defined(NOCLEAN) .warning NOCLEAN option is deprecated. Use NO_CLEAN instead. NO_CLEAN= ${NOCLEAN} .endif .if defined(NO_CLEANDIR) CLEANDIR= clean cleandepend .else CLEANDIR= cleandir .endif .if ${MK_META_MODE} == "yes" # If filemon is used then we can rely on the build being incremental-safe. # The .meta files will also track the build command and rebuild should # it change. .if empty(.MAKE.MODE:Mnofilemon) NO_CLEAN= t .endif .endif LOCAL_TOOL_DIRS?= PACKAGEDIR?= ${DESTDIR}/${DISTDIR} .if empty(SHELL:M*csh*) BUILDENV_SHELL?=${SHELL} .else BUILDENV_SHELL?=/bin/sh .endif .if !defined(SVN) || empty(SVN) . for _P in /usr/bin /usr/local/bin . for _S in svn svnlite . if exists(${_P}/${_S}) SVN= ${_P}/${_S} . endif . endfor . endfor .endif SVNFLAGS?= -r HEAD MAKEOBJDIRPREFIX?= /usr/obj .if !defined(OSRELDATE) .if exists(/usr/include/osreldate.h) OSRELDATE!= awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \ /usr/include/osreldate.h .else OSRELDATE= 0 .endif .export OSRELDATE .endif # Set VERSION for CTFMERGE to use via the default CTFFLAGS=-L VERSION. .if !defined(_REVISION) _REVISION!= MK_AUTO_OBJ=no ${MAKE} -C ${SRCDIR}/release -V REVISION .export _REVISION .endif .if !defined(_BRANCH) _BRANCH!= MK_AUTO_OBJ=no ${MAKE} -C ${SRCDIR}/release -V BRANCH .export _BRANCH .endif .if !defined(SRCRELDATE) SRCRELDATE!= awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \ ${SRCDIR}/sys/sys/param.h .export SRCRELDATE .endif .if !defined(VERSION) VERSION= FreeBSD ${_REVISION}-${_BRANCH:C/-p[0-9]+$//} ${TARGET_ARCH} ${SRCRELDATE} .export VERSION .endif .if !defined(PKG_VERSION) .if ${_BRANCH:MSTABLE*} || ${_BRANCH:MCURRENT*} || ${_BRANCH:MALPHA*} TIMENOW= %Y%m%d%H%M%S EXTRA_REVISION= .s${TIMENOW:gmtime} .endif .if ${_BRANCH:M*-p*} EXTRA_REVISION= _${_BRANCH:C/.*-p([0-9]+$)/\1/} .endif PKG_VERSION= ${_REVISION}${EXTRA_REVISION} .endif KNOWN_ARCHES?= aarch64/arm64 \ amd64 \ arm \ armeb/arm \ armv6/arm \ i386 \ mips \ mipsel/mips \ mips64el/mips \ mipsn32el/mips \ mips64/mips \ mipsn32/mips \ mipshf/mips \ mipselhf/mips \ mips64elhf/mips \ mips64hf/mips \ powerpc \ powerpc64/powerpc \ powerpcspe/powerpc \ riscv64/riscv \ riscv64sf/riscv \ sparc64 .if ${TARGET} == ${TARGET_ARCH} _t= ${TARGET} .else _t= ${TARGET_ARCH}/${TARGET} .endif .for _t in ${_t} .if empty(KNOWN_ARCHES:M${_t}) .error Unknown target ${TARGET_ARCH}:${TARGET}. .endif .endfor .if ${TARGET} == ${MACHINE} TARGET_CPUTYPE?=${CPUTYPE} .else TARGET_CPUTYPE?= .endif .if !empty(TARGET_CPUTYPE) _TARGET_CPUTYPE=${TARGET_CPUTYPE} .else _TARGET_CPUTYPE=dummy .endif _CPUTYPE!= MK_AUTO_OBJ=no MAKEFLAGS= CPUTYPE=${_TARGET_CPUTYPE} ${MAKE} \ -f /dev/null -m ${.CURDIR}/share/mk -V CPUTYPE .if ${_CPUTYPE} != ${_TARGET_CPUTYPE} .error CPUTYPE global should be set with ?=. .endif .if make(buildworld) BUILD_ARCH!= uname -p .if ${MACHINE_ARCH} != ${BUILD_ARCH} .error To cross-build, set TARGET_ARCH. .endif .endif .if ${MACHINE} == ${TARGET} && ${MACHINE_ARCH} == ${TARGET_ARCH} && !defined(CROSS_BUILD_TESTING) OBJTREE= ${MAKEOBJDIRPREFIX} .else OBJTREE= ${MAKEOBJDIRPREFIX}/${TARGET}.${TARGET_ARCH} .endif WORLDTMP= ${OBJTREE}${.CURDIR}/tmp BPATH= ${WORLDTMP}/legacy/usr/sbin:${WORLDTMP}/legacy/usr/bin:${WORLDTMP}/legacy/bin XPATH= ${WORLDTMP}/usr/sbin:${WORLDTMP}/usr/bin STRICTTMPPATH= ${BPATH}:${XPATH} TMPPATH= ${STRICTTMPPATH}:${PATH} # # Avoid running mktemp(1) unless actually needed. # It may not be functional, e.g., due to new ABI # when in the middle of installing over this system. # .if make(distributeworld) || make(installworld) || make(stageworld) INSTALLTMP!= /usr/bin/mktemp -d -u -t install .endif .if make(stagekernel) || make(distributekernel) TAGS+= kernel PACKAGE= kernel .endif # # Building a world goes through the following stages # # 1. legacy stage [BMAKE] # This stage is responsible for creating compatibility # shims that are needed by the bootstrap-tools, # build-tools and cross-tools stages. These are generally # APIs that tools from one of those three stages need to # build that aren't present on the host. # 1. bootstrap-tools stage [BMAKE] # This stage is responsible for creating programs that # are needed for backward compatibility reasons. They # are not built as cross-tools. # 2. build-tools stage [TMAKE] # This stage is responsible for creating the object # tree and building any tools that are needed during # the build process. Some programs are listed during # this phase because they build binaries to generate # files needed to build these programs. This stage also # builds the 'build-tools' target rather than 'all'. # 3. cross-tools stage [XMAKE] # This stage is responsible for creating any tools that # are needed for building the system. A cross-compiler is one # of them. This differs from build tools in two ways: # 1. the 'all' target is built rather than 'build-tools' # 2. these tools are installed into TMPPATH for stage 4. # 4. world stage [WMAKE] # This stage actually builds the world. # 5. install stage (optional) [IMAKE] # This stage installs a previously built world. # BOOTSTRAPPING?= 0 # Keep these in sync -- see below for special case exception MINIMUM_SUPPORTED_OSREL?= 900044 MINIMUM_SUPPORTED_REL?= 9.1 # Common environment for world related stages CROSSENV+= MAKEOBJDIRPREFIX=${OBJTREE} \ MACHINE_ARCH=${TARGET_ARCH} \ MACHINE=${TARGET} \ CPUTYPE=${TARGET_CPUTYPE} .if ${MK_META_MODE} != "no" # Don't rebuild build-tools targets during normal build. CROSSENV+= BUILD_TOOLS_META=.NOMETA_CMP .endif .if ${MK_GROFF} != "no" CROSSENV+= GROFF_BIN_PATH=${WORLDTMP}/legacy/usr/bin \ GROFF_FONT_PATH=${WORLDTMP}/legacy/usr/share/groff_font \ GROFF_TMAC_PATH=${WORLDTMP}/legacy/usr/share/tmac .endif .if defined(TARGET_CFLAGS) CROSSENV+= ${TARGET_CFLAGS} .endif # bootstrap-tools stage BMAKEENV= INSTALL="sh ${.CURDIR}/tools/install.sh" \ TOOLS_PREFIX=${WORLDTMP} \ PATH=${BPATH}:${PATH} \ WORLDTMP=${WORLDTMP} \ MAKEFLAGS="-m ${.CURDIR}/tools/build/mk ${.MAKEFLAGS}" # need to keep this in sync with targets/pseudo/bootstrap-tools/Makefile BSARGS= DESTDIR= \ BOOTSTRAPPING=${OSRELDATE} \ SSP_CFLAGS= \ MK_HTML=no NO_LINT=yes MK_MAN=no \ -DNO_PIC MK_PROFILE=no -DNO_SHARED \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \ MK_LLDB=no MK_TESTS=no \ MK_INCLUDES=yes BMAKE= MAKEOBJDIRPREFIX=${WORLDTMP} \ ${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ ${BSARGS} # build-tools stage TMAKE= MAKEOBJDIRPREFIX=${OBJTREE} \ ${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ DESTDIR= \ BOOTSTRAPPING=${OSRELDATE} \ SSP_CFLAGS= \ -DNO_LINT \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \ MK_LLDB=no MK_TESTS=no # cross-tools stage XMAKE= TOOLS_PREFIX=${WORLDTMP} ${BMAKE} \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ MK_GDB=no MK_TESTS=no MK_LLD_IS_LD=no # kernel-tools stage KTMAKEENV= INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${BPATH}:${PATH} \ WORLDTMP=${WORLDTMP} KTMAKE= TOOLS_PREFIX=${WORLDTMP} MAKEOBJDIRPREFIX=${WORLDTMP} \ ${KTMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ DESTDIR= \ BOOTSTRAPPING=${OSRELDATE} \ SSP_CFLAGS= \ MK_HTML=no -DNO_LINT MK_MAN=no \ -DNO_PIC MK_PROFILE=no -DNO_SHARED \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no # world stage WMAKEENV= ${CROSSENV} \ INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${TMPPATH} # make hierarchy HMAKE= PATH=${TMPPATH} ${MAKE} LOCAL_MTREE=${LOCAL_MTREE:Q} .if defined(NO_ROOT) HMAKE+= PATH=${TMPPATH} METALOG=${METALOG} -DNO_ROOT .endif CROSSENV+= CC="${XCC} ${XCFLAGS}" CXX="${XCXX} ${XCXXFLAGS} ${XCFLAGS}" \ CPP="${XCPP} ${XCFLAGS}" \ AS="${XAS}" AR="${XAR}" LD="${XLD}" LLVM_LINK="${XLLVM_LINK}" \ NM=${XNM} OBJCOPY="${XOBJCOPY}" \ RANLIB=${XRANLIB} STRINGS=${XSTRINGS} \ SIZE="${XSIZE}" .if defined(CROSS_BINUTILS_PREFIX) && exists(${CROSS_BINUTILS_PREFIX}) # In the case of xdev-build tools, CROSS_BINUTILS_PREFIX won't be a # directory, but the compiler will look in the right place for its # tools so we don't need to tell it where to look. BFLAGS+= -B${CROSS_BINUTILS_PREFIX} .endif # The internal bootstrap compiler has a default sysroot set by TOOLS_PREFIX # and target set by TARGET/TARGET_ARCH. However, there are several needs to # always pass an explicit --sysroot and -target. # - External compiler needs sysroot and target flags. # - External ld needs sysroot. # - To be clear about the use of a sysroot when using the internal compiler. # - Easier debugging. # - Allowing WITH_SYSTEM_COMPILER+WITH_META_MODE to work together due to # the flip-flopping build command when sometimes using external and # sometimes using internal. # - Allow using lld which has no support for default paths. .if !defined(CROSS_BINUTILS_PREFIX) || !exists(${CROSS_BINUTILS_PREFIX}) BFLAGS+= -B${WORLDTMP}/usr/bin .endif .if ${TARGET} == "arm" .if ${TARGET_ARCH:Marmv6*} != "" && ${TARGET_CPUTYPE:M*soft*} == "" TARGET_ABI= gnueabihf .else TARGET_ABI= gnueabi .endif .endif .if ${WANT_COMPILER_TYPE} == gcc || \ (defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == gcc) # GCC requires -isystem and -L when using a cross-compiler. --sysroot # won't set header path and -L is used to ensure the base library path # is added before the port PREFIX library path. XCFLAGS+= -isystem ${WORLDTMP}/usr/include -L${WORLDTMP}/usr/lib # GCC requires -B to find /usr/lib/crti.o when using a cross-compiler # combined with --sysroot. XCFLAGS+= -B${WORLDTMP}/usr/lib # Force using libc++ for external GCC. # XXX: This should be checking MK_GNUCXX == no .if ${X_COMPILER_VERSION} >= 40800 XCXXFLAGS+= -isystem ${WORLDTMP}/usr/include/c++/v1 -std=c++11 \ -nostdinc++ .endif .elif ${WANT_COMPILER_TYPE} == clang || \ (defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == clang) TARGET_ABI?= unknown TARGET_TRIPLE?= ${TARGET_ARCH:C/amd64/x86_64/}-${TARGET_ABI}-freebsd12.0 XCFLAGS+= -target ${TARGET_TRIPLE} .endif XCFLAGS+= --sysroot=${WORLDTMP} .if !empty(BFLAGS) XCFLAGS+= ${BFLAGS} .endif .if ${MK_LIB32} != "no" && (${TARGET_ARCH} == "amd64" || \ ${TARGET_ARCH} == "powerpc64") || ${TARGET_ARCH:Mmips64*} != "" LIBCOMPAT= 32 .include "Makefile.libcompat" .elif ${MK_LIBSOFT} != "no" && ${TARGET_ARCH} == "armv6" LIBCOMPAT= SOFT .include "Makefile.libcompat" .endif WMAKE= ${WMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 DESTDIR=${WORLDTMP} IMAKEENV= ${CROSSENV} IMAKE= ${IMAKEENV} ${MAKE} -f Makefile.inc1 \ ${IMAKE_INSTALL} ${IMAKE_MTREE} .if empty(.MAKEFLAGS:M-n) IMAKEENV+= PATH=${STRICTTMPPATH}:${INSTALLTMP} \ LD_LIBRARY_PATH=${INSTALLTMP} \ PATH_LOCALE=${INSTALLTMP}/locale IMAKE+= __MAKE_SHELL=${INSTALLTMP}/sh .else IMAKEENV+= PATH=${TMPPATH}:${INSTALLTMP} .endif .if defined(DB_FROM_SRC) INSTALLFLAGS+= -N ${.CURDIR}/etc MTREEFLAGS+= -N ${.CURDIR}/etc .endif _INSTALL_DDIR= ${DESTDIR}/${DISTDIR} INSTALL_DDIR= ${_INSTALL_DDIR:S://:/:g:C:/$::} .if defined(NO_ROOT) METALOG?= ${DESTDIR}/${DISTDIR}/METALOG IMAKE+= -DNO_ROOT METALOG=${METALOG} INSTALLFLAGS+= -U -M ${METALOG} -D ${INSTALL_DDIR} MTREEFLAGS+= -W .endif .if defined(BUILD_PKGS) INSTALLFLAGS+= -h sha256 .endif .if defined(DB_FROM_SRC) || defined(NO_ROOT) IMAKE_INSTALL= INSTALL="install ${INSTALLFLAGS}" IMAKE_MTREE= MTREE_CMD="mtree ${MTREEFLAGS}" .endif # kernel stage KMAKEENV= ${WMAKEENV} KMAKE= ${KMAKEENV} ${MAKE} ${.MAKEFLAGS} ${KERNEL_FLAGS} KERNEL=${INSTKERNNAME} # # buildworld # # Attempt to rebuild the entire system, with reasonable chance of # success, regardless of how old your existing system is. # _worldtmp: .PHONY .if ${.CURDIR:C/[^,]//g} != "" # The m4 build of sendmail files doesn't like it if ',' is used # anywhere in the path of it's files. @echo @echo "*** Error: path to source tree contains a comma ','" @echo false .endif @echo @echo "--------------------------------------------------------------" @echo ">>> Rebuilding the temporary build tree" @echo "--------------------------------------------------------------" .if !defined(NO_CLEAN) rm -rf ${WORLDTMP} .if defined(LIBCOMPAT) rm -rf ${LIBCOMPATTMP} .endif .else rm -rf ${WORLDTMP}/legacy/usr/include .endif .for _dir in \ lib lib/casper usr legacy/bin legacy/usr mkdir -p ${WORLDTMP}/${_dir} .endfor mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${WORLDTMP}/legacy/usr >/dev/null .if ${MK_GROFF} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.groff.dist \ -p ${WORLDTMP}/legacy/usr >/dev/null .endif mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${WORLDTMP}/legacy/usr/include >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${WORLDTMP}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${WORLDTMP}/usr/include >/dev/null ln -sf ${.CURDIR}/sys ${WORLDTMP} .if ${MK_DEBUG_FILES} != "no" # We could instead disable debug files for these build stages mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${WORLDTMP}/legacy/usr/lib >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${WORLDTMP}/usr/lib >/dev/null .endif .if defined(LIBCOMPAT) mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${WORLDTMP}/usr >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${WORLDTMP}/legacy/usr/lib/debug/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${WORLDTMP}/usr/lib/debug/usr >/dev/null .endif .endif .if ${MK_TESTS} != "no" mkdir -p ${WORLDTMP}${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${WORLDTMP}${TESTSBASE} >/dev/null .if ${MK_DEBUG_FILES} != "no" mkdir -p ${WORLDTMP}/usr/lib/debug/${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${WORLDTMP}/usr/lib/debug/${TESTSBASE} >/dev/null .endif .endif .for _mtree in ${LOCAL_MTREE} mtree -deU -f ${.CURDIR}/${_mtree} -p ${WORLDTMP} > /dev/null .endfor _legacy: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1.1: legacy release compatibility shims" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${BMAKE} legacy _bootstrap-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1.2: bootstrap tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${BMAKE} bootstrap-tools _cleanobj: .if !defined(NO_CLEAN) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.1: cleaning up the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${WMAKE} ${CLEANDIR} .if defined(LIBCOMPAT) ${_+_}cd ${.CURDIR}; ${LIBCOMPATWMAKE} -f Makefile.inc1 ${CLEANDIR} .endif .endif _obj: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.2: rebuilding the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${WMAKE} obj _build-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.3: build tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${TMAKE} build-tools _cross-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 3: cross tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${XMAKE} cross-tools ${_+_}cd ${.CURDIR}; ${XMAKE} kernel-tools _includes: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.1: building includes" @echo "--------------------------------------------------------------" # Special handling for SUBDIR_OVERRIDE in buildworld as they most likely need # headers from default SUBDIR. Do SUBDIR_OVERRIDE includes last. ${_+_}cd ${.CURDIR}; ${WMAKE} SUBDIR_OVERRIDE= SHARED=symlinks \ MK_INCLUDES=yes includes .if !empty(SUBDIR_OVERRIDE) && make(buildworld) ${_+_}cd ${.CURDIR}; ${WMAKE} MK_INCLUDES=yes SHARED=symlinks includes .endif _libraries: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.2: building libraries" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; \ ${WMAKE} -DNO_FSCHG MK_HTML=no -DNO_LINT MK_MAN=no \ MK_PROFILE=no MK_TESTS=no MK_TESTS_SUPPORT=${MK_TESTS} libraries everything: .PHONY @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.3: building everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; _PARALLEL_SUBDIR_OK=1 ${WMAKE} all WMAKE_TGTS= WMAKE_TGTS+= _worldtmp _legacy .if empty(SUBDIR_OVERRIDE) WMAKE_TGTS+= _bootstrap-tools .endif WMAKE_TGTS+= _cleanobj _obj _build-tools _cross-tools WMAKE_TGTS+= _includes _libraries WMAKE_TGTS+= everything .if defined(LIBCOMPAT) && empty(SUBDIR_OVERRIDE) WMAKE_TGTS+= build${libcompat} .endif buildworld: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue .PHONY .ORDER: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue buildworld_prologue: .PHONY @echo "--------------------------------------------------------------" @echo ">>> World build started on `LC_ALL=C date`" @echo "--------------------------------------------------------------" buildworld_epilogue: .PHONY @echo @echo "--------------------------------------------------------------" @echo ">>> World build completed on `LC_ALL=C date`" @echo "--------------------------------------------------------------" # # We need to have this as a target because the indirection between Makefile # and Makefile.inc1 causes the correct PATH to be used, rather than a # modification of the current environment's PATH. In addition, we need # to quote multiword values. # buildenvvars: .PHONY @echo ${WMAKEENV:Q} ${.MAKE.EXPORTED:@v@$v=\"${$v}\"@} .if ${.TARGETS:Mbuildenv} .if ${.MAKEFLAGS:M-j} .error The buildenv target is incompatible with -j .endif .endif BUILDENV_DIR?= ${.CURDIR} buildenv: .PHONY @echo Entering world for ${TARGET_ARCH}:${TARGET} .if ${BUILDENV_SHELL:M*zsh*} @echo For ZSH you must run: export CPUTYPE=${TARGET_CPUTYPE} .endif @cd ${BUILDENV_DIR} && env ${WMAKEENV} BUILDENV=1 ${BUILDENV_SHELL} \ || true TOOLCHAIN_TGTS= ${WMAKE_TGTS:Neverything:Nbuild${libcompat}} toolchain: ${TOOLCHAIN_TGTS} .PHONY kernel-toolchain: ${TOOLCHAIN_TGTS:N_includes:N_libraries} .PHONY # # installcheck # # Checks to be sure system is ready for installworld/installkernel. # installcheck: _installcheck_world _installcheck_kernel .PHONY _installcheck_world: .PHONY _installcheck_kernel: .PHONY # # Require DESTDIR to be set if installing for a different architecture or # using the user/group database in the source tree. # .if ${TARGET_ARCH} != ${MACHINE_ARCH} || ${TARGET} != ${MACHINE} || \ defined(DB_FROM_SRC) .if !make(distributeworld) _installcheck_world: __installcheck_DESTDIR _installcheck_kernel: __installcheck_DESTDIR __installcheck_DESTDIR: .PHONY .if !defined(DESTDIR) || empty(DESTDIR) @echo "ERROR: Please set DESTDIR!"; \ false .endif .endif .endif .if !defined(DB_FROM_SRC) # # Check for missing UIDs/GIDs. # CHECK_UIDS= auditdistd CHECK_GIDS= audit .if ${MK_SENDMAIL} != "no" CHECK_UIDS+= smmsp CHECK_GIDS+= smmsp .endif .if ${MK_PF} != "no" CHECK_UIDS+= proxy CHECK_GIDS+= proxy authpf .endif .if ${MK_UNBOUND} != "no" CHECK_UIDS+= unbound CHECK_GIDS+= unbound .endif _installcheck_world: __installcheck_UGID __installcheck_UGID: .PHONY .for uid in ${CHECK_UIDS} @if ! `id -u ${uid} >/dev/null 2>&1`; then \ echo "ERROR: Required ${uid} user is missing, see /usr/src/UPDATING."; \ false; \ fi .endfor .for gid in ${CHECK_GIDS} @if ! `find / -prune -group ${gid} >/dev/null 2>&1`; then \ echo "ERROR: Required ${gid} group is missing, see /usr/src/UPDATING."; \ false; \ fi .endfor .endif # # Required install tools to be saved in a scratch dir for safety. # .if ${MK_ZONEINFO} != "no" _zoneinfo= zic tzsetup .endif ITOOLS= [ awk cap_mkdb cat chflags chmod chown cmp cp \ date echo egrep find grep id install ${_install-info} \ ln make mkdir mtree mv pwd_mkdb \ rm sed services_mkdb sh strip sysctl test true uname wc ${_zoneinfo} \ ${LOCAL_ITOOLS} # Needed for share/man .if ${MK_MAN_UTILS} != "no" ITOOLS+=makewhatis .endif # # distributeworld # # Distributes everything compiled by a `buildworld'. # # installworld # # Installs everything compiled by a 'buildworld'. # # Non-base distributions produced by the base system EXTRA_DISTRIBUTIONS= doc .if defined(LIBCOMPAT) EXTRA_DISTRIBUTIONS+= lib${libcompat} .endif .if ${MK_TESTS} != "no" EXTRA_DISTRIBUTIONS+= tests .endif DEBUG_DISTRIBUTIONS= .if ${MK_DEBUG_FILES} != "no" DEBUG_DISTRIBUTIONS+= base ${EXTRA_DISTRIBUTIONS:S,doc,,:S,tests,,} .endif MTREE_MAGIC?= mtree 2.0 distributeworld installworld stageworld: _installcheck_world .PHONY mkdir -p ${INSTALLTMP} progs=$$(for prog in ${ITOOLS}; do \ if progpath=`which $$prog`; then \ echo $$progpath; \ else \ echo "Required tool $$prog not found in PATH." >&2; \ exit 1; \ fi; \ done); \ libs=$$(ldd -f "%o %p\n" -f "%o %p\n" $$progs 2>/dev/null | sort -u | \ while read line; do \ set -- $$line; \ if [ "$$2 $$3" != "not found" ]; then \ echo $$2; \ else \ echo "Required library $$1 not found." >&2; \ exit 1; \ fi; \ done); \ cp $$libs $$progs ${INSTALLTMP} cp -R $${PATH_LOCALE:-"/usr/share/locale"} ${INSTALLTMP}/locale .if defined(NO_ROOT) -mkdir -p ${METALOG:H} echo "#${MTREE_MAGIC}" > ${METALOG} .endif .if make(distributeworld) .for dist in ${EXTRA_DISTRIBUTIONS} -mkdir ${DESTDIR}/${DISTDIR}/${dist} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.root.dist \ -p ${DESTDIR}/${DISTDIR}/${dist} >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/include >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib >/dev/null .endif .if defined(LIBCOMPAT) mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/usr >/dev/null .endif .endif .if ${MK_TESTS} != "no" && ${dist} == "tests" -mkdir -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE} >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/${TESTSBASE} >/dev/null .endif .endif .if defined(NO_ROOT) ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.root.dist | \ sed -e 's#^\./#./${dist}/#' >> ${METALOG} ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.usr.dist | \ sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG} ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.include.dist | \ sed -e 's#^\./#./${dist}/usr/include/#' >> ${METALOG} .if defined(LIBCOMPAT) ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist | \ sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG} .endif .endif .endfor -mkdir ${DESTDIR}/${DISTDIR}/base ${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \ METALOG=${METALOG} ${IMAKE_INSTALL} ${IMAKE_MTREE} \ DISTBASE=/base DESTDIR=${DESTDIR}/${DISTDIR}/base \ LOCAL_MTREE=${LOCAL_MTREE:Q} distrib-dirs .endif ${_+_}cd ${.CURDIR}; ${IMAKE} re${.TARGET:S/world$//}; \ ${IMAKEENV} rm -rf ${INSTALLTMP} .if make(distributeworld) .for dist in ${EXTRA_DISTRIBUTIONS} find ${DESTDIR}/${DISTDIR}/${dist} -mindepth 1 -type d -empty -delete .endfor .if defined(NO_ROOT) .for dist in base ${EXTRA_DISTRIBUTIONS} @# For each file that exists in this dist, print the corresponding @# line from the METALOG. This relies on the fact that @# a line containing only the filename will sort immediately before @# the relevant mtree line. cd ${DESTDIR}/${DISTDIR}; \ find ./${dist} | sort -u ${METALOG} - | \ awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \ ${DESTDIR}/${DISTDIR}/${dist}.meta .endfor .for dist in ${DEBUG_DISTRIBUTIONS} @# For each file that exists in this dist, print the corresponding @# line from the METALOG. This relies on the fact that @# a line containing only the filename will sort immediately before @# the relevant mtree line. cd ${DESTDIR}/${DISTDIR}; \ find ./${dist}/usr/lib/debug | sort -u ${METALOG} - | \ awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \ ${DESTDIR}/${DISTDIR}/${dist}.debug.meta .endfor .endif .endif packageworld: .PHONY .for dist in base ${EXTRA_DISTRIBUTIONS} .if defined(NO_ROOT) ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - --exclude usr/lib/debug \ @${DESTDIR}/${DISTDIR}/${dist}.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz .else ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - --exclude usr/lib/debug . | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz .endif .endfor .for dist in ${DEBUG_DISTRIBUTIONS} . if defined(NO_ROOT) ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - @${DESTDIR}/${DISTDIR}/${dist}.debug.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz . else ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvLf - usr/lib/debug | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz . endif .endfor # # reinstall # # If you have a build server, you can NFS mount the source and obj directories # and do a 'make reinstall' on the *client* to install new binaries from the # most recent server build. # restage reinstall: .MAKE .PHONY @echo "--------------------------------------------------------------" @echo ">>> Making hierarchy" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \ LOCAL_MTREE=${LOCAL_MTREE:Q} hierarchy .if make(restage) @echo "--------------------------------------------------------------" @echo ">>> Making distribution" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \ LOCAL_MTREE=${LOCAL_MTREE:Q} distribution .endif @echo @echo "--------------------------------------------------------------" @echo ">>> Installing everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install .if defined(LIBCOMPAT) ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install${libcompat} .endif redistribute: .MAKE .PHONY @echo "--------------------------------------------------------------" @echo ">>> Distributing everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute .if defined(LIBCOMPAT) ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute${libcompat} \ DISTRIBUTION=lib${libcompat} .endif distrib-dirs distribution: .MAKE .PHONY ${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \ ${IMAKE_INSTALL} ${IMAKE_MTREE} METALOG=${METALOG} ${.TARGET} .if make(distribution) ${_+_}cd ${.CURDIR}; ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} -f Makefile.inc1 ${IMAKE_INSTALL} \ METALOG=${METALOG} MK_TESTS=no installconfig .endif # # buildkernel and installkernel # # Which kernels to build and/or install is specified by setting # KERNCONF. If not defined a GENERIC kernel is built/installed. # Only the existing (depending TARGET) config files are used # for building kernels and only the first of these is designated # as the one being installed. # # Note that we have to use TARGET instead of TARGET_ARCH when # we're in kernel-land. Since only TARGET_ARCH is (expected) to # be set to cross-build, we have to make sure TARGET is set # properly. .if defined(KERNFAST) NO_KERNELCLEAN= t NO_KERNELCONFIG= t NO_KERNELOBJ= t # Shortcut for KERNCONF=Blah -DKERNFAST is now KERNFAST=Blah .if !defined(KERNCONF) && ${KERNFAST} != "1" KERNCONF=${KERNFAST} .endif .endif .if ${TARGET_ARCH} == "powerpc64" KERNCONF?= GENERIC64 .else KERNCONF?= GENERIC .endif INSTKERNNAME?= kernel KERNSRCDIR?= ${.CURDIR}/sys KRNLCONFDIR= ${KERNSRCDIR}/${TARGET}/conf KRNLOBJDIR= ${OBJTREE}${KERNSRCDIR} KERNCONFDIR?= ${KRNLCONFDIR} BUILDKERNELS= INSTALLKERNEL= .if defined(NO_INSTALLKERNEL) # All of the BUILDKERNELS loops start at index 1. BUILDKERNELS+= dummy .endif .for _kernel in ${KERNCONF} .if exists(${KERNCONFDIR}/${_kernel}) BUILDKERNELS+= ${_kernel} .if empty(INSTALLKERNEL) && !defined(NO_INSTALLKERNEL) INSTALLKERNEL= ${_kernel} .endif .endif .endfor ${WMAKE_TGTS:N_worldtmp:Nbuild${libcompat}} ${.ALLTARGETS:M_*:N_worldtmp}: .MAKE .PHONY # # buildkernel # # Builds all kernels defined by BUILDKERNELS. # buildkernel: .MAKE .PHONY .if empty(BUILDKERNELS:Ndummy) @echo "ERROR: Missing kernel configuration file(s) (${KERNCONF})."; \ false .endif @echo .for _kernel in ${BUILDKERNELS:Ndummy} @echo "--------------------------------------------------------------" @echo ">>> Kernel build for ${_kernel} started on `LC_ALL=C date`" @echo "--------------------------------------------------------------" @echo "===> ${_kernel}" mkdir -p ${KRNLOBJDIR} .if !defined(NO_KERNELCONFIG) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1: configuring the kernel" @echo "--------------------------------------------------------------" cd ${KRNLCONFDIR}; \ PATH=${TMPPATH} \ config ${CONFIGARGS} -d ${KRNLOBJDIR}/${_kernel} \ -I '${KERNCONFDIR}' '${KERNCONFDIR}/${_kernel}' .endif .if !defined(NO_CLEAN) && !defined(NO_KERNELCLEAN) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.1: cleaning up the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} ${CLEANDIR} .endif .if !defined(NO_KERNELOBJ) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.2: rebuilding the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} obj .endif @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.3: build tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${KTMAKE} kernel-tools @echo @echo "--------------------------------------------------------------" @echo ">>> stage 3.1: building everything" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} all -DNO_MODULES_OBJ @echo "--------------------------------------------------------------" @echo ">>> Kernel build for ${_kernel} completed on `LC_ALL=C date`" @echo "--------------------------------------------------------------" .endfor NO_INSTALLEXTRAKERNELS?= yes # # installkernel, etc. # # Install the kernel defined by INSTALLKERNEL # installkernel installkernel.debug \ reinstallkernel reinstallkernel.debug: _installcheck_kernel .PHONY .if !defined(NO_INSTALLKERNEL) .if empty(INSTALLKERNEL) @echo "ERROR: No kernel \"${KERNCONF}\" to install."; \ false .endif @echo "--------------------------------------------------------------" @echo ">>> Installing kernel ${INSTALLKERNEL}" @echo "--------------------------------------------------------------" cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \ ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME} ${.TARGET:S/kernel//} .endif .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} @echo "--------------------------------------------------------------" @echo ">>> Installing kernel ${_kernel}" @echo "--------------------------------------------------------------" cd ${KRNLOBJDIR}/${_kernel}; \ ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME}.${_kernel} ${.TARGET:S/kernel//} .endfor .endif distributekernel distributekernel.debug: .PHONY .if !defined(NO_INSTALLKERNEL) .if empty(INSTALLKERNEL) @echo "ERROR: No kernel \"${KERNCONF}\" to install."; \ false .endif mkdir -p ${DESTDIR}/${DISTDIR} .if defined(NO_ROOT) @echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.premeta .endif cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \ ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.premeta/} \ ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} KERNEL=${INSTKERNNAME} \ DESTDIR=${INSTALL_DDIR}/kernel \ ${.TARGET:S/distributekernel/install/} .if defined(NO_ROOT) @sed -e 's|^./kernel|.|' ${DESTDIR}/${DISTDIR}/kernel.premeta > \ ${DESTDIR}/${DISTDIR}/kernel.meta .endif .endif .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} .if defined(NO_ROOT) @echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta .endif cd ${KRNLOBJDIR}/${_kernel}; \ ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.${_kernel}.premeta/} \ ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} \ KERNEL=${INSTKERNNAME}.${_kernel} \ DESTDIR=${INSTALL_DDIR}/kernel.${_kernel} \ ${.TARGET:S/distributekernel/install/} .if defined(NO_ROOT) @sed -e "s|^./kernel.${_kernel}|.|" \ ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta > \ ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta .endif .endfor .endif packagekernel: .PHONY .if defined(NO_ROOT) .if !defined(NO_INSTALLKERNEL) cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --exclude '*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz .endif cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --include '*/*/*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.meta | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --exclude '*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --include '*/*/*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz .endfor .endif .else .if !defined(NO_INSTALLKERNEL) cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --exclude '*.debug' . | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz .endif cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --include '*/*/*.debug' $$(eval find .) | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --exclude '*.debug' . | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --include '*/*/*.debug' $$(eval find .) | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz .endfor .endif .endif stagekernel: .PHONY ${_+_}${MAKE} -C ${.CURDIR} ${.MAKEFLAGS} distributekernel PORTSDIR?= /usr/ports WSTAGEDIR?= ${MAKEOBJDIRPREFIX}${.CURDIR}/${TARGET}.${TARGET_ARCH}/worldstage KSTAGEDIR?= ${MAKEOBJDIRPREFIX}${.CURDIR}/${TARGET}.${TARGET_ARCH}/kernelstage REPODIR?= ${MAKEOBJDIRPREFIX}${.CURDIR}/repo PKGSIGNKEY?= # empty .ORDER: stage-packages create-packages .ORDER: create-packages create-world-packages .ORDER: create-packages create-kernel-packages .ORDER: create-packages sign-packages _pkgbootstrap: .PHONY .if !exists(${LOCALBASE}/sbin/pkg) @env ASSUME_ALWAYS_YES=YES pkg bootstrap .endif packages: .PHONY ${_+_}${MAKE} -C ${.CURDIR} PKG_VERSION=${PKG_VERSION} real-packages package-pkg: .PHONY rm -rf /tmp/ports.${TARGET} || : env ${WMAKEENV:Q} SRCDIR=${.CURDIR} PORTSDIR=${PORTSDIR} REVISION=${_REVISION} \ PKG_CMD=${PKG_CMD} PKG_VERSION=${PKG_VERSION} REPODIR=${REPODIR} \ WSTAGEDIR=${WSTAGEDIR} \ sh ${.CURDIR}/release/scripts/make-pkg-package.sh real-packages: stage-packages create-packages sign-packages .PHONY stage-packages: .PHONY @mkdir -p ${REPODIR} ${WSTAGEDIR} ${KSTAGEDIR} ${_+_}@cd ${.CURDIR}; \ ${MAKE} DESTDIR=${WSTAGEDIR} -DNO_ROOT -B stageworld ; \ ${MAKE} DESTDIR=${KSTAGEDIR} -DNO_ROOT -B stagekernel create-packages: _pkgbootstrap .PHONY @mkdir -p ${REPODIR} ${_+_}@cd ${.CURDIR}; \ ${MAKE} DESTDIR=${WSTAGEDIR} \ PKG_VERSION=${PKG_VERSION} create-world-packages ; \ ${MAKE} DESTDIR=${KSTAGEDIR} \ PKG_VERSION=${PKG_VERSION} DISTDIR=kernel \ create-kernel-packages create-world-packages: _pkgbootstrap .PHONY @rm -f ${WSTAGEDIR}/*.plist 2>/dev/null || : @cd ${WSTAGEDIR} ; \ awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \ ${WSTAGEDIR}/METALOG @for plist in ${WSTAGEDIR}/*.plist; do \ plist=$${plist##*/} ; \ pkgname=$${plist%.plist} ; \ sh ${SRCDIR}/release/packages/generate-ucl.sh -o $${pkgname} \ -s ${SRCDIR} -u ${WSTAGEDIR}/$${pkgname}.ucl ; \ done @for plist in ${WSTAGEDIR}/*.plist; do \ plist=$${plist##*/} ; \ pkgname=$${plist%.plist} ; \ awk -F\" ' \ /^name/ { printf("===> Creating %s-", $$2); next } \ /^version/ { print $$2; next } \ ' ${WSTAGEDIR}/$${pkgname}.ucl ; \ ${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh -o ALLOW_BASE_SHLIBS=yes \ create -M ${WSTAGEDIR}/$${pkgname}.ucl \ -p ${WSTAGEDIR}/$${pkgname}.plist \ -r ${WSTAGEDIR} \ -o ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} ; \ done create-kernel-packages: _pkgbootstrap .PHONY .if exists(${KSTAGEDIR}/kernel.meta) .for flavor in "" -debug @cd ${KSTAGEDIR}/${DISTDIR} ; \ awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \ -v kernel=yes -v _kernconf=${INSTALLKERNEL} \ ${KSTAGEDIR}/kernel.meta ; \ cap_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VCAP_MKDB_ENDIAN` ; \ pwd_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VPWD_MKDB_ENDIAN` ; \ sed -e "s/%VERSION%/${PKG_VERSION}/" \ -e "s/%PKGNAME%/kernel-${INSTALLKERNEL:tl}${flavor}/" \ -e "s/%COMMENT%/FreeBSD ${INSTALLKERNEL} kernel ${flavor}/" \ -e "s/%DESC%/FreeBSD ${INSTALLKERNEL} kernel ${flavor}/" \ -e "s/%CAP_MKDB_ENDIAN%/$${cap_arg}/g" \ -e "s/%PWD_MKDB_ENDIAN%/$${pwd_arg}/g" \ ${SRCDIR}/release/packages/kernel.ucl \ > ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl ; \ awk -F\" ' \ /name/ { printf("===> Creating %s-", $$2); next } \ /version/ {print $$2; next } ' \ ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl ; \ ${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh -o ALLOW_BASE_SHLIBS=yes \ create -M ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl \ -p ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.plist \ -r ${KSTAGEDIR}/${DISTDIR} \ -o ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} .endfor .endif .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} .if exists(${KSTAGEDIR}/kernel.${_kernel}.meta) .for flavor in "" -debug @cd ${KSTAGEDIR}/kernel.${_kernel} ; \ awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \ -v kernel=yes -v _kernconf=${_kernel} \ ${KSTAGEDIR}/kernel.${_kernel}.meta ; \ cap_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VCAP_MKDB_ENDIAN` ; \ pwd_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VPWD_MKDB_ENDIAN` ; \ sed -e "s/%VERSION%/${PKG_VERSION}/" \ -e "s/%PKGNAME%/kernel-${_kernel:tl}${flavor}/" \ -e "s/%COMMENT%/FreeBSD ${_kernel} kernel ${flavor}/" \ -e "s/%DESC%/FreeBSD ${_kernel} kernel ${flavor}/" \ -e "s/%CAP_MKDB_ENDIAN%/$${cap_arg}/g" \ -e "s/%PWD_MKDB_ENDIAN%/$${pwd_arg}/g" \ ${SRCDIR}/release/packages/kernel.ucl \ > ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl ; \ awk -F\" ' \ /name/ { printf("===> Creating %s-", $$2); next } \ /version/ {print $$2; next } ' \ ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl ; \ ${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh -o ALLOW_BASE_SHLIBS=yes \ create -M ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl \ -p ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.plist \ -r ${KSTAGEDIR}/kernel.${_kernel} \ -o ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} .endfor .endif .endfor .endif sign-packages: _pkgbootstrap .PHONY @[ -L "${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/latest" ] && \ unlink ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/latest ; \ ${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh repo \ -o ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} \ ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} \ ${PKGSIGNKEY} ; \ ln -s ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} \ ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/latest # # # checkworld # # Run test suite on installed world. # checkworld: .PHONY @if [ ! -x ${LOCALBASE}/bin/kyua ]; then \ echo "You need kyua (devel/kyua) to run the test suite." | /usr/bin/fmt; \ exit 1; \ fi ${_+_}${LOCALBASE}/bin/kyua test -k ${TESTSBASE}/Kyuafile # # # doxygen # # Build the API documentation with doxygen # doxygen: .PHONY @if [ ! -x ${LOCALBASE}/bin/doxygen ]; then \ echo "You need doxygen (devel/doxygen) to generate the API documentation of the kernel." | /usr/bin/fmt; \ exit 1; \ fi ${_+_}cd ${.CURDIR}/tools/kerneldoc/subsys; ${MAKE} obj all # # update # # Update the source tree(s), by running svn/svnup to update to the # latest copy. # update: .PHONY .if defined(SVN_UPDATE) @echo "--------------------------------------------------------------" @echo ">>> Updating ${.CURDIR} using Subversion" @echo "--------------------------------------------------------------" @(cd ${.CURDIR}; ${SVN} update ${SVNFLAGS}) .endif # # ------------------------------------------------------------------------ # # From here onwards are utility targets used by the 'make world' and # related targets. If your 'world' breaks, you may like to try to fix # the problem and manually run the following targets to attempt to # complete the build. Beware, this is *not* guaranteed to work, you # need to have a pretty good grip on the current state of the system # to attempt to manually finish it. If in doubt, 'make world' again. # # # legacy: Build compatibility shims for the next three targets. This is a # minimal set of tools and shims necessary to compensate for older systems # which don't have the APIs required by the targets built in bootstrap-tools, # build-tools or cross-tools. # # ELF Tool Chain libraries are needed for ELF tools and dtrace tools. # r296685 fix cross-endian objcopy .if ${BOOTSTRAPPING} < 1100102 _elftoolchain_libs= lib/libelf lib/libdwarf .endif legacy: .PHONY # Temporary special case for automatically detecting the clang compiler issue # Note: 9.x didn't have FreeBSD_version bumps often enough, so you may need to # set BOOTSTRAPPING to 0 if you're stable/9 tree post-dates r286035 but is before # the version bump in r296219 (from July 29, 2015 -> Feb 29, 2016). .if ${BOOTSTRAPPING} != 0 && \ ${WANT_COMPILER_TYPE} == "clang" && ${COMPILER_TYPE} == "clang" && ${COMPILER_VERSION} < 30601 .if ${BOOTSTRAPPING} > 10000000 && ${BOOTSTRAPPING} < 1002501 @echo "ERROR: Source upgrades from stable/10 prior to r286033 are not supported."; false .elif ${BOOTSTRAPPING} > 9000000 && ${BOOTSTRAPPING} < 903509 @echo "ERROR: Source upgrades from stable/9 prior to r286035 are not supported."; false .endif .endif .if ${BOOTSTRAPPING} < ${MINIMUM_SUPPORTED_OSREL} && ${BOOTSTRAPPING} != 0 @echo "ERROR: Source upgrades from versions prior to ${MINIMUM_SUPPORTED_REL} are not supported."; \ false .endif .for _tool in tools/build ${_elftoolchain_libs} ${_+_}@${ECHODIR} "===> ${_tool} (obj,includes,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy includes; \ ${MAKE} DIRPRFX=${_tool}/ MK_INCLUDES=no all; \ ${MAKE} DIRPRFX=${_tool}/ MK_INCLUDES=no \ DESTDIR=${MAKEOBJDIRPREFIX}/legacy install .endfor # # bootstrap-tools: Build tools needed for compatibility. These are binaries that # are built to build other binaries in the system. However, the focus of these # binaries is usually quite narrow. Bootstrap tools use the host's compiler and # libraries, augmented by -legacy. # _bt= _bootstrap-tools .if ${MK_GAMES} != "no" _strfile= usr.bin/fortune/strfile .endif .if ${MK_GCC} != "no" && ${MK_CXX} != "no" _gperf= gnu/usr.bin/gperf .endif .if ${MK_SHAREDOCS} != "no" && ${MK_GROFF} != "no" _groff= gnu/usr.bin/groff \ usr.bin/soelim .endif .if ${MK_VT} != "no" _vtfontcvt= usr.bin/vtfontcvt .endif .if ${BOOTSTRAPPING} < 1000033 _libopenbsd= lib/libopenbsd _m4= usr.bin/m4 _lex= usr.bin/lex ${_bt}-usr.bin/m4: ${_bt}-lib/libopenbsd ${_bt}-usr.bin/lex: ${_bt}-usr.bin/m4 .endif .if ${BOOTSTRAPPING} < 1000026 _nmtree= lib/libnetbsd \ usr.sbin/nmtree ${_bt}-usr.sbin/nmtree: ${_bt}-lib/libnetbsd .endif .if ${BOOTSTRAPPING} < 1000027 _cat= bin/cat .endif # r277259 crunchide: Correct 64-bit section header offset # r281674 crunchide: always include both 32- and 64-bit ELF support .if ${BOOTSTRAPPING} < 1100078 _crunchide= usr.sbin/crunch/crunchide .endif # r285986 crunchen: use STRIPBIN rather than STRIP # 1100113: Support MK_AUTO_OBJ # 1200006: META_MODE fixes .if ${BOOTSTRAPPING} < 1100078 || \ (${MK_AUTO_OBJ} == "yes" && ${BOOTSTRAPPING} < 1100114) || \ (${MK_META_MODE} == "yes" && ${BOOTSTRAPPING} < 1200006) _crunchgen= usr.sbin/crunch/crunchgen .endif # r296926 -P keymap search path, MFC to stable/10 in r298297 .if ${BOOTSTRAPPING} < 1003501 || \ (${BOOTSTRAPPING} >= 1100000 && ${BOOTSTRAPPING} < 1100103) _kbdcontrol= usr.sbin/kbdcontrol .endif _yacc= lib/liby \ usr.bin/yacc ${_bt}-usr.bin/yacc: ${_bt}-lib/liby .if ${MK_BSNMP} != "no" _gensnmptree= usr.sbin/bsnmpd/gensnmptree .endif # We need to build tblgen when we're building clang either as # the bootstrap compiler, or as the part of the normal build. .if ${MK_CLANG_BOOTSTRAP} != "no" || ${MK_CLANG} != "no" _clang_tblgen= \ lib/clang/libllvmminimal \ usr.bin/clang/llvm-tblgen \ usr.bin/clang/clang-tblgen ${_bt}-usr.bin/clang/clang-tblgen: ${_bt}-lib/clang/libllvmminimal ${_bt}-usr.bin/clang/llvm-tblgen: ${_bt}-lib/clang/libllvmminimal .endif # Default to building the GPL DTC, but build the BSDL one if users explicitly # request it. _dtc= usr.bin/dtc .if ${MK_GPL_DTC} != "no" _dtc= gnu/usr.bin/dtc .endif .if ${MK_KERBEROS} != "no" _kerberos5_bootstrap_tools= \ kerberos5/tools/make-roken \ kerberos5/lib/libroken \ kerberos5/lib/libvers \ kerberos5/tools/asn1_compile \ kerberos5/tools/slc \ usr.bin/compile_et .ORDER: ${_kerberos5_bootstrap_tools:C/^/${_bt}-/g} .endif # r283777 makewhatis(1) replaced with mandoc version which builds a database. .if ${MK_MANDOCDB} != "no" _libopenbsd?= lib/libopenbsd _makewhatis= usr.bin/mandoc ${_bt}-usr.bin/mandoc: ${_bt}-lib/libopenbsd .endif bootstrap-tools: .PHONY # Please document (add comment) why something is in 'bootstrap-tools'. # Try to bound the building of the bootstrap-tool to just the # FreeBSD versions that need the tool built at this stage of the build. .for _tool in \ ${_clang_tblgen} \ ${_kerberos5_bootstrap_tools} \ ${_strfile} \ ${_gperf} \ ${_groff} \ ${_dtc} \ ${_cat} \ ${_kbdcontrol} \ usr.bin/lorder \ ${_libopenbsd} \ ${_makewhatis} \ usr.bin/rpcgen \ ${_yacc} \ ${_m4} \ ${_lex} \ usr.bin/xinstall \ ${_gensnmptree} \ usr.sbin/config \ ${_crunchide} \ ${_crunchgen} \ ${_nmtree} \ ${_vtfontcvt} \ usr.bin/localedef ${_bt}-${_tool}: .PHONY .MAKE ${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ all; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy install bootstrap-tools: ${_bt}-${_tool} .endfor # # build-tools: Build special purpose build tools # .if !defined(NO_SHARE) _share= share/syscons/scrnmaps .endif .if ${MK_GCC} != "no" _gcc_tools= gnu/usr.bin/cc/cc_tools .endif .if ${MK_RESCUE} != "no" # rescue includes programs that have build-tools targets _rescue=rescue/rescue .endif .for _tool in \ bin/csh \ bin/sh \ ${LOCAL_TOOL_DIRS} \ lib/ncurses/ncurses \ lib/ncurses/ncursesw \ ${_rescue} \ ${_share} \ usr.bin/awk \ lib/libmagic \ usr.bin/mkesdb_static \ usr.bin/mkcsmapper_static \ usr.bin/vi/catalog build-tools_${_tool}: .PHONY ${_+_}@${ECHODIR} "===> ${_tool} (obj,build-tools)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ build-tools build-tools: build-tools_${_tool} .endfor .for _tool in \ ${_gcc_tools} build-tools_${_tool}: .PHONY ${_+_}@${ECHODIR} "===> ${_tool} (obj,all)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ all build-tools: build-tools_${_tool} .endfor # # kernel-tools: Build kernel-building tools # kernel-tools: .PHONY mkdir -p ${MAKEOBJDIRPREFIX}/usr mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${MAKEOBJDIRPREFIX}/usr >/dev/null # # cross-tools: All the tools needed to build the rest of the system after # we get done with the earlier stages. It is the last set of tools needed # to begin building the target binaries. # .if ${TARGET_ARCH} != ${MACHINE_ARCH} .if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "i386" _btxld= usr.sbin/btxld .endif .endif # Rebuild ctfconvert and ctfmerge to avoid difficult-to-diagnose failures # resulting from missing bug fixes or ELF Toolchain updates. .if ${MK_CDDL} != "no" _dtrace_tools= cddl/lib/libctf cddl/usr.bin/ctfconvert \ cddl/usr.bin/ctfmerge .endif # If we're given an XAS, don't build binutils. .if ${XAS:M/*} == "" .if ${MK_BINUTILS_BOOTSTRAP} != "no" _binutils= gnu/usr.bin/binutils .endif .if ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no" _elftctools= lib/libelftc \ lib/libpe \ usr.bin/elfcopy \ usr.bin/nm \ usr.bin/size \ usr.bin/strings # These are not required by the build, but can be useful for developers who # cross-build on a FreeBSD 10 host: _elftctools+= usr.bin/addr2line .endif .elif ${TARGET_ARCH} != ${MACHINE_ARCH} && ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no" # If cross-building with an external binutils we still need to build strip for # the target (for at least crunchide). _elftctools= lib/libelftc \ lib/libpe \ usr.bin/elfcopy .endif .if ${MK_CLANG_BOOTSTRAP} != "no" _clang= usr.bin/clang _clang_libs= lib/clang .endif .if ${MK_GCC_BOOTSTRAP} != "no" _cc= gnu/usr.bin/cc .endif .if ${MK_USB} != "no" _usb_tools= sys/boot/usb/tools .endif cross-tools: .MAKE .PHONY .for _tool in \ ${LOCAL_XTOOL_DIRS} \ ${_clang_libs} \ ${_clang} \ ${_binutils} \ ${_elftctools} \ ${_dtrace_tools} \ ${_cc} \ ${_btxld} \ ${_usb_tools} ${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ all; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX} install .endfor NXBDESTDIR= ${OBJTREE}/nxb-bin NXBENV= MAKEOBJDIRPREFIX=${OBJTREE}/nxb \ TOOLS_PREFIX= \ INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${PATH}:${OBJTREE}/gperf_for_gcc/usr/bin NXBMAKE= ${NXBENV} ${MAKE} \ LLVM_TBLGEN=${NXBDESTDIR}/usr/bin/llvm-tblgen \ CLANG_TBLGEN=${NXBDESTDIR}/usr/bin/clang-tblgen \ MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH} \ MK_GDB=no MK_TESTS=no \ SSP_CFLAGS= \ MK_HTML=no NO_LINT=yes MK_MAN=no \ -DNO_PIC MK_PROFILE=no -DNO_SHARED \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \ MK_LLDB=no MK_DEBUG_FILES=no # native-xtools is the current target for qemu-user cross builds of ports # via poudriere and the imgact_binmisc kernel module. # For non-clang enabled targets that are still using the in tree gcc # we must build a gperf binary for one instance of its Makefiles. On # clang-enabled systems, the gperf binary is obsolete. native-xtools: .PHONY .if ${MK_GCC_BOOTSTRAP} != "no" mkdir -p ${OBJTREE}/gperf_for_gcc/usr/bin ${_+_}@${ECHODIR} "===> ${_gperf} (obj,all,install)"; \ cd ${.CURDIR}/${_gperf}; \ ${NXBMAKE} DIRPRFX=${_gperf}/ obj; \ ${NXBMAKE} DIRPRFX=${_gperf}/ all; \ ${NXBMAKE} DIRPRFX=${_gperf}/ DESTDIR=${OBJTREE}/gperf_for_gcc install .endif mkdir -p ${NXBDESTDIR}/bin ${NXBDESTDIR}/sbin ${NXBDESTDIR}/usr mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${NXBDESTDIR}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${NXBDESTDIR}/usr/include >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${NXBDESTDIR}/usr/lib >/dev/null .endif .for _tool in \ bin/cat \ bin/chmod \ bin/cp \ bin/csh \ bin/echo \ bin/expr \ bin/hostname \ bin/ln \ bin/ls \ bin/mkdir \ bin/mv \ bin/ps \ bin/realpath \ bin/rm \ bin/rmdir \ bin/sh \ bin/sleep \ ${_clang_tblgen} \ usr.bin/ar \ ${_binutils} \ ${_elftctools} \ ${_cc} \ ${_gcc_tools} \ ${_clang_libs} \ ${_clang} \ sbin/md5 \ sbin/sysctl \ gnu/usr.bin/diff \ usr.bin/awk \ usr.bin/basename \ usr.bin/bmake \ usr.bin/bzip2 \ usr.bin/cmp \ usr.bin/dirname \ usr.bin/env \ usr.bin/fetch \ usr.bin/find \ usr.bin/grep \ usr.bin/gzip \ usr.bin/id \ usr.bin/lex \ usr.bin/limits \ usr.bin/lorder \ usr.bin/mktemp \ usr.bin/mt \ usr.bin/patch \ + usr.bin/readelf \ usr.bin/sed \ usr.bin/sort \ usr.bin/tar \ usr.bin/touch \ usr.bin/tr \ usr.bin/true \ usr.bin/uniq \ usr.bin/unzip \ usr.bin/xargs \ usr.bin/xinstall \ usr.bin/xz \ usr.bin/yacc \ usr.sbin/chown ${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${NXBMAKE} DIRPRFX=${_tool}/ obj; \ ${NXBMAKE} DIRPRFX=${_tool}/ all; \ ${NXBMAKE} DIRPRFX=${_tool}/ DESTDIR=${NXBDESTDIR} install .endfor # # hierarchy - ensure that all the needed directories are present # hierarchy hier: .MAKE .PHONY ${_+_}cd ${.CURDIR}/etc; ${HMAKE} distrib-dirs # # libraries - build all libraries, and install them under ${DESTDIR}. # # The list of libraries with dependents (${_prebuild_libs}) and their # interdependencies (__L) are built automatically by the # ${.CURDIR}/tools/make_libdeps.sh script. # libraries: .MAKE .PHONY ${_+_}cd ${.CURDIR}; \ ${MAKE} -f Makefile.inc1 _prereq_libs; \ ${MAKE} -f Makefile.inc1 _startup_libs; \ ${MAKE} -f Makefile.inc1 _prebuild_libs; \ ${MAKE} -f Makefile.inc1 _generic_libs # # static libgcc.a prerequisite for shared libc # _prereq_libs= lib/libcompiler_rt .if ${MK_SSP} != "no" _prereq_libs+= gnu/lib/libssp/libssp_nonshared .endif # These dependencies are not automatically generated: # # gnu/lib/csu, gnu/lib/libgcc, lib/csu and lib/libc must be built before # all shared libraries for ELF. # _startup_libs= gnu/lib/csu _startup_libs+= lib/csu _startup_libs+= lib/libcompiler_rt _startup_libs+= lib/libc _startup_libs+= lib/libc_nonshared .if ${MK_LIBCPLUSPLUS} != "no" _startup_libs+= lib/libcxxrt .endif .if ${MK_LLVM_LIBUNWIND} != "no" _prereq_libs+= lib/libgcc_eh lib/libgcc_s _startup_libs+= lib/libgcc_eh lib/libgcc_s lib/libgcc_s__L: lib/libc__L lib/libgcc_s__L: lib/libc_nonshared__L .if ${MK_LIBCPLUSPLUS} != "no" lib/libcxxrt__L: lib/libgcc_s__L .endif .else # MK_LLVM_LIBUNWIND == no _prereq_libs+= gnu/lib/libgcc _startup_libs+= gnu/lib/libgcc gnu/lib/libgcc__L: lib/libc__L gnu/lib/libgcc__L: lib/libc_nonshared__L .if ${MK_LIBCPLUSPLUS} != "no" lib/libcxxrt__L: gnu/lib/libgcc__L .endif .endif _prebuild_libs= ${_kerberos5_lib_libasn1} \ ${_kerberos5_lib_libhdb} \ ${_kerberos5_lib_libheimbase} \ ${_kerberos5_lib_libheimntlm} \ ${_libsqlite3} \ ${_kerberos5_lib_libheimipcc} \ ${_kerberos5_lib_libhx509} ${_kerberos5_lib_libkrb5} \ ${_kerberos5_lib_libroken} \ ${_kerberos5_lib_libwind} \ lib/libbz2 ${_libcom_err} lib/libcrypt \ lib/libelf lib/libexpat \ lib/libfigpar \ ${_lib_libgssapi} \ lib/libkiconv lib/libkvm lib/liblzma lib/libmd lib/libnv \ ${_lib_casper} \ lib/ncurses/ncurses lib/ncurses/ncursesw \ lib/libopie lib/libpam/libpam ${_lib_libthr} \ ${_lib_libradius} lib/libsbuf lib/libtacplus \ lib/libgeom \ ${_cddl_lib_libumem} ${_cddl_lib_libnvpair} \ ${_cddl_lib_libuutil} \ ${_cddl_lib_libavl} \ ${_cddl_lib_libzfs_core} \ ${_cddl_lib_libctf} \ lib/libutil lib/libpjdlog ${_lib_libypclnt} lib/libz lib/msun \ ${_secure_lib_libcrypto} ${_lib_libldns} \ ${_secure_lib_libssh} ${_secure_lib_libssl} .if ${MK_GNUCXX} != "no" _prebuild_libs+= gnu/lib/libstdc++ gnu/lib/libsupc++ gnu/lib/libstdc++__L: lib/msun__L gnu/lib/libsupc++__L: gnu/lib/libstdc++__L .endif .if ${MK_DIALOG} != "no" _prebuild_libs+= gnu/lib/libdialog gnu/lib/libdialog__L: lib/msun__L lib/ncurses/ncursesw__L .endif .if ${MK_LIBCPLUSPLUS} != "no" _prebuild_libs+= lib/libc++ .endif lib/libgeom__L: lib/libexpat__L lib/libkvm__L: lib/libelf__L .if ${MK_LIBTHR} != "no" _lib_libthr= lib/libthr .endif .if ${MK_RADIUS_SUPPORT} != "no" _lib_libradius= lib/libradius .endif .if ${MK_OFED} != "no" _ofed_lib= contrib/ofed/usr.lib _prebuild_libs+= contrib/ofed/usr.lib/libosmcomp _prebuild_libs+= contrib/ofed/usr.lib/libopensm _prebuild_libs+= contrib/ofed/usr.lib/libibcommon _prebuild_libs+= contrib/ofed/usr.lib/libibverbs _prebuild_libs+= contrib/ofed/usr.lib/libibumad contrib/ofed/usr.lib/libopensm__L: lib/libthr__L contrib/ofed/usr.lib/libosmcomp__L: lib/libthr__L contrib/ofed/usr.lib/libibumad__L: contrib/ofed/usr.lib/libibcommon__L .endif .if ${MK_CASPER} != "no" _lib_casper= lib/libcasper .endif lib/libpjdlog__L: lib/libutil__L lib/libcasper__L: lib/libnv__L lib/liblzma__L: lib/libthr__L _generic_libs= ${_cddl_lib} gnu/lib ${_kerberos5_lib} lib ${_secure_lib} usr.bin/lex/lib ${_ofed_lib} .for _DIR in ${LOCAL_LIB_DIRS} .if exists(${.CURDIR}/${_DIR}/Makefile) && empty(_generic_libs:M${_DIR}) _generic_libs+= ${_DIR} .endif .endfor lib/libopie__L lib/libtacplus__L: lib/libmd__L .if ${MK_CDDL} != "no" _cddl_lib_libumem= cddl/lib/libumem _cddl_lib_libnvpair= cddl/lib/libnvpair _cddl_lib_libavl= cddl/lib/libavl _cddl_lib_libuutil= cddl/lib/libuutil _cddl_lib_libzfs_core= cddl/lib/libzfs_core _cddl_lib_libctf= cddl/lib/libctf _cddl_lib= cddl/lib cddl/lib/libzfs_core__L: cddl/lib/libnvpair__L cddl/lib/libzfs__L: lib/libgeom__L cddl/lib/libctf__L: lib/libz__L .endif # cddl/lib/libdtrace requires lib/libproc and lib/librtld_db; it's only built # on select architectures though (see cddl/lib/Makefile) .if ${MACHINE_CPUARCH} != "sparc64" _prebuild_libs+= lib/libprocstat lib/libproc lib/librtld_db lib/libprocstat__L: lib/libelf__L lib/libkvm__L lib/libutil__L lib/libproc__L: lib/libprocstat__L lib/librtld_db__L: lib/libprocstat__L .endif .if ${MK_CRYPT} != "no" .if ${MK_OPENSSL} != "no" _secure_lib_libcrypto= secure/lib/libcrypto _secure_lib_libssl= secure/lib/libssl lib/libradius__L secure/lib/libssl__L: secure/lib/libcrypto__L .if ${MK_LDNS} != "no" _lib_libldns= lib/libldns lib/libldns__L: secure/lib/libcrypto__L .endif .if ${MK_OPENSSH} != "no" _secure_lib_libssh= secure/lib/libssh secure/lib/libssh__L: lib/libz__L secure/lib/libcrypto__L lib/libcrypt__L .if ${MK_LDNS} != "no" secure/lib/libssh__L: lib/libldns__L .endif .if ${MK_GSSAPI} != "no" && ${MK_KERBEROS_SUPPORT} != "no" secure/lib/libssh__L: lib/libgssapi__L kerberos5/lib/libkrb5__L \ kerberos5/lib/libhx509__L kerberos5/lib/libasn1__L lib/libcom_err__L \ lib/libmd__L kerberos5/lib/libroken__L .endif .endif .endif _secure_lib= secure/lib .endif .if ${MK_KERBEROS} != "no" kerberos5/lib/libasn1__L: lib/libcom_err__L kerberos5/lib/libroken__L kerberos5/lib/libhdb__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ kerberos5/lib/libkrb5__L kerberos5/lib/libroken__L \ kerberos5/lib/libwind__L lib/libsqlite3__L kerberos5/lib/libheimntlm__L: secure/lib/libcrypto__L kerberos5/lib/libkrb5__L \ kerberos5/lib/libroken__L lib/libcom_err__L kerberos5/lib/libhx509__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ secure/lib/libcrypto__L kerberos5/lib/libroken__L kerberos5/lib/libwind__L kerberos5/lib/libkrb5__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ lib/libcrypt__L secure/lib/libcrypto__L kerberos5/lib/libhx509__L \ kerberos5/lib/libroken__L kerberos5/lib/libwind__L \ kerberos5/lib/libheimbase__L kerberos5/lib/libheimipcc__L kerberos5/lib/libroken__L: lib/libcrypt__L kerberos5/lib/libwind__L: kerberos5/lib/libroken__L lib/libcom_err__L kerberos5/lib/libheimbase__L: lib/libthr__L kerberos5/lib/libheimipcc__L: kerberos5/lib/libroken__L kerberos5/lib/libheimbase__L lib/libthr__L .endif lib/libsqlite3__L: lib/libthr__L .if ${MK_GSSAPI} != "no" _lib_libgssapi= lib/libgssapi .endif .if ${MK_KERBEROS} != "no" _kerberos5_lib= kerberos5/lib _kerberos5_lib_libasn1= kerberos5/lib/libasn1 _kerberos5_lib_libhdb= kerberos5/lib/libhdb _kerberos5_lib_libheimbase= kerberos5/lib/libheimbase _kerberos5_lib_libkrb5= kerberos5/lib/libkrb5 _kerberos5_lib_libhx509= kerberos5/lib/libhx509 _kerberos5_lib_libroken= kerberos5/lib/libroken _kerberos5_lib_libheimntlm= kerberos5/lib/libheimntlm _libsqlite3= lib/libsqlite3 _kerberos5_lib_libheimipcc= kerberos5/lib/libheimipcc _kerberos5_lib_libwind= kerberos5/lib/libwind _libcom_err= lib/libcom_err .endif .if ${MK_NIS} != "no" _lib_libypclnt= lib/libypclnt .endif .if ${MK_OPENSSL} == "no" lib/libradius__L: lib/libmd__L .endif lib/libproc__L: \ ${_cddl_lib_libctf:D${_cddl_lib_libctf}__L} lib/libelf__L lib/librtld_db__L lib/libutil__L .if ${MK_CXX} != "no" .if ${MK_LIBCPLUSPLUS} != "no" lib/libproc__L: lib/libcxxrt__L .else # This implies MK_GNUCXX != "no"; see lib/libproc lib/libproc__L: gnu/lib/libsupc++__L .endif .endif .for _lib in ${_prereq_libs} ${_lib}__PL: .PHONY .MAKE .if exists(${.CURDIR}/${_lib}) ${_+_}@${ECHODIR} "===> ${_lib} (obj,all,install)"; \ cd ${.CURDIR}/${_lib}; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; \ ${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \ DIRPRFX=${_lib}/ all; \ ${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \ DIRPRFX=${_lib}/ install .endif .endfor .for _lib in ${_startup_libs} ${_prebuild_libs} ${_generic_libs} ${_lib}__L: .PHONY .MAKE .if exists(${.CURDIR}/${_lib}) ${_+_}@${ECHODIR} "===> ${_lib} (obj,all,install)"; \ cd ${.CURDIR}/${_lib}; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ all; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ install .endif .endfor _prereq_libs: ${_prereq_libs:S/$/__PL/} _startup_libs: ${_startup_libs:S/$/__L/} _prebuild_libs: ${_prebuild_libs:S/$/__L/} _generic_libs: ${_generic_libs:S/$/__L/} # Enable SUBDIR_PARALLEL when not calling 'make all', unless called from # 'everything' with _PARALLEL_SUBDIR_OK set. This is because it is unlikely # that running 'make all' from the top-level, especially with a SUBDIR_OVERRIDE # or LOCAL_DIRS set, will have a reliable build if SUBDIRs are built in # parallel. This is safe for the world stage of buildworld though since it has # already built libraries in a proper order and installed includes into # WORLDTMP. Special handling is done for SUBDIR ordering for 'install*' to # avoid trashing a system if it crashes mid-install. .if !make(all) || defined(_PARALLEL_SUBDIR_OK) SUBDIR_PARALLEL= .endif .include .if make(check-old) || make(check-old-dirs) || \ make(check-old-files) || make(check-old-libs) || \ make(delete-old) || make(delete-old-dirs) || \ make(delete-old-files) || make(delete-old-libs) # # check for / delete old files section # .include "ObsoleteFiles.inc" OLD_LIBS_MESSAGE="Please be sure no application still uses those libraries, \ else you can not start such an application. Consult UPDATING for more \ information regarding how to cope with the removal/revision bump of a \ specific library." .if !defined(BATCH_DELETE_OLD_FILES) RM_I=-i .else RM_I=-v .endif delete-old-files: .PHONY @echo ">>> Removing old files (only deletes safe to delete libs)" # Ask for every old file if the user really wants to remove it. # It's annoying, but better safe than sorry. # NB: We cannot pass the list of OLD_FILES as a parameter because the # argument list will get too long. Using .for/.endfor make "loops" will make # the Makefile parser segfault. @exec 3<&0; \ cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \ rm ${RM_I} "${DESTDIR}/$${file}" <&3; \ fi; \ for ext in debug symbols; do \ if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \ "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \ <&3; \ fi; \ done; \ done # Remove catpages without corresponding manpages. @exec 3<&0; \ find ${DESTDIR}/usr/share/man/cat* ! -type d | \ sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \ while read catpage; do \ read manpage; \ if [ ! -e "$${manpage}" ]; then \ rm ${RM_I} $${catpage} <&3; \ fi; \ done @echo ">>> Old files removed" check-old-files: .PHONY @echo ">>> Checking for old files" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ echo "${DESTDIR}/$${file}"; \ fi; \ for ext in debug symbols; do \ if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \ fi; \ done; \ done # Check for catpages without corresponding manpages. @find ${DESTDIR}/usr/share/man/cat* ! -type d | \ sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \ while read catpage; do \ read manpage; \ if [ ! -e "$${manpage}" ]; then \ echo $${catpage}; \ fi; \ done delete-old-libs: .PHONY @echo ">>> Removing old libraries" @echo "${OLD_LIBS_MESSAGE}" | fmt @exec 3<&0; \ cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_LIBS | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \ rm ${RM_I} "${DESTDIR}/$${file}" <&3; \ fi; \ for ext in debug symbols; do \ if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \ "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \ <&3; \ fi; \ done; \ done @echo ">>> Old libraries removed" check-old-libs: .PHONY @echo ">>> Checking for old libraries" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_LIBS | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ echo "${DESTDIR}/$${file}"; \ fi; \ for ext in debug symbols; do \ if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \ fi; \ done; \ done delete-old-dirs: .PHONY @echo ">>> Removing old directories" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_DIRS | xargs -n1 | sort -r | \ while read dir; do \ if [ -d "${DESTDIR}/$${dir}" ]; then \ rmdir -v "${DESTDIR}/$${dir}" || true; \ elif [ -L "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \ fi; \ if [ -d "${DESTDIR}${DEBUGDIR}/$${dir}" ]; then \ rmdir -v "${DESTDIR}${DEBUGDIR}/$${dir}" || true; \ elif [ -L "${DESTDIR}${DEBUGDIR}/$${dir}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${dir} is a link, please remove everything manually."; \ fi; \ done @echo ">>> Old directories removed" check-old-dirs: .PHONY @echo ">>> Checking for old directories" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_DIRS | xargs -n1 | \ while read dir; do \ if [ -d "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir}"; \ elif [ -L "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \ fi; \ if [ -d "${DESTDIR}${DEBUGDIR}/$${dir}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${dir}"; \ elif [ -L "${DESTDIR}${DEBUGDIR}/$${dir}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${dir} is a link, please remove everything manually."; \ fi; \ done delete-old: delete-old-files delete-old-dirs .PHONY @echo "To remove old libraries run '${MAKE_CMD} delete-old-libs'." check-old: check-old-files check-old-libs check-old-dirs .PHONY @echo "To remove old files and directories run '${MAKE_CMD} delete-old'." @echo "To remove old libraries run '${MAKE_CMD} delete-old-libs'." .endif # # showconfig - show build configuration. # showconfig: .PHONY @(${MAKE} -n -f ${.CURDIR}/sys/conf/kern.opts.mk -V dummy -dg1; \ ${MAKE} -n -f ${.CURDIR}/share/mk/src.opts.mk -V dummy -dg1) 2>&1 | grep ^MK_ | sort -u .if !empty(KRNLOBJDIR) && !empty(KERNCONF) DTBOUTPUTPATH= ${KRNLOBJDIR}/${KERNCONF}/ .if !defined(FDT_DTS_FILE) || empty(FDT_DTS_FILE) .if exists(${KERNCONFDIR}/${KERNCONF}) FDT_DTS_FILE!= awk 'BEGIN {FS="="} /^makeoptions[[:space:]]+FDT_DTS_FILE/ {print $$2}' \ '${KERNCONFDIR}/${KERNCONF}' ; echo .endif .endif .endif .if !defined(DTBOUTPUTPATH) || !exists(${DTBOUTPUTPATH}) DTBOUTPUTPATH= ${.CURDIR} .endif # # Build 'standalone' Device Tree Blob # builddtb: .PHONY @PATH=${TMPPATH} MACHINE=${TARGET} \ ${.CURDIR}/sys/tools/fdt/make_dtb.sh ${.CURDIR}/sys \ "${FDT_DTS_FILE}" ${DTBOUTPUTPATH} ############### # cleanworld # In the following, the first 'rm' in a series will usually remove all # files and directories. If it does not, then there are probably some # files with file flags set, so this unsets them and tries the 'rm' a # second time. There are situations where this target will be cleaning # some directories via more than one method, but that duplication is # needed to correctly handle all the possible situations. Removing all # files without file flags set in the first 'rm' instance saves time, # because 'chflags' will need to operate on fewer files afterwards. # # It is expected that BW_CANONICALOBJDIR == the CANONICALOBJDIR as would be # created by bsd.obj.mk, except that we don't want to .include that file # in this makefile. # BW_CANONICALOBJDIR:=${OBJTREE}${.CURDIR} cleanworld: .PHONY .if exists(${BW_CANONICALOBJDIR}/) -rm -rf ${BW_CANONICALOBJDIR}/* -chflags -R 0 ${BW_CANONICALOBJDIR} rm -rf ${BW_CANONICALOBJDIR}/* .endif .if ${.CURDIR} == ${.OBJDIR} || ${.CURDIR}/obj == ${.OBJDIR} # To be safe in this case, fall back to a 'make cleandir' ${_+_}@cd ${.CURDIR}; ${MAKE} cleandir .endif .if defined(TARGET) && defined(TARGET_ARCH) .if ${TARGET} == ${MACHINE} && ${TARGET_ARCH} == ${MACHINE_ARCH} XDEV_CPUTYPE?=${CPUTYPE} .else XDEV_CPUTYPE?=${TARGET_CPUTYPE} .endif NOFUN=-DNO_FSCHG MK_HTML=no -DNO_LINT \ MK_MAN=no MK_NLS=no MK_PROFILE=no \ MK_KERBEROS=no MK_RESCUE=no MK_TESTS=no MK_WARNS=no \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ CPUTYPE=${XDEV_CPUTYPE} XDDIR=${TARGET_ARCH}-freebsd XDTP?=/usr/${XDDIR} .if ${XDTP:N/*} .error XDTP variable should be an absolute path .endif CDBENV=MAKEOBJDIRPREFIX=${MAKEOBJDIRPREFIX}/${XDDIR} \ INSTALL="sh ${.CURDIR}/tools/install.sh" CDENV= ${CDBENV} \ TOOLS_PREFIX=${XDTP} CD2CFLAGS=-isystem ${XDDESTDIR}/usr/include -L${XDDESTDIR}/usr/lib \ --sysroot=${XDDESTDIR}/ -B${XDDESTDIR}/usr/libexec \ -B${XDDESTDIR}/usr/bin -B${XDDESTDIR}/usr/lib CD2ENV=${CDENV} CC="${CC} ${CD2CFLAGS}" CXX="${CXX} ${CD2CFLAGS}" \ CPP="${CPP} ${CD2CFLAGS}" \ MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH} CDTMP= ${MAKEOBJDIRPREFIX}/${XDDIR}/${.CURDIR}/tmp CDMAKE=${CDENV} PATH=${CDTMP}/usr/bin:${PATH} ${MAKE} ${NOFUN} CD2MAKE=${CD2ENV} PATH=${CDTMP}/usr/bin:${XDDESTDIR}/usr/bin:${PATH} ${MAKE} ${NOFUN} XDDESTDIR=${DESTDIR}/${XDTP} .if !defined(OSREL) OSREL!= uname -r | sed -e 's/[-(].*//' .endif .ORDER: xdev-build xdev-install xdev-links xdev: xdev-build xdev-install .PHONY .ORDER: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools xdev-build: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools .PHONY _xb-worldtmp: .PHONY mkdir -p ${CDTMP}/usr mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${CDTMP}/usr >/dev/null _xb-bootstrap-tools: .PHONY .for _tool in \ ${_clang_tblgen} \ ${_gperf} ${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${CDMAKE} DIRPRFX=${_tool}/ obj; \ ${CDMAKE} DIRPRFX=${_tool}/ all; \ ${CDMAKE} DIRPRFX=${_tool}/ DESTDIR=${CDTMP} install .endfor _xb-build-tools: .PHONY ${_+_}@cd ${.CURDIR}; \ ${CDBENV} ${MAKE} -f Makefile.inc1 ${NOFUN} build-tools _xb-cross-tools: .PHONY .for _tool in \ ${_binutils} \ ${_elftctools} \ usr.bin/ar \ ${_clang_libs} \ ${_clang} \ ${_cc} ${_+_}@${ECHODIR} "===> xdev ${_tool} (obj,all)"; \ cd ${.CURDIR}/${_tool}; \ ${CDMAKE} DIRPRFX=${_tool}/ obj; \ ${CDMAKE} DIRPRFX=${_tool}/ all .endfor _xi-mtree: .PHONY ${_+_}@${ECHODIR} "mtree populating ${XDDESTDIR}" mkdir -p ${XDDESTDIR} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.root.dist \ -p ${XDDESTDIR} >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${XDDESTDIR}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${XDDESTDIR}/usr/include >/dev/null .if defined(LIBCOMPAT) mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${XDDESTDIR}/usr >/dev/null .endif .if ${MK_TESTS} != "no" mkdir -p ${XDDESTDIR}${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${XDDESTDIR}${TESTSBASE} >/dev/null .endif .ORDER: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries xdev-install: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries .PHONY _xi-cross-tools: .PHONY @echo "_xi-cross-tools" .for _tool in \ ${_binutils} \ ${_elftctools} \ usr.bin/ar \ ${_clang_libs} \ ${_clang} \ ${_cc} ${_+_}@${ECHODIR} "===> xdev ${_tool} (install)"; \ cd ${.CURDIR}/${_tool}; \ ${CDMAKE} DIRPRFX=${_tool}/ install DESTDIR=${XDDESTDIR} .endfor _xi-includes: .PHONY ${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 includes \ DESTDIR=${XDDESTDIR} _xi-libraries: .PHONY ${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 libraries \ DESTDIR=${XDDESTDIR} xdev-links: .PHONY ${_+_}cd ${XDDESTDIR}/usr/bin; \ mkdir -p ../../../../usr/bin; \ for i in *; do \ ln -sf ../../${XDTP}/usr/bin/$$i \ ../../../../usr/bin/${XDDIR}-$$i; \ ln -sf ../../${XDTP}/usr/bin/$$i \ ../../../../usr/bin/${XDDIR}${OSREL}-$$i; \ done .else xdev xdev-build xdev-install xdev-links: .PHONY @echo "*** Error: Both TARGET and TARGET_ARCH must be defined for \"${.TARGET}\" target" .endif Index: projects/ipsec/lib/libc/aarch64/gen/setjmp.S =================================================================== --- projects/ipsec/lib/libc/aarch64/gen/setjmp.S (revision 313186) +++ projects/ipsec/lib/libc/aarch64/gen/setjmp.S (revision 313187) @@ -1,123 +1,123 @@ /*- * Copyright (c) 2014 Andrew Turner * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Andrew Turner * under sponsorship from the FreeBSD Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include ENTRY(setjmp) sub sp, sp, #16 stp x0, lr, [sp] /* Store the signal mask */ add x2, x0, #(_JB_SIGMASK * 8) /* oset */ mov x1, #0 /* set */ mov x0, #1 /* SIG_BLOCK */ bl sigprocmask ldp x0, lr, [sp] add sp, sp, #16 /* Store the magic value and stack pointer */ ldr x8, .Lmagic mov x9, sp stp x8, x9, [x0], #16 /* Store the general purpose registers and lr */ stp x19, x20, [x0], #16 stp x21, x22, [x0], #16 stp x23, x24, [x0], #16 stp x25, x26, [x0], #16 stp x27, x28, [x0], #16 stp x29, lr, [x0], #16 /* Store the vfp registers */ stp d8, d9, [x0], #16 stp d10, d11, [x0], #16 stp d12, d13, [x0], #16 stp d14, d15, [x0] /* Return value */ mov x0, #0 ret .align 3 .Lmagic: .quad _JB_MAGIC_SETJMP END(setjmp) ENTRY(longjmp) sub sp, sp, #32 stp x0, lr, [sp] str x1, [sp, #16] /* Restore the signal mask */ mov x2, #0 /* oset */ add x1, x0, #(_JB_SIGMASK * 8) /* set */ - mov x0, #3 /* SIG_BLOCK */ + mov x0, #3 /* SIG_SETMASK */ bl sigprocmask ldr x1, [sp, #16] ldp x0, lr, [sp] add sp, sp, #32 /* Check the magic value */ ldr x8, [x0], #8 ldr x9, .Lmagic cmp x8, x9 b.ne botch /* Restore the stack pointer */ ldr x8, [x0], #8 mov sp, x8 /* Restore the general purpose registers and lr */ ldp x19, x20, [x0], #16 ldp x21, x22, [x0], #16 ldp x23, x24, [x0], #16 ldp x25, x26, [x0], #16 ldp x27, x28, [x0], #16 ldp x29, lr, [x0], #16 /* Restore the vfp registers */ ldp d8, d9, [x0], #16 ldp d10, d11, [x0], #16 ldp d12, d13, [x0], #16 ldp d14, d15, [x0] /* Load the return value */ mov x0, x1 ret botch: bl _C_LABEL(longjmperror) bl _C_LABEL(abort) END(longjmp) Index: projects/ipsec/lib/libc/gen/getpeereid.3 =================================================================== --- projects/ipsec/lib/libc/gen/getpeereid.3 (revision 313186) +++ projects/ipsec/lib/libc/gen/getpeereid.3 (revision 313187) @@ -1,137 +1,137 @@ .\" .\" Copyright (c) 2001 Dima Dorfman. .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd July 15, 2001 +.Dd February 3, 2017 .Dt GETPEEREID 3 .Os .Sh NAME .Nm getpeereid .Nd get the effective credentials of a UNIX-domain peer .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In sys/types.h .In unistd.h .Ft int .Fn getpeereid "int s" "uid_t *euid" "gid_t *egid" .Sh DESCRIPTION The .Fn getpeereid function returns the effective user and group IDs of the peer connected to a .Ux Ns -domain socket. The argument .Fa s must be a .Ux Ns -domain socket .Pq Xr unix 4 of type .Dv SOCK_STREAM on which either .Xr connect 2 or .Xr listen 2 -have been called. -The effective used ID is placed in +has been called. +The effective user ID is placed in .Fa euid , and the effective group ID in .Fa egid . .Pp The credentials returned to the .Xr listen 2 caller are those of its peer at the time it called .Xr connect 2 ; the credentials returned to the .Xr connect 2 caller are those of its peer at the time it called .Xr listen 2 . This mechanism is reliable; there is no way for either side to influence the credentials returned to its peer except by calling the appropriate system call (i.e., either .Xr connect 2 or .Xr listen 2 ) under different effective credentials. .Pp One common use of this routine is for a .Ux Ns -domain server to verify the credentials of its client. Likewise, the client can verify the credentials of the server. .Sh IMPLEMENTATION NOTES On .Fx , .Fn getpeereid is implemented in terms of the .Dv LOCAL_PEERCRED .Xr unix 4 socket option. .Sh RETURN VALUES .Rv -std getpeereid .Sh ERRORS The .Fn getpeereid function fails if: .Bl -tag -width Er .It Bq Er EBADF The argument .Fa s is not a valid descriptor. .It Bq Er ENOTSOCK The argument .Fa s is a file, not a socket. .It Bq Er ENOTCONN The argument .Fa s does not refer to a socket on which .Xr connect 2 or .Xr listen 2 have been called. .It Bq Er EINVAL The argument .Fa s does not refer to a socket of type .Dv SOCK_STREAM , or the kernel returned invalid data. .El .Sh SEE ALSO .Xr connect 2 , .Xr getpeername 2 , .Xr getsockname 2 , .Xr getsockopt 2 , .Xr listen 2 , .Xr unix 4 .Sh HISTORY The .Fn getpeereid function appeared in .Fx 4.6 . Index: projects/ipsec/lib/libc/sys/recv.2 =================================================================== --- projects/ipsec/lib/libc/sys/recv.2 (revision 313186) +++ projects/ipsec/lib/libc/sys/recv.2 (revision 313187) @@ -1,423 +1,376 @@ .\" Copyright (c) 1983, 1990, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 4. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)recv.2 8.3 (Berkeley) 2/21/94 .\" $FreeBSD$ .\" -.Dd August 18, 2016 +.Dd February 3, 2017 .Dt RECV 2 .Os .Sh NAME .Nm recv , .Nm recvfrom , .Nm recvmsg , .Nm recvmmsg .Nd receive message(s) from a socket .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In sys/socket.h .Ft ssize_t .Fn recv "int s" "void *buf" "size_t len" "int flags" .Ft ssize_t .Fn recvfrom "int s" "void *buf" "size_t len" "int flags" "struct sockaddr * restrict from" "socklen_t * restrict fromlen" .Ft ssize_t .Fn recvmsg "int s" "struct msghdr *msg" "int flags" .Ft ssize_t .Fn recvmmsg "int s" "struct mmsghdr * restrict msgvec" "size_t vlen" "int flags" "const struct timespec * restrict timeout" .Sh DESCRIPTION The .Fn recvfrom , .Fn recvmsg , and .Fn recvmmsg system calls are used to receive messages from a socket, and may be used to receive data on a socket whether or not it is connection-oriented. .Pp If .Fa from is not a null pointer and the socket is not connection-oriented, the source address of the message is filled in. The .Fa fromlen argument is a value-result argument, initialized to the size of the buffer associated with .Fa from , and modified on return to indicate the actual size of the address stored there. .Pp The .Fn recv function is normally used only on a .Em connected socket (see .Xr connect 2 ) and is identical to .Fn recvfrom with a null pointer passed as its .Fa from argument. .Pp The .Fn recvmmsg function is used to receive multiple messages at a call. Their number is supplied by .Fa vlen . The messages are placed in the buffers described by .Fa msgvec vector, after reception. The size of each received message is placed in the .Fa msg_len field of each element of the vector. If .Fa timeout is NULL the call blocks until the data is available for each supplied message buffer. Otherwise it waits for data for the specified amount of time. If the timeout expired and there is no data received, a value 0 is returned. The .Xr ppoll 2 system call is used to implement the timeout mechanism, before first receive is performed. .Pp The .Fn recv , .Fn recvfrom and .Fn recvmsg return the length of the message on successful completion, whereas .Fn recvmmsg returns the number of received messages. If a message is too long to fit in the supplied buffer, excess bytes may be discarded depending on the type of socket the message is received from (see .Xr socket 2 ) . .Pp If no messages are available at the socket, the receive call waits for a message to arrive, unless the socket is non-blocking (see .Xr fcntl 2 ) in which case the value \-1 is returned and the global variable .Va errno is set to .Er EAGAIN . The receive calls except .Fn recvmmsg normally return any data available, up to the requested amount, rather than waiting for receipt of the full amount requested; this behavior is affected by the socket-level options .Dv SO_RCVLOWAT and .Dv SO_RCVTIMEO described in .Xr getsockopt 2 . The .Fn recvmmsg function implements this behaviour for each message in the vector. .Pp The .Xr select 2 system call may be used to determine when more data arrives. .Pp The .Fa flags argument to a .Fn recv function is formed by .Em or Ap ing one or more of the values: .Bl -column ".Dv MSG_CMSG_CLOEXEC" -offset indent .It Dv MSG_OOB Ta process out-of-band data .It Dv MSG_PEEK Ta peek at incoming message .It Dv MSG_WAITALL Ta wait for full request or error .It Dv MSG_DONTWAIT Ta do not block .It Dv MSG_CMSG_CLOEXEC Ta set received fds close-on-exec .It Dv MSG_WAITFORONE Ta do not block after receiving the first message (only for .Fn recvmmsg ) .El .Pp The .Dv MSG_OOB flag requests receipt of out-of-band data that would not be received in the normal data stream. Some protocols place expedited data at the head of the normal data queue, and thus this flag cannot be used with such protocols. The .Dv MSG_PEEK flag causes the receive operation to return data from the beginning of the receive queue without removing that data from the queue. Thus, a subsequent receive call will return the same data. The .Dv MSG_WAITALL flag requests that the operation block until the full request is satisfied. However, the call may still return less data than requested if a signal is caught, an error or disconnect occurs, or the next data to be received is of a different type than that returned. The .Dv MSG_DONTWAIT flag requests the call to return when it would block otherwise. If no data is available, .Va errno is set to .Er EAGAIN . This flag is not available in strict .Tn ANSI or C99 compilation mode. The .Dv MSG_WAITFORONE flag sets MSG_DONTWAIT after the first message has been received. This flag is only relevant for .Fn recvmmsg . .Pp The .Fn recvmsg system call uses a .Fa msghdr structure to minimize the number of directly supplied arguments. This structure has the following form, as defined in .In sys/socket.h : .Bd -literal struct msghdr { void *msg_name; /* optional address */ socklen_t msg_namelen; /* size of address */ struct iovec *msg_iov; /* scatter/gather array */ int msg_iovlen; /* # elements in msg_iov */ void *msg_control; /* ancillary data, see below */ socklen_t msg_controllen;/* ancillary data buffer len */ int msg_flags; /* flags on received message */ }; .Ed .Pp Here .Fa msg_name and .Fa msg_namelen specify the destination address if the socket is unconnected; .Fa msg_name may be given as a null pointer if no names are desired or required. The .Fa msg_iov and .Fa msg_iovlen arguments describe scatter gather locations, as discussed in .Xr read 2 . The .Fa msg_control argument, which has length .Fa msg_controllen , points to a buffer for other protocol control related messages or other miscellaneous ancillary data. The messages are of the form: .Bd -literal struct cmsghdr { socklen_t cmsg_len; /* data byte count, including hdr */ int cmsg_level; /* originating protocol */ int cmsg_type; /* protocol-specific type */ /* followed by u_char cmsg_data[]; */ }; .Ed .Pp As an example, one could use this to learn of changes in the data-stream in XNS/SPP, or in ISO, to obtain user-connection-request data by requesting a .Fn recvmsg with no data buffer provided immediately after an .Fn accept system call. .Pp -Open file descriptors are now passed as ancillary data for +With .Dv AF_UNIX -domain sockets, with -.Fa cmsg_level -set to -.Dv SOL_SOCKET -and -.Fa cmsg_type -set to -.Dv SCM_RIGHTS . -The close-on-exec flag on received descriptors is set according to the -.Dv MSG_CMSG_CLOEXEC -flag passed to -.Fn recvmsg . -.Pp -Process credentials can also be passed as ancillary data for -.Dv AF_UNIX -domain sockets using a -.Fa cmsg_type -of -.Dv SCM_CREDS . -In this case, -.Fa cmsg_data -should be a structure of type -.Fa cmsgcred , -which is defined in -.In sys/socket.h -as follows: -.Bd -literal -struct cmsgcred { - pid_t cmcred_pid; /* PID of sending process */ - uid_t cmcred_uid; /* real UID of sending process */ - uid_t cmcred_euid; /* effective UID of sending process */ - gid_t cmcred_gid; /* real GID of sending process */ - short cmcred_ngroups; /* number or groups */ - gid_t cmcred_groups[CMGROUP_MAX]; /* groups */ -}; -.Ed -.Pp -If a sender supplies ancillary data with enough space for the above struct -tagged as -.Dv SCM_CREDS -control message type to the -.Fn sendmsg -system call, then kernel will fill in the credential information of the -sending process and deliver it to the receiver. -Since receiver usually has no control over a sender, this method of retrieving -credential information isn't reliable. -For reliable retrieval of remote side credentials it is advised to use the -.Dv LOCAL_CREDS -socket option on the receiving socket. +domain sockets, ancillary data can be used to pass file descriptors and +process credentials. See .Xr unix 4 for details. .Pp The .Fa msg_flags field is set on return according to the message received. .Dv MSG_EOR indicates end-of-record; the data returned completed a record (generally used with sockets of type .Dv SOCK_SEQPACKET ) . .Dv MSG_TRUNC indicates that the trailing portion of a datagram was discarded because the datagram was larger than the buffer supplied. .Dv MSG_CTRUNC indicates that some control data were discarded due to lack of space in the buffer for ancillary data. .Dv MSG_OOB is returned to indicate that expedited or out-of-band data were received. .Pp The .Fn recvmmsg system call uses the .Fa mmsghdr structure, defined as follows in the .In sys/socket.h header : .Bd -literal struct mmsghdr { struct msghdr msg_hdr; /* message header */ ssize_t msg_len; /* message length */ }; .Ed .Pp On data reception the .Fa msg_len field is updated to the length of the received message. .Sh RETURN VALUES These calls except .Fn recvmmsg return the number of bytes received. .Fn recvmmsg returns the number of messages received. A value of -1 is returned if an error occurred. .Sh ERRORS The calls fail if: .Bl -tag -width Er .It Bq Er EBADF The argument .Fa s is an invalid descriptor. .It Bq Er ECONNRESET The remote socket end is forcibly closed. .It Bq Er ENOTCONN The socket is associated with a connection-oriented protocol and has not been connected (see .Xr connect 2 and .Xr accept 2 ) . .It Bq Er ENOTSOCK The argument .Fa s does not refer to a socket. .It Bq Er EMSGSIZE The .Fn recvmsg system call was used to receive rights (file descriptors) that were in flight on the connection. However, the receiving program did not have enough free file descriptor slots to accept them. In this case the descriptors are closed, any pending data can be returned by another call to .Fn recvmsg . .It Bq Er EAGAIN The socket is marked non-blocking and the receive operation would block, or a receive timeout had been set and the timeout expired before data were received. .It Bq Er EINTR The receive was interrupted by delivery of a signal before any data were available. .It Bq Er EFAULT The receive buffer pointer(s) point outside the process's address space. .El .Sh SEE ALSO .Xr fcntl 2 , .Xr getsockopt 2 , .Xr read 2 , .Xr select 2 , .Xr socket 2 , .Xr unix 4 .Sh HISTORY The .Fn recv function appeared in .Bx 4.2 . The .Fn recvmmsg function appeared in .Fx 11.0 . Index: projects/ipsec/sbin/resolvconf/Makefile =================================================================== --- projects/ipsec/sbin/resolvconf/Makefile (revision 313186) +++ projects/ipsec/sbin/resolvconf/Makefile (revision 313187) @@ -1,39 +1,39 @@ # $FreeBSD$ PACKAGE=runtime DIST= ${.CURDIR}/../../contrib/openresolv .PATH: ${DIST} SCRIPTS= resolvconf FILES= libc dnsmasq named pdnsd pdns_recursor unbound FILESDIR= /libexec/resolvconf MAN= resolvconf.conf.5 resolvconf.8 CLEANFILES= ${SCRIPTS} ${FILES} ${MAN} SYSCONFDIR= /etc RCDIR= ${SYSCONFDIR}/rc.d VARDIR= /var/run/resolvconf SBINDIR= /sbin # We don't assume to restart the services in /sbin. So, though # our service(8) is in /usr/sbin, we can use it, here. -CMD1= \1 onestatus >/dev/null 2>\&1 -CMD2= \1 restart -RESTARTCMD= /usr/sbin/service ${CMD1} \&\& /usr/sbin/service ${CMD2} +CMD1= \\$$1 onestatus >/dev/null 2>\&1 +CMD2= \\$$1 restart +RESTARTCMD= "/usr/sbin/service ${CMD1} \&\& /usr/sbin/service ${CMD2}" .for f in ${SCRIPTS} ${FILES} ${MAN} ${f}: ${f}.in sed -e 's:@SYSCONFDIR@:${SYSCONFDIR}:g' \ -e 's:@LIBEXECDIR@:${FILESDIR}:g' \ -e 's:@VARDIR@:${VARDIR}:g' \ - -e 's:@RESTARTCMD \(.*\)@:${RESTARTCMD}:g' \ + -e 's:@RESTARTCMD@:${RESTARTCMD}:g' \ -e 's:@RCDIR@:${RCDIR}:g' \ -e 's:@SBINDIR@:${SBINDIR}:g' \ -e 's: vpn : ng[0-9]*&:g' \ ${DIST}/$@.in > $@ .endfor .include Index: projects/ipsec/share/man/man4/unix.4 =================================================================== --- projects/ipsec/share/man/man4/unix.4 (revision 313186) +++ projects/ipsec/share/man/man4/unix.4 (revision 313187) @@ -1,300 +1,363 @@ .\" Copyright (c) 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)unix.4 8.1 (Berkeley) 6/9/93 .\" $FreeBSD$ .\" -.Dd March 19, 2013 +.Dd February 3, 2017 .Dt UNIX 4 .Os .Sh NAME .Nm unix .Nd UNIX-domain protocol family .Sh SYNOPSIS .In sys/types.h .In sys/un.h .Sh DESCRIPTION The .Ux Ns -domain protocol family is a collection of protocols that provides local (on-machine) interprocess communication through the normal .Xr socket 2 mechanisms. The .Ux Ns -domain family supports the .Dv SOCK_STREAM , .Dv SOCK_SEQPACKET , and .Dv SOCK_DGRAM socket types and uses file system pathnames for addressing. .Sh ADDRESSING .Ux Ns -domain addresses are variable-length file system pathnames of at most 104 characters. The include file .In sys/un.h defines this address: .Bd -literal -offset indent struct sockaddr_un { u_char sun_len; u_char sun_family; char sun_path[104]; }; .Ed .Pp Binding a name to a .Ux Ns -domain socket with .Xr bind 2 causes a socket file to be created in the file system. This file is .Em not removed when the socket is closed \(em .Xr unlink 2 must be used to remove the file. .Pp The length of .Ux Ns -domain address, required by .Xr bind 2 and .Xr connect 2 , can be calculated by the macro .Fn SUN_LEN defined in .In sys/un.h . The .Va sun_path field must be terminated by a .Dv NUL character to be used with .Fn SUN_LEN , but the terminating .Dv NUL is .Em not part of the address. .Pp The .Ux Ns -domain protocol family does not support broadcast addressing or any form of .Dq wildcard matching on incoming messages. All addresses are absolute- or relative-pathnames of other .Ux Ns -domain sockets. Normal file system access-control mechanisms are also applied when referencing pathnames; e.g., the destination of a .Xr connect 2 or .Xr sendto 2 must be writable. -.Sh PASSING FILE DESCRIPTORS +.Sh CONTROL MESSAGES The .Ux Ns -domain sockets support the communication of .Ux -file descriptors through the use of the +file descriptors and process credentials through the use of the .Va msg_control field in the .Fa msg argument to .Xr sendmsg 2 and .Xr recvmsg 2 . -.Pp -Any valid descriptor may be sent in a message. -The file descriptor(s) to be passed are described using a +The items to be passed are described using a .Vt "struct cmsghdr" that is defined in the include file .In sys/socket.h . -The type of the message is +.Pp +To send file descriptors, the type of the message is .Dv SCM_RIGHTS , and the data portion of the messages is an array of integers representing the file descriptors to be passed. The number of descriptors being passed is defined by the length field of the message; the length field is the sum of the size of the header plus the size of the array of file descriptors. .Pp The received descriptor is a .Em duplicate of the sender's descriptor, as if it were created via .Li dup(fd) or .Li fcntl(fd, F_DUPFD_CLOEXEC, 0) depending on whether .Dv MSG_CMSG_CLOEXEC is passed in the .Xr recvmsg 2 call. Descriptors that are awaiting delivery, or that are purposely not received, are automatically closed by the system when the destination socket is closed. +.Pp +Credentials of the sending process can be transmitted explicitly using a +control message of type +.Dv SCM_CREDS +with a data portion of type +.Vt "struct cmsgcred" , +defined in +.In sys/socket.h +as follows: +.Bd -literal +struct cmsgcred { + pid_t cmcred_pid; /* PID of sending process */ + uid_t cmcred_uid; /* real UID of sending process */ + uid_t cmcred_euid; /* effective UID of sending process */ + gid_t cmcred_gid; /* real GID of sending process */ + short cmcred_ngroups; /* number of groups */ + gid_t cmcred_groups[CMGROUP_MAX]; /* groups */ +}; +.Ed +.Pp +The sender should pass a zeroed buffer which will be filled in by the system. +.Pp +The group list is truncated to at most +.Dv CMGROUP_MAX +GIDs. +.Pp +The process ID +.Fa cmcred_pid +should not be looked up (such as via the +.Dv KERN_PROC_PID +sysctl) for making security decisions. +The sending process could have exited and its process ID already been +reused for a new process. .Sh SOCKET OPTIONS .Tn UNIX domain sockets support a number of socket options which can be set with .Xr setsockopt 2 and tested with .Xr getsockopt 2 : .Bl -tag -width ".Dv LOCAL_CONNWAIT" .It Dv LOCAL_CREDS This option may be enabled on .Dv SOCK_DGRAM , .Dv SOCK_SEQPACKET , or a .Dv SOCK_STREAM socket. This option provides a mechanism for the receiver to -receive the credentials of the process as a +receive the credentials of the process calling +.Xr write 2 , +.Xr send 2 , +.Xr sendto 2 +or +.Xr sendmsg 2 +as a .Xr recvmsg 2 control message. The .Va msg_control field in the .Vt msghdr structure points to a buffer that contains a .Vt cmsghdr structure followed by a variable length .Vt sockcred structure, defined in .In sys/socket.h as follows: .Bd -literal struct sockcred { uid_t sc_uid; /* real user id */ uid_t sc_euid; /* effective user id */ gid_t sc_gid; /* real group id */ gid_t sc_egid; /* effective group id */ int sc_ngroups; /* number of supplemental groups */ gid_t sc_groups[1]; /* variable length */ }; .Ed .Pp +The current implementation truncates the group list to at most +.Dv CMGROUP_MAX +groups. +.Pp The .Fn SOCKCREDSIZE macro computes the size of the .Vt sockcred structure for a specified number of groups. The .Vt cmsghdr fields have the following values: .Bd -literal cmsg_len = CMSG_LEN(SOCKCREDSIZE(ngroups)) cmsg_level = SOL_SOCKET cmsg_type = SCM_CREDS .Ed .Pp On .Dv SOCK_STREAM and .Dv SOCK_SEQPACKET sockets credentials are passed only on the first read from a socket, -then system clears the option on socket. +then the system clears the option on the socket. +.Pp +This option and the above explicit +.Vt "struct cmsgcred" +both use the same value +.Dv SCM_CREDS +but incompatible control messages. +If this option is enabled and the sender attached a +.Dv SCM_CREDS +control message with a +.Vt "struct cmsgcred" , +it will be discarded and a +.Vt "struct sockcred" +will be included. +.Pp +Many setuid programs will +.Xr write 2 +data at least partially controlled by the invoker, +such as error messages. +Therefore, a message accompanied by a particular +.Fa sc_euid +value should not be trusted as being from that user. .It Dv LOCAL_CONNWAIT Used with .Dv SOCK_STREAM sockets, this option causes the .Xr connect 2 function to block until .Xr accept 2 has been called on the listening socket. .It Dv LOCAL_PEERCRED Requested via .Xr getsockopt 2 on a .Dv SOCK_STREAM socket returns credentials of the remote side. These will arrive in the form of a filled in .Vt xucred structure, defined in .In sys/ucred.h as follows: .Bd -literal struct xucred { u_int cr_version; /* structure layout version */ uid_t cr_uid; /* effective user id */ short cr_ngroups; /* number of groups */ gid_t cr_groups[XU_NGROUPS]; /* groups */ }; .Ed The .Vt cr_version fields should be checked against .Dv XUCRED_VERSION define. .Pp The credentials presented to the server (the .Xr listen 2 caller) are those of the client when it called .Xr connect 2 ; the credentials presented to the client (the .Xr connect 2 caller) are those of the server when it called .Xr listen 2 . This mechanism is reliable; there is no way for either party to influence the credentials presented to its peer except by calling the appropriate system call (e.g., .Xr connect 2 or .Xr listen 2 ) under different effective credentials. .Pp To reliably obtain peer credentials on a .Dv SOCK_DGRAM socket refer to the .Dv LOCAL_CREDS socket option. .El .Sh SEE ALSO .Xr connect 2 , .Xr dup 2 , .Xr fcntl 2 , .Xr getsockopt 2 , .Xr listen 2 , .Xr recvmsg 2 , .Xr sendto 2 , .Xr setsockopt 2 , .Xr socket 2 , .Xr intro 4 .Rs .%T "An Introductory 4.3 BSD Interprocess Communication Tutorial" .%B PS1 .%N 7 .Re .Rs .%T "An Advanced 4.3 BSD Interprocess Communication Tutorial" .%B PS1 .%N 8 .Re Index: projects/ipsec/sys/arm/include/asmacros.h =================================================================== --- projects/ipsec/sys/arm/include/asmacros.h (revision 313186) +++ projects/ipsec/sys/arm/include/asmacros.h (revision 313187) @@ -1,63 +1,63 @@ /*- * Copyright (c) 2012 Olivier Houchard * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_ASMACROS_H_ #define _MACHINE_ASMACROS_H_ #include #ifdef _KERNEL #ifdef LOCORE -#ifdef _ARM_ARCH_6 +#if __ARM_ARCH >= 6 #define GET_CURTHREAD_PTR(tmp) \ mrc p15, 0, tmp, c13, c0, 4 #else #define GET_CURTHREAD_PTR(tmp) \ ldr tmp, =_C_LABEL(__pcpu);\ ldr tmp, [tmp, #PC_CURTHREAD] #endif #define ELFNOTE(section, type, vendor, desctype, descdata...) \ .pushsection section ; \ .balign 4 ; \ .long 2f - 1f /* namesz */ ; \ .long 4f - 3f /* descsz */ ; \ .long type /* type */ ; \ 1: .asciz vendor /* vendor name */ ; \ 2: .balign 4 ; \ 3: desctype descdata /* node */ ; \ 4: .balign 4 ; \ .popsection #endif /* LOCORE */ #endif /* _KERNEL */ #endif /* !_MACHINE_ASMACROS_H_ */ Index: projects/ipsec/sys/arm/include/atomic-v4.h =================================================================== --- projects/ipsec/sys/arm/include/atomic-v4.h (revision 313186) +++ projects/ipsec/sys/arm/include/atomic-v4.h (revision 313187) @@ -1,535 +1,538 @@ /* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */ /*- * Copyright (C) 2003-2004 Olivier Houchard * Copyright (C) 1994-1997 Mark Brinicombe * Copyright (C) 1994 Brini * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Brini. * 4. The name of Brini may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_ATOMIC_V4_H_ #define _MACHINE_ATOMIC_V4_H_ #ifndef _MACHINE_ATOMIC_H_ #error Do not include this file directly, use #endif #if __ARM_ARCH <= 5 #define isb() __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory") #define dsb() __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory") #define dmb() dsb() #else #error Only use this file with ARMv5 and earlier #endif #define mb() dmb() #define wmb() dmb() #define rmb() dmb() #define __with_interrupts_disabled(expr) \ do { \ u_int cpsr_save, tmp; \ \ __asm __volatile( \ "mrs %0, cpsr;" \ "orr %1, %0, %2;" \ "msr cpsr_fsxc, %1;" \ : "=r" (cpsr_save), "=r" (tmp) \ : "I" (PSR_I | PSR_F) \ : "cc" ); \ (expr); \ __asm __volatile( \ "msr cpsr_fsxc, %0" \ : /* no output */ \ : "r" (cpsr_save) \ : "cc" ); \ } while(0) static __inline uint32_t __swp(uint32_t val, volatile uint32_t *ptr) { __asm __volatile("swp %0, %2, [%3]" : "=&r" (val), "=m" (*ptr) : "r" (val), "r" (ptr), "m" (*ptr) : "memory"); return (val); } #ifdef _KERNEL #define ARM_HAVE_ATOMIC64 static __inline void atomic_add_32(volatile u_int32_t *p, u_int32_t val) { __with_interrupts_disabled(*p += val); } static __inline void atomic_add_64(volatile u_int64_t *p, u_int64_t val) { __with_interrupts_disabled(*p += val); } static __inline void atomic_clear_32(volatile uint32_t *address, uint32_t clearmask) { __with_interrupts_disabled(*address &= ~clearmask); } static __inline void atomic_clear_64(volatile uint64_t *address, uint64_t clearmask) { __with_interrupts_disabled(*address &= ~clearmask); } static __inline int atomic_fcmpset_32(volatile u_int32_t *p, volatile u_int32_t *cmpval, volatile u_int32_t newval) { u_int32_t ret; __with_interrupts_disabled( { ret = *p; if (*p == *cmpval) { *p = newval; ret = 1; } else { *cmpval = *p; ret = 0; } }); return (ret); } static __inline int atomic_fcmpset_64(volatile u_int64_t *p, volatile u_int64_t *cmpval, volatile u_int64_t newval) { u_int64_t ret; __with_interrupts_disabled( { if (*p == *cmpval) { *p = newval; ret = 1; } else { *cmpval = *p; ret = 0; } }); return (ret); } static __inline u_int32_t atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval) { int ret; __with_interrupts_disabled( { if (*p == cmpval) { *p = newval; ret = 1; } else { ret = 0; } }); return (ret); } static __inline u_int64_t atomic_cmpset_64(volatile u_int64_t *p, volatile u_int64_t cmpval, volatile u_int64_t newval) { int ret; __with_interrupts_disabled( { if (*p == cmpval) { *p = newval; ret = 1; } else { ret = 0; } }); return (ret); } static __inline uint32_t atomic_fetchadd_32(volatile uint32_t *p, uint32_t v) { uint32_t value; __with_interrupts_disabled( { value = *p; *p += v; }); return (value); } static __inline uint64_t atomic_fetchadd_64(volatile uint64_t *p, uint64_t v) { uint64_t value; __with_interrupts_disabled( { value = *p; *p += v; }); return (value); } static __inline uint64_t atomic_load_64(volatile uint64_t *p) { uint64_t value; __with_interrupts_disabled(value = *p); return (value); } static __inline void atomic_set_32(volatile uint32_t *address, uint32_t setmask) { __with_interrupts_disabled(*address |= setmask); } static __inline void atomic_set_64(volatile uint64_t *address, uint64_t setmask) { __with_interrupts_disabled(*address |= setmask); } static __inline void atomic_store_64(volatile uint64_t *p, uint64_t value) { __with_interrupts_disabled(*p = value); } static __inline void atomic_subtract_32(volatile u_int32_t *p, u_int32_t val) { __with_interrupts_disabled(*p -= val); } static __inline void atomic_subtract_64(volatile u_int64_t *p, u_int64_t val) { __with_interrupts_disabled(*p -= val); } #else /* !_KERNEL */ static __inline void atomic_add_32(volatile u_int32_t *p, u_int32_t val) { int start, ras_start = ARM_RAS_START; __asm __volatile("1:\n" "adr %1, 1b\n" "str %1, [%0]\n" "adr %1, 2f\n" "str %1, [%0, #4]\n" "ldr %1, [%2]\n" "add %1, %1, %3\n" "str %1, [%2]\n" "2:\n" "mov %1, #0\n" "str %1, [%0]\n" "mov %1, #0xffffffff\n" "str %1, [%0, #4]\n" : "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val) : : "memory"); } static __inline void atomic_clear_32(volatile uint32_t *address, uint32_t clearmask) { int start, ras_start = ARM_RAS_START; __asm __volatile("1:\n" "adr %1, 1b\n" "str %1, [%0]\n" "adr %1, 2f\n" "str %1, [%0, #4]\n" "ldr %1, [%2]\n" "bic %1, %1, %3\n" "str %1, [%2]\n" "2:\n" "mov %1, #0\n" "str %1, [%0]\n" "mov %1, #0xffffffff\n" "str %1, [%0, #4]\n" : "+r" (ras_start), "=r" (start), "+r" (address), "+r" (clearmask) : : "memory"); } static __inline u_int32_t atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval) { register int done, ras_start = ARM_RAS_START; __asm __volatile("1:\n" "adr %1, 1b\n" "str %1, [%0]\n" "adr %1, 2f\n" "str %1, [%0, #4]\n" "ldr %1, [%2]\n" "cmp %1, %3\n" "streq %4, [%2]\n" "2:\n" "mov %1, #0\n" "str %1, [%0]\n" "mov %1, #0xffffffff\n" "str %1, [%0, #4]\n" "moveq %1, #1\n" "movne %1, #0\n" : "+r" (ras_start), "=r" (done) ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc", "memory"); return (done); } static __inline uint32_t atomic_fetchadd_32(volatile uint32_t *p, uint32_t v) { uint32_t start, tmp, ras_start = ARM_RAS_START; __asm __volatile("1:\n" "adr %1, 1b\n" "str %1, [%0]\n" "adr %1, 2f\n" "str %1, [%0, #4]\n" "ldr %1, [%3]\n" "mov %2, %1\n" "add %2, %2, %4\n" "str %2, [%3]\n" "2:\n" "mov %2, #0\n" "str %2, [%0]\n" "mov %2, #0xffffffff\n" "str %2, [%0, #4]\n" : "+r" (ras_start), "=r" (start), "=r" (tmp), "+r" (p), "+r" (v) : : "memory"); return (start); } static __inline void atomic_set_32(volatile uint32_t *address, uint32_t setmask) { int start, ras_start = ARM_RAS_START; __asm __volatile("1:\n" "adr %1, 1b\n" "str %1, [%0]\n" "adr %1, 2f\n" "str %1, [%0, #4]\n" "ldr %1, [%2]\n" "orr %1, %1, %3\n" "str %1, [%2]\n" "2:\n" "mov %1, #0\n" "str %1, [%0]\n" "mov %1, #0xffffffff\n" "str %1, [%0, #4]\n" : "+r" (ras_start), "=r" (start), "+r" (address), "+r" (setmask) : : "memory"); } static __inline void atomic_subtract_32(volatile u_int32_t *p, u_int32_t val) { int start, ras_start = ARM_RAS_START; __asm __volatile("1:\n" "adr %1, 1b\n" "str %1, [%0]\n" "adr %1, 2f\n" "str %1, [%0, #4]\n" "ldr %1, [%2]\n" "sub %1, %1, %3\n" "str %1, [%2]\n" "2:\n" "mov %1, #0\n" "str %1, [%0]\n" "mov %1, #0xffffffff\n" "str %1, [%0, #4]\n" : "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val) : : "memory"); } #endif /* _KERNEL */ static __inline uint32_t atomic_readandclear_32(volatile u_int32_t *p) { return (__swp(0, p)); } static __inline uint32_t atomic_swap_32(volatile u_int32_t *p, u_int32_t v) { return (__swp(v, p)); } #define atomic_fcmpset_rel_32 atomic_fcmpset_32 #define atomic_fcmpset_acq_32 atomic_fcmpset_32 #define atomic_fcmpset_rel_64 atomic_fcmpset_64 #define atomic_fcmpset_acq_64 atomic_fcmpset_64 #define atomic_fcmpset_acq_long atomic_fcmpset_long #define atomic_fcmpset_rel_long atomic_fcmpset_long #define atomic_cmpset_rel_32 atomic_cmpset_32 #define atomic_cmpset_acq_32 atomic_cmpset_32 #define atomic_cmpset_rel_64 atomic_cmpset_64 #define atomic_cmpset_acq_64 atomic_cmpset_64 #define atomic_set_rel_32 atomic_set_32 #define atomic_set_acq_32 atomic_set_32 #define atomic_clear_rel_32 atomic_clear_32 #define atomic_clear_acq_32 atomic_clear_32 #define atomic_add_rel_32 atomic_add_32 #define atomic_add_acq_32 atomic_add_32 #define atomic_subtract_rel_32 atomic_subtract_32 #define atomic_subtract_acq_32 atomic_subtract_32 #define atomic_store_rel_32 atomic_store_32 #define atomic_store_rel_long atomic_store_long #define atomic_load_acq_32 atomic_load_32 #define atomic_load_acq_long atomic_load_long #define atomic_add_acq_long atomic_add_long #define atomic_add_rel_long atomic_add_long #define atomic_subtract_acq_long atomic_subtract_long #define atomic_subtract_rel_long atomic_subtract_long #define atomic_clear_acq_long atomic_clear_long #define atomic_clear_rel_long atomic_clear_long #define atomic_set_acq_long atomic_set_long #define atomic_set_rel_long atomic_set_long #define atomic_cmpset_acq_long atomic_cmpset_long #define atomic_cmpset_rel_long atomic_cmpset_long #define atomic_load_acq_long atomic_load_long #undef __with_interrupts_disabled static __inline void atomic_add_long(volatile u_long *p, u_long v) { atomic_add_32((volatile uint32_t *)p, v); } static __inline void atomic_clear_long(volatile u_long *p, u_long v) { atomic_clear_32((volatile uint32_t *)p, v); } static __inline int atomic_cmpset_long(volatile u_long *dst, u_long old, u_long newe) { return (atomic_cmpset_32((volatile uint32_t *)dst, old, newe)); } +#ifdef _KERNEL +/* atomic_fcmpset_32 is only defined for the kernel */ static __inline u_long atomic_fcmpset_long(volatile u_long *dst, u_long *old, u_long newe) { return (atomic_fcmpset_32((volatile uint32_t *)dst, (uint32_t *)old, newe)); } +#endif static __inline u_long atomic_fetchadd_long(volatile u_long *p, u_long v) { return (atomic_fetchadd_32((volatile uint32_t *)p, v)); } static __inline void atomic_readandclear_long(volatile u_long *p) { atomic_readandclear_32((volatile uint32_t *)p); } static __inline void atomic_set_long(volatile u_long *p, u_long v) { atomic_set_32((volatile uint32_t *)p, v); } static __inline void atomic_subtract_long(volatile u_long *p, u_long v) { atomic_subtract_32((volatile uint32_t *)p, v); } /* * ARMv5 does not support SMP. For both kernel and user modes, only a * compiler barrier is needed for fences, since CPU is always * self-consistent. */ static __inline void atomic_thread_fence_acq(void) { __compiler_membar(); } static __inline void atomic_thread_fence_rel(void) { __compiler_membar(); } static __inline void atomic_thread_fence_acq_rel(void) { __compiler_membar(); } static __inline void atomic_thread_fence_seq_cst(void) { __compiler_membar(); } #endif /* _MACHINE_ATOMIC_H_ */ Index: projects/ipsec/sys/boot/efi/libefi/Makefile =================================================================== --- projects/ipsec/sys/boot/efi/libefi/Makefile (revision 313186) +++ projects/ipsec/sys/boot/efi/libefi/Makefile (revision 313187) @@ -1,51 +1,52 @@ # $FreeBSD$ .include .if ${MK_FORTH} != "no" CFLAGS+= -DBOOT_FORTH .include "${.CURDIR}/../../Makefile.ficl" .endif LIB= efi INTERNALLIB= WARNS?= 2 SRCS= delay.c devpath.c efi_console.c efinet.c efipart.c env.c errno.c \ handles.c wchar.c libefi.c .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" SRCS+= time.c .elif ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "arm" SRCS+= time_event.c .endif # We implement a slightly non-standard %S in that it always takes a # CHAR16 that's common in UEFI-land instead of a wchar_t. This only # seems to matter on arm64 where wchar_t defaults to an int instead # of a short. There's no good cast to use here so just ignore the # warnings for now. CWARNFLAGS.efinet.c+= -Wno-format +CWARNFLAGS.env.c+= -Wno-format .if ${MACHINE_CPUARCH} == "aarch64" CFLAGS+= -msoft-float -mgeneral-regs-only .endif .if ${MACHINE_ARCH} == "amd64" CFLAGS+= -fPIC -mno-red-zone .endif CFLAGS+= -I${.CURDIR}/../include CFLAGS+= -I${.CURDIR}/../include/${MACHINE} CFLAGS+= -I${.CURDIR}/../../../../lib/libstand # Pick up the bootstrap header for some interface items CFLAGS+= -I${.CURDIR}/../../common # Handle FreeBSD specific %b and %D printf format specifiers CFLAGS+= ${FORMAT_EXTENSIONS} # Do not use TERM_EMU on arm and arm64 as it doesn't behave well with serial console .if ${MACHINE_CPUARCH} != "arm" && ${MACHINE_CPUARCH} != "aarch64" CFLAGS+= -DTERM_EMU .endif .include Index: projects/ipsec/sys/boot/efi/libefi/env.c =================================================================== --- projects/ipsec/sys/boot/efi/libefi/env.c (revision 313186) +++ projects/ipsec/sys/boot/efi/libefi/env.c (revision 313187) @@ -1,532 +1,534 @@ /* * Copyright (c) 2015 Netflix, Inc. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include "bootstrap.h" #ifdef BOOT_FORTH #include "ficl.h" #endif /* * Simple wrappers to the underlying UEFI functions. * See http://wiki.phoenix.com/wiki/index.php/EFI_RUNTIME_SERVICES * for details. */ EFI_STATUS efi_get_next_variable_name(UINTN *variable_name_size, CHAR16 *variable_name, EFI_GUID *vendor_guid) { return (RS->GetNextVariableName(variable_name_size, variable_name, vendor_guid)); } EFI_STATUS efi_get_variable(CHAR16 *variable_name, EFI_GUID *vendor_guid, UINT32 *attributes, UINTN *data_size, void *data) { return (RS->GetVariable(variable_name, vendor_guid, attributes, data_size, data)); } EFI_STATUS efi_set_variable(CHAR16 *variable_name, EFI_GUID *vendor_guid, UINT32 attributes, UINTN data_size, void *data) { return (RS->SetVariable(variable_name, vendor_guid, attributes, data_size, data)); } void efi_init_environment(void) { char var[128]; snprintf(var, sizeof(var), "%d.%02d", ST->Hdr.Revision >> 16, ST->Hdr.Revision & 0xffff); env_setenv("efi-version", EV_VOLATILE, var, env_noset, env_nounset); } COMMAND_SET(efishow, "efi-show", "print some or all EFI variables", command_efi_show); static int efi_print_var(CHAR16 *varnamearg, EFI_GUID *matchguid, int lflag) { UINTN datasz, i; EFI_STATUS status; UINT32 attr; CHAR16 *data; char *str; uint32_t uuid_status; int is_ascii; datasz = 0; status = RS->GetVariable(varnamearg, matchguid, &attr, &datasz, NULL); if (status != EFI_BUFFER_TOO_SMALL) { - printf("Can't get the variable: error %#lx\n", status); + printf("Can't get the variable: error %#lx\n", + EFI_ERROR_CODE(status)); return (CMD_ERROR); } data = malloc(datasz); status = RS->GetVariable(varnamearg, matchguid, &attr, &datasz, data); if (status != EFI_SUCCESS) { - printf("Can't get the variable: error %#lx\n", status); + printf("Can't get the variable: error %#lx\n", + EFI_ERROR_CODE(status)); return (CMD_ERROR); } uuid_to_string((uuid_t *)matchguid, &str, &uuid_status); if (lflag) { printf("%s 0x%x %S", str, attr, varnamearg); } else { printf("%s 0x%x %S=", str, attr, varnamearg); is_ascii = 1; free(str); str = (char *)data; for (i = 0; i < datasz - 1; i++) { /* Quick hack to see if this ascii-ish string printable range plus tab, cr and lf */ if ((str[i] < 32 || str[i] > 126) && str[i] != 9 && str[i] != 10 && str[i] != 13) { is_ascii = 0; break; } } if (str[datasz - 1] != '\0') is_ascii = 0; if (is_ascii) printf("%s", str); else { for (i = 0; i < datasz / 2; i++) { if (isalnum(data[i]) || isspace(data[i])) printf("%c", data[i]); else printf("\\x%02x", data[i]); } } } free(data); if (pager_output("\n")) return (CMD_WARN); return (CMD_OK); } static int command_efi_show(int argc, char *argv[]) { /* * efi-show [-a] * print all the env * efi-show -u UUID * print all the env vars tagged with UUID * efi-show -v var * search all the env vars and print the ones matching var * eif-show -u UUID -v var * eif-show UUID var * print all the env vars that match UUID and var */ /* NB: We assume EFI_GUID is the same as uuid_t */ int aflag = 0, gflag = 0, lflag = 0, vflag = 0; int ch, rv; unsigned i; EFI_STATUS status; EFI_GUID varguid = { 0,0,0,{0,0,0,0,0,0,0,0} }; EFI_GUID matchguid = { 0,0,0,{0,0,0,0,0,0,0,0} }; uint32_t uuid_status; CHAR16 *varname; CHAR16 *newnm; CHAR16 varnamearg[128]; UINTN varalloc; UINTN varsz; while ((ch = getopt(argc, argv, "ag:lv:")) != -1) { switch (ch) { case 'a': aflag = 1; break; case 'g': gflag = 1; uuid_from_string(optarg, (uuid_t *)&matchguid, &uuid_status); if (uuid_status != uuid_s_ok) { printf("uid %s could not be parsed\n", optarg); return (CMD_ERROR); } break; case 'l': lflag = 1; break; case 'v': vflag = 1; if (strlen(optarg) >= nitems(varnamearg)) { printf("Variable %s is longer than %zd characters\n", optarg, nitems(varnamearg)); return (CMD_ERROR); } for (i = 0; i < strlen(optarg); i++) varnamearg[i] = optarg[i]; varnamearg[i] = 0; break; default: printf("Invalid argument %c\n", ch); return (CMD_ERROR); } } if (aflag && (gflag || vflag)) { printf("-a isn't compatible with -v or -u\n"); return (CMD_ERROR); } if (aflag && optind < argc) { printf("-a doesn't take any args\n"); return (CMD_ERROR); } if (optind == argc) aflag = 1; argc -= optind; argv += optind; pager_open(); if (vflag && gflag) { rv = efi_print_var(varnamearg, &matchguid, lflag); pager_close(); return (rv); } if (argc == 2) { optarg = argv[0]; if (strlen(optarg) >= nitems(varnamearg)) { printf("Variable %s is longer than %zd characters\n", optarg, nitems(varnamearg)); pager_close(); return (CMD_ERROR); } for (i = 0; i < strlen(optarg); i++) varnamearg[i] = optarg[i]; varnamearg[i] = 0; optarg = argv[1]; uuid_from_string(optarg, (uuid_t *)&matchguid, &uuid_status); if (uuid_status != uuid_s_ok) { printf("uid %s could not be parsed\n", optarg); pager_close(); return (CMD_ERROR); } rv = efi_print_var(varnamearg, &matchguid, lflag); pager_close(); return (rv); } if (argc > 0) { printf("Too many args %d\n", argc); pager_close(); return (CMD_ERROR); } /* * Initiate the search -- note the standard takes pain * to specify the initial call must be a poiner to a NULL * character. */ varalloc = 1024; varname = malloc(varalloc); if (varname == NULL) { printf("Can't allocate memory to get variables\n"); pager_close(); return (CMD_ERROR); } varname[0] = 0; while (1) { varsz = varalloc; status = RS->GetNextVariableName(&varsz, varname, &varguid); if (status == EFI_BUFFER_TOO_SMALL) { varalloc = varsz; newnm = realloc(varname, varalloc); if (newnm == NULL) { printf("Can't allocate memory to get variables\n"); free(varname); pager_close(); return (CMD_ERROR); } varname = newnm; continue; /* Try again with bigger buffer */ } if (status != EFI_SUCCESS) break; if (aflag) { if (efi_print_var(varname, &varguid, lflag) != CMD_OK) break; continue; } if (vflag) { if (wcscmp(varnamearg, varname) == 0) { if (efi_print_var(varname, &varguid, lflag) != CMD_OK) break; continue; } } if (gflag) { if (memcmp(&varguid, &matchguid, sizeof(varguid)) == 0) { if (efi_print_var(varname, &varguid, lflag) != CMD_OK) break; continue; } } } free(varname); pager_close(); return (CMD_OK); } COMMAND_SET(efiset, "efi-set", "set EFI variables", command_efi_set); static int command_efi_set(int argc, char *argv[]) { char *uuid, *var, *val; CHAR16 wvar[128]; EFI_GUID guid; uint32_t status; EFI_STATUS err; if (argc != 4) { printf("efi-set uuid var new-value\n"); return (CMD_ERROR); } uuid = argv[1]; var = argv[2]; val = argv[3]; uuid_from_string(uuid, (uuid_t *)&guid, &status); if (status != uuid_s_ok) { printf("Invalid uuid %s %d\n", uuid, status); return (CMD_ERROR); } cpy8to16(var, wvar, sizeof(wvar)); err = RS->SetVariable(wvar, &guid, EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_RUNTIME_ACCESS | EFI_VARIABLE_BOOTSERVICE_ACCESS, strlen(val) + 1, val); if (EFI_ERROR(err)) { printf("Failed to set variable: error %lu\n", EFI_ERROR_CODE(err)); return (CMD_ERROR); } return (CMD_OK); } COMMAND_SET(efiunset, "efi-unset", "delete / unset EFI variables", command_efi_unset); static int command_efi_unset(int argc, char *argv[]) { char *uuid, *var; CHAR16 wvar[128]; EFI_GUID guid; uint32_t status; EFI_STATUS err; if (argc != 3) { printf("efi-unset uuid var\n"); return (CMD_ERROR); } uuid = argv[1]; var = argv[2]; uuid_from_string(uuid, (uuid_t *)&guid, &status); if (status != uuid_s_ok) { printf("Invalid uuid %s\n", uuid); return (CMD_ERROR); } cpy8to16(var, wvar, sizeof(wvar)); err = RS->SetVariable(wvar, &guid, 0, 0, NULL); if (EFI_ERROR(err)) { printf("Failed to unset variable: error %lu\n", EFI_ERROR_CODE(err)); return (CMD_ERROR); } return (CMD_OK); } #ifdef BOOT_FORTH /* * FreeBSD's loader interaction words and extras * * efi-setenv ( value n name n guid n attr -- 0 | -1) * efi-getenv ( guid n addr n -- addr' n' | -1 ) * efi-unsetenv ( name n guid n'' -- ) */ /* * efi-setenv * efi-setenv ( value n name n guid n attr -- 0 | -1) * * Set environment variables using the SetVariable EFI runtime service. * * Value and guid are passed through in binary form (so guid needs to be * converted to binary form from its string form). Name is converted from * ASCII to CHAR16. Since ficl doesn't have support for internationalization, * there's no native CHAR16 interface provided. * * attr is an int in the bitmask of the following attributes for this variable. * * 1 Non volatile * 2 Boot service access * 4 Run time access * (corresponding to the same bits in the UEFI spec). */ static void ficlEfiSetenv(FICL_VM *pVM) { char *value = NULL, *guid = NULL; CHAR16 *name = NULL; int i; char *namep, *valuep, *guidp; int names, values, guids, attr; EFI_STATUS status; uuid_t u; uint32_t ustatus; bool error = true; #if FICL_ROBUST > 1 vmCheckStack(pVM, 6, 0); #endif attr = stackPopINT(pVM->pStack); guids = stackPopINT(pVM->pStack); guidp = (char*)stackPopPtr(pVM->pStack); names = stackPopINT(pVM->pStack); namep = (char*)stackPopPtr(pVM->pStack); values = stackPopINT(pVM->pStack); valuep = (char*)stackPopPtr(pVM->pStack); guid = (char*)ficlMalloc(guids); if (guid == NULL) goto out; memcpy(guid, guidp, guids); uuid_from_string(guid, &u, &ustatus); if (ustatus != uuid_s_ok) { stackPushINT(pVM->pStack, -1); goto out; } name = ficlMalloc((names + 1) * sizeof(CHAR16)); if (name == NULL) goto out; for (i = 0; i < names; i++) name[i] = namep[i]; name[names] = 0; value = ficlMalloc(values + 1); if (value == NULL) goto out; memcpy(value, valuep, values); status = efi_set_variable(name, (EFI_GUID *)&u, attr, values, value); if (status == EFI_SUCCESS) stackPushINT(pVM->pStack, 0); else stackPushINT(pVM->pStack, -1); error = false; out: ficlFree(name); ficlFree(value); ficlFree(guid); if (error == true) vmThrowErr(pVM, "Error: out of memory"); } static void ficlEfiGetenv(FICL_VM *pVM) { char *name, *value; char *namep; int names; #if FICL_ROBUST > 1 vmCheckStack(pVM, 2, 2); #endif names = stackPopINT(pVM->pStack); namep = (char*) stackPopPtr(pVM->pStack); name = (char*) ficlMalloc(names+1); if (name == NULL) vmThrowErr(pVM, "Error: out of memory"); strncpy(name, namep, names); name[names] = '\0'; value = getenv(name); ficlFree(name); if(value != NULL) { stackPushPtr(pVM->pStack, value); stackPushINT(pVM->pStack, strlen(value)); } else stackPushINT(pVM->pStack, -1); } static void ficlEfiUnsetenv(FICL_VM *pVM) { char *name; char *namep; int names; #if FICL_ROBUST > 1 vmCheckStack(pVM, 2, 0); #endif names = stackPopINT(pVM->pStack); namep = (char*) stackPopPtr(pVM->pStack); name = (char*) ficlMalloc(names+1); if (name == NULL) vmThrowErr(pVM, "Error: out of memory"); strncpy(name, namep, names); name[names] = '\0'; unsetenv(name); ficlFree(name); } /************************************************************************** ** Add FreeBSD UEFI platform extensions into the system dictionary **************************************************************************/ void ficlEfiCompilePlatform(FICL_SYSTEM *pSys) { FICL_DICT *dp = pSys->dp; assert (dp); dictAppendWord(dp, "efi-setenv", ficlEfiSetenv, FW_DEFAULT); dictAppendWord(dp, "efi-getenv", ficlEfiGetenv, FW_DEFAULT); dictAppendWord(dp, "efi-unsetenv", ficlEfiUnsetenv, FW_DEFAULT); } FICL_COMPILE_SET(ficlEfiCompilePlatform); #endif /* BOOT_FORTH */ Index: projects/ipsec/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c =================================================================== --- projects/ipsec/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c (revision 313186) +++ projects/ipsec/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c (revision 313187) @@ -1,18333 +1,18352 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END * * $FreeBSD$ */ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, Joyent, Inc. All rights reserved. * Copyright (c) 2012, 2014 by Delphix. All rights reserved. */ /* * DTrace - Dynamic Tracing for Solaris * * This is the implementation of the Solaris Dynamic Tracing framework * (DTrace). The user-visible interface to DTrace is described at length in * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace * library, the in-kernel DTrace framework, and the DTrace providers are * described in the block comments in the header file. The * internal architecture of DTrace is described in the block comments in the * header file. The comments contained within the DTrace * implementation very much assume mastery of all of these sources; if one has * an unanswered question about the implementation, one should consult them * first. * * The functions here are ordered roughly as follows: * * - Probe context functions * - Probe hashing functions * - Non-probe context utility functions * - Matching functions * - Provider-to-Framework API functions * - Probe management functions * - DIF object functions * - Format functions * - Predicate functions * - ECB functions * - Buffer functions * - Enabling functions * - DOF functions * - Anonymous enabling functions * - Consumer state functions * - Helper functions * - Hook functions * - Driver cookbook functions * * Each group of functions begins with a block comment labelled the "DTrace * [Group] Functions", allowing one to find each block by searching forward * on capital-f functions. */ #include #ifndef illumos #include #endif #include #include #include #include #ifdef illumos #include #include #endif #include #include #ifdef illumos #include #endif #include #include #include #include #ifdef illumos #include #include #endif #include #ifdef illumos #include #include #endif #include #ifdef illumos #include #include #endif #include #ifdef illumos #include #include #endif #include #include #include #include "strtolctype.h" /* FreeBSD includes: */ #ifndef illumos #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include "dtrace_cddl.h" #include "dtrace_debug.c" #endif +#include "dtrace_xoroshiro128_plus.h" + /* * DTrace Tunable Variables * * The following variables may be tuned by adding a line to /etc/system that * includes both the name of the DTrace module ("dtrace") and the name of the * variable. For example: * * set dtrace:dtrace_destructive_disallow = 1 * * In general, the only variables that one should be tuning this way are those * that affect system-wide DTrace behavior, and for which the default behavior * is undesirable. Most of these variables are tunable on a per-consumer * basis using DTrace options, and need not be tuned on a system-wide basis. * When tuning these variables, avoid pathological values; while some attempt * is made to verify the integrity of these variables, they are not considered * part of the supported interface to DTrace, and they are therefore not * checked comprehensively. Further, these variables should not be tuned * dynamically via "mdb -kw" or other means; they should only be tuned via * /etc/system. */ int dtrace_destructive_disallow = 0; #ifndef illumos /* Positive logic version of dtrace_destructive_disallow for loader tunable */ int dtrace_allow_destructive = 1; #endif dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024); size_t dtrace_difo_maxsize = (256 * 1024); dtrace_optval_t dtrace_dof_maxsize = (8 * 1024 * 1024); size_t dtrace_statvar_maxsize = (16 * 1024); size_t dtrace_actions_max = (16 * 1024); size_t dtrace_retain_max = 1024; dtrace_optval_t dtrace_helper_actions_max = 128; dtrace_optval_t dtrace_helper_providers_max = 32; dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024); size_t dtrace_strsize_default = 256; dtrace_optval_t dtrace_cleanrate_default = 9900990; /* 101 hz */ dtrace_optval_t dtrace_cleanrate_min = 200000; /* 5000 hz */ dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */ dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */ dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */ dtrace_optval_t dtrace_statusrate_max = (hrtime_t)10 * NANOSEC; /* 6/minute */ dtrace_optval_t dtrace_switchrate_default = NANOSEC; /* 1 hz */ dtrace_optval_t dtrace_nspec_default = 1; dtrace_optval_t dtrace_specsize_default = 32 * 1024; dtrace_optval_t dtrace_stackframes_default = 20; dtrace_optval_t dtrace_ustackframes_default = 20; dtrace_optval_t dtrace_jstackframes_default = 50; dtrace_optval_t dtrace_jstackstrsize_default = 512; int dtrace_msgdsize_max = 128; hrtime_t dtrace_chill_max = MSEC2NSEC(500); /* 500 ms */ hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */ int dtrace_devdepth_max = 32; int dtrace_err_verbose; hrtime_t dtrace_deadman_interval = NANOSEC; hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC; hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC; hrtime_t dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC; #ifndef illumos int dtrace_memstr_max = 4096; #endif /* * DTrace External Variables * * As dtrace(7D) is a kernel module, any DTrace variables are obviously * available to DTrace consumers via the backtick (`) syntax. One of these, * dtrace_zero, is made deliberately so: it is provided as a source of * well-known, zero-filled memory. While this variable is not documented, * it is used by some translators as an implementation detail. */ const char dtrace_zero[256] = { 0 }; /* zero-filled memory */ /* * DTrace Internal Variables */ #ifdef illumos static dev_info_t *dtrace_devi; /* device info */ #endif #ifdef illumos static vmem_t *dtrace_arena; /* probe ID arena */ static vmem_t *dtrace_minor; /* minor number arena */ #else static taskq_t *dtrace_taskq; /* task queue */ static struct unrhdr *dtrace_arena; /* Probe ID number. */ #endif static dtrace_probe_t **dtrace_probes; /* array of all probes */ static int dtrace_nprobes; /* number of probes */ static dtrace_provider_t *dtrace_provider; /* provider list */ static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */ static int dtrace_opens; /* number of opens */ static int dtrace_helpers; /* number of helpers */ static int dtrace_getf; /* number of unpriv getf()s */ #ifdef illumos static void *dtrace_softstate; /* softstate pointer */ #endif static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */ static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */ static dtrace_hash_t *dtrace_byname; /* probes hashed by name */ static dtrace_toxrange_t *dtrace_toxrange; /* toxic range array */ static int dtrace_toxranges; /* number of toxic ranges */ static int dtrace_toxranges_max; /* size of toxic range array */ static dtrace_anon_t dtrace_anon; /* anonymous enabling */ static kmem_cache_t *dtrace_state_cache; /* cache for dynamic state */ static uint64_t dtrace_vtime_references; /* number of vtimestamp refs */ static kthread_t *dtrace_panicked; /* panicking thread */ static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */ static dtrace_genid_t dtrace_probegen; /* current probe generation */ static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */ static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */ static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */ static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */ static int dtrace_dynvar_failclean; /* dynvars failed to clean */ #ifndef illumos static struct mtx dtrace_unr_mtx; MTX_SYSINIT(dtrace_unr_mtx, &dtrace_unr_mtx, "Unique resource identifier", MTX_DEF); static eventhandler_tag dtrace_kld_load_tag; static eventhandler_tag dtrace_kld_unload_try_tag; #endif /* * DTrace Locking * DTrace is protected by three (relatively coarse-grained) locks: * * (1) dtrace_lock is required to manipulate essentially any DTrace state, * including enabling state, probes, ECBs, consumer state, helper state, * etc. Importantly, dtrace_lock is _not_ required when in probe context; * probe context is lock-free -- synchronization is handled via the * dtrace_sync() cross call mechanism. * * (2) dtrace_provider_lock is required when manipulating provider state, or * when provider state must be held constant. * * (3) dtrace_meta_lock is required when manipulating meta provider state, or * when meta provider state must be held constant. * * The lock ordering between these three locks is dtrace_meta_lock before * dtrace_provider_lock before dtrace_lock. (In particular, there are * several places where dtrace_provider_lock is held by the framework as it * calls into the providers -- which then call back into the framework, * grabbing dtrace_lock.) * * There are two other locks in the mix: mod_lock and cpu_lock. With respect * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical * role as a coarse-grained lock; it is acquired before both of these locks. * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must * be acquired _between_ dtrace_meta_lock and any other DTrace locks. * mod_lock is similar with respect to dtrace_provider_lock in that it must be * acquired _between_ dtrace_provider_lock and dtrace_lock. */ static kmutex_t dtrace_lock; /* probe state lock */ static kmutex_t dtrace_provider_lock; /* provider state lock */ static kmutex_t dtrace_meta_lock; /* meta-provider state lock */ #ifndef illumos /* XXX FreeBSD hacks. */ #define cr_suid cr_svuid #define cr_sgid cr_svgid #define ipaddr_t in_addr_t #define mod_modname pathname #define vuprintf vprintf #define ttoproc(_a) ((_a)->td_proc) #define crgetzoneid(_a) 0 -#define NCPU MAXCPU #define SNOCD 0 #define CPU_ON_INTR(_a) 0 #define PRIV_EFFECTIVE (1 << 0) #define PRIV_DTRACE_KERNEL (1 << 1) #define PRIV_DTRACE_PROC (1 << 2) #define PRIV_DTRACE_USER (1 << 3) #define PRIV_PROC_OWNER (1 << 4) #define PRIV_PROC_ZONE (1 << 5) #define PRIV_ALL ~0 SYSCTL_DECL(_debug_dtrace); SYSCTL_DECL(_kern_dtrace); #endif #ifdef illumos #define curcpu CPU->cpu_id #endif /* * DTrace Provider Variables * * These are the variables relating to DTrace as a provider (that is, the * provider of the BEGIN, END, and ERROR probes). */ static dtrace_pattr_t dtrace_provider_attr = { { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, }; static void dtrace_nullop(void) {} static dtrace_pops_t dtrace_provider_ops = { (void (*)(void *, dtrace_probedesc_t *))dtrace_nullop, (void (*)(void *, modctl_t *))dtrace_nullop, (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, NULL, NULL, NULL, (void (*)(void *, dtrace_id_t, void *))dtrace_nullop }; static dtrace_id_t dtrace_probeid_begin; /* special BEGIN probe */ static dtrace_id_t dtrace_probeid_end; /* special END probe */ dtrace_id_t dtrace_probeid_error; /* special ERROR probe */ /* * DTrace Helper Tracing Variables * * These variables should be set dynamically to enable helper tracing. The * only variables that should be set are dtrace_helptrace_enable (which should * be set to a non-zero value to allocate helper tracing buffers on the next * open of /dev/dtrace) and dtrace_helptrace_disable (which should be set to a * non-zero value to deallocate helper tracing buffers on the next close of * /dev/dtrace). When (and only when) helper tracing is disabled, the * buffer size may also be set via dtrace_helptrace_bufsize. */ int dtrace_helptrace_enable = 0; int dtrace_helptrace_disable = 0; int dtrace_helptrace_bufsize = 16 * 1024 * 1024; uint32_t dtrace_helptrace_nlocals; static dtrace_helptrace_t *dtrace_helptrace_buffer; static uint32_t dtrace_helptrace_next = 0; static int dtrace_helptrace_wrapped = 0; /* * DTrace Error Hashing * * On DEBUG kernels, DTrace will track the errors that has seen in a hash * table. This is very useful for checking coverage of tests that are * expected to induce DIF or DOF processing errors, and may be useful for * debugging problems in the DIF code generator or in DOF generation . The * error hash may be examined with the ::dtrace_errhash MDB dcmd. */ #ifdef DEBUG static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ]; static const char *dtrace_errlast; static kthread_t *dtrace_errthread; static kmutex_t dtrace_errlock; #endif /* * DTrace Macros and Constants * * These are various macros that are useful in various spots in the * implementation, along with a few random constants that have no meaning * outside of the implementation. There is no real structure to this cpp * mishmash -- but is there ever? */ #define DTRACE_HASHSTR(hash, probe) \ dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs))) #define DTRACE_HASHNEXT(hash, probe) \ (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs) #define DTRACE_HASHPREV(hash, probe) \ (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs) #define DTRACE_HASHEQ(hash, lhs, rhs) \ (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \ *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0) #define DTRACE_AGGHASHSIZE_SLEW 17 #define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3) /* * The key for a thread-local variable consists of the lower 61 bits of the * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL. * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never * equal to a variable identifier. This is necessary (but not sufficient) to * assure that global associative arrays never collide with thread-local * variables. To guarantee that they cannot collide, we must also define the * order for keying dynamic variables. That order is: * * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ] * * Because the variable-key and the tls-key are in orthogonal spaces, there is * no way for a global variable key signature to match a thread-local key * signature. */ #ifdef illumos #define DTRACE_TLS_THRKEY(where) { \ uint_t intr = 0; \ uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \ for (; actv; actv >>= 1) \ intr++; \ ASSERT(intr < (1 << 3)); \ (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \ (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ } #else #define DTRACE_TLS_THRKEY(where) { \ solaris_cpu_t *_c = &solaris_cpu[curcpu]; \ uint_t intr = 0; \ uint_t actv = _c->cpu_intr_actv; \ for (; actv; actv >>= 1) \ intr++; \ ASSERT(intr < (1 << 3)); \ (where) = ((curthread->td_tid + DIF_VARIABLE_MAX) & \ (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ } #endif #define DT_BSWAP_8(x) ((x) & 0xff) #define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8)) #define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16)) #define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32)) #define DT_MASK_LO 0x00000000FFFFFFFFULL #define DTRACE_STORE(type, tomax, offset, what) \ *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what); #ifndef __x86 #define DTRACE_ALIGNCHECK(addr, size, flags) \ if (addr & (size - 1)) { \ *flags |= CPU_DTRACE_BADALIGN; \ cpu_core[curcpu].cpuc_dtrace_illval = addr; \ return (0); \ } #else #define DTRACE_ALIGNCHECK(addr, size, flags) #endif /* * Test whether a range of memory starting at testaddr of size testsz falls * within the range of memory described by addr, sz. We take care to avoid * problems with overflow and underflow of the unsigned quantities, and * disallow all negative sizes. Ranges of size 0 are allowed. */ #define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \ ((testaddr) - (uintptr_t)(baseaddr) < (basesz) && \ (testaddr) + (testsz) - (uintptr_t)(baseaddr) <= (basesz) && \ (testaddr) + (testsz) >= (testaddr)) #define DTRACE_RANGE_REMAIN(remp, addr, baseaddr, basesz) \ do { \ if ((remp) != NULL) { \ *(remp) = (uintptr_t)(baseaddr) + (basesz) - (addr); \ } \ _NOTE(CONSTCOND) } while (0) /* * Test whether alloc_sz bytes will fit in the scratch region. We isolate * alloc_sz on the righthand side of the comparison in order to avoid overflow * or underflow in the comparison with it. This is simpler than the INRANGE * check above, because we know that the dtms_scratch_ptr is valid in the * range. Allocations of size zero are allowed. */ #define DTRACE_INSCRATCH(mstate, alloc_sz) \ ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \ (mstate)->dtms_scratch_ptr >= (alloc_sz)) #define DTRACE_LOADFUNC(bits) \ /*CSTYLED*/ \ uint##bits##_t \ dtrace_load##bits(uintptr_t addr) \ { \ size_t size = bits / NBBY; \ /*CSTYLED*/ \ uint##bits##_t rval; \ int i; \ volatile uint16_t *flags = (volatile uint16_t *) \ &cpu_core[curcpu].cpuc_dtrace_flags; \ \ DTRACE_ALIGNCHECK(addr, size, flags); \ \ for (i = 0; i < dtrace_toxranges; i++) { \ if (addr >= dtrace_toxrange[i].dtt_limit) \ continue; \ \ if (addr + size <= dtrace_toxrange[i].dtt_base) \ continue; \ \ /* \ * This address falls within a toxic region; return 0. \ */ \ *flags |= CPU_DTRACE_BADADDR; \ cpu_core[curcpu].cpuc_dtrace_illval = addr; \ return (0); \ } \ \ *flags |= CPU_DTRACE_NOFAULT; \ /*CSTYLED*/ \ rval = *((volatile uint##bits##_t *)addr); \ *flags &= ~CPU_DTRACE_NOFAULT; \ \ return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0); \ } #ifdef _LP64 #define dtrace_loadptr dtrace_load64 #else #define dtrace_loadptr dtrace_load32 #endif #define DTRACE_DYNHASH_FREE 0 #define DTRACE_DYNHASH_SINK 1 #define DTRACE_DYNHASH_VALID 2 #define DTRACE_MATCH_NEXT 0 #define DTRACE_MATCH_DONE 1 #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0') #define DTRACE_STATE_ALIGN 64 #define DTRACE_FLAGS2FLT(flags) \ (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \ ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \ ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \ ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \ ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \ ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \ ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \ ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \ ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \ DTRACEFLT_UNKNOWN) #define DTRACEACT_ISSTRING(act) \ ((act)->dta_kind == DTRACEACT_DIFEXPR && \ (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) /* Function prototype definitions: */ static size_t dtrace_strlen(const char *, size_t); static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id); static void dtrace_enabling_provide(dtrace_provider_t *); static int dtrace_enabling_match(dtrace_enabling_t *, int *); static void dtrace_enabling_matchall(void); static void dtrace_enabling_reap(void); static dtrace_state_t *dtrace_anon_grab(void); static uint64_t dtrace_helper(int, dtrace_mstate_t *, dtrace_state_t *, uint64_t, uint64_t); static dtrace_helpers_t *dtrace_helpers_create(proc_t *); static void dtrace_buffer_drop(dtrace_buffer_t *); static int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when); static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t, dtrace_state_t *, dtrace_mstate_t *); static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t, dtrace_optval_t); static int dtrace_ecb_create_enable(dtrace_probe_t *, void *); static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *); uint16_t dtrace_load16(uintptr_t); uint32_t dtrace_load32(uintptr_t); uint64_t dtrace_load64(uintptr_t); uint8_t dtrace_load8(uintptr_t); void dtrace_dynvar_clean(dtrace_dstate_t *); dtrace_dynvar_t *dtrace_dynvar(dtrace_dstate_t *, uint_t, dtrace_key_t *, size_t, dtrace_dynvar_op_t, dtrace_mstate_t *, dtrace_vstate_t *); uintptr_t dtrace_dif_varstr(uintptr_t, dtrace_state_t *, dtrace_mstate_t *); static int dtrace_priv_proc(dtrace_state_t *); static void dtrace_getf_barrier(void); static int dtrace_canload_remains(uint64_t, size_t, size_t *, dtrace_mstate_t *, dtrace_vstate_t *); static int dtrace_canstore_remains(uint64_t, size_t, size_t *, dtrace_mstate_t *, dtrace_vstate_t *); /* * DTrace Probe Context Functions * * These functions are called from probe context. Because probe context is * any context in which C may be called, arbitrarily locks may be held, * interrupts may be disabled, we may be in arbitrary dispatched state, etc. * As a result, functions called from probe context may only call other DTrace * support functions -- they may not interact at all with the system at large. * (Note that the ASSERT macro is made probe-context safe by redefining it in * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary * loads are to be performed from probe context, they _must_ be in terms of * the safe dtrace_load*() variants. * * Some functions in this block are not actually called from probe context; * for these functions, there will be a comment above the function reading * "Note: not called from probe context." */ void dtrace_panic(const char *format, ...) { va_list alist; va_start(alist, format); #ifdef __FreeBSD__ vpanic(format, alist); #else dtrace_vpanic(format, alist); #endif va_end(alist); } int dtrace_assfail(const char *a, const char *f, int l) { dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l); /* * We just need something here that even the most clever compiler * cannot optimize away. */ return (a[(uintptr_t)f]); } /* * Atomically increment a specified error counter from probe context. */ static void dtrace_error(uint32_t *counter) { /* * Most counters stored to in probe context are per-CPU counters. * However, there are some error conditions that are sufficiently * arcane that they don't merit per-CPU storage. If these counters * are incremented concurrently on different CPUs, scalability will be * adversely affected -- but we don't expect them to be white-hot in a * correctly constructed enabling... */ uint32_t oval, nval; do { oval = *counter; if ((nval = oval + 1) == 0) { /* * If the counter would wrap, set it to 1 -- assuring * that the counter is never zero when we have seen * errors. (The counter must be 32-bits because we * aren't guaranteed a 64-bit compare&swap operation.) * To save this code both the infamy of being fingered * by a priggish news story and the indignity of being * the target of a neo-puritan witch trial, we're * carefully avoiding any colorful description of the * likelihood of this condition -- but suffice it to * say that it is only slightly more likely than the * overflow of predicate cache IDs, as discussed in * dtrace_predicate_create(). */ nval = 1; } } while (dtrace_cas32(counter, oval, nval) != oval); } /* * Use the DTRACE_LOADFUNC macro to define functions for each of loading a * uint8_t, a uint16_t, a uint32_t and a uint64_t. */ /* BEGIN CSTYLED */ DTRACE_LOADFUNC(8) DTRACE_LOADFUNC(16) DTRACE_LOADFUNC(32) DTRACE_LOADFUNC(64) /* END CSTYLED */ static int dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate) { if (dest < mstate->dtms_scratch_base) return (0); if (dest + size < dest) return (0); if (dest + size > mstate->dtms_scratch_ptr) return (0); return (1); } static int dtrace_canstore_statvar(uint64_t addr, size_t sz, size_t *remain, dtrace_statvar_t **svars, int nsvars) { int i; size_t maxglobalsize, maxlocalsize; if (nsvars == 0) return (0); maxglobalsize = dtrace_statvar_maxsize + sizeof (uint64_t); maxlocalsize = maxglobalsize * NCPU; for (i = 0; i < nsvars; i++) { dtrace_statvar_t *svar = svars[i]; uint8_t scope; size_t size; if (svar == NULL || (size = svar->dtsv_size) == 0) continue; scope = svar->dtsv_var.dtdv_scope; /* * We verify that our size is valid in the spirit of providing * defense in depth: we want to prevent attackers from using * DTrace to escalate an orthogonal kernel heap corruption bug * into the ability to store to arbitrary locations in memory. */ VERIFY((scope == DIFV_SCOPE_GLOBAL && size <= maxglobalsize) || (scope == DIFV_SCOPE_LOCAL && size <= maxlocalsize)); if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size)) { DTRACE_RANGE_REMAIN(remain, addr, svar->dtsv_data, svar->dtsv_size); return (1); } } return (0); } /* * Check to see if the address is within a memory region to which a store may * be issued. This includes the DTrace scratch areas, and any DTrace variable * region. The caller of dtrace_canstore() is responsible for performing any * alignment checks that are needed before stores are actually executed. */ static int dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { return (dtrace_canstore_remains(addr, sz, NULL, mstate, vstate)); } /* * Implementation of dtrace_canstore which communicates the upper bound of the * allowed memory region. */ static int dtrace_canstore_remains(uint64_t addr, size_t sz, size_t *remain, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { /* * First, check to see if the address is in scratch space... */ if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base, mstate->dtms_scratch_size)) { DTRACE_RANGE_REMAIN(remain, addr, mstate->dtms_scratch_base, mstate->dtms_scratch_size); return (1); } /* * Now check to see if it's a dynamic variable. This check will pick * up both thread-local variables and any global dynamically-allocated * variables. */ if (DTRACE_INRANGE(addr, sz, vstate->dtvs_dynvars.dtds_base, vstate->dtvs_dynvars.dtds_size)) { dtrace_dstate_t *dstate = &vstate->dtvs_dynvars; uintptr_t base = (uintptr_t)dstate->dtds_base + (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t)); uintptr_t chunkoffs; dtrace_dynvar_t *dvar; /* * Before we assume that we can store here, we need to make * sure that it isn't in our metadata -- storing to our * dynamic variable metadata would corrupt our state. For * the range to not include any dynamic variable metadata, * it must: * * (1) Start above the hash table that is at the base of * the dynamic variable space * * (2) Have a starting chunk offset that is beyond the * dtrace_dynvar_t that is at the base of every chunk * * (3) Not span a chunk boundary * * (4) Not be in the tuple space of a dynamic variable * */ if (addr < base) return (0); chunkoffs = (addr - base) % dstate->dtds_chunksize; if (chunkoffs < sizeof (dtrace_dynvar_t)) return (0); if (chunkoffs + sz > dstate->dtds_chunksize) return (0); dvar = (dtrace_dynvar_t *)((uintptr_t)addr - chunkoffs); if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) return (0); if (chunkoffs < sizeof (dtrace_dynvar_t) + ((dvar->dtdv_tuple.dtt_nkeys - 1) * sizeof (dtrace_key_t))) return (0); DTRACE_RANGE_REMAIN(remain, addr, dvar, dstate->dtds_chunksize); return (1); } /* * Finally, check the static local and global variables. These checks * take the longest, so we perform them last. */ if (dtrace_canstore_statvar(addr, sz, remain, vstate->dtvs_locals, vstate->dtvs_nlocals)) return (1); if (dtrace_canstore_statvar(addr, sz, remain, vstate->dtvs_globals, vstate->dtvs_nglobals)) return (1); return (0); } /* * Convenience routine to check to see if the address is within a memory * region in which a load may be issued given the user's privilege level; * if not, it sets the appropriate error flags and loads 'addr' into the * illegal value slot. * * DTrace subroutines (DIF_SUBR_*) should use this helper to implement * appropriate memory access protection. */ static int dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { return (dtrace_canload_remains(addr, sz, NULL, mstate, vstate)); } /* * Implementation of dtrace_canload which communicates the uppoer bound of the * allowed memory region. */ static int dtrace_canload_remains(uint64_t addr, size_t sz, size_t *remain, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { volatile uintptr_t *illval = &cpu_core[curcpu].cpuc_dtrace_illval; file_t *fp; /* * If we hold the privilege to read from kernel memory, then * everything is readable. */ if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { DTRACE_RANGE_REMAIN(remain, addr, addr, sz); return (1); } /* * You can obviously read that which you can store. */ if (dtrace_canstore_remains(addr, sz, remain, mstate, vstate)) return (1); /* * We're allowed to read from our own string table. */ if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab, mstate->dtms_difo->dtdo_strlen)) { DTRACE_RANGE_REMAIN(remain, addr, mstate->dtms_difo->dtdo_strtab, mstate->dtms_difo->dtdo_strlen); return (1); } if (vstate->dtvs_state != NULL && dtrace_priv_proc(vstate->dtvs_state)) { proc_t *p; /* * When we have privileges to the current process, there are * several context-related kernel structures that are safe to * read, even absent the privilege to read from kernel memory. * These reads are safe because these structures contain only * state that (1) we're permitted to read, (2) is harmless or * (3) contains pointers to additional kernel state that we're * not permitted to read (and as such, do not present an * opportunity for privilege escalation). Finally (and * critically), because of the nature of their relation with * the current thread context, the memory associated with these * structures cannot change over the duration of probe context, * and it is therefore impossible for this memory to be * deallocated and reallocated as something else while it's * being operated upon. */ if (DTRACE_INRANGE(addr, sz, curthread, sizeof (kthread_t))) { DTRACE_RANGE_REMAIN(remain, addr, curthread, sizeof (kthread_t)); return (1); } if ((p = curthread->t_procp) != NULL && DTRACE_INRANGE(addr, sz, curthread->t_procp, sizeof (proc_t))) { DTRACE_RANGE_REMAIN(remain, addr, curthread->t_procp, sizeof (proc_t)); return (1); } if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz, curthread->t_cred, sizeof (cred_t))) { DTRACE_RANGE_REMAIN(remain, addr, curthread->t_cred, sizeof (cred_t)); return (1); } #ifdef illumos if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz, &(p->p_pidp->pid_id), sizeof (pid_t))) { DTRACE_RANGE_REMAIN(remain, addr, &(p->p_pidp->pid_id), sizeof (pid_t)); return (1); } if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz, curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) { DTRACE_RANGE_REMAIN(remain, addr, curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread)); return (1); } #endif } if ((fp = mstate->dtms_getf) != NULL) { uintptr_t psz = sizeof (void *); vnode_t *vp; vnodeops_t *op; /* * When getf() returns a file_t, the enabling is implicitly * granted the (transient) right to read the returned file_t * as well as the v_path and v_op->vnop_name of the underlying * vnode. These accesses are allowed after a successful * getf() because the members that they refer to cannot change * once set -- and the barrier logic in the kernel's closef() * path assures that the file_t and its referenced vode_t * cannot themselves be stale (that is, it impossible for * either dtms_getf itself or its f_vnode member to reference * freed memory). */ if (DTRACE_INRANGE(addr, sz, fp, sizeof (file_t))) { DTRACE_RANGE_REMAIN(remain, addr, fp, sizeof (file_t)); return (1); } if ((vp = fp->f_vnode) != NULL) { size_t slen; #ifdef illumos if (DTRACE_INRANGE(addr, sz, &vp->v_path, psz)) { DTRACE_RANGE_REMAIN(remain, addr, &vp->v_path, psz); return (1); } slen = strlen(vp->v_path) + 1; if (DTRACE_INRANGE(addr, sz, vp->v_path, slen)) { DTRACE_RANGE_REMAIN(remain, addr, vp->v_path, slen); return (1); } #endif if (DTRACE_INRANGE(addr, sz, &vp->v_op, psz)) { DTRACE_RANGE_REMAIN(remain, addr, &vp->v_op, psz); return (1); } #ifdef illumos if ((op = vp->v_op) != NULL && DTRACE_INRANGE(addr, sz, &op->vnop_name, psz)) { DTRACE_RANGE_REMAIN(remain, addr, &op->vnop_name, psz); return (1); } if (op != NULL && op->vnop_name != NULL && DTRACE_INRANGE(addr, sz, op->vnop_name, (slen = strlen(op->vnop_name) + 1))) { DTRACE_RANGE_REMAIN(remain, addr, op->vnop_name, slen); return (1); } #endif } } DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV); *illval = addr; return (0); } /* * Convenience routine to check to see if a given string is within a memory * region in which a load may be issued given the user's privilege level; * this exists so that we don't need to issue unnecessary dtrace_strlen() * calls in the event that the user has all privileges. */ static int dtrace_strcanload(uint64_t addr, size_t sz, size_t *remain, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { size_t rsize; /* * If we hold the privilege to read from kernel memory, then * everything is readable. */ if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { DTRACE_RANGE_REMAIN(remain, addr, addr, sz); return (1); } /* * Even if the caller is uninterested in querying the remaining valid * range, it is required to ensure that the access is allowed. */ if (remain == NULL) { remain = &rsize; } if (dtrace_canload_remains(addr, 0, remain, mstate, vstate)) { size_t strsz; /* * Perform the strlen after determining the length of the * memory region which is accessible. This prevents timing * information from being used to find NULs in memory which is * not accessible to the caller. */ strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr, MIN(sz, *remain)); if (strsz <= *remain) { return (1); } } return (0); } /* * Convenience routine to check to see if a given variable is within a memory * region in which a load may be issued given the user's privilege level. */ static int dtrace_vcanload(void *src, dtrace_diftype_t *type, size_t *remain, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { size_t sz; ASSERT(type->dtdt_flags & DIF_TF_BYREF); /* * Calculate the max size before performing any checks since even * DTRACE_ACCESS_KERNEL-credentialed callers expect that this function * return the max length via 'remain'. */ if (type->dtdt_kind == DIF_TYPE_STRING) { dtrace_state_t *state = vstate->dtvs_state; if (state != NULL) { sz = state->dts_options[DTRACEOPT_STRSIZE]; } else { /* * In helper context, we have a NULL state; fall back * to using the system-wide default for the string size * in this case. */ sz = dtrace_strsize_default; } } else { sz = type->dtdt_size; } /* * If we hold the privilege to read from kernel memory, then * everything is readable. */ if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { DTRACE_RANGE_REMAIN(remain, (uintptr_t)src, src, sz); return (1); } if (type->dtdt_kind == DIF_TYPE_STRING) { return (dtrace_strcanload((uintptr_t)src, sz, remain, mstate, vstate)); } return (dtrace_canload_remains((uintptr_t)src, sz, remain, mstate, vstate)); } /* * Convert a string to a signed integer using safe loads. * * NOTE: This function uses various macros from strtolctype.h to manipulate * digit values, etc -- these have all been checked to ensure they make * no additional function calls. */ static int64_t dtrace_strtoll(char *input, int base, size_t limit) { uintptr_t pos = (uintptr_t)input; int64_t val = 0; int x; boolean_t neg = B_FALSE; char c, cc, ccc; uintptr_t end = pos + limit; /* * Consume any whitespace preceding digits. */ while ((c = dtrace_load8(pos)) == ' ' || c == '\t') pos++; /* * Handle an explicit sign if one is present. */ if (c == '-' || c == '+') { if (c == '-') neg = B_TRUE; c = dtrace_load8(++pos); } /* * Check for an explicit hexadecimal prefix ("0x" or "0X") and skip it * if present. */ if (base == 16 && c == '0' && ((cc = dtrace_load8(pos + 1)) == 'x' || cc == 'X') && isxdigit(ccc = dtrace_load8(pos + 2))) { pos += 2; c = ccc; } /* * Read in contiguous digits until the first non-digit character. */ for (; pos < end && c != '\0' && lisalnum(c) && (x = DIGIT(c)) < base; c = dtrace_load8(++pos)) val = val * base + x; return (neg ? -val : val); } /* * Compare two strings using safe loads. */ static int dtrace_strncmp(char *s1, char *s2, size_t limit) { uint8_t c1, c2; volatile uint16_t *flags; if (s1 == s2 || limit == 0) return (0); flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; do { if (s1 == NULL) { c1 = '\0'; } else { c1 = dtrace_load8((uintptr_t)s1++); } if (s2 == NULL) { c2 = '\0'; } else { c2 = dtrace_load8((uintptr_t)s2++); } if (c1 != c2) return (c1 - c2); } while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT)); return (0); } /* * Compute strlen(s) for a string using safe memory accesses. The additional * len parameter is used to specify a maximum length to ensure completion. */ static size_t dtrace_strlen(const char *s, size_t lim) { uint_t len; for (len = 0; len != lim; len++) { if (dtrace_load8((uintptr_t)s++) == '\0') break; } return (len); } /* * Check if an address falls within a toxic region. */ static int dtrace_istoxic(uintptr_t kaddr, size_t size) { uintptr_t taddr, tsize; int i; for (i = 0; i < dtrace_toxranges; i++) { taddr = dtrace_toxrange[i].dtt_base; tsize = dtrace_toxrange[i].dtt_limit - taddr; if (kaddr - taddr < tsize) { DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); cpu_core[curcpu].cpuc_dtrace_illval = kaddr; return (1); } if (taddr - kaddr < size) { DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); cpu_core[curcpu].cpuc_dtrace_illval = taddr; return (1); } } return (0); } /* * Copy src to dst using safe memory accesses. The src is assumed to be unsafe * memory specified by the DIF program. The dst is assumed to be safe memory * that we can store to directly because it is managed by DTrace. As with * standard bcopy, overlapping copies are handled properly. */ static void dtrace_bcopy(const void *src, void *dst, size_t len) { if (len != 0) { uint8_t *s1 = dst; const uint8_t *s2 = src; if (s1 <= s2) { do { *s1++ = dtrace_load8((uintptr_t)s2++); } while (--len != 0); } else { s2 += len; s1 += len; do { *--s1 = dtrace_load8((uintptr_t)--s2); } while (--len != 0); } } } /* * Copy src to dst using safe memory accesses, up to either the specified * length, or the point that a nul byte is encountered. The src is assumed to * be unsafe memory specified by the DIF program. The dst is assumed to be * safe memory that we can store to directly because it is managed by DTrace. * Unlike dtrace_bcopy(), overlapping regions are not handled. */ static void dtrace_strcpy(const void *src, void *dst, size_t len) { if (len != 0) { uint8_t *s1 = dst, c; const uint8_t *s2 = src; do { *s1++ = c = dtrace_load8((uintptr_t)s2++); } while (--len != 0 && c != '\0'); } } /* * Copy src to dst, deriving the size and type from the specified (BYREF) * variable type. The src is assumed to be unsafe memory specified by the DIF * program. The dst is assumed to be DTrace variable memory that is of the * specified type; we assume that we can store to directly. */ static void dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type, size_t limit) { ASSERT(type->dtdt_flags & DIF_TF_BYREF); if (type->dtdt_kind == DIF_TYPE_STRING) { dtrace_strcpy(src, dst, MIN(type->dtdt_size, limit)); } else { dtrace_bcopy(src, dst, MIN(type->dtdt_size, limit)); } } /* * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be * unsafe memory specified by the DIF program. The s2 data is assumed to be * safe memory that we can access directly because it is managed by DTrace. */ static int dtrace_bcmp(const void *s1, const void *s2, size_t len) { volatile uint16_t *flags; flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; if (s1 == s2) return (0); if (s1 == NULL || s2 == NULL) return (1); if (s1 != s2 && len != 0) { const uint8_t *ps1 = s1; const uint8_t *ps2 = s2; do { if (dtrace_load8((uintptr_t)ps1++) != *ps2++) return (1); } while (--len != 0 && !(*flags & CPU_DTRACE_FAULT)); } return (0); } /* * Zero the specified region using a simple byte-by-byte loop. Note that this * is for safe DTrace-managed memory only. */ static void dtrace_bzero(void *dst, size_t len) { uchar_t *cp; for (cp = dst; len != 0; len--) *cp++ = 0; } static void dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum) { uint64_t result[2]; result[0] = addend1[0] + addend2[0]; result[1] = addend1[1] + addend2[1] + (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0); sum[0] = result[0]; sum[1] = result[1]; } /* * Shift the 128-bit value in a by b. If b is positive, shift left. * If b is negative, shift right. */ static void dtrace_shift_128(uint64_t *a, int b) { uint64_t mask; if (b == 0) return; if (b < 0) { b = -b; if (b >= 64) { a[0] = a[1] >> (b - 64); a[1] = 0; } else { a[0] >>= b; mask = 1LL << (64 - b); mask -= 1; a[0] |= ((a[1] & mask) << (64 - b)); a[1] >>= b; } } else { if (b >= 64) { a[1] = a[0] << (b - 64); a[0] = 0; } else { a[1] <<= b; mask = a[0] >> (64 - b); a[1] |= mask; a[0] <<= b; } } } /* * The basic idea is to break the 2 64-bit values into 4 32-bit values, * use native multiplication on those, and then re-combine into the * resulting 128-bit value. * * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) = * hi1 * hi2 << 64 + * hi1 * lo2 << 32 + * hi2 * lo1 << 32 + * lo1 * lo2 */ static void dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product) { uint64_t hi1, hi2, lo1, lo2; uint64_t tmp[2]; hi1 = factor1 >> 32; hi2 = factor2 >> 32; lo1 = factor1 & DT_MASK_LO; lo2 = factor2 & DT_MASK_LO; product[0] = lo1 * lo2; product[1] = hi1 * hi2; tmp[0] = hi1 * lo2; tmp[1] = 0; dtrace_shift_128(tmp, 32); dtrace_add_128(product, tmp, product); tmp[0] = hi2 * lo1; tmp[1] = 0; dtrace_shift_128(tmp, 32); dtrace_add_128(product, tmp, product); } /* * This privilege check should be used by actions and subroutines to * verify that the user credentials of the process that enabled the * invoking ECB match the target credentials */ static int dtrace_priv_proc_common_user(dtrace_state_t *state) { cred_t *cr, *s_cr = state->dts_cred.dcr_cred; /* * We should always have a non-NULL state cred here, since if cred * is null (anonymous tracing), we fast-path bypass this routine. */ ASSERT(s_cr != NULL); if ((cr = CRED()) != NULL && s_cr->cr_uid == cr->cr_uid && s_cr->cr_uid == cr->cr_ruid && s_cr->cr_uid == cr->cr_suid && s_cr->cr_gid == cr->cr_gid && s_cr->cr_gid == cr->cr_rgid && s_cr->cr_gid == cr->cr_sgid) return (1); return (0); } /* * This privilege check should be used by actions and subroutines to * verify that the zone of the process that enabled the invoking ECB * matches the target credentials */ static int dtrace_priv_proc_common_zone(dtrace_state_t *state) { #ifdef illumos cred_t *cr, *s_cr = state->dts_cred.dcr_cred; /* * We should always have a non-NULL state cred here, since if cred * is null (anonymous tracing), we fast-path bypass this routine. */ ASSERT(s_cr != NULL); if ((cr = CRED()) != NULL && s_cr->cr_zone == cr->cr_zone) return (1); return (0); #else return (1); #endif } /* * This privilege check should be used by actions and subroutines to * verify that the process has not setuid or changed credentials. */ static int dtrace_priv_proc_common_nocd(void) { proc_t *proc; if ((proc = ttoproc(curthread)) != NULL && !(proc->p_flag & SNOCD)) return (1); return (0); } static int dtrace_priv_proc_destructive(dtrace_state_t *state) { int action = state->dts_cred.dcr_action; if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) && dtrace_priv_proc_common_zone(state) == 0) goto bad; if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER) == 0) && dtrace_priv_proc_common_user(state) == 0) goto bad; if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG) == 0) && dtrace_priv_proc_common_nocd() == 0) goto bad; return (1); bad: cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; return (0); } static int dtrace_priv_proc_control(dtrace_state_t *state) { if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL) return (1); if (dtrace_priv_proc_common_zone(state) && dtrace_priv_proc_common_user(state) && dtrace_priv_proc_common_nocd()) return (1); cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; return (0); } static int dtrace_priv_proc(dtrace_state_t *state) { if (state->dts_cred.dcr_action & DTRACE_CRA_PROC) return (1); cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; return (0); } static int dtrace_priv_kernel(dtrace_state_t *state) { if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL) return (1); cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV; return (0); } static int dtrace_priv_kernel_destructive(dtrace_state_t *state) { if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE) return (1); cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV; return (0); } /* * Determine if the dte_cond of the specified ECB allows for processing of * the current probe to continue. Note that this routine may allow continued * processing, but with access(es) stripped from the mstate's dtms_access * field. */ static int dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate, dtrace_ecb_t *ecb) { dtrace_probe_t *probe = ecb->dte_probe; dtrace_provider_t *prov = probe->dtpr_provider; dtrace_pops_t *pops = &prov->dtpv_pops; int mode = DTRACE_MODE_NOPRIV_DROP; ASSERT(ecb->dte_cond); #ifdef illumos if (pops->dtps_mode != NULL) { mode = pops->dtps_mode(prov->dtpv_arg, probe->dtpr_id, probe->dtpr_arg); ASSERT((mode & DTRACE_MODE_USER) || (mode & DTRACE_MODE_KERNEL)); ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) || (mode & DTRACE_MODE_NOPRIV_DROP)); } /* * If the dte_cond bits indicate that this consumer is only allowed to * see user-mode firings of this probe, call the provider's dtps_mode() * entry point to check that the probe was fired while in a user * context. If that's not the case, use the policy specified by the * provider to determine if we drop the probe or merely restrict * operation. */ if (ecb->dte_cond & DTRACE_COND_USERMODE) { ASSERT(mode != DTRACE_MODE_NOPRIV_DROP); if (!(mode & DTRACE_MODE_USER)) { if (mode & DTRACE_MODE_NOPRIV_DROP) return (0); mstate->dtms_access &= ~DTRACE_ACCESS_ARGS; } } #endif /* * This is more subtle than it looks. We have to be absolutely certain * that CRED() isn't going to change out from under us so it's only * legit to examine that structure if we're in constrained situations. * Currently, the only times we'll this check is if a non-super-user * has enabled the profile or syscall providers -- providers that * allow visibility of all processes. For the profile case, the check * above will ensure that we're examining a user context. */ if (ecb->dte_cond & DTRACE_COND_OWNER) { cred_t *cr; cred_t *s_cr = state->dts_cred.dcr_cred; proc_t *proc; ASSERT(s_cr != NULL); if ((cr = CRED()) == NULL || s_cr->cr_uid != cr->cr_uid || s_cr->cr_uid != cr->cr_ruid || s_cr->cr_uid != cr->cr_suid || s_cr->cr_gid != cr->cr_gid || s_cr->cr_gid != cr->cr_rgid || s_cr->cr_gid != cr->cr_sgid || (proc = ttoproc(curthread)) == NULL || (proc->p_flag & SNOCD)) { if (mode & DTRACE_MODE_NOPRIV_DROP) return (0); #ifdef illumos mstate->dtms_access &= ~DTRACE_ACCESS_PROC; #endif } } #ifdef illumos /* * If our dte_cond is set to DTRACE_COND_ZONEOWNER and we are not * in our zone, check to see if our mode policy is to restrict rather * than to drop; if to restrict, strip away both DTRACE_ACCESS_PROC * and DTRACE_ACCESS_ARGS */ if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) { cred_t *cr; cred_t *s_cr = state->dts_cred.dcr_cred; ASSERT(s_cr != NULL); if ((cr = CRED()) == NULL || s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) { if (mode & DTRACE_MODE_NOPRIV_DROP) return (0); mstate->dtms_access &= ~(DTRACE_ACCESS_PROC | DTRACE_ACCESS_ARGS); } } #endif return (1); } /* * Note: not called from probe context. This function is called * asynchronously (and at a regular interval) from outside of probe context to * clean the dirty dynamic variable lists on all CPUs. Dynamic variable * cleaning is explained in detail in . */ void dtrace_dynvar_clean(dtrace_dstate_t *dstate) { dtrace_dynvar_t *dirty; dtrace_dstate_percpu_t *dcpu; dtrace_dynvar_t **rinsep; int i, j, work = 0; for (i = 0; i < NCPU; i++) { dcpu = &dstate->dtds_percpu[i]; rinsep = &dcpu->dtdsc_rinsing; /* * If the dirty list is NULL, there is no dirty work to do. */ if (dcpu->dtdsc_dirty == NULL) continue; if (dcpu->dtdsc_rinsing != NULL) { /* * If the rinsing list is non-NULL, then it is because * this CPU was selected to accept another CPU's * dirty list -- and since that time, dirty buffers * have accumulated. This is a highly unlikely * condition, but we choose to ignore the dirty * buffers -- they'll be picked up a future cleanse. */ continue; } if (dcpu->dtdsc_clean != NULL) { /* * If the clean list is non-NULL, then we're in a * situation where a CPU has done deallocations (we * have a non-NULL dirty list) but no allocations (we * also have a non-NULL clean list). We can't simply * move the dirty list into the clean list on this * CPU, yet we also don't want to allow this condition * to persist, lest a short clean list prevent a * massive dirty list from being cleaned (which in * turn could lead to otherwise avoidable dynamic * drops). To deal with this, we look for some CPU * with a NULL clean list, NULL dirty list, and NULL * rinsing list -- and then we borrow this CPU to * rinse our dirty list. */ for (j = 0; j < NCPU; j++) { dtrace_dstate_percpu_t *rinser; rinser = &dstate->dtds_percpu[j]; if (rinser->dtdsc_rinsing != NULL) continue; if (rinser->dtdsc_dirty != NULL) continue; if (rinser->dtdsc_clean != NULL) continue; rinsep = &rinser->dtdsc_rinsing; break; } if (j == NCPU) { /* * We were unable to find another CPU that * could accept this dirty list -- we are * therefore unable to clean it now. */ dtrace_dynvar_failclean++; continue; } } work = 1; /* * Atomically move the dirty list aside. */ do { dirty = dcpu->dtdsc_dirty; /* * Before we zap the dirty list, set the rinsing list. * (This allows for a potential assertion in * dtrace_dynvar(): if a free dynamic variable appears * on a hash chain, either the dirty list or the * rinsing list for some CPU must be non-NULL.) */ *rinsep = dirty; dtrace_membar_producer(); } while (dtrace_casptr(&dcpu->dtdsc_dirty, dirty, NULL) != dirty); } if (!work) { /* * We have no work to do; we can simply return. */ return; } dtrace_sync(); for (i = 0; i < NCPU; i++) { dcpu = &dstate->dtds_percpu[i]; if (dcpu->dtdsc_rinsing == NULL) continue; /* * We are now guaranteed that no hash chain contains a pointer * into this dirty list; we can make it clean. */ ASSERT(dcpu->dtdsc_clean == NULL); dcpu->dtdsc_clean = dcpu->dtdsc_rinsing; dcpu->dtdsc_rinsing = NULL; } /* * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make * sure that all CPUs have seen all of the dtdsc_clean pointers. * This prevents a race whereby a CPU incorrectly decides that * the state should be something other than DTRACE_DSTATE_CLEAN * after dtrace_dynvar_clean() has completed. */ dtrace_sync(); dstate->dtds_state = DTRACE_DSTATE_CLEAN; } /* * Depending on the value of the op parameter, this function looks-up, * allocates or deallocates an arbitrarily-keyed dynamic variable. If an * allocation is requested, this function will return a pointer to a * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no * variable can be allocated. If NULL is returned, the appropriate counter * will be incremented. */ dtrace_dynvar_t * dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { uint64_t hashval = DTRACE_DYNHASH_VALID; dtrace_dynhash_t *hash = dstate->dtds_hash; dtrace_dynvar_t *free, *new_free, *next, *dvar, *start, *prev = NULL; processorid_t me = curcpu, cpu = me; dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[me]; size_t bucket, ksize; size_t chunksize = dstate->dtds_chunksize; uintptr_t kdata, lock, nstate; uint_t i; ASSERT(nkeys != 0); /* * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time" * algorithm. For the by-value portions, we perform the algorithm in * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a * bit, and seems to have only a minute effect on distribution. For * the by-reference data, we perform "One-at-a-time" iterating (safely) * over each referenced byte. It's painful to do this, but it's much * better than pathological hash distribution. The efficacy of the * hashing algorithm (and a comparison with other algorithms) may be * found by running the ::dtrace_dynstat MDB dcmd. */ for (i = 0; i < nkeys; i++) { if (key[i].dttk_size == 0) { uint64_t val = key[i].dttk_value; hashval += (val >> 48) & 0xffff; hashval += (hashval << 10); hashval ^= (hashval >> 6); hashval += (val >> 32) & 0xffff; hashval += (hashval << 10); hashval ^= (hashval >> 6); hashval += (val >> 16) & 0xffff; hashval += (hashval << 10); hashval ^= (hashval >> 6); hashval += val & 0xffff; hashval += (hashval << 10); hashval ^= (hashval >> 6); } else { /* * This is incredibly painful, but it beats the hell * out of the alternative. */ uint64_t j, size = key[i].dttk_size; uintptr_t base = (uintptr_t)key[i].dttk_value; if (!dtrace_canload(base, size, mstate, vstate)) break; for (j = 0; j < size; j++) { hashval += dtrace_load8(base + j); hashval += (hashval << 10); hashval ^= (hashval >> 6); } } } if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) return (NULL); hashval += (hashval << 3); hashval ^= (hashval >> 11); hashval += (hashval << 15); /* * There is a remote chance (ideally, 1 in 2^31) that our hashval * comes out to be one of our two sentinel hash values. If this * actually happens, we set the hashval to be a value known to be a * non-sentinel value. */ if (hashval == DTRACE_DYNHASH_FREE || hashval == DTRACE_DYNHASH_SINK) hashval = DTRACE_DYNHASH_VALID; /* * Yes, it's painful to do a divide here. If the cycle count becomes * important here, tricks can be pulled to reduce it. (However, it's * critical that hash collisions be kept to an absolute minimum; * they're much more painful than a divide.) It's better to have a * solution that generates few collisions and still keeps things * relatively simple. */ bucket = hashval % dstate->dtds_hashsize; if (op == DTRACE_DYNVAR_DEALLOC) { volatile uintptr_t *lockp = &hash[bucket].dtdh_lock; for (;;) { while ((lock = *lockp) & 1) continue; if (dtrace_casptr((volatile void *)lockp, (volatile void *)lock, (volatile void *)(lock + 1)) == (void *)lock) break; } dtrace_membar_producer(); } top: prev = NULL; lock = hash[bucket].dtdh_lock; dtrace_membar_consumer(); start = hash[bucket].dtdh_chain; ASSERT(start != NULL && (start->dtdv_hashval == DTRACE_DYNHASH_SINK || start->dtdv_hashval != DTRACE_DYNHASH_FREE || op != DTRACE_DYNVAR_DEALLOC)); for (dvar = start; dvar != NULL; dvar = dvar->dtdv_next) { dtrace_tuple_t *dtuple = &dvar->dtdv_tuple; dtrace_key_t *dkey = &dtuple->dtt_key[0]; if (dvar->dtdv_hashval != hashval) { if (dvar->dtdv_hashval == DTRACE_DYNHASH_SINK) { /* * We've reached the sink, and therefore the * end of the hash chain; we can kick out of * the loop knowing that we have seen a valid * snapshot of state. */ ASSERT(dvar->dtdv_next == NULL); ASSERT(dvar == &dtrace_dynhash_sink); break; } if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) { /* * We've gone off the rails: somewhere along * the line, one of the members of this hash * chain was deleted. Note that we could also * detect this by simply letting this loop run * to completion, as we would eventually hit * the end of the dirty list. However, we * want to avoid running the length of the * dirty list unnecessarily (it might be quite * long), so we catch this as early as * possible by detecting the hash marker. In * this case, we simply set dvar to NULL and * break; the conditional after the loop will * send us back to top. */ dvar = NULL; break; } goto next; } if (dtuple->dtt_nkeys != nkeys) goto next; for (i = 0; i < nkeys; i++, dkey++) { if (dkey->dttk_size != key[i].dttk_size) goto next; /* size or type mismatch */ if (dkey->dttk_size != 0) { if (dtrace_bcmp( (void *)(uintptr_t)key[i].dttk_value, (void *)(uintptr_t)dkey->dttk_value, dkey->dttk_size)) goto next; } else { if (dkey->dttk_value != key[i].dttk_value) goto next; } } if (op != DTRACE_DYNVAR_DEALLOC) return (dvar); ASSERT(dvar->dtdv_next == NULL || dvar->dtdv_next->dtdv_hashval != DTRACE_DYNHASH_FREE); if (prev != NULL) { ASSERT(hash[bucket].dtdh_chain != dvar); ASSERT(start != dvar); ASSERT(prev->dtdv_next == dvar); prev->dtdv_next = dvar->dtdv_next; } else { if (dtrace_casptr(&hash[bucket].dtdh_chain, start, dvar->dtdv_next) != start) { /* * We have failed to atomically swing the * hash table head pointer, presumably because * of a conflicting allocation on another CPU. * We need to reread the hash chain and try * again. */ goto top; } } dtrace_membar_producer(); /* * Now set the hash value to indicate that it's free. */ ASSERT(hash[bucket].dtdh_chain != dvar); dvar->dtdv_hashval = DTRACE_DYNHASH_FREE; dtrace_membar_producer(); /* * Set the next pointer to point at the dirty list, and * atomically swing the dirty pointer to the newly freed dvar. */ do { next = dcpu->dtdsc_dirty; dvar->dtdv_next = next; } while (dtrace_casptr(&dcpu->dtdsc_dirty, next, dvar) != next); /* * Finally, unlock this hash bucket. */ ASSERT(hash[bucket].dtdh_lock == lock); ASSERT(lock & 1); hash[bucket].dtdh_lock++; return (NULL); next: prev = dvar; continue; } if (dvar == NULL) { /* * If dvar is NULL, it is because we went off the rails: * one of the elements that we traversed in the hash chain * was deleted while we were traversing it. In this case, * we assert that we aren't doing a dealloc (deallocs lock * the hash bucket to prevent themselves from racing with * one another), and retry the hash chain traversal. */ ASSERT(op != DTRACE_DYNVAR_DEALLOC); goto top; } if (op != DTRACE_DYNVAR_ALLOC) { /* * If we are not to allocate a new variable, we want to * return NULL now. Before we return, check that the value * of the lock word hasn't changed. If it has, we may have * seen an inconsistent snapshot. */ if (op == DTRACE_DYNVAR_NOALLOC) { if (hash[bucket].dtdh_lock != lock) goto top; } else { ASSERT(op == DTRACE_DYNVAR_DEALLOC); ASSERT(hash[bucket].dtdh_lock == lock); ASSERT(lock & 1); hash[bucket].dtdh_lock++; } return (NULL); } /* * We need to allocate a new dynamic variable. The size we need is the * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the * size of any auxiliary key data (rounded up to 8-byte alignment) plus * the size of any referred-to data (dsize). We then round the final * size up to the chunksize for allocation. */ for (ksize = 0, i = 0; i < nkeys; i++) ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t)); /* * This should be pretty much impossible, but could happen if, say, * strange DIF specified the tuple. Ideally, this should be an * assertion and not an error condition -- but that requires that the * chunksize calculation in dtrace_difo_chunksize() be absolutely * bullet-proof. (That is, it must not be able to be fooled by * malicious DIF.) Given the lack of backwards branches in DIF, * solving this would presumably not amount to solving the Halting * Problem -- but it still seems awfully hard. */ if (sizeof (dtrace_dynvar_t) + sizeof (dtrace_key_t) * (nkeys - 1) + ksize + dsize > chunksize) { dcpu->dtdsc_drops++; return (NULL); } nstate = DTRACE_DSTATE_EMPTY; do { retry: free = dcpu->dtdsc_free; if (free == NULL) { dtrace_dynvar_t *clean = dcpu->dtdsc_clean; void *rval; if (clean == NULL) { /* * We're out of dynamic variable space on * this CPU. Unless we have tried all CPUs, * we'll try to allocate from a different * CPU. */ switch (dstate->dtds_state) { case DTRACE_DSTATE_CLEAN: { void *sp = &dstate->dtds_state; if (++cpu >= NCPU) cpu = 0; if (dcpu->dtdsc_dirty != NULL && nstate == DTRACE_DSTATE_EMPTY) nstate = DTRACE_DSTATE_DIRTY; if (dcpu->dtdsc_rinsing != NULL) nstate = DTRACE_DSTATE_RINSING; dcpu = &dstate->dtds_percpu[cpu]; if (cpu != me) goto retry; (void) dtrace_cas32(sp, DTRACE_DSTATE_CLEAN, nstate); /* * To increment the correct bean * counter, take another lap. */ goto retry; } case DTRACE_DSTATE_DIRTY: dcpu->dtdsc_dirty_drops++; break; case DTRACE_DSTATE_RINSING: dcpu->dtdsc_rinsing_drops++; break; case DTRACE_DSTATE_EMPTY: dcpu->dtdsc_drops++; break; } DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP); return (NULL); } /* * The clean list appears to be non-empty. We want to * move the clean list to the free list; we start by * moving the clean pointer aside. */ if (dtrace_casptr(&dcpu->dtdsc_clean, clean, NULL) != clean) { /* * We are in one of two situations: * * (a) The clean list was switched to the * free list by another CPU. * * (b) The clean list was added to by the * cleansing cyclic. * * In either of these situations, we can * just reattempt the free list allocation. */ goto retry; } ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE); /* * Now we'll move the clean list to our free list. * It's impossible for this to fail: the only way * the free list can be updated is through this * code path, and only one CPU can own the clean list. * Thus, it would only be possible for this to fail if * this code were racing with dtrace_dynvar_clean(). * (That is, if dtrace_dynvar_clean() updated the clean * list, and we ended up racing to update the free * list.) This race is prevented by the dtrace_sync() * in dtrace_dynvar_clean() -- which flushes the * owners of the clean lists out before resetting * the clean lists. */ dcpu = &dstate->dtds_percpu[me]; rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean); ASSERT(rval == NULL); goto retry; } dvar = free; new_free = dvar->dtdv_next; } while (dtrace_casptr(&dcpu->dtdsc_free, free, new_free) != free); /* * We have now allocated a new chunk. We copy the tuple keys into the * tuple array and copy any referenced key data into the data space * following the tuple array. As we do this, we relocate dttk_value * in the final tuple to point to the key data address in the chunk. */ kdata = (uintptr_t)&dvar->dtdv_tuple.dtt_key[nkeys]; dvar->dtdv_data = (void *)(kdata + ksize); dvar->dtdv_tuple.dtt_nkeys = nkeys; for (i = 0; i < nkeys; i++) { dtrace_key_t *dkey = &dvar->dtdv_tuple.dtt_key[i]; size_t kesize = key[i].dttk_size; if (kesize != 0) { dtrace_bcopy( (const void *)(uintptr_t)key[i].dttk_value, (void *)kdata, kesize); dkey->dttk_value = kdata; kdata += P2ROUNDUP(kesize, sizeof (uint64_t)); } else { dkey->dttk_value = key[i].dttk_value; } dkey->dttk_size = kesize; } ASSERT(dvar->dtdv_hashval == DTRACE_DYNHASH_FREE); dvar->dtdv_hashval = hashval; dvar->dtdv_next = start; if (dtrace_casptr(&hash[bucket].dtdh_chain, start, dvar) == start) return (dvar); /* * The cas has failed. Either another CPU is adding an element to * this hash chain, or another CPU is deleting an element from this * hash chain. The simplest way to deal with both of these cases * (though not necessarily the most efficient) is to free our * allocated block and re-attempt it all. Note that the free is * to the dirty list and _not_ to the free list. This is to prevent * races with allocators, above. */ dvar->dtdv_hashval = DTRACE_DYNHASH_FREE; dtrace_membar_producer(); do { free = dcpu->dtdsc_dirty; dvar->dtdv_next = free; } while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free); goto top; } /*ARGSUSED*/ static void dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg) { if ((int64_t)nval < (int64_t)*oval) *oval = nval; } /*ARGSUSED*/ static void dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg) { if ((int64_t)nval > (int64_t)*oval) *oval = nval; } static void dtrace_aggregate_quantize(uint64_t *quanta, uint64_t nval, uint64_t incr) { int i, zero = DTRACE_QUANTIZE_ZEROBUCKET; int64_t val = (int64_t)nval; if (val < 0) { for (i = 0; i < zero; i++) { if (val <= DTRACE_QUANTIZE_BUCKETVAL(i)) { quanta[i] += incr; return; } } } else { for (i = zero + 1; i < DTRACE_QUANTIZE_NBUCKETS; i++) { if (val < DTRACE_QUANTIZE_BUCKETVAL(i)) { quanta[i - 1] += incr; return; } } quanta[DTRACE_QUANTIZE_NBUCKETS - 1] += incr; return; } ASSERT(0); } static void dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr) { uint64_t arg = *lquanta++; int32_t base = DTRACE_LQUANTIZE_BASE(arg); uint16_t step = DTRACE_LQUANTIZE_STEP(arg); uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg); int32_t val = (int32_t)nval, level; ASSERT(step != 0); ASSERT(levels != 0); if (val < base) { /* * This is an underflow. */ lquanta[0] += incr; return; } level = (val - base) / step; if (level < levels) { lquanta[level + 1] += incr; return; } /* * This is an overflow. */ lquanta[levels + 1] += incr; } static int dtrace_aggregate_llquantize_bucket(uint16_t factor, uint16_t low, uint16_t high, uint16_t nsteps, int64_t value) { int64_t this = 1, last, next; int base = 1, order; ASSERT(factor <= nsteps); ASSERT(nsteps % factor == 0); for (order = 0; order < low; order++) this *= factor; /* * If our value is less than our factor taken to the power of the * low order of magnitude, it goes into the zeroth bucket. */ if (value < (last = this)) return (0); for (this *= factor; order <= high; order++) { int nbuckets = this > nsteps ? nsteps : this; if ((next = this * factor) < this) { /* * We should not generally get log/linear quantizations * with a high magnitude that allows 64-bits to * overflow, but we nonetheless protect against this * by explicitly checking for overflow, and clamping * our value accordingly. */ value = this - 1; } if (value < this) { /* * If our value lies within this order of magnitude, * determine its position by taking the offset within * the order of magnitude, dividing by the bucket * width, and adding to our (accumulated) base. */ return (base + (value - last) / (this / nbuckets)); } base += nbuckets - (nbuckets / factor); last = this; this = next; } /* * Our value is greater than or equal to our factor taken to the * power of one plus the high magnitude -- return the top bucket. */ return (base); } static void dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr) { uint64_t arg = *llquanta++; uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg); uint16_t low = DTRACE_LLQUANTIZE_LOW(arg); uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg); uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg); llquanta[dtrace_aggregate_llquantize_bucket(factor, low, high, nsteps, nval)] += incr; } /*ARGSUSED*/ static void dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg) { data[0]++; data[1] += nval; } /*ARGSUSED*/ static void dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg) { int64_t snval = (int64_t)nval; uint64_t tmp[2]; data[0]++; data[1] += nval; /* * What we want to say here is: * * data[2] += nval * nval; * * But given that nval is 64-bit, we could easily overflow, so * we do this as 128-bit arithmetic. */ if (snval < 0) snval = -snval; dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp); dtrace_add_128(data + 2, tmp, data + 2); } /*ARGSUSED*/ static void dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg) { *oval = *oval + 1; } /*ARGSUSED*/ static void dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg) { *oval += nval; } /* * Aggregate given the tuple in the principal data buffer, and the aggregating * action denoted by the specified dtrace_aggregation_t. The aggregation * buffer is specified as the buf parameter. This routine does not return * failure; if there is no space in the aggregation buffer, the data will be * dropped, and a corresponding counter incremented. */ static void dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf, intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg) { dtrace_recdesc_t *rec = &agg->dtag_action.dta_rec; uint32_t i, ndx, size, fsize; uint32_t align = sizeof (uint64_t) - 1; dtrace_aggbuffer_t *agb; dtrace_aggkey_t *key; uint32_t hashval = 0, limit, isstr; caddr_t tomax, data, kdata; dtrace_actkind_t action; dtrace_action_t *act; uintptr_t offs; if (buf == NULL) return; if (!agg->dtag_hasarg) { /* * Currently, only quantize() and lquantize() take additional * arguments, and they have the same semantics: an increment * value that defaults to 1 when not present. If additional * aggregating actions take arguments, the setting of the * default argument value will presumably have to become more * sophisticated... */ arg = 1; } action = agg->dtag_action.dta_kind - DTRACEACT_AGGREGATION; size = rec->dtrd_offset - agg->dtag_base; fsize = size + rec->dtrd_size; ASSERT(dbuf->dtb_tomax != NULL); data = dbuf->dtb_tomax + offset + agg->dtag_base; if ((tomax = buf->dtb_tomax) == NULL) { dtrace_buffer_drop(buf); return; } /* * The metastructure is always at the bottom of the buffer. */ agb = (dtrace_aggbuffer_t *)(tomax + buf->dtb_size - sizeof (dtrace_aggbuffer_t)); if (buf->dtb_offset == 0) { /* * We just kludge up approximately 1/8th of the size to be * buckets. If this guess ends up being routinely * off-the-mark, we may need to dynamically readjust this * based on past performance. */ uintptr_t hashsize = (buf->dtb_size >> 3) / sizeof (uintptr_t); if ((uintptr_t)agb - hashsize * sizeof (dtrace_aggkey_t *) < (uintptr_t)tomax || hashsize == 0) { /* * We've been given a ludicrously small buffer; * increment our drop count and leave. */ dtrace_buffer_drop(buf); return; } /* * And now, a pathetic attempt to try to get a an odd (or * perchance, a prime) hash size for better hash distribution. */ if (hashsize > (DTRACE_AGGHASHSIZE_SLEW << 3)) hashsize -= DTRACE_AGGHASHSIZE_SLEW; agb->dtagb_hashsize = hashsize; agb->dtagb_hash = (dtrace_aggkey_t **)((uintptr_t)agb - agb->dtagb_hashsize * sizeof (dtrace_aggkey_t *)); agb->dtagb_free = (uintptr_t)agb->dtagb_hash; for (i = 0; i < agb->dtagb_hashsize; i++) agb->dtagb_hash[i] = NULL; } ASSERT(agg->dtag_first != NULL); ASSERT(agg->dtag_first->dta_intuple); /* * Calculate the hash value based on the key. Note that we _don't_ * include the aggid in the hashing (but we will store it as part of * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time" * algorithm: a simple, quick algorithm that has no known funnels, and * gets good distribution in practice. The efficacy of the hashing * algorithm (and a comparison with other algorithms) may be found by * running the ::dtrace_aggstat MDB dcmd. */ for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) { i = act->dta_rec.dtrd_offset - agg->dtag_base; limit = i + act->dta_rec.dtrd_size; ASSERT(limit <= size); isstr = DTRACEACT_ISSTRING(act); for (; i < limit; i++) { hashval += data[i]; hashval += (hashval << 10); hashval ^= (hashval >> 6); if (isstr && data[i] == '\0') break; } } hashval += (hashval << 3); hashval ^= (hashval >> 11); hashval += (hashval << 15); /* * Yes, the divide here is expensive -- but it's generally the least * of the performance issues given the amount of data that we iterate * over to compute hash values, compare data, etc. */ ndx = hashval % agb->dtagb_hashsize; for (key = agb->dtagb_hash[ndx]; key != NULL; key = key->dtak_next) { ASSERT((caddr_t)key >= tomax); ASSERT((caddr_t)key < tomax + buf->dtb_size); if (hashval != key->dtak_hashval || key->dtak_size != size) continue; kdata = key->dtak_data; ASSERT(kdata >= tomax && kdata < tomax + buf->dtb_size); for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) { i = act->dta_rec.dtrd_offset - agg->dtag_base; limit = i + act->dta_rec.dtrd_size; ASSERT(limit <= size); isstr = DTRACEACT_ISSTRING(act); for (; i < limit; i++) { if (kdata[i] != data[i]) goto next; if (isstr && data[i] == '\0') break; } } if (action != key->dtak_action) { /* * We are aggregating on the same value in the same * aggregation with two different aggregating actions. * (This should have been picked up in the compiler, * so we may be dealing with errant or devious DIF.) * This is an error condition; we indicate as much, * and return. */ DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); return; } /* * This is a hit: we need to apply the aggregator to * the value at this key. */ agg->dtag_aggregate((uint64_t *)(kdata + size), expr, arg); return; next: continue; } /* * We didn't find it. We need to allocate some zero-filled space, * link it into the hash table appropriately, and apply the aggregator * to the (zero-filled) value. */ offs = buf->dtb_offset; while (offs & (align - 1)) offs += sizeof (uint32_t); /* * If we don't have enough room to both allocate a new key _and_ * its associated data, increment the drop count and return. */ if ((uintptr_t)tomax + offs + fsize > agb->dtagb_free - sizeof (dtrace_aggkey_t)) { dtrace_buffer_drop(buf); return; } /*CONSTCOND*/ ASSERT(!(sizeof (dtrace_aggkey_t) & (sizeof (uintptr_t) - 1))); key = (dtrace_aggkey_t *)(agb->dtagb_free - sizeof (dtrace_aggkey_t)); agb->dtagb_free -= sizeof (dtrace_aggkey_t); key->dtak_data = kdata = tomax + offs; buf->dtb_offset = offs + fsize; /* * Now copy the data across. */ *((dtrace_aggid_t *)kdata) = agg->dtag_id; for (i = sizeof (dtrace_aggid_t); i < size; i++) kdata[i] = data[i]; /* * Because strings are not zeroed out by default, we need to iterate * looking for actions that store strings, and we need to explicitly * pad these strings out with zeroes. */ for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) { int nul; if (!DTRACEACT_ISSTRING(act)) continue; i = act->dta_rec.dtrd_offset - agg->dtag_base; limit = i + act->dta_rec.dtrd_size; ASSERT(limit <= size); for (nul = 0; i < limit; i++) { if (nul) { kdata[i] = '\0'; continue; } if (data[i] != '\0') continue; nul = 1; } } for (i = size; i < fsize; i++) kdata[i] = 0; key->dtak_hashval = hashval; key->dtak_size = size; key->dtak_action = action; key->dtak_next = agb->dtagb_hash[ndx]; agb->dtagb_hash[ndx] = key; /* * Finally, apply the aggregator. */ *((uint64_t *)(key->dtak_data + size)) = agg->dtag_initial; agg->dtag_aggregate((uint64_t *)(key->dtak_data + size), expr, arg); } /* * Given consumer state, this routine finds a speculation in the INACTIVE * state and transitions it into the ACTIVE state. If there is no speculation * in the INACTIVE state, 0 is returned. In this case, no error counter is * incremented -- it is up to the caller to take appropriate action. */ static int dtrace_speculation(dtrace_state_t *state) { int i = 0; dtrace_speculation_state_t current; uint32_t *stat = &state->dts_speculations_unavail, count; while (i < state->dts_nspeculations) { dtrace_speculation_t *spec = &state->dts_speculations[i]; current = spec->dtsp_state; if (current != DTRACESPEC_INACTIVE) { if (current == DTRACESPEC_COMMITTINGMANY || current == DTRACESPEC_COMMITTING || current == DTRACESPEC_DISCARDING) stat = &state->dts_speculations_busy; i++; continue; } if (dtrace_cas32((uint32_t *)&spec->dtsp_state, current, DTRACESPEC_ACTIVE) == current) return (i + 1); } /* * We couldn't find a speculation. If we found as much as a single * busy speculation buffer, we'll attribute this failure as "busy" * instead of "unavail". */ do { count = *stat; } while (dtrace_cas32(stat, count, count + 1) != count); return (0); } /* * This routine commits an active speculation. If the specified speculation * is not in a valid state to perform a commit(), this routine will silently do * nothing. The state of the specified speculation is transitioned according * to the state transition diagram outlined in */ static void dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu, dtrace_specid_t which) { dtrace_speculation_t *spec; dtrace_buffer_t *src, *dest; uintptr_t daddr, saddr, dlimit, slimit; dtrace_speculation_state_t current, new = 0; intptr_t offs; uint64_t timestamp; if (which == 0) return; if (which > state->dts_nspeculations) { cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; return; } spec = &state->dts_speculations[which - 1]; src = &spec->dtsp_buffer[cpu]; dest = &state->dts_buffer[cpu]; do { current = spec->dtsp_state; if (current == DTRACESPEC_COMMITTINGMANY) break; switch (current) { case DTRACESPEC_INACTIVE: case DTRACESPEC_DISCARDING: return; case DTRACESPEC_COMMITTING: /* * This is only possible if we are (a) commit()'ing * without having done a prior speculate() on this CPU * and (b) racing with another commit() on a different * CPU. There's nothing to do -- we just assert that * our offset is 0. */ ASSERT(src->dtb_offset == 0); return; case DTRACESPEC_ACTIVE: new = DTRACESPEC_COMMITTING; break; case DTRACESPEC_ACTIVEONE: /* * This speculation is active on one CPU. If our * buffer offset is non-zero, we know that the one CPU * must be us. Otherwise, we are committing on a * different CPU from the speculate(), and we must * rely on being asynchronously cleaned. */ if (src->dtb_offset != 0) { new = DTRACESPEC_COMMITTING; break; } /*FALLTHROUGH*/ case DTRACESPEC_ACTIVEMANY: new = DTRACESPEC_COMMITTINGMANY; break; default: ASSERT(0); } } while (dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new) != current); /* * We have set the state to indicate that we are committing this * speculation. Now reserve the necessary space in the destination * buffer. */ if ((offs = dtrace_buffer_reserve(dest, src->dtb_offset, sizeof (uint64_t), state, NULL)) < 0) { dtrace_buffer_drop(dest); goto out; } /* * We have sufficient space to copy the speculative buffer into the * primary buffer. First, modify the speculative buffer, filling * in the timestamp of all entries with the current time. The data * must have the commit() time rather than the time it was traced, * so that all entries in the primary buffer are in timestamp order. */ timestamp = dtrace_gethrtime(); saddr = (uintptr_t)src->dtb_tomax; slimit = saddr + src->dtb_offset; while (saddr < slimit) { size_t size; dtrace_rechdr_t *dtrh = (dtrace_rechdr_t *)saddr; if (dtrh->dtrh_epid == DTRACE_EPIDNONE) { saddr += sizeof (dtrace_epid_t); continue; } ASSERT3U(dtrh->dtrh_epid, <=, state->dts_necbs); size = state->dts_ecbs[dtrh->dtrh_epid - 1]->dte_size; ASSERT3U(saddr + size, <=, slimit); ASSERT3U(size, >=, sizeof (dtrace_rechdr_t)); ASSERT3U(DTRACE_RECORD_LOAD_TIMESTAMP(dtrh), ==, UINT64_MAX); DTRACE_RECORD_STORE_TIMESTAMP(dtrh, timestamp); saddr += size; } /* * Copy the buffer across. (Note that this is a * highly subobtimal bcopy(); in the unlikely event that this becomes * a serious performance issue, a high-performance DTrace-specific * bcopy() should obviously be invented.) */ daddr = (uintptr_t)dest->dtb_tomax + offs; dlimit = daddr + src->dtb_offset; saddr = (uintptr_t)src->dtb_tomax; /* * First, the aligned portion. */ while (dlimit - daddr >= sizeof (uint64_t)) { *((uint64_t *)daddr) = *((uint64_t *)saddr); daddr += sizeof (uint64_t); saddr += sizeof (uint64_t); } /* * Now any left-over bit... */ while (dlimit - daddr) *((uint8_t *)daddr++) = *((uint8_t *)saddr++); /* * Finally, commit the reserved space in the destination buffer. */ dest->dtb_offset = offs + src->dtb_offset; out: /* * If we're lucky enough to be the only active CPU on this speculation * buffer, we can just set the state back to DTRACESPEC_INACTIVE. */ if (current == DTRACESPEC_ACTIVE || (current == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) { uint32_t rval = dtrace_cas32((uint32_t *)&spec->dtsp_state, DTRACESPEC_COMMITTING, DTRACESPEC_INACTIVE); ASSERT(rval == DTRACESPEC_COMMITTING); } src->dtb_offset = 0; src->dtb_xamot_drops += src->dtb_drops; src->dtb_drops = 0; } /* * This routine discards an active speculation. If the specified speculation * is not in a valid state to perform a discard(), this routine will silently * do nothing. The state of the specified speculation is transitioned * according to the state transition diagram outlined in */ static void dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu, dtrace_specid_t which) { dtrace_speculation_t *spec; dtrace_speculation_state_t current, new = 0; dtrace_buffer_t *buf; if (which == 0) return; if (which > state->dts_nspeculations) { cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; return; } spec = &state->dts_speculations[which - 1]; buf = &spec->dtsp_buffer[cpu]; do { current = spec->dtsp_state; switch (current) { case DTRACESPEC_INACTIVE: case DTRACESPEC_COMMITTINGMANY: case DTRACESPEC_COMMITTING: case DTRACESPEC_DISCARDING: return; case DTRACESPEC_ACTIVE: case DTRACESPEC_ACTIVEMANY: new = DTRACESPEC_DISCARDING; break; case DTRACESPEC_ACTIVEONE: if (buf->dtb_offset != 0) { new = DTRACESPEC_INACTIVE; } else { new = DTRACESPEC_DISCARDING; } break; default: ASSERT(0); } } while (dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new) != current); buf->dtb_offset = 0; buf->dtb_drops = 0; } /* * Note: not called from probe context. This function is called * asynchronously from cross call context to clean any speculations that are * in the COMMITTINGMANY or DISCARDING states. These speculations may not be * transitioned back to the INACTIVE state until all CPUs have cleaned the * speculation. */ static void dtrace_speculation_clean_here(dtrace_state_t *state) { dtrace_icookie_t cookie; processorid_t cpu = curcpu; dtrace_buffer_t *dest = &state->dts_buffer[cpu]; dtrace_specid_t i; cookie = dtrace_interrupt_disable(); if (dest->dtb_tomax == NULL) { dtrace_interrupt_enable(cookie); return; } for (i = 0; i < state->dts_nspeculations; i++) { dtrace_speculation_t *spec = &state->dts_speculations[i]; dtrace_buffer_t *src = &spec->dtsp_buffer[cpu]; if (src->dtb_tomax == NULL) continue; if (spec->dtsp_state == DTRACESPEC_DISCARDING) { src->dtb_offset = 0; continue; } if (spec->dtsp_state != DTRACESPEC_COMMITTINGMANY) continue; if (src->dtb_offset == 0) continue; dtrace_speculation_commit(state, cpu, i + 1); } dtrace_interrupt_enable(cookie); } /* * Note: not called from probe context. This function is called * asynchronously (and at a regular interval) to clean any speculations that * are in the COMMITTINGMANY or DISCARDING states. If it discovers that there * is work to be done, it cross calls all CPUs to perform that work; * COMMITMANY and DISCARDING speculations may not be transitioned back to the * INACTIVE state until they have been cleaned by all CPUs. */ static void dtrace_speculation_clean(dtrace_state_t *state) { int work = 0, rv; dtrace_specid_t i; for (i = 0; i < state->dts_nspeculations; i++) { dtrace_speculation_t *spec = &state->dts_speculations[i]; ASSERT(!spec->dtsp_cleaning); if (spec->dtsp_state != DTRACESPEC_DISCARDING && spec->dtsp_state != DTRACESPEC_COMMITTINGMANY) continue; work++; spec->dtsp_cleaning = 1; } if (!work) return; dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_speculation_clean_here, state); /* * We now know that all CPUs have committed or discarded their * speculation buffers, as appropriate. We can now set the state * to inactive. */ for (i = 0; i < state->dts_nspeculations; i++) { dtrace_speculation_t *spec = &state->dts_speculations[i]; dtrace_speculation_state_t current, new; if (!spec->dtsp_cleaning) continue; current = spec->dtsp_state; ASSERT(current == DTRACESPEC_DISCARDING || current == DTRACESPEC_COMMITTINGMANY); new = DTRACESPEC_INACTIVE; rv = dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new); ASSERT(rv == current); spec->dtsp_cleaning = 0; } } /* * Called as part of a speculate() to get the speculative buffer associated * with a given speculation. Returns NULL if the specified speculation is not * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and * the active CPU is not the specified CPU -- the speculation will be * atomically transitioned into the ACTIVEMANY state. */ static dtrace_buffer_t * dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid, dtrace_specid_t which) { dtrace_speculation_t *spec; dtrace_speculation_state_t current, new = 0; dtrace_buffer_t *buf; if (which == 0) return (NULL); if (which > state->dts_nspeculations) { cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; return (NULL); } spec = &state->dts_speculations[which - 1]; buf = &spec->dtsp_buffer[cpuid]; do { current = spec->dtsp_state; switch (current) { case DTRACESPEC_INACTIVE: case DTRACESPEC_COMMITTINGMANY: case DTRACESPEC_DISCARDING: return (NULL); case DTRACESPEC_COMMITTING: ASSERT(buf->dtb_offset == 0); return (NULL); case DTRACESPEC_ACTIVEONE: /* * This speculation is currently active on one CPU. * Check the offset in the buffer; if it's non-zero, * that CPU must be us (and we leave the state alone). * If it's zero, assume that we're starting on a new * CPU -- and change the state to indicate that the * speculation is active on more than one CPU. */ if (buf->dtb_offset != 0) return (buf); new = DTRACESPEC_ACTIVEMANY; break; case DTRACESPEC_ACTIVEMANY: return (buf); case DTRACESPEC_ACTIVE: new = DTRACESPEC_ACTIVEONE; break; default: ASSERT(0); } } while (dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new) != current); ASSERT(new == DTRACESPEC_ACTIVEONE || new == DTRACESPEC_ACTIVEMANY); return (buf); } /* * Return a string. In the event that the user lacks the privilege to access * arbitrary kernel memory, we copy the string out to scratch memory so that we * don't fail access checking. * * dtrace_dif_variable() uses this routine as a helper for various * builtin values such as 'execname' and 'probefunc.' */ uintptr_t dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state, dtrace_mstate_t *mstate) { uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t ret; size_t strsz; /* * The easy case: this probe is allowed to read all of memory, so * we can just return this as a vanilla pointer. */ if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) return (addr); /* * This is the tougher case: we copy the string in question from * kernel memory into scratch memory and return it that way: this * ensures that we won't trip up when access checking tests the * BYREF return value. */ strsz = dtrace_strlen((char *)addr, size) + 1; if (mstate->dtms_scratch_ptr + strsz > mstate->dtms_scratch_base + mstate->dtms_scratch_size) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); return (0); } dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr, strsz); ret = mstate->dtms_scratch_ptr; mstate->dtms_scratch_ptr += strsz; return (ret); } /* * Return a string from a memoy address which is known to have one or * more concatenated, individually zero terminated, sub-strings. * In the event that the user lacks the privilege to access * arbitrary kernel memory, we copy the string out to scratch memory so that we * don't fail access checking. * * dtrace_dif_variable() uses this routine as a helper for various * builtin values such as 'execargs'. */ static uintptr_t dtrace_dif_varstrz(uintptr_t addr, size_t strsz, dtrace_state_t *state, dtrace_mstate_t *mstate) { char *p; size_t i; uintptr_t ret; if (mstate->dtms_scratch_ptr + strsz > mstate->dtms_scratch_base + mstate->dtms_scratch_size) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); return (0); } dtrace_bcopy((const void *)addr, (void *)mstate->dtms_scratch_ptr, strsz); /* Replace sub-string termination characters with a space. */ for (p = (char *) mstate->dtms_scratch_ptr, i = 0; i < strsz - 1; p++, i++) if (*p == '\0') *p = ' '; ret = mstate->dtms_scratch_ptr; mstate->dtms_scratch_ptr += strsz; return (ret); } /* * This function implements the DIF emulator's variable lookups. The emulator * passes a reserved variable identifier and optional built-in array index. */ static uint64_t dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, uint64_t ndx) { /* * If we're accessing one of the uncached arguments, we'll turn this * into a reference in the args array. */ if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) { ndx = v - DIF_VAR_ARG0; v = DIF_VAR_ARGS; } switch (v) { case DIF_VAR_ARGS: ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS); if (ndx >= sizeof (mstate->dtms_arg) / sizeof (mstate->dtms_arg[0])) { int aframes = mstate->dtms_probe->dtpr_aframes + 2; dtrace_provider_t *pv; uint64_t val; pv = mstate->dtms_probe->dtpr_provider; if (pv->dtpv_pops.dtps_getargval != NULL) val = pv->dtpv_pops.dtps_getargval(pv->dtpv_arg, mstate->dtms_probe->dtpr_id, mstate->dtms_probe->dtpr_arg, ndx, aframes); else val = dtrace_getarg(ndx, aframes); /* * This is regrettably required to keep the compiler * from tail-optimizing the call to dtrace_getarg(). * The condition always evaluates to true, but the * compiler has no way of figuring that out a priori. * (None of this would be necessary if the compiler * could be relied upon to _always_ tail-optimize * the call to dtrace_getarg() -- but it can't.) */ if (mstate->dtms_probe != NULL) return (val); ASSERT(0); } return (mstate->dtms_arg[ndx]); #ifdef illumos case DIF_VAR_UREGS: { klwp_t *lwp; if (!dtrace_priv_proc(state)) return (0); if ((lwp = curthread->t_lwp) == NULL) { DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); cpu_core[curcpu].cpuc_dtrace_illval = NULL; return (0); } return (dtrace_getreg(lwp->lwp_regs, ndx)); return (0); } #else case DIF_VAR_UREGS: { struct trapframe *tframe; if (!dtrace_priv_proc(state)) return (0); if ((tframe = curthread->td_frame) == NULL) { DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); cpu_core[curcpu].cpuc_dtrace_illval = 0; return (0); } return (dtrace_getreg(tframe, ndx)); } #endif case DIF_VAR_CURTHREAD: if (!dtrace_priv_proc(state)) return (0); return ((uint64_t)(uintptr_t)curthread); case DIF_VAR_TIMESTAMP: if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) { mstate->dtms_timestamp = dtrace_gethrtime(); mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP; } return (mstate->dtms_timestamp); case DIF_VAR_VTIMESTAMP: ASSERT(dtrace_vtime_references != 0); return (curthread->t_dtrace_vtime); case DIF_VAR_WALLTIMESTAMP: if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) { mstate->dtms_walltimestamp = dtrace_gethrestime(); mstate->dtms_present |= DTRACE_MSTATE_WALLTIMESTAMP; } return (mstate->dtms_walltimestamp); #ifdef illumos case DIF_VAR_IPL: if (!dtrace_priv_kernel(state)) return (0); if (!(mstate->dtms_present & DTRACE_MSTATE_IPL)) { mstate->dtms_ipl = dtrace_getipl(); mstate->dtms_present |= DTRACE_MSTATE_IPL; } return (mstate->dtms_ipl); #endif case DIF_VAR_EPID: ASSERT(mstate->dtms_present & DTRACE_MSTATE_EPID); return (mstate->dtms_epid); case DIF_VAR_ID: ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); return (mstate->dtms_probe->dtpr_id); case DIF_VAR_STACKDEPTH: if (!dtrace_priv_kernel(state)) return (0); if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) { int aframes = mstate->dtms_probe->dtpr_aframes + 2; mstate->dtms_stackdepth = dtrace_getstackdepth(aframes); mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH; } return (mstate->dtms_stackdepth); case DIF_VAR_USTACKDEPTH: if (!dtrace_priv_proc(state)) return (0); if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) { /* * See comment in DIF_VAR_PID. */ if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) { mstate->dtms_ustackdepth = 0; } else { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); mstate->dtms_ustackdepth = dtrace_getustackdepth(); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); } mstate->dtms_present |= DTRACE_MSTATE_USTACKDEPTH; } return (mstate->dtms_ustackdepth); case DIF_VAR_CALLER: if (!dtrace_priv_kernel(state)) return (0); if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) { int aframes = mstate->dtms_probe->dtpr_aframes + 2; if (!DTRACE_ANCHORED(mstate->dtms_probe)) { /* * If this is an unanchored probe, we are * required to go through the slow path: * dtrace_caller() only guarantees correct * results for anchored probes. */ pc_t caller[2] = {0, 0}; dtrace_getpcstack(caller, 2, aframes, (uint32_t *)(uintptr_t)mstate->dtms_arg[0]); mstate->dtms_caller = caller[1]; } else if ((mstate->dtms_caller = dtrace_caller(aframes)) == -1) { /* * We have failed to do this the quick way; * we must resort to the slower approach of * calling dtrace_getpcstack(). */ pc_t caller = 0; dtrace_getpcstack(&caller, 1, aframes, NULL); mstate->dtms_caller = caller; } mstate->dtms_present |= DTRACE_MSTATE_CALLER; } return (mstate->dtms_caller); case DIF_VAR_UCALLER: if (!dtrace_priv_proc(state)) return (0); if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) { uint64_t ustack[3]; /* * dtrace_getupcstack() fills in the first uint64_t * with the current PID. The second uint64_t will * be the program counter at user-level. The third * uint64_t will contain the caller, which is what * we're after. */ ustack[2] = 0; DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_getupcstack(ustack, 3); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); mstate->dtms_ucaller = ustack[2]; mstate->dtms_present |= DTRACE_MSTATE_UCALLER; } return (mstate->dtms_ucaller); case DIF_VAR_PROBEPROV: ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); return (dtrace_dif_varstr( (uintptr_t)mstate->dtms_probe->dtpr_provider->dtpv_name, state, mstate)); case DIF_VAR_PROBEMOD: ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); return (dtrace_dif_varstr( (uintptr_t)mstate->dtms_probe->dtpr_mod, state, mstate)); case DIF_VAR_PROBEFUNC: ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); return (dtrace_dif_varstr( (uintptr_t)mstate->dtms_probe->dtpr_func, state, mstate)); case DIF_VAR_PROBENAME: ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); return (dtrace_dif_varstr( (uintptr_t)mstate->dtms_probe->dtpr_name, state, mstate)); case DIF_VAR_PID: if (!dtrace_priv_proc(state)) return (0); #ifdef illumos /* * Note that we are assuming that an unanchored probe is * always due to a high-level interrupt. (And we're assuming * that there is only a single high level interrupt.) */ if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) return (pid0.pid_id); /* * It is always safe to dereference one's own t_procp pointer: * it always points to a valid, allocated proc structure. * Further, it is always safe to dereference the p_pidp member * of one's own proc structure. (These are truisms becuase * threads and processes don't clean up their own state -- * they leave that task to whomever reaps them.) */ return ((uint64_t)curthread->t_procp->p_pidp->pid_id); #else return ((uint64_t)curproc->p_pid); #endif case DIF_VAR_PPID: if (!dtrace_priv_proc(state)) return (0); #ifdef illumos /* * See comment in DIF_VAR_PID. */ if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) return (pid0.pid_id); /* * It is always safe to dereference one's own t_procp pointer: * it always points to a valid, allocated proc structure. * (This is true because threads don't clean up their own * state -- they leave that task to whomever reaps them.) */ return ((uint64_t)curthread->t_procp->p_ppid); #else if (curproc->p_pid == proc0.p_pid) return (curproc->p_pid); else return (curproc->p_pptr->p_pid); #endif case DIF_VAR_TID: #ifdef illumos /* * See comment in DIF_VAR_PID. */ if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) return (0); #endif return ((uint64_t)curthread->t_tid); case DIF_VAR_EXECARGS: { struct pargs *p_args = curthread->td_proc->p_args; if (p_args == NULL) return(0); return (dtrace_dif_varstrz( (uintptr_t) p_args->ar_args, p_args->ar_length, state, mstate)); } case DIF_VAR_EXECNAME: #ifdef illumos if (!dtrace_priv_proc(state)) return (0); /* * See comment in DIF_VAR_PID. */ if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) return ((uint64_t)(uintptr_t)p0.p_user.u_comm); /* * It is always safe to dereference one's own t_procp pointer: * it always points to a valid, allocated proc structure. * (This is true because threads don't clean up their own * state -- they leave that task to whomever reaps them.) */ return (dtrace_dif_varstr( (uintptr_t)curthread->t_procp->p_user.u_comm, state, mstate)); #else return (dtrace_dif_varstr( (uintptr_t) curthread->td_proc->p_comm, state, mstate)); #endif case DIF_VAR_ZONENAME: #ifdef illumos if (!dtrace_priv_proc(state)) return (0); /* * See comment in DIF_VAR_PID. */ if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) return ((uint64_t)(uintptr_t)p0.p_zone->zone_name); /* * It is always safe to dereference one's own t_procp pointer: * it always points to a valid, allocated proc structure. * (This is true because threads don't clean up their own * state -- they leave that task to whomever reaps them.) */ return (dtrace_dif_varstr( (uintptr_t)curthread->t_procp->p_zone->zone_name, state, mstate)); #else return (0); #endif case DIF_VAR_UID: if (!dtrace_priv_proc(state)) return (0); #ifdef illumos /* * See comment in DIF_VAR_PID. */ if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) return ((uint64_t)p0.p_cred->cr_uid); /* * It is always safe to dereference one's own t_procp pointer: * it always points to a valid, allocated proc structure. * (This is true because threads don't clean up their own * state -- they leave that task to whomever reaps them.) * * Additionally, it is safe to dereference one's own process * credential, since this is never NULL after process birth. */ return ((uint64_t)curthread->t_procp->p_cred->cr_uid); #else return ((uint64_t)curthread->td_ucred->cr_uid); #endif case DIF_VAR_GID: if (!dtrace_priv_proc(state)) return (0); #ifdef illumos /* * See comment in DIF_VAR_PID. */ if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) return ((uint64_t)p0.p_cred->cr_gid); /* * It is always safe to dereference one's own t_procp pointer: * it always points to a valid, allocated proc structure. * (This is true because threads don't clean up their own * state -- they leave that task to whomever reaps them.) * * Additionally, it is safe to dereference one's own process * credential, since this is never NULL after process birth. */ return ((uint64_t)curthread->t_procp->p_cred->cr_gid); #else return ((uint64_t)curthread->td_ucred->cr_gid); #endif case DIF_VAR_ERRNO: { #ifdef illumos klwp_t *lwp; if (!dtrace_priv_proc(state)) return (0); /* * See comment in DIF_VAR_PID. */ if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) return (0); /* * It is always safe to dereference one's own t_lwp pointer in * the event that this pointer is non-NULL. (This is true * because threads and lwps don't clean up their own state -- * they leave that task to whomever reaps them.) */ if ((lwp = curthread->t_lwp) == NULL) return (0); return ((uint64_t)lwp->lwp_errno); #else return (curthread->td_errno); #endif } #ifndef illumos case DIF_VAR_CPU: { return curcpu; } #endif default: DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); return (0); } } typedef enum dtrace_json_state { DTRACE_JSON_REST = 1, DTRACE_JSON_OBJECT, DTRACE_JSON_STRING, DTRACE_JSON_STRING_ESCAPE, DTRACE_JSON_STRING_ESCAPE_UNICODE, DTRACE_JSON_COLON, DTRACE_JSON_COMMA, DTRACE_JSON_VALUE, DTRACE_JSON_IDENTIFIER, DTRACE_JSON_NUMBER, DTRACE_JSON_NUMBER_FRAC, DTRACE_JSON_NUMBER_EXP, DTRACE_JSON_COLLECT_OBJECT } dtrace_json_state_t; /* * This function possesses just enough knowledge about JSON to extract a single * value from a JSON string and store it in the scratch buffer. It is able * to extract nested object values, and members of arrays by index. * * elemlist is a list of JSON keys, stored as packed NUL-terminated strings, to * be looked up as we descend into the object tree. e.g. * * foo[0].bar.baz[32] --> "foo" NUL "0" NUL "bar" NUL "baz" NUL "32" NUL * with nelems = 5. * * The run time of this function must be bounded above by strsize to limit the * amount of work done in probe context. As such, it is implemented as a * simple state machine, reading one character at a time using safe loads * until we find the requested element, hit a parsing error or run off the * end of the object or string. * * As there is no way for a subroutine to return an error without interrupting * clause execution, we simply return NULL in the event of a missing key or any * other error condition. Each NULL return in this function is commented with * the error condition it represents -- parsing or otherwise. * * The set of states for the state machine closely matches the JSON * specification (http://json.org/). Briefly: * * DTRACE_JSON_REST: * Skip whitespace until we find either a top-level Object, moving * to DTRACE_JSON_OBJECT; or an Array, moving to DTRACE_JSON_VALUE. * * DTRACE_JSON_OBJECT: * Locate the next key String in an Object. Sets a flag to denote * the next String as a key string and moves to DTRACE_JSON_STRING. * * DTRACE_JSON_COLON: * Skip whitespace until we find the colon that separates key Strings * from their values. Once found, move to DTRACE_JSON_VALUE. * * DTRACE_JSON_VALUE: * Detects the type of the next value (String, Number, Identifier, Object * or Array) and routes to the states that process that type. Here we also * deal with the element selector list if we are requested to traverse down * into the object tree. * * DTRACE_JSON_COMMA: * Skip whitespace until we find the comma that separates key-value pairs * in Objects (returning to DTRACE_JSON_OBJECT) or values in Arrays * (similarly DTRACE_JSON_VALUE). All following literal value processing * states return to this state at the end of their value, unless otherwise * noted. * * DTRACE_JSON_NUMBER, DTRACE_JSON_NUMBER_FRAC, DTRACE_JSON_NUMBER_EXP: * Processes a Number literal from the JSON, including any exponent * component that may be present. Numbers are returned as strings, which * may be passed to strtoll() if an integer is required. * * DTRACE_JSON_IDENTIFIER: * Processes a "true", "false" or "null" literal in the JSON. * * DTRACE_JSON_STRING, DTRACE_JSON_STRING_ESCAPE, * DTRACE_JSON_STRING_ESCAPE_UNICODE: * Processes a String literal from the JSON, whether the String denotes * a key, a value or part of a larger Object. Handles all escape sequences * present in the specification, including four-digit unicode characters, * but merely includes the escape sequence without converting it to the * actual escaped character. If the String is flagged as a key, we * move to DTRACE_JSON_COLON rather than DTRACE_JSON_COMMA. * * DTRACE_JSON_COLLECT_OBJECT: * This state collects an entire Object (or Array), correctly handling * embedded strings. If the full element selector list matches this nested * object, we return the Object in full as a string. If not, we use this * state to skip to the next value at this level and continue processing. * * NOTE: This function uses various macros from strtolctype.h to manipulate * digit values, etc -- these have all been checked to ensure they make * no additional function calls. */ static char * dtrace_json(uint64_t size, uintptr_t json, char *elemlist, int nelems, char *dest) { dtrace_json_state_t state = DTRACE_JSON_REST; int64_t array_elem = INT64_MIN; int64_t array_pos = 0; uint8_t escape_unicount = 0; boolean_t string_is_key = B_FALSE; boolean_t collect_object = B_FALSE; boolean_t found_key = B_FALSE; boolean_t in_array = B_FALSE; uint32_t braces = 0, brackets = 0; char *elem = elemlist; char *dd = dest; uintptr_t cur; for (cur = json; cur < json + size; cur++) { char cc = dtrace_load8(cur); if (cc == '\0') return (NULL); switch (state) { case DTRACE_JSON_REST: if (isspace(cc)) break; if (cc == '{') { state = DTRACE_JSON_OBJECT; break; } if (cc == '[') { in_array = B_TRUE; array_pos = 0; array_elem = dtrace_strtoll(elem, 10, size); found_key = array_elem == 0 ? B_TRUE : B_FALSE; state = DTRACE_JSON_VALUE; break; } /* * ERROR: expected to find a top-level object or array. */ return (NULL); case DTRACE_JSON_OBJECT: if (isspace(cc)) break; if (cc == '"') { state = DTRACE_JSON_STRING; string_is_key = B_TRUE; break; } /* * ERROR: either the object did not start with a key * string, or we've run off the end of the object * without finding the requested key. */ return (NULL); case DTRACE_JSON_STRING: if (cc == '\\') { *dd++ = '\\'; state = DTRACE_JSON_STRING_ESCAPE; break; } if (cc == '"') { if (collect_object) { /* * We don't reset the dest here, as * the string is part of a larger * object being collected. */ *dd++ = cc; collect_object = B_FALSE; state = DTRACE_JSON_COLLECT_OBJECT; break; } *dd = '\0'; dd = dest; /* reset string buffer */ if (string_is_key) { if (dtrace_strncmp(dest, elem, size) == 0) found_key = B_TRUE; } else if (found_key) { if (nelems > 1) { /* * We expected an object, not * this string. */ return (NULL); } return (dest); } state = string_is_key ? DTRACE_JSON_COLON : DTRACE_JSON_COMMA; string_is_key = B_FALSE; break; } *dd++ = cc; break; case DTRACE_JSON_STRING_ESCAPE: *dd++ = cc; if (cc == 'u') { escape_unicount = 0; state = DTRACE_JSON_STRING_ESCAPE_UNICODE; } else { state = DTRACE_JSON_STRING; } break; case DTRACE_JSON_STRING_ESCAPE_UNICODE: if (!isxdigit(cc)) { /* * ERROR: invalid unicode escape, expected * four valid hexidecimal digits. */ return (NULL); } *dd++ = cc; if (++escape_unicount == 4) state = DTRACE_JSON_STRING; break; case DTRACE_JSON_COLON: if (isspace(cc)) break; if (cc == ':') { state = DTRACE_JSON_VALUE; break; } /* * ERROR: expected a colon. */ return (NULL); case DTRACE_JSON_COMMA: if (isspace(cc)) break; if (cc == ',') { if (in_array) { state = DTRACE_JSON_VALUE; if (++array_pos == array_elem) found_key = B_TRUE; } else { state = DTRACE_JSON_OBJECT; } break; } /* * ERROR: either we hit an unexpected character, or * we reached the end of the object or array without * finding the requested key. */ return (NULL); case DTRACE_JSON_IDENTIFIER: if (islower(cc)) { *dd++ = cc; break; } *dd = '\0'; dd = dest; /* reset string buffer */ if (dtrace_strncmp(dest, "true", 5) == 0 || dtrace_strncmp(dest, "false", 6) == 0 || dtrace_strncmp(dest, "null", 5) == 0) { if (found_key) { if (nelems > 1) { /* * ERROR: We expected an object, * not this identifier. */ return (NULL); } return (dest); } else { cur--; state = DTRACE_JSON_COMMA; break; } } /* * ERROR: we did not recognise the identifier as one * of those in the JSON specification. */ return (NULL); case DTRACE_JSON_NUMBER: if (cc == '.') { *dd++ = cc; state = DTRACE_JSON_NUMBER_FRAC; break; } if (cc == 'x' || cc == 'X') { /* * ERROR: specification explicitly excludes * hexidecimal or octal numbers. */ return (NULL); } /* FALLTHRU */ case DTRACE_JSON_NUMBER_FRAC: if (cc == 'e' || cc == 'E') { *dd++ = cc; state = DTRACE_JSON_NUMBER_EXP; break; } if (cc == '+' || cc == '-') { /* * ERROR: expect sign as part of exponent only. */ return (NULL); } /* FALLTHRU */ case DTRACE_JSON_NUMBER_EXP: if (isdigit(cc) || cc == '+' || cc == '-') { *dd++ = cc; break; } *dd = '\0'; dd = dest; /* reset string buffer */ if (found_key) { if (nelems > 1) { /* * ERROR: We expected an object, not * this number. */ return (NULL); } return (dest); } cur--; state = DTRACE_JSON_COMMA; break; case DTRACE_JSON_VALUE: if (isspace(cc)) break; if (cc == '{' || cc == '[') { if (nelems > 1 && found_key) { in_array = cc == '[' ? B_TRUE : B_FALSE; /* * If our element selector directs us * to descend into this nested object, * then move to the next selector * element in the list and restart the * state machine. */ while (*elem != '\0') elem++; elem++; /* skip the inter-element NUL */ nelems--; dd = dest; if (in_array) { state = DTRACE_JSON_VALUE; array_pos = 0; array_elem = dtrace_strtoll( elem, 10, size); found_key = array_elem == 0 ? B_TRUE : B_FALSE; } else { found_key = B_FALSE; state = DTRACE_JSON_OBJECT; } break; } /* * Otherwise, we wish to either skip this * nested object or return it in full. */ if (cc == '[') brackets = 1; else braces = 1; *dd++ = cc; state = DTRACE_JSON_COLLECT_OBJECT; break; } if (cc == '"') { state = DTRACE_JSON_STRING; break; } if (islower(cc)) { /* * Here we deal with true, false and null. */ *dd++ = cc; state = DTRACE_JSON_IDENTIFIER; break; } if (cc == '-' || isdigit(cc)) { *dd++ = cc; state = DTRACE_JSON_NUMBER; break; } /* * ERROR: unexpected character at start of value. */ return (NULL); case DTRACE_JSON_COLLECT_OBJECT: if (cc == '\0') /* * ERROR: unexpected end of input. */ return (NULL); *dd++ = cc; if (cc == '"') { collect_object = B_TRUE; state = DTRACE_JSON_STRING; break; } if (cc == ']') { if (brackets-- == 0) { /* * ERROR: unbalanced brackets. */ return (NULL); } } else if (cc == '}') { if (braces-- == 0) { /* * ERROR: unbalanced braces. */ return (NULL); } } else if (cc == '{') { braces++; } else if (cc == '[') { brackets++; } if (brackets == 0 && braces == 0) { if (found_key) { *dd = '\0'; return (dest); } dd = dest; /* reset string buffer */ state = DTRACE_JSON_COMMA; } break; } } return (NULL); } /* * Emulate the execution of DTrace ID subroutines invoked by the call opcode. * Notice that we don't bother validating the proper number of arguments or * their types in the tuple stack. This isn't needed because all argument * interpretation is safe because of our load safety -- the worst that can * happen is that a bogus program can obtain bogus results. */ static void dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, dtrace_key_t *tupregs, int nargs, dtrace_mstate_t *mstate, dtrace_state_t *state) { volatile uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags; volatile uintptr_t *illval = &cpu_core[curcpu].cpuc_dtrace_illval; dtrace_vstate_t *vstate = &state->dts_vstate; #ifdef illumos union { mutex_impl_t mi; uint64_t mx; } m; union { krwlock_t ri; uintptr_t rw; } r; #else struct thread *lowner; union { struct lock_object *li; uintptr_t lx; } l; #endif switch (subr) { case DIF_SUBR_RAND: - regs[rd] = (dtrace_gethrtime() * 2416 + 374441) % 1771875; + regs[rd] = dtrace_xoroshiro128_plus_next( + state->dts_rstate[curcpu]); break; #ifdef illumos case DIF_SUBR_MUTEX_OWNED: if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), mstate, vstate)) { regs[rd] = 0; break; } m.mx = dtrace_load64(tupregs[0].dttk_value); if (MUTEX_TYPE_ADAPTIVE(&m.mi)) regs[rd] = MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER; else regs[rd] = LOCK_HELD(&m.mi.m_spin.m_spinlock); break; case DIF_SUBR_MUTEX_OWNER: if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), mstate, vstate)) { regs[rd] = 0; break; } m.mx = dtrace_load64(tupregs[0].dttk_value); if (MUTEX_TYPE_ADAPTIVE(&m.mi) && MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER) regs[rd] = (uintptr_t)MUTEX_OWNER(&m.mi); else regs[rd] = 0; break; case DIF_SUBR_MUTEX_TYPE_ADAPTIVE: if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), mstate, vstate)) { regs[rd] = 0; break; } m.mx = dtrace_load64(tupregs[0].dttk_value); regs[rd] = MUTEX_TYPE_ADAPTIVE(&m.mi); break; case DIF_SUBR_MUTEX_TYPE_SPIN: if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), mstate, vstate)) { regs[rd] = 0; break; } m.mx = dtrace_load64(tupregs[0].dttk_value); regs[rd] = MUTEX_TYPE_SPIN(&m.mi); break; case DIF_SUBR_RW_READ_HELD: { uintptr_t tmp; if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t), mstate, vstate)) { regs[rd] = 0; break; } r.rw = dtrace_loadptr(tupregs[0].dttk_value); regs[rd] = _RW_READ_HELD(&r.ri, tmp); break; } case DIF_SUBR_RW_WRITE_HELD: if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t), mstate, vstate)) { regs[rd] = 0; break; } r.rw = dtrace_loadptr(tupregs[0].dttk_value); regs[rd] = _RW_WRITE_HELD(&r.ri); break; case DIF_SUBR_RW_ISWRITER: if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t), mstate, vstate)) { regs[rd] = 0; break; } r.rw = dtrace_loadptr(tupregs[0].dttk_value); regs[rd] = _RW_ISWRITER(&r.ri); break; #else /* !illumos */ case DIF_SUBR_MUTEX_OWNED: if (!dtrace_canload(tupregs[0].dttk_value, sizeof (struct lock_object), mstate, vstate)) { regs[rd] = 0; break; } l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value); DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); regs[rd] = LOCK_CLASS(l.li)->lc_owner(l.li, &lowner); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); break; case DIF_SUBR_MUTEX_OWNER: if (!dtrace_canload(tupregs[0].dttk_value, sizeof (struct lock_object), mstate, vstate)) { regs[rd] = 0; break; } l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value); DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); LOCK_CLASS(l.li)->lc_owner(l.li, &lowner); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); regs[rd] = (uintptr_t)lowner; break; case DIF_SUBR_MUTEX_TYPE_ADAPTIVE: if (!dtrace_canload(tupregs[0].dttk_value, sizeof (struct mtx), mstate, vstate)) { regs[rd] = 0; break; } l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value); DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); regs[rd] = (LOCK_CLASS(l.li)->lc_flags & LC_SLEEPLOCK) != 0; DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); break; case DIF_SUBR_MUTEX_TYPE_SPIN: if (!dtrace_canload(tupregs[0].dttk_value, sizeof (struct mtx), mstate, vstate)) { regs[rd] = 0; break; } l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value); DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); regs[rd] = (LOCK_CLASS(l.li)->lc_flags & LC_SPINLOCK) != 0; DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); break; case DIF_SUBR_RW_READ_HELD: case DIF_SUBR_SX_SHARED_HELD: if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t), mstate, vstate)) { regs[rd] = 0; break; } l.lx = dtrace_loadptr((uintptr_t)&tupregs[0].dttk_value); DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); regs[rd] = LOCK_CLASS(l.li)->lc_owner(l.li, &lowner) && lowner == NULL; DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); break; case DIF_SUBR_RW_WRITE_HELD: case DIF_SUBR_SX_EXCLUSIVE_HELD: if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t), mstate, vstate)) { regs[rd] = 0; break; } l.lx = dtrace_loadptr(tupregs[0].dttk_value); DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); regs[rd] = LOCK_CLASS(l.li)->lc_owner(l.li, &lowner) && lowner != NULL; DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); break; case DIF_SUBR_RW_ISWRITER: case DIF_SUBR_SX_ISEXCLUSIVE: if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t), mstate, vstate)) { regs[rd] = 0; break; } l.lx = dtrace_loadptr(tupregs[0].dttk_value); DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); LOCK_CLASS(l.li)->lc_owner(l.li, &lowner); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); regs[rd] = (lowner == curthread); break; #endif /* illumos */ case DIF_SUBR_BCOPY: { /* * We need to be sure that the destination is in the scratch * region -- no other region is allowed. */ uintptr_t src = tupregs[0].dttk_value; uintptr_t dest = tupregs[1].dttk_value; size_t size = tupregs[2].dttk_value; if (!dtrace_inscratch(dest, size, mstate)) { *flags |= CPU_DTRACE_BADADDR; *illval = regs[rd]; break; } if (!dtrace_canload(src, size, mstate, vstate)) { regs[rd] = 0; break; } dtrace_bcopy((void *)src, (void *)dest, size); break; } case DIF_SUBR_ALLOCA: case DIF_SUBR_COPYIN: { uintptr_t dest = P2ROUNDUP(mstate->dtms_scratch_ptr, 8); uint64_t size = tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value; size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size; /* * This action doesn't require any credential checks since * probes will not activate in user contexts to which the * enabling user does not have permissions. */ /* * Rounding up the user allocation size could have overflowed * a large, bogus allocation (like -1ULL) to 0. */ if (scratch_size < size || !DTRACE_INSCRATCH(mstate, scratch_size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = 0; break; } if (subr == DIF_SUBR_COPYIN) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); } mstate->dtms_scratch_ptr += scratch_size; regs[rd] = dest; break; } case DIF_SUBR_COPYINTO: { uint64_t size = tupregs[1].dttk_value; uintptr_t dest = tupregs[2].dttk_value; /* * This action doesn't require any credential checks since * probes will not activate in user contexts to which the * enabling user does not have permissions. */ if (!dtrace_inscratch(dest, size, mstate)) { *flags |= CPU_DTRACE_BADADDR; *illval = regs[rd]; break; } DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); break; } case DIF_SUBR_COPYINSTR: { uintptr_t dest = mstate->dtms_scratch_ptr; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; if (nargs > 1 && tupregs[1].dttk_value < size) size = tupregs[1].dttk_value + 1; /* * This action doesn't require any credential checks since * probes will not activate in user contexts to which the * enabling user does not have permissions. */ if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = 0; break; } DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); ((char *)dest)[size - 1] = '\0'; mstate->dtms_scratch_ptr += size; regs[rd] = dest; break; } #ifdef illumos case DIF_SUBR_MSGSIZE: case DIF_SUBR_MSGDSIZE: { uintptr_t baddr = tupregs[0].dttk_value, daddr; uintptr_t wptr, rptr; size_t count = 0; int cont = 0; while (baddr != 0 && !(*flags & CPU_DTRACE_FAULT)) { if (!dtrace_canload(baddr, sizeof (mblk_t), mstate, vstate)) { regs[rd] = 0; break; } wptr = dtrace_loadptr(baddr + offsetof(mblk_t, b_wptr)); rptr = dtrace_loadptr(baddr + offsetof(mblk_t, b_rptr)); if (wptr < rptr) { *flags |= CPU_DTRACE_BADADDR; *illval = tupregs[0].dttk_value; break; } daddr = dtrace_loadptr(baddr + offsetof(mblk_t, b_datap)); baddr = dtrace_loadptr(baddr + offsetof(mblk_t, b_cont)); /* * We want to prevent against denial-of-service here, * so we're only going to search the list for * dtrace_msgdsize_max mblks. */ if (cont++ > dtrace_msgdsize_max) { *flags |= CPU_DTRACE_ILLOP; break; } if (subr == DIF_SUBR_MSGDSIZE) { if (dtrace_load8(daddr + offsetof(dblk_t, db_type)) != M_DATA) continue; } count += wptr - rptr; } if (!(*flags & CPU_DTRACE_FAULT)) regs[rd] = count; break; } #endif case DIF_SUBR_PROGENYOF: { pid_t pid = tupregs[0].dttk_value; proc_t *p; int rval = 0; DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); for (p = curthread->t_procp; p != NULL; p = p->p_parent) { #ifdef illumos if (p->p_pidp->pid_id == pid) { #else if (p->p_pid == pid) { #endif rval = 1; break; } } DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); regs[rd] = rval; break; } case DIF_SUBR_SPECULATION: regs[rd] = dtrace_speculation(state); break; case DIF_SUBR_COPYOUT: { uintptr_t kaddr = tupregs[0].dttk_value; uintptr_t uaddr = tupregs[1].dttk_value; uint64_t size = tupregs[2].dttk_value; if (!dtrace_destructive_disallow && dtrace_priv_proc_control(state) && !dtrace_istoxic(kaddr, size) && dtrace_canload(kaddr, size, mstate, vstate)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_copyout(kaddr, uaddr, size, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); } break; } case DIF_SUBR_COPYOUTSTR: { uintptr_t kaddr = tupregs[0].dttk_value; uintptr_t uaddr = tupregs[1].dttk_value; uint64_t size = tupregs[2].dttk_value; size_t lim; if (!dtrace_destructive_disallow && dtrace_priv_proc_control(state) && !dtrace_istoxic(kaddr, size) && dtrace_strcanload(kaddr, size, &lim, mstate, vstate)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_copyoutstr(kaddr, uaddr, lim, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); } break; } case DIF_SUBR_STRLEN: { size_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t addr = (uintptr_t)tupregs[0].dttk_value; size_t lim; if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) { regs[rd] = 0; break; } regs[rd] = dtrace_strlen((char *)addr, lim); break; } case DIF_SUBR_STRCHR: case DIF_SUBR_STRRCHR: { /* * We're going to iterate over the string looking for the * specified character. We will iterate until we have reached * the string length or we have found the character. If this * is DIF_SUBR_STRRCHR, we will look for the last occurrence * of the specified character instead of the first. */ uintptr_t addr = tupregs[0].dttk_value; uintptr_t addr_limit; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; size_t lim; char c, target = (char)tupregs[1].dttk_value; if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) { regs[rd] = 0; break; } addr_limit = addr + lim; for (regs[rd] = 0; addr < addr_limit; addr++) { if ((c = dtrace_load8(addr)) == target) { regs[rd] = addr; if (subr == DIF_SUBR_STRCHR) break; } if (c == '\0') break; } break; } case DIF_SUBR_STRSTR: case DIF_SUBR_INDEX: case DIF_SUBR_RINDEX: { /* * We're going to iterate over the string looking for the * specified string. We will iterate until we have reached * the string length or we have found the string. (Yes, this * is done in the most naive way possible -- but considering * that the string we're searching for is likely to be * relatively short, the complexity of Rabin-Karp or similar * hardly seems merited.) */ char *addr = (char *)(uintptr_t)tupregs[0].dttk_value; char *substr = (char *)(uintptr_t)tupregs[1].dttk_value; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; size_t len = dtrace_strlen(addr, size); size_t sublen = dtrace_strlen(substr, size); char *limit = addr + len, *orig = addr; int notfound = subr == DIF_SUBR_STRSTR ? 0 : -1; int inc = 1; regs[rd] = notfound; if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) { regs[rd] = 0; break; } if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate, vstate)) { regs[rd] = 0; break; } /* * strstr() and index()/rindex() have similar semantics if * both strings are the empty string: strstr() returns a * pointer to the (empty) string, and index() and rindex() * both return index 0 (regardless of any position argument). */ if (sublen == 0 && len == 0) { if (subr == DIF_SUBR_STRSTR) regs[rd] = (uintptr_t)addr; else regs[rd] = 0; break; } if (subr != DIF_SUBR_STRSTR) { if (subr == DIF_SUBR_RINDEX) { limit = orig - 1; addr += len; inc = -1; } /* * Both index() and rindex() take an optional position * argument that denotes the starting position. */ if (nargs == 3) { int64_t pos = (int64_t)tupregs[2].dttk_value; /* * If the position argument to index() is * negative, Perl implicitly clamps it at * zero. This semantic is a little surprising * given the special meaning of negative * positions to similar Perl functions like * substr(), but it appears to reflect a * notion that index() can start from a * negative index and increment its way up to * the string. Given this notion, Perl's * rindex() is at least self-consistent in * that it implicitly clamps positions greater * than the string length to be the string * length. Where Perl completely loses * coherence, however, is when the specified * substring is the empty string (""). In * this case, even if the position is * negative, rindex() returns 0 -- and even if * the position is greater than the length, * index() returns the string length. These * semantics violate the notion that index() * should never return a value less than the * specified position and that rindex() should * never return a value greater than the * specified position. (One assumes that * these semantics are artifacts of Perl's * implementation and not the results of * deliberate design -- it beggars belief that * even Larry Wall could desire such oddness.) * While in the abstract one would wish for * consistent position semantics across * substr(), index() and rindex() -- or at the * very least self-consistent position * semantics for index() and rindex() -- we * instead opt to keep with the extant Perl * semantics, in all their broken glory. (Do * we have more desire to maintain Perl's * semantics than Perl does? Probably.) */ if (subr == DIF_SUBR_RINDEX) { if (pos < 0) { if (sublen == 0) regs[rd] = 0; break; } if (pos > len) pos = len; } else { if (pos < 0) pos = 0; if (pos >= len) { if (sublen == 0) regs[rd] = len; break; } } addr = orig + pos; } } for (regs[rd] = notfound; addr != limit; addr += inc) { if (dtrace_strncmp(addr, substr, sublen) == 0) { if (subr != DIF_SUBR_STRSTR) { /* * As D index() and rindex() are * modeled on Perl (and not on awk), * we return a zero-based (and not a * one-based) index. (For you Perl * weenies: no, we're not going to add * $[ -- and shouldn't you be at a con * or something?) */ regs[rd] = (uintptr_t)(addr - orig); break; } ASSERT(subr == DIF_SUBR_STRSTR); regs[rd] = (uintptr_t)addr; break; } } break; } case DIF_SUBR_STRTOK: { uintptr_t addr = tupregs[0].dttk_value; uintptr_t tokaddr = tupregs[1].dttk_value; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t limit, toklimit; size_t clim; uint8_t c = 0, tokmap[32]; /* 256 / 8 */ char *dest = (char *)mstate->dtms_scratch_ptr; int i; /* * Check both the token buffer and (later) the input buffer, * since both could be non-scratch addresses. */ if (!dtrace_strcanload(tokaddr, size, &clim, mstate, vstate)) { regs[rd] = 0; break; } toklimit = tokaddr + clim; if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = 0; break; } if (addr == 0) { /* * If the address specified is NULL, we use our saved * strtok pointer from the mstate. Note that this * means that the saved strtok pointer is _only_ * valid within multiple enablings of the same probe -- * it behaves like an implicit clause-local variable. */ addr = mstate->dtms_strtok; limit = mstate->dtms_strtok_limit; } else { /* * If the user-specified address is non-NULL we must * access check it. This is the only time we have * a chance to do so, since this address may reside * in the string table of this clause-- future calls * (when we fetch addr from mstate->dtms_strtok) * would fail this access check. */ if (!dtrace_strcanload(addr, size, &clim, mstate, vstate)) { regs[rd] = 0; break; } limit = addr + clim; } /* * First, zero the token map, and then process the token * string -- setting a bit in the map for every character * found in the token string. */ for (i = 0; i < sizeof (tokmap); i++) tokmap[i] = 0; for (; tokaddr < toklimit; tokaddr++) { if ((c = dtrace_load8(tokaddr)) == '\0') break; ASSERT((c >> 3) < sizeof (tokmap)); tokmap[c >> 3] |= (1 << (c & 0x7)); } for (; addr < limit; addr++) { /* * We're looking for a character that is _not_ * contained in the token string. */ if ((c = dtrace_load8(addr)) == '\0') break; if (!(tokmap[c >> 3] & (1 << (c & 0x7)))) break; } if (c == '\0') { /* * We reached the end of the string without finding * any character that was not in the token string. * We return NULL in this case, and we set the saved * address to NULL as well. */ regs[rd] = 0; mstate->dtms_strtok = 0; mstate->dtms_strtok_limit = 0; break; } /* * From here on, we're copying into the destination string. */ for (i = 0; addr < limit && i < size - 1; addr++) { if ((c = dtrace_load8(addr)) == '\0') break; if (tokmap[c >> 3] & (1 << (c & 0x7))) break; ASSERT(i < size); dest[i++] = c; } ASSERT(i < size); dest[i] = '\0'; regs[rd] = (uintptr_t)dest; mstate->dtms_scratch_ptr += size; mstate->dtms_strtok = addr; mstate->dtms_strtok_limit = limit; break; } case DIF_SUBR_SUBSTR: { uintptr_t s = tupregs[0].dttk_value; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; char *d = (char *)mstate->dtms_scratch_ptr; int64_t index = (int64_t)tupregs[1].dttk_value; int64_t remaining = (int64_t)tupregs[2].dttk_value; size_t len = dtrace_strlen((char *)s, size); int64_t i; if (!dtrace_canload(s, len + 1, mstate, vstate)) { regs[rd] = 0; break; } if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = 0; break; } if (nargs <= 2) remaining = (int64_t)size; if (index < 0) { index += len; if (index < 0 && index + remaining > 0) { remaining += index; index = 0; } } if (index >= len || index < 0) { remaining = 0; } else if (remaining < 0) { remaining += len - index; } else if (index + remaining > size) { remaining = size - index; } for (i = 0; i < remaining; i++) { if ((d[i] = dtrace_load8(s + index + i)) == '\0') break; } d[i] = '\0'; mstate->dtms_scratch_ptr += size; regs[rd] = (uintptr_t)d; break; } case DIF_SUBR_JSON: { uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t json = tupregs[0].dttk_value; size_t jsonlen = dtrace_strlen((char *)json, size); uintptr_t elem = tupregs[1].dttk_value; size_t elemlen = dtrace_strlen((char *)elem, size); char *dest = (char *)mstate->dtms_scratch_ptr; char *elemlist = (char *)mstate->dtms_scratch_ptr + jsonlen + 1; char *ee = elemlist; int nelems = 1; uintptr_t cur; if (!dtrace_canload(json, jsonlen + 1, mstate, vstate) || !dtrace_canload(elem, elemlen + 1, mstate, vstate)) { regs[rd] = 0; break; } if (!DTRACE_INSCRATCH(mstate, jsonlen + 1 + elemlen + 1)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = 0; break; } /* * Read the element selector and split it up into a packed list * of strings. */ for (cur = elem; cur < elem + elemlen; cur++) { char cc = dtrace_load8(cur); if (cur == elem && cc == '[') { /* * If the first element selector key is * actually an array index then ignore the * bracket. */ continue; } if (cc == ']') continue; if (cc == '.' || cc == '[') { nelems++; cc = '\0'; } *ee++ = cc; } *ee++ = '\0'; if ((regs[rd] = (uintptr_t)dtrace_json(size, json, elemlist, nelems, dest)) != 0) mstate->dtms_scratch_ptr += jsonlen + 1; break; } case DIF_SUBR_TOUPPER: case DIF_SUBR_TOLOWER: { uintptr_t s = tupregs[0].dttk_value; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; char *dest = (char *)mstate->dtms_scratch_ptr, c; size_t len = dtrace_strlen((char *)s, size); char lower, upper, convert; int64_t i; if (subr == DIF_SUBR_TOUPPER) { lower = 'a'; upper = 'z'; convert = 'A'; } else { lower = 'A'; upper = 'Z'; convert = 'a'; } if (!dtrace_canload(s, len + 1, mstate, vstate)) { regs[rd] = 0; break; } if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = 0; break; } for (i = 0; i < size - 1; i++) { if ((c = dtrace_load8(s + i)) == '\0') break; if (c >= lower && c <= upper) c = convert + (c - lower); dest[i] = c; } ASSERT(i < size); dest[i] = '\0'; regs[rd] = (uintptr_t)dest; mstate->dtms_scratch_ptr += size; break; } #ifdef illumos case DIF_SUBR_GETMAJOR: #ifdef _LP64 regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64; #else regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR) & MAXMAJ; #endif break; case DIF_SUBR_GETMINOR: #ifdef _LP64 regs[rd] = tupregs[0].dttk_value & MAXMIN64; #else regs[rd] = tupregs[0].dttk_value & MAXMIN; #endif break; case DIF_SUBR_DDI_PATHNAME: { /* * This one is a galactic mess. We are going to roughly * emulate ddi_pathname(), but it's made more complicated * by the fact that we (a) want to include the minor name and * (b) must proceed iteratively instead of recursively. */ uintptr_t dest = mstate->dtms_scratch_ptr; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; char *start = (char *)dest, *end = start + size - 1; uintptr_t daddr = tupregs[0].dttk_value; int64_t minor = (int64_t)tupregs[1].dttk_value; char *s; int i, len, depth = 0; /* * Due to all the pointer jumping we do and context we must * rely upon, we just mandate that the user must have kernel * read privileges to use this routine. */ if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) == 0) { *flags |= CPU_DTRACE_KPRIV; *illval = daddr; regs[rd] = 0; } if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = 0; break; } *end = '\0'; /* * We want to have a name for the minor. In order to do this, * we need to walk the minor list from the devinfo. We want * to be sure that we don't infinitely walk a circular list, * so we check for circularity by sending a scout pointer * ahead two elements for every element that we iterate over; * if the list is circular, these will ultimately point to the * same element. You may recognize this little trick as the * answer to a stupid interview question -- one that always * seems to be asked by those who had to have it laboriously * explained to them, and who can't even concisely describe * the conditions under which one would be forced to resort to * this technique. Needless to say, those conditions are * found here -- and probably only here. Is this the only use * of this infamous trick in shipping, production code? If it * isn't, it probably should be... */ if (minor != -1) { uintptr_t maddr = dtrace_loadptr(daddr + offsetof(struct dev_info, devi_minor)); uintptr_t next = offsetof(struct ddi_minor_data, next); uintptr_t name = offsetof(struct ddi_minor_data, d_minor) + offsetof(struct ddi_minor, name); uintptr_t dev = offsetof(struct ddi_minor_data, d_minor) + offsetof(struct ddi_minor, dev); uintptr_t scout; if (maddr != NULL) scout = dtrace_loadptr(maddr + next); while (maddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { uint64_t m; #ifdef _LP64 m = dtrace_load64(maddr + dev) & MAXMIN64; #else m = dtrace_load32(maddr + dev) & MAXMIN; #endif if (m != minor) { maddr = dtrace_loadptr(maddr + next); if (scout == NULL) continue; scout = dtrace_loadptr(scout + next); if (scout == NULL) continue; scout = dtrace_loadptr(scout + next); if (scout == NULL) continue; if (scout == maddr) { *flags |= CPU_DTRACE_ILLOP; break; } continue; } /* * We have the minor data. Now we need to * copy the minor's name into the end of the * pathname. */ s = (char *)dtrace_loadptr(maddr + name); len = dtrace_strlen(s, size); if (*flags & CPU_DTRACE_FAULT) break; if (len != 0) { if ((end -= (len + 1)) < start) break; *end = ':'; } for (i = 1; i <= len; i++) end[i] = dtrace_load8((uintptr_t)s++); break; } } while (daddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { ddi_node_state_t devi_state; devi_state = dtrace_load32(daddr + offsetof(struct dev_info, devi_node_state)); if (*flags & CPU_DTRACE_FAULT) break; if (devi_state >= DS_INITIALIZED) { s = (char *)dtrace_loadptr(daddr + offsetof(struct dev_info, devi_addr)); len = dtrace_strlen(s, size); if (*flags & CPU_DTRACE_FAULT) break; if (len != 0) { if ((end -= (len + 1)) < start) break; *end = '@'; } for (i = 1; i <= len; i++) end[i] = dtrace_load8((uintptr_t)s++); } /* * Now for the node name... */ s = (char *)dtrace_loadptr(daddr + offsetof(struct dev_info, devi_node_name)); daddr = dtrace_loadptr(daddr + offsetof(struct dev_info, devi_parent)); /* * If our parent is NULL (that is, if we're the root * node), we're going to use the special path * "devices". */ if (daddr == 0) s = "devices"; len = dtrace_strlen(s, size); if (*flags & CPU_DTRACE_FAULT) break; if ((end -= (len + 1)) < start) break; for (i = 1; i <= len; i++) end[i] = dtrace_load8((uintptr_t)s++); *end = '/'; if (depth++ > dtrace_devdepth_max) { *flags |= CPU_DTRACE_ILLOP; break; } } if (end < start) DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); if (daddr == 0) { regs[rd] = (uintptr_t)end; mstate->dtms_scratch_ptr += size; } break; } #endif case DIF_SUBR_STRJOIN: { char *d = (char *)mstate->dtms_scratch_ptr; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t s1 = tupregs[0].dttk_value; uintptr_t s2 = tupregs[1].dttk_value; int i = 0, j = 0; size_t lim1, lim2; char c; if (!dtrace_strcanload(s1, size, &lim1, mstate, vstate) || !dtrace_strcanload(s2, size, &lim2, mstate, vstate)) { regs[rd] = 0; break; } if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = 0; break; } for (;;) { if (i >= size) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = 0; break; } c = (i >= lim1) ? '\0' : dtrace_load8(s1++); if ((d[i++] = c) == '\0') { i--; break; } } for (;;) { if (i >= size) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = 0; break; } c = (j++ >= lim2) ? '\0' : dtrace_load8(s2++); if ((d[i++] = c) == '\0') break; } if (i < size) { mstate->dtms_scratch_ptr += i; regs[rd] = (uintptr_t)d; } break; } case DIF_SUBR_STRTOLL: { uintptr_t s = tupregs[0].dttk_value; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; size_t lim; int base = 10; if (nargs > 1) { if ((base = tupregs[1].dttk_value) <= 1 || base > ('z' - 'a' + 1) + ('9' - '0' + 1)) { *flags |= CPU_DTRACE_ILLOP; break; } } if (!dtrace_strcanload(s, size, &lim, mstate, vstate)) { regs[rd] = INT64_MIN; break; } regs[rd] = dtrace_strtoll((char *)s, base, lim); break; } case DIF_SUBR_LLTOSTR: { int64_t i = (int64_t)tupregs[0].dttk_value; uint64_t val, digit; uint64_t size = 65; /* enough room for 2^64 in binary */ char *end = (char *)mstate->dtms_scratch_ptr + size - 1; int base = 10; if (nargs > 1) { if ((base = tupregs[1].dttk_value) <= 1 || base > ('z' - 'a' + 1) + ('9' - '0' + 1)) { *flags |= CPU_DTRACE_ILLOP; break; } } val = (base == 10 && i < 0) ? i * -1 : i; if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = 0; break; } for (*end-- = '\0'; val; val /= base) { if ((digit = val % base) <= '9' - '0') { *end-- = '0' + digit; } else { *end-- = 'a' + (digit - ('9' - '0') - 1); } } if (i == 0 && base == 16) *end-- = '0'; if (base == 16) *end-- = 'x'; if (i == 0 || base == 8 || base == 16) *end-- = '0'; if (i < 0 && base == 10) *end-- = '-'; regs[rd] = (uintptr_t)end + 1; mstate->dtms_scratch_ptr += size; break; } case DIF_SUBR_HTONS: case DIF_SUBR_NTOHS: #if BYTE_ORDER == BIG_ENDIAN regs[rd] = (uint16_t)tupregs[0].dttk_value; #else regs[rd] = DT_BSWAP_16((uint16_t)tupregs[0].dttk_value); #endif break; case DIF_SUBR_HTONL: case DIF_SUBR_NTOHL: #if BYTE_ORDER == BIG_ENDIAN regs[rd] = (uint32_t)tupregs[0].dttk_value; #else regs[rd] = DT_BSWAP_32((uint32_t)tupregs[0].dttk_value); #endif break; case DIF_SUBR_HTONLL: case DIF_SUBR_NTOHLL: #if BYTE_ORDER == BIG_ENDIAN regs[rd] = (uint64_t)tupregs[0].dttk_value; #else regs[rd] = DT_BSWAP_64((uint64_t)tupregs[0].dttk_value); #endif break; case DIF_SUBR_DIRNAME: case DIF_SUBR_BASENAME: { char *dest = (char *)mstate->dtms_scratch_ptr; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t src = tupregs[0].dttk_value; int i, j, len = dtrace_strlen((char *)src, size); int lastbase = -1, firstbase = -1, lastdir = -1; int start, end; if (!dtrace_canload(src, len + 1, mstate, vstate)) { regs[rd] = 0; break; } if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = 0; break; } /* * The basename and dirname for a zero-length string is * defined to be "." */ if (len == 0) { len = 1; src = (uintptr_t)"."; } /* * Start from the back of the string, moving back toward the * front until we see a character that isn't a slash. That * character is the last character in the basename. */ for (i = len - 1; i >= 0; i--) { if (dtrace_load8(src + i) != '/') break; } if (i >= 0) lastbase = i; /* * Starting from the last character in the basename, move * towards the front until we find a slash. The character * that we processed immediately before that is the first * character in the basename. */ for (; i >= 0; i--) { if (dtrace_load8(src + i) == '/') break; } if (i >= 0) firstbase = i + 1; /* * Now keep going until we find a non-slash character. That * character is the last character in the dirname. */ for (; i >= 0; i--) { if (dtrace_load8(src + i) != '/') break; } if (i >= 0) lastdir = i; ASSERT(!(lastbase == -1 && firstbase != -1)); ASSERT(!(firstbase == -1 && lastdir != -1)); if (lastbase == -1) { /* * We didn't find a non-slash character. We know that * the length is non-zero, so the whole string must be * slashes. In either the dirname or the basename * case, we return '/'. */ ASSERT(firstbase == -1); firstbase = lastbase = lastdir = 0; } if (firstbase == -1) { /* * The entire string consists only of a basename * component. If we're looking for dirname, we need * to change our string to be just "."; if we're * looking for a basename, we'll just set the first * character of the basename to be 0. */ if (subr == DIF_SUBR_DIRNAME) { ASSERT(lastdir == -1); src = (uintptr_t)"."; lastdir = 0; } else { firstbase = 0; } } if (subr == DIF_SUBR_DIRNAME) { if (lastdir == -1) { /* * We know that we have a slash in the name -- * or lastdir would be set to 0, above. And * because lastdir is -1, we know that this * slash must be the first character. (That * is, the full string must be of the form * "/basename".) In this case, the last * character of the directory name is 0. */ lastdir = 0; } start = 0; end = lastdir; } else { ASSERT(subr == DIF_SUBR_BASENAME); ASSERT(firstbase != -1 && lastbase != -1); start = firstbase; end = lastbase; } for (i = start, j = 0; i <= end && j < size - 1; i++, j++) dest[j] = dtrace_load8(src + i); dest[j] = '\0'; regs[rd] = (uintptr_t)dest; mstate->dtms_scratch_ptr += size; break; } case DIF_SUBR_GETF: { uintptr_t fd = tupregs[0].dttk_value; struct filedesc *fdp; file_t *fp; if (!dtrace_priv_proc(state)) { regs[rd] = 0; break; } fdp = curproc->p_fd; FILEDESC_SLOCK(fdp); fp = fget_locked(fdp, fd); mstate->dtms_getf = fp; regs[rd] = (uintptr_t)fp; FILEDESC_SUNLOCK(fdp); break; } case DIF_SUBR_CLEANPATH: { char *dest = (char *)mstate->dtms_scratch_ptr, c; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t src = tupregs[0].dttk_value; size_t lim; int i = 0, j = 0; #ifdef illumos zone_t *z; #endif if (!dtrace_strcanload(src, size, &lim, mstate, vstate)) { regs[rd] = 0; break; } if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = 0; break; } /* * Move forward, loading each character. */ do { c = (i >= lim) ? '\0' : dtrace_load8(src + i++); next: if (j + 5 >= size) /* 5 = strlen("/..c\0") */ break; if (c != '/') { dest[j++] = c; continue; } c = (i >= lim) ? '\0' : dtrace_load8(src + i++); if (c == '/') { /* * We have two slashes -- we can just advance * to the next character. */ goto next; } if (c != '.') { /* * This is not "." and it's not ".." -- we can * just store the "/" and this character and * drive on. */ dest[j++] = '/'; dest[j++] = c; continue; } c = (i >= lim) ? '\0' : dtrace_load8(src + i++); if (c == '/') { /* * This is a "/./" component. We're not going * to store anything in the destination buffer; * we're just going to go to the next component. */ goto next; } if (c != '.') { /* * This is not ".." -- we can just store the * "/." and this character and continue * processing. */ dest[j++] = '/'; dest[j++] = '.'; dest[j++] = c; continue; } c = (i >= lim) ? '\0' : dtrace_load8(src + i++); if (c != '/' && c != '\0') { /* * This is not ".." -- it's "..[mumble]". * We'll store the "/.." and this character * and continue processing. */ dest[j++] = '/'; dest[j++] = '.'; dest[j++] = '.'; dest[j++] = c; continue; } /* * This is "/../" or "/..\0". We need to back up * our destination pointer until we find a "/". */ i--; while (j != 0 && dest[--j] != '/') continue; if (c == '\0') dest[++j] = '/'; } while (c != '\0'); dest[j] = '\0'; #ifdef illumos if (mstate->dtms_getf != NULL && !(mstate->dtms_access & DTRACE_ACCESS_KERNEL) && (z = state->dts_cred.dcr_cred->cr_zone) != kcred->cr_zone) { /* * If we've done a getf() as a part of this ECB and we * don't have kernel access (and we're not in the global * zone), check if the path we cleaned up begins with * the zone's root path, and trim it off if so. Note * that this is an output cleanliness issue, not a * security issue: knowing one's zone root path does * not enable privilege escalation. */ if (strstr(dest, z->zone_rootpath) == dest) dest += strlen(z->zone_rootpath) - 1; } #endif regs[rd] = (uintptr_t)dest; mstate->dtms_scratch_ptr += size; break; } case DIF_SUBR_INET_NTOA: case DIF_SUBR_INET_NTOA6: case DIF_SUBR_INET_NTOP: { size_t size; int af, argi, i; char *base, *end; if (subr == DIF_SUBR_INET_NTOP) { af = (int)tupregs[0].dttk_value; argi = 1; } else { af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6; argi = 0; } if (af == AF_INET) { ipaddr_t ip4; uint8_t *ptr8, val; if (!dtrace_canload(tupregs[argi].dttk_value, sizeof (ipaddr_t), mstate, vstate)) { regs[rd] = 0; break; } /* * Safely load the IPv4 address. */ ip4 = dtrace_load32(tupregs[argi].dttk_value); /* * Check an IPv4 string will fit in scratch. */ size = INET_ADDRSTRLEN; if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = 0; break; } base = (char *)mstate->dtms_scratch_ptr; end = (char *)mstate->dtms_scratch_ptr + size - 1; /* * Stringify as a dotted decimal quad. */ *end-- = '\0'; ptr8 = (uint8_t *)&ip4; for (i = 3; i >= 0; i--) { val = ptr8[i]; if (val == 0) { *end-- = '0'; } else { for (; val; val /= 10) { *end-- = '0' + (val % 10); } } if (i > 0) *end-- = '.'; } ASSERT(end + 1 >= base); } else if (af == AF_INET6) { struct in6_addr ip6; int firstzero, tryzero, numzero, v6end; uint16_t val; const char digits[] = "0123456789abcdef"; /* * Stringify using RFC 1884 convention 2 - 16 bit * hexadecimal values with a zero-run compression. * Lower case hexadecimal digits are used. * eg, fe80::214:4fff:fe0b:76c8. * The IPv4 embedded form is returned for inet_ntop, * just the IPv4 string is returned for inet_ntoa6. */ if (!dtrace_canload(tupregs[argi].dttk_value, sizeof (struct in6_addr), mstate, vstate)) { regs[rd] = 0; break; } /* * Safely load the IPv6 address. */ dtrace_bcopy( (void *)(uintptr_t)tupregs[argi].dttk_value, (void *)(uintptr_t)&ip6, sizeof (struct in6_addr)); /* * Check an IPv6 string will fit in scratch. */ size = INET6_ADDRSTRLEN; if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = 0; break; } base = (char *)mstate->dtms_scratch_ptr; end = (char *)mstate->dtms_scratch_ptr + size - 1; *end-- = '\0'; /* * Find the longest run of 16 bit zero values * for the single allowed zero compression - "::". */ firstzero = -1; tryzero = -1; numzero = 1; for (i = 0; i < sizeof (struct in6_addr); i++) { #ifdef illumos if (ip6._S6_un._S6_u8[i] == 0 && #else if (ip6.__u6_addr.__u6_addr8[i] == 0 && #endif tryzero == -1 && i % 2 == 0) { tryzero = i; continue; } if (tryzero != -1 && #ifdef illumos (ip6._S6_un._S6_u8[i] != 0 || #else (ip6.__u6_addr.__u6_addr8[i] != 0 || #endif i == sizeof (struct in6_addr) - 1)) { if (i - tryzero <= numzero) { tryzero = -1; continue; } firstzero = tryzero; numzero = i - i % 2 - tryzero; tryzero = -1; #ifdef illumos if (ip6._S6_un._S6_u8[i] == 0 && #else if (ip6.__u6_addr.__u6_addr8[i] == 0 && #endif i == sizeof (struct in6_addr) - 1) numzero += 2; } } ASSERT(firstzero + numzero <= sizeof (struct in6_addr)); /* * Check for an IPv4 embedded address. */ v6end = sizeof (struct in6_addr) - 2; if (IN6_IS_ADDR_V4MAPPED(&ip6) || IN6_IS_ADDR_V4COMPAT(&ip6)) { for (i = sizeof (struct in6_addr) - 1; i >= DTRACE_V4MAPPED_OFFSET; i--) { ASSERT(end >= base); #ifdef illumos val = ip6._S6_un._S6_u8[i]; #else val = ip6.__u6_addr.__u6_addr8[i]; #endif if (val == 0) { *end-- = '0'; } else { for (; val; val /= 10) { *end-- = '0' + val % 10; } } if (i > DTRACE_V4MAPPED_OFFSET) *end-- = '.'; } if (subr == DIF_SUBR_INET_NTOA6) goto inetout; /* * Set v6end to skip the IPv4 address that * we have already stringified. */ v6end = 10; } /* * Build the IPv6 string by working through the * address in reverse. */ for (i = v6end; i >= 0; i -= 2) { ASSERT(end >= base); if (i == firstzero + numzero - 2) { *end-- = ':'; *end-- = ':'; i -= numzero - 2; continue; } if (i < 14 && i != firstzero - 2) *end-- = ':'; #ifdef illumos val = (ip6._S6_un._S6_u8[i] << 8) + ip6._S6_un._S6_u8[i + 1]; #else val = (ip6.__u6_addr.__u6_addr8[i] << 8) + ip6.__u6_addr.__u6_addr8[i + 1]; #endif if (val == 0) { *end-- = '0'; } else { for (; val; val /= 16) { *end-- = digits[val % 16]; } } } ASSERT(end + 1 >= base); } else { /* * The user didn't use AH_INET or AH_INET6. */ DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); regs[rd] = 0; break; } inetout: regs[rd] = (uintptr_t)end + 1; mstate->dtms_scratch_ptr += size; break; } case DIF_SUBR_MEMREF: { uintptr_t size = 2 * sizeof(uintptr_t); uintptr_t *memref = (uintptr_t *) P2ROUNDUP(mstate->dtms_scratch_ptr, sizeof(uintptr_t)); size_t scratch_size = ((uintptr_t) memref - mstate->dtms_scratch_ptr) + size; /* address and length */ memref[0] = tupregs[0].dttk_value; memref[1] = tupregs[1].dttk_value; regs[rd] = (uintptr_t) memref; mstate->dtms_scratch_ptr += scratch_size; break; } #ifndef illumos case DIF_SUBR_MEMSTR: { char *str = (char *)mstate->dtms_scratch_ptr; uintptr_t mem = tupregs[0].dttk_value; char c = tupregs[1].dttk_value; size_t size = tupregs[2].dttk_value; uint8_t n; int i; regs[rd] = 0; if (size == 0) break; if (!dtrace_canload(mem, size - 1, mstate, vstate)) break; if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); break; } if (dtrace_memstr_max != 0 && size > dtrace_memstr_max) { *flags |= CPU_DTRACE_ILLOP; break; } for (i = 0; i < size - 1; i++) { n = dtrace_load8(mem++); str[i] = (n == 0) ? c : n; } str[size - 1] = 0; regs[rd] = (uintptr_t)str; mstate->dtms_scratch_ptr += size; break; } #endif } } /* * Emulate the execution of DTrace IR instructions specified by the given * DIF object. This function is deliberately void of assertions as all of * the necessary checks are handled by a call to dtrace_difo_validate(). */ static uint64_t dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, dtrace_state_t *state) { const dif_instr_t *text = difo->dtdo_buf; const uint_t textlen = difo->dtdo_len; const char *strtab = difo->dtdo_strtab; const uint64_t *inttab = difo->dtdo_inttab; uint64_t rval = 0; dtrace_statvar_t *svar; dtrace_dstate_t *dstate = &vstate->dtvs_dynvars; dtrace_difv_t *v; volatile uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags; volatile uintptr_t *illval = &cpu_core[curcpu].cpuc_dtrace_illval; dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */ uint64_t regs[DIF_DIR_NREGS]; uint64_t *tmp; uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0; int64_t cc_r; uint_t pc = 0, id, opc = 0; uint8_t ttop = 0; dif_instr_t instr; uint_t r1, r2, rd; /* * We stash the current DIF object into the machine state: we need it * for subsequent access checking. */ mstate->dtms_difo = difo; regs[DIF_REG_R0] = 0; /* %r0 is fixed at zero */ while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) { opc = pc; instr = text[pc++]; r1 = DIF_INSTR_R1(instr); r2 = DIF_INSTR_R2(instr); rd = DIF_INSTR_RD(instr); switch (DIF_INSTR_OP(instr)) { case DIF_OP_OR: regs[rd] = regs[r1] | regs[r2]; break; case DIF_OP_XOR: regs[rd] = regs[r1] ^ regs[r2]; break; case DIF_OP_AND: regs[rd] = regs[r1] & regs[r2]; break; case DIF_OP_SLL: regs[rd] = regs[r1] << regs[r2]; break; case DIF_OP_SRL: regs[rd] = regs[r1] >> regs[r2]; break; case DIF_OP_SUB: regs[rd] = regs[r1] - regs[r2]; break; case DIF_OP_ADD: regs[rd] = regs[r1] + regs[r2]; break; case DIF_OP_MUL: regs[rd] = regs[r1] * regs[r2]; break; case DIF_OP_SDIV: if (regs[r2] == 0) { regs[rd] = 0; *flags |= CPU_DTRACE_DIVZERO; } else { regs[rd] = (int64_t)regs[r1] / (int64_t)regs[r2]; } break; case DIF_OP_UDIV: if (regs[r2] == 0) { regs[rd] = 0; *flags |= CPU_DTRACE_DIVZERO; } else { regs[rd] = regs[r1] / regs[r2]; } break; case DIF_OP_SREM: if (regs[r2] == 0) { regs[rd] = 0; *flags |= CPU_DTRACE_DIVZERO; } else { regs[rd] = (int64_t)regs[r1] % (int64_t)regs[r2]; } break; case DIF_OP_UREM: if (regs[r2] == 0) { regs[rd] = 0; *flags |= CPU_DTRACE_DIVZERO; } else { regs[rd] = regs[r1] % regs[r2]; } break; case DIF_OP_NOT: regs[rd] = ~regs[r1]; break; case DIF_OP_MOV: regs[rd] = regs[r1]; break; case DIF_OP_CMP: cc_r = regs[r1] - regs[r2]; cc_n = cc_r < 0; cc_z = cc_r == 0; cc_v = 0; cc_c = regs[r1] < regs[r2]; break; case DIF_OP_TST: cc_n = cc_v = cc_c = 0; cc_z = regs[r1] == 0; break; case DIF_OP_BA: pc = DIF_INSTR_LABEL(instr); break; case DIF_OP_BE: if (cc_z) pc = DIF_INSTR_LABEL(instr); break; case DIF_OP_BNE: if (cc_z == 0) pc = DIF_INSTR_LABEL(instr); break; case DIF_OP_BG: if ((cc_z | (cc_n ^ cc_v)) == 0) pc = DIF_INSTR_LABEL(instr); break; case DIF_OP_BGU: if ((cc_c | cc_z) == 0) pc = DIF_INSTR_LABEL(instr); break; case DIF_OP_BGE: if ((cc_n ^ cc_v) == 0) pc = DIF_INSTR_LABEL(instr); break; case DIF_OP_BGEU: if (cc_c == 0) pc = DIF_INSTR_LABEL(instr); break; case DIF_OP_BL: if (cc_n ^ cc_v) pc = DIF_INSTR_LABEL(instr); break; case DIF_OP_BLU: if (cc_c) pc = DIF_INSTR_LABEL(instr); break; case DIF_OP_BLE: if (cc_z | (cc_n ^ cc_v)) pc = DIF_INSTR_LABEL(instr); break; case DIF_OP_BLEU: if (cc_c | cc_z) pc = DIF_INSTR_LABEL(instr); break; case DIF_OP_RLDSB: if (!dtrace_canload(regs[r1], 1, mstate, vstate)) break; /*FALLTHROUGH*/ case DIF_OP_LDSB: regs[rd] = (int8_t)dtrace_load8(regs[r1]); break; case DIF_OP_RLDSH: if (!dtrace_canload(regs[r1], 2, mstate, vstate)) break; /*FALLTHROUGH*/ case DIF_OP_LDSH: regs[rd] = (int16_t)dtrace_load16(regs[r1]); break; case DIF_OP_RLDSW: if (!dtrace_canload(regs[r1], 4, mstate, vstate)) break; /*FALLTHROUGH*/ case DIF_OP_LDSW: regs[rd] = (int32_t)dtrace_load32(regs[r1]); break; case DIF_OP_RLDUB: if (!dtrace_canload(regs[r1], 1, mstate, vstate)) break; /*FALLTHROUGH*/ case DIF_OP_LDUB: regs[rd] = dtrace_load8(regs[r1]); break; case DIF_OP_RLDUH: if (!dtrace_canload(regs[r1], 2, mstate, vstate)) break; /*FALLTHROUGH*/ case DIF_OP_LDUH: regs[rd] = dtrace_load16(regs[r1]); break; case DIF_OP_RLDUW: if (!dtrace_canload(regs[r1], 4, mstate, vstate)) break; /*FALLTHROUGH*/ case DIF_OP_LDUW: regs[rd] = dtrace_load32(regs[r1]); break; case DIF_OP_RLDX: if (!dtrace_canload(regs[r1], 8, mstate, vstate)) break; /*FALLTHROUGH*/ case DIF_OP_LDX: regs[rd] = dtrace_load64(regs[r1]); break; case DIF_OP_ULDSB: DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); regs[rd] = (int8_t) dtrace_fuword8((void *)(uintptr_t)regs[r1]); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); break; case DIF_OP_ULDSH: DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); regs[rd] = (int16_t) dtrace_fuword16((void *)(uintptr_t)regs[r1]); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); break; case DIF_OP_ULDSW: DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); regs[rd] = (int32_t) dtrace_fuword32((void *)(uintptr_t)regs[r1]); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); break; case DIF_OP_ULDUB: DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); regs[rd] = dtrace_fuword8((void *)(uintptr_t)regs[r1]); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); break; case DIF_OP_ULDUH: DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); regs[rd] = dtrace_fuword16((void *)(uintptr_t)regs[r1]); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); break; case DIF_OP_ULDUW: DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); regs[rd] = dtrace_fuword32((void *)(uintptr_t)regs[r1]); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); break; case DIF_OP_ULDX: DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); regs[rd] = dtrace_fuword64((void *)(uintptr_t)regs[r1]); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); break; case DIF_OP_RET: rval = regs[rd]; pc = textlen; break; case DIF_OP_NOP: break; case DIF_OP_SETX: regs[rd] = inttab[DIF_INSTR_INTEGER(instr)]; break; case DIF_OP_SETS: regs[rd] = (uint64_t)(uintptr_t) (strtab + DIF_INSTR_STRING(instr)); break; case DIF_OP_SCMP: { size_t sz = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t s1 = regs[r1]; uintptr_t s2 = regs[r2]; size_t lim1, lim2; if (s1 != 0 && !dtrace_strcanload(s1, sz, &lim1, mstate, vstate)) break; if (s2 != 0 && !dtrace_strcanload(s2, sz, &lim2, mstate, vstate)) break; cc_r = dtrace_strncmp((char *)s1, (char *)s2, MIN(lim1, lim2)); cc_n = cc_r < 0; cc_z = cc_r == 0; cc_v = cc_c = 0; break; } case DIF_OP_LDGA: regs[rd] = dtrace_dif_variable(mstate, state, r1, regs[r2]); break; case DIF_OP_LDGS: id = DIF_INSTR_VAR(instr); if (id >= DIF_VAR_OTHER_UBASE) { uintptr_t a; id -= DIF_VAR_OTHER_UBASE; svar = vstate->dtvs_globals[id]; ASSERT(svar != NULL); v = &svar->dtsv_var; if (!(v->dtdv_type.dtdt_flags & DIF_TF_BYREF)) { regs[rd] = svar->dtsv_data; break; } a = (uintptr_t)svar->dtsv_data; if (*(uint8_t *)a == UINT8_MAX) { /* * If the 0th byte is set to UINT8_MAX * then this is to be treated as a * reference to a NULL variable. */ regs[rd] = 0; } else { regs[rd] = a + sizeof (uint64_t); } break; } regs[rd] = dtrace_dif_variable(mstate, state, id, 0); break; case DIF_OP_STGS: id = DIF_INSTR_VAR(instr); ASSERT(id >= DIF_VAR_OTHER_UBASE); id -= DIF_VAR_OTHER_UBASE; VERIFY(id < vstate->dtvs_nglobals); svar = vstate->dtvs_globals[id]; ASSERT(svar != NULL); v = &svar->dtsv_var; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { uintptr_t a = (uintptr_t)svar->dtsv_data; size_t lim; ASSERT(a != 0); ASSERT(svar->dtsv_size != 0); if (regs[rd] == 0) { *(uint8_t *)a = UINT8_MAX; break; } else { *(uint8_t *)a = 0; a += sizeof (uint64_t); } if (!dtrace_vcanload( (void *)(uintptr_t)regs[rd], &v->dtdv_type, &lim, mstate, vstate)) break; dtrace_vcopy((void *)(uintptr_t)regs[rd], (void *)a, &v->dtdv_type, lim); break; } svar->dtsv_data = regs[rd]; break; case DIF_OP_LDTA: /* * There are no DTrace built-in thread-local arrays at * present. This opcode is saved for future work. */ *flags |= CPU_DTRACE_ILLOP; regs[rd] = 0; break; case DIF_OP_LDLS: id = DIF_INSTR_VAR(instr); if (id < DIF_VAR_OTHER_UBASE) { /* * For now, this has no meaning. */ regs[rd] = 0; break; } id -= DIF_VAR_OTHER_UBASE; ASSERT(id < vstate->dtvs_nlocals); ASSERT(vstate->dtvs_locals != NULL); svar = vstate->dtvs_locals[id]; ASSERT(svar != NULL); v = &svar->dtsv_var; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { uintptr_t a = (uintptr_t)svar->dtsv_data; size_t sz = v->dtdv_type.dtdt_size; size_t lim; sz += sizeof (uint64_t); ASSERT(svar->dtsv_size == NCPU * sz); a += curcpu * sz; if (*(uint8_t *)a == UINT8_MAX) { /* * If the 0th byte is set to UINT8_MAX * then this is to be treated as a * reference to a NULL variable. */ regs[rd] = 0; } else { regs[rd] = a + sizeof (uint64_t); } break; } ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t)); tmp = (uint64_t *)(uintptr_t)svar->dtsv_data; regs[rd] = tmp[curcpu]; break; case DIF_OP_STLS: id = DIF_INSTR_VAR(instr); ASSERT(id >= DIF_VAR_OTHER_UBASE); id -= DIF_VAR_OTHER_UBASE; VERIFY(id < vstate->dtvs_nlocals); ASSERT(vstate->dtvs_locals != NULL); svar = vstate->dtvs_locals[id]; ASSERT(svar != NULL); v = &svar->dtsv_var; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { uintptr_t a = (uintptr_t)svar->dtsv_data; size_t sz = v->dtdv_type.dtdt_size; size_t lim; sz += sizeof (uint64_t); ASSERT(svar->dtsv_size == NCPU * sz); a += curcpu * sz; if (regs[rd] == 0) { *(uint8_t *)a = UINT8_MAX; break; } else { *(uint8_t *)a = 0; a += sizeof (uint64_t); } if (!dtrace_vcanload( (void *)(uintptr_t)regs[rd], &v->dtdv_type, &lim, mstate, vstate)) break; dtrace_vcopy((void *)(uintptr_t)regs[rd], (void *)a, &v->dtdv_type, lim); break; } ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t)); tmp = (uint64_t *)(uintptr_t)svar->dtsv_data; tmp[curcpu] = regs[rd]; break; case DIF_OP_LDTS: { dtrace_dynvar_t *dvar; dtrace_key_t *key; id = DIF_INSTR_VAR(instr); ASSERT(id >= DIF_VAR_OTHER_UBASE); id -= DIF_VAR_OTHER_UBASE; v = &vstate->dtvs_tlocals[id]; key = &tupregs[DIF_DTR_NREGS]; key[0].dttk_value = (uint64_t)id; key[0].dttk_size = 0; DTRACE_TLS_THRKEY(key[1].dttk_value); key[1].dttk_size = 0; dvar = dtrace_dynvar(dstate, 2, key, sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC, mstate, vstate); if (dvar == NULL) { regs[rd] = 0; break; } if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data; } else { regs[rd] = *((uint64_t *)dvar->dtdv_data); } break; } case DIF_OP_STTS: { dtrace_dynvar_t *dvar; dtrace_key_t *key; id = DIF_INSTR_VAR(instr); ASSERT(id >= DIF_VAR_OTHER_UBASE); id -= DIF_VAR_OTHER_UBASE; VERIFY(id < vstate->dtvs_ntlocals); key = &tupregs[DIF_DTR_NREGS]; key[0].dttk_value = (uint64_t)id; key[0].dttk_size = 0; DTRACE_TLS_THRKEY(key[1].dttk_value); key[1].dttk_size = 0; v = &vstate->dtvs_tlocals[id]; dvar = dtrace_dynvar(dstate, 2, key, v->dtdv_type.dtdt_size > sizeof (uint64_t) ? v->dtdv_type.dtdt_size : sizeof (uint64_t), regs[rd] ? DTRACE_DYNVAR_ALLOC : DTRACE_DYNVAR_DEALLOC, mstate, vstate); /* * Given that we're storing to thread-local data, * we need to flush our predicate cache. */ curthread->t_predcache = 0; if (dvar == NULL) break; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { size_t lim; if (!dtrace_vcanload( (void *)(uintptr_t)regs[rd], &v->dtdv_type, &lim, mstate, vstate)) break; dtrace_vcopy((void *)(uintptr_t)regs[rd], dvar->dtdv_data, &v->dtdv_type, lim); } else { *((uint64_t *)dvar->dtdv_data) = regs[rd]; } break; } case DIF_OP_SRA: regs[rd] = (int64_t)regs[r1] >> regs[r2]; break; case DIF_OP_CALL: dtrace_dif_subr(DIF_INSTR_SUBR(instr), rd, regs, tupregs, ttop, mstate, state); break; case DIF_OP_PUSHTR: if (ttop == DIF_DTR_NREGS) { *flags |= CPU_DTRACE_TUPOFLOW; break; } if (r1 == DIF_TYPE_STRING) { /* * If this is a string type and the size is 0, * we'll use the system-wide default string * size. Note that we are _not_ looking at * the value of the DTRACEOPT_STRSIZE option; * had this been set, we would expect to have * a non-zero size value in the "pushtr". */ tupregs[ttop].dttk_size = dtrace_strlen((char *)(uintptr_t)regs[rd], regs[r2] ? regs[r2] : dtrace_strsize_default) + 1; } else { if (regs[r2] > LONG_MAX) { *flags |= CPU_DTRACE_ILLOP; break; } tupregs[ttop].dttk_size = regs[r2]; } tupregs[ttop++].dttk_value = regs[rd]; break; case DIF_OP_PUSHTV: if (ttop == DIF_DTR_NREGS) { *flags |= CPU_DTRACE_TUPOFLOW; break; } tupregs[ttop].dttk_value = regs[rd]; tupregs[ttop++].dttk_size = 0; break; case DIF_OP_POPTS: if (ttop != 0) ttop--; break; case DIF_OP_FLUSHTS: ttop = 0; break; case DIF_OP_LDGAA: case DIF_OP_LDTAA: { dtrace_dynvar_t *dvar; dtrace_key_t *key = tupregs; uint_t nkeys = ttop; id = DIF_INSTR_VAR(instr); ASSERT(id >= DIF_VAR_OTHER_UBASE); id -= DIF_VAR_OTHER_UBASE; key[nkeys].dttk_value = (uint64_t)id; key[nkeys++].dttk_size = 0; if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) { DTRACE_TLS_THRKEY(key[nkeys].dttk_value); key[nkeys++].dttk_size = 0; VERIFY(id < vstate->dtvs_ntlocals); v = &vstate->dtvs_tlocals[id]; } else { VERIFY(id < vstate->dtvs_nglobals); v = &vstate->dtvs_globals[id]->dtsv_var; } dvar = dtrace_dynvar(dstate, nkeys, key, v->dtdv_type.dtdt_size > sizeof (uint64_t) ? v->dtdv_type.dtdt_size : sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC, mstate, vstate); if (dvar == NULL) { regs[rd] = 0; break; } if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data; } else { regs[rd] = *((uint64_t *)dvar->dtdv_data); } break; } case DIF_OP_STGAA: case DIF_OP_STTAA: { dtrace_dynvar_t *dvar; dtrace_key_t *key = tupregs; uint_t nkeys = ttop; id = DIF_INSTR_VAR(instr); ASSERT(id >= DIF_VAR_OTHER_UBASE); id -= DIF_VAR_OTHER_UBASE; key[nkeys].dttk_value = (uint64_t)id; key[nkeys++].dttk_size = 0; if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) { DTRACE_TLS_THRKEY(key[nkeys].dttk_value); key[nkeys++].dttk_size = 0; VERIFY(id < vstate->dtvs_ntlocals); v = &vstate->dtvs_tlocals[id]; } else { VERIFY(id < vstate->dtvs_nglobals); v = &vstate->dtvs_globals[id]->dtsv_var; } dvar = dtrace_dynvar(dstate, nkeys, key, v->dtdv_type.dtdt_size > sizeof (uint64_t) ? v->dtdv_type.dtdt_size : sizeof (uint64_t), regs[rd] ? DTRACE_DYNVAR_ALLOC : DTRACE_DYNVAR_DEALLOC, mstate, vstate); if (dvar == NULL) break; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { size_t lim; if (!dtrace_vcanload( (void *)(uintptr_t)regs[rd], &v->dtdv_type, &lim, mstate, vstate)) break; dtrace_vcopy((void *)(uintptr_t)regs[rd], dvar->dtdv_data, &v->dtdv_type, lim); } else { *((uint64_t *)dvar->dtdv_data) = regs[rd]; } break; } case DIF_OP_ALLOCS: { uintptr_t ptr = P2ROUNDUP(mstate->dtms_scratch_ptr, 8); size_t size = ptr - mstate->dtms_scratch_ptr + regs[r1]; /* * Rounding up the user allocation size could have * overflowed large, bogus allocations (like -1ULL) to * 0. */ if (size < regs[r1] || !DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); regs[rd] = 0; break; } dtrace_bzero((void *) mstate->dtms_scratch_ptr, size); mstate->dtms_scratch_ptr += size; regs[rd] = ptr; break; } case DIF_OP_COPYS: if (!dtrace_canstore(regs[rd], regs[r2], mstate, vstate)) { *flags |= CPU_DTRACE_BADADDR; *illval = regs[rd]; break; } if (!dtrace_canload(regs[r1], regs[r2], mstate, vstate)) break; dtrace_bcopy((void *)(uintptr_t)regs[r1], (void *)(uintptr_t)regs[rd], (size_t)regs[r2]); break; case DIF_OP_STB: if (!dtrace_canstore(regs[rd], 1, mstate, vstate)) { *flags |= CPU_DTRACE_BADADDR; *illval = regs[rd]; break; } *((uint8_t *)(uintptr_t)regs[rd]) = (uint8_t)regs[r1]; break; case DIF_OP_STH: if (!dtrace_canstore(regs[rd], 2, mstate, vstate)) { *flags |= CPU_DTRACE_BADADDR; *illval = regs[rd]; break; } if (regs[rd] & 1) { *flags |= CPU_DTRACE_BADALIGN; *illval = regs[rd]; break; } *((uint16_t *)(uintptr_t)regs[rd]) = (uint16_t)regs[r1]; break; case DIF_OP_STW: if (!dtrace_canstore(regs[rd], 4, mstate, vstate)) { *flags |= CPU_DTRACE_BADADDR; *illval = regs[rd]; break; } if (regs[rd] & 3) { *flags |= CPU_DTRACE_BADALIGN; *illval = regs[rd]; break; } *((uint32_t *)(uintptr_t)regs[rd]) = (uint32_t)regs[r1]; break; case DIF_OP_STX: if (!dtrace_canstore(regs[rd], 8, mstate, vstate)) { *flags |= CPU_DTRACE_BADADDR; *illval = regs[rd]; break; } if (regs[rd] & 7) { *flags |= CPU_DTRACE_BADALIGN; *illval = regs[rd]; break; } *((uint64_t *)(uintptr_t)regs[rd]) = regs[r1]; break; } } if (!(*flags & CPU_DTRACE_FAULT)) return (rval); mstate->dtms_fltoffs = opc * sizeof (dif_instr_t); mstate->dtms_present |= DTRACE_MSTATE_FLTOFFS; return (0); } static void dtrace_action_breakpoint(dtrace_ecb_t *ecb) { dtrace_probe_t *probe = ecb->dte_probe; dtrace_provider_t *prov = probe->dtpr_provider; char c[DTRACE_FULLNAMELEN + 80], *str; char *msg = "dtrace: breakpoint action at probe "; char *ecbmsg = " (ecb "; uintptr_t mask = (0xf << (sizeof (uintptr_t) * NBBY / 4)); uintptr_t val = (uintptr_t)ecb; int shift = (sizeof (uintptr_t) * NBBY) - 4, i = 0; if (dtrace_destructive_disallow) return; /* * It's impossible to be taking action on the NULL probe. */ ASSERT(probe != NULL); /* * This is a poor man's (destitute man's?) sprintf(): we want to * print the provider name, module name, function name and name of * the probe, along with the hex address of the ECB with the breakpoint * action -- all of which we must place in the character buffer by * hand. */ while (*msg != '\0') c[i++] = *msg++; for (str = prov->dtpv_name; *str != '\0'; str++) c[i++] = *str; c[i++] = ':'; for (str = probe->dtpr_mod; *str != '\0'; str++) c[i++] = *str; c[i++] = ':'; for (str = probe->dtpr_func; *str != '\0'; str++) c[i++] = *str; c[i++] = ':'; for (str = probe->dtpr_name; *str != '\0'; str++) c[i++] = *str; while (*ecbmsg != '\0') c[i++] = *ecbmsg++; while (shift >= 0) { mask = (uintptr_t)0xf << shift; if (val >= ((uintptr_t)1 << shift)) c[i++] = "0123456789abcdef"[(val & mask) >> shift]; shift -= 4; } c[i++] = ')'; c[i] = '\0'; #ifdef illumos debug_enter(c); #else kdb_enter(KDB_WHY_DTRACE, "breakpoint action"); #endif } static void dtrace_action_panic(dtrace_ecb_t *ecb) { dtrace_probe_t *probe = ecb->dte_probe; /* * It's impossible to be taking action on the NULL probe. */ ASSERT(probe != NULL); if (dtrace_destructive_disallow) return; if (dtrace_panicked != NULL) return; if (dtrace_casptr(&dtrace_panicked, NULL, curthread) != NULL) return; /* * We won the right to panic. (We want to be sure that only one * thread calls panic() from dtrace_probe(), and that panic() is * called exactly once.) */ dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)", probe->dtpr_provider->dtpv_name, probe->dtpr_mod, probe->dtpr_func, probe->dtpr_name, (void *)ecb); } static void dtrace_action_raise(uint64_t sig) { if (dtrace_destructive_disallow) return; if (sig >= NSIG) { DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); return; } #ifdef illumos /* * raise() has a queue depth of 1 -- we ignore all subsequent * invocations of the raise() action. */ if (curthread->t_dtrace_sig == 0) curthread->t_dtrace_sig = (uint8_t)sig; curthread->t_sig_check = 1; aston(curthread); #else struct proc *p = curproc; PROC_LOCK(p); kern_psignal(p, sig); PROC_UNLOCK(p); #endif } static void dtrace_action_stop(void) { if (dtrace_destructive_disallow) return; #ifdef illumos if (!curthread->t_dtrace_stop) { curthread->t_dtrace_stop = 1; curthread->t_sig_check = 1; aston(curthread); } #else struct proc *p = curproc; PROC_LOCK(p); kern_psignal(p, SIGSTOP); PROC_UNLOCK(p); #endif } static void dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val) { hrtime_t now; volatile uint16_t *flags; #ifdef illumos cpu_t *cpu = CPU; #else cpu_t *cpu = &solaris_cpu[curcpu]; #endif if (dtrace_destructive_disallow) return; flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; now = dtrace_gethrtime(); if (now - cpu->cpu_dtrace_chillmark > dtrace_chill_interval) { /* * We need to advance the mark to the current time. */ cpu->cpu_dtrace_chillmark = now; cpu->cpu_dtrace_chilled = 0; } /* * Now check to see if the requested chill time would take us over * the maximum amount of time allowed in the chill interval. (Or * worse, if the calculation itself induces overflow.) */ if (cpu->cpu_dtrace_chilled + val > dtrace_chill_max || cpu->cpu_dtrace_chilled + val < cpu->cpu_dtrace_chilled) { *flags |= CPU_DTRACE_ILLOP; return; } while (dtrace_gethrtime() - now < val) continue; /* * Normally, we assure that the value of the variable "timestamp" does * not change within an ECB. The presence of chill() represents an * exception to this rule, however. */ mstate->dtms_present &= ~DTRACE_MSTATE_TIMESTAMP; cpu->cpu_dtrace_chilled += val; } static void dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t *buf, uint64_t arg) { int nframes = DTRACE_USTACK_NFRAMES(arg); int strsize = DTRACE_USTACK_STRSIZE(arg); uint64_t *pcs = &buf[1], *fps; char *str = (char *)&pcs[nframes]; int size, offs = 0, i, j; size_t rem; uintptr_t old = mstate->dtms_scratch_ptr, saved; uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags; char *sym; /* * Should be taking a faster path if string space has not been * allocated. */ ASSERT(strsize != 0); /* * We will first allocate some temporary space for the frame pointers. */ fps = (uint64_t *)P2ROUNDUP(mstate->dtms_scratch_ptr, 8); size = (uintptr_t)fps - mstate->dtms_scratch_ptr + (nframes * sizeof (uint64_t)); if (!DTRACE_INSCRATCH(mstate, size)) { /* * Not enough room for our frame pointers -- need to indicate * that we ran out of scratch space. */ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); return; } mstate->dtms_scratch_ptr += size; saved = mstate->dtms_scratch_ptr; /* * Now get a stack with both program counters and frame pointers. */ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_getufpstack(buf, fps, nframes + 1); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); /* * If that faulted, we're cooked. */ if (*flags & CPU_DTRACE_FAULT) goto out; /* * Now we want to walk up the stack, calling the USTACK helper. For * each iteration, we restore the scratch pointer. */ for (i = 0; i < nframes; i++) { mstate->dtms_scratch_ptr = saved; if (offs >= strsize) break; sym = (char *)(uintptr_t)dtrace_helper( DTRACE_HELPER_ACTION_USTACK, mstate, state, pcs[i], fps[i]); /* * If we faulted while running the helper, we're going to * clear the fault and null out the corresponding string. */ if (*flags & CPU_DTRACE_FAULT) { *flags &= ~CPU_DTRACE_FAULT; str[offs++] = '\0'; continue; } if (sym == NULL) { str[offs++] = '\0'; continue; } if (!dtrace_strcanload((uintptr_t)sym, strsize, &rem, mstate, &(state->dts_vstate))) { str[offs++] = '\0'; continue; } DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); /* * Now copy in the string that the helper returned to us. */ for (j = 0; offs + j < strsize && j < rem; j++) { if ((str[offs + j] = sym[j]) == '\0') break; } DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); offs += j + 1; } if (offs >= strsize) { /* * If we didn't have room for all of the strings, we don't * abort processing -- this needn't be a fatal error -- but we * still want to increment a counter (dts_stkstroverflows) to * allow this condition to be warned about. (If this is from * a jstack() action, it is easily tuned via jstackstrsize.) */ dtrace_error(&state->dts_stkstroverflows); } while (offs < strsize) str[offs++] = '\0'; out: mstate->dtms_scratch_ptr = old; } static void dtrace_store_by_ref(dtrace_difo_t *dp, caddr_t tomax, size_t size, size_t *valoffsp, uint64_t *valp, uint64_t end, int intuple, int dtkind) { volatile uint16_t *flags; uint64_t val = *valp; size_t valoffs = *valoffsp; flags = (volatile uint16_t *)&cpu_core[curcpu].cpuc_dtrace_flags; ASSERT(dtkind == DIF_TF_BYREF || dtkind == DIF_TF_BYUREF); /* * If this is a string, we're going to only load until we find the zero * byte -- after which we'll store zero bytes. */ if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) { char c = '\0' + 1; size_t s; for (s = 0; s < size; s++) { if (c != '\0' && dtkind == DIF_TF_BYREF) { c = dtrace_load8(val++); } else if (c != '\0' && dtkind == DIF_TF_BYUREF) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); c = dtrace_fuword8((void *)(uintptr_t)val++); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); if (*flags & CPU_DTRACE_FAULT) break; } DTRACE_STORE(uint8_t, tomax, valoffs++, c); if (c == '\0' && intuple) break; } } else { uint8_t c; while (valoffs < end) { if (dtkind == DIF_TF_BYREF) { c = dtrace_load8(val++); } else if (dtkind == DIF_TF_BYUREF) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); c = dtrace_fuword8((void *)(uintptr_t)val++); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); if (*flags & CPU_DTRACE_FAULT) break; } DTRACE_STORE(uint8_t, tomax, valoffs++, c); } } *valp = val; *valoffsp = valoffs; } /* * If you're looking for the epicenter of DTrace, you just found it. This * is the function called by the provider to fire a probe -- from which all * subsequent probe-context DTrace activity emanates. */ void dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, uintptr_t arg4) { processorid_t cpuid; dtrace_icookie_t cookie; dtrace_probe_t *probe; dtrace_mstate_t mstate; dtrace_ecb_t *ecb; dtrace_action_t *act; intptr_t offs; size_t size; int vtime, onintr; volatile uint16_t *flags; hrtime_t now; if (panicstr != NULL) return; #ifdef illumos /* * Kick out immediately if this CPU is still being born (in which case * curthread will be set to -1) or the current thread can't allow * probes in its current context. */ if (((uintptr_t)curthread & 1) || (curthread->t_flag & T_DONTDTRACE)) return; #endif cookie = dtrace_interrupt_disable(); probe = dtrace_probes[id - 1]; cpuid = curcpu; onintr = CPU_ON_INTR(CPU); if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE && probe->dtpr_predcache == curthread->t_predcache) { /* * We have hit in the predicate cache; we know that * this predicate would evaluate to be false. */ dtrace_interrupt_enable(cookie); return; } #ifdef illumos if (panic_quiesce) { #else if (panicstr != NULL) { #endif /* * We don't trace anything if we're panicking. */ dtrace_interrupt_enable(cookie); return; } now = mstate.dtms_timestamp = dtrace_gethrtime(); mstate.dtms_present |= DTRACE_MSTATE_TIMESTAMP; vtime = dtrace_vtime_references != 0; if (vtime && curthread->t_dtrace_start) curthread->t_dtrace_vtime += now - curthread->t_dtrace_start; mstate.dtms_difo = NULL; mstate.dtms_probe = probe; mstate.dtms_strtok = 0; mstate.dtms_arg[0] = arg0; mstate.dtms_arg[1] = arg1; mstate.dtms_arg[2] = arg2; mstate.dtms_arg[3] = arg3; mstate.dtms_arg[4] = arg4; flags = (volatile uint16_t *)&cpu_core[cpuid].cpuc_dtrace_flags; for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) { dtrace_predicate_t *pred = ecb->dte_predicate; dtrace_state_t *state = ecb->dte_state; dtrace_buffer_t *buf = &state->dts_buffer[cpuid]; dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid]; dtrace_vstate_t *vstate = &state->dts_vstate; dtrace_provider_t *prov = probe->dtpr_provider; uint64_t tracememsize = 0; int committed = 0; caddr_t tomax; /* * A little subtlety with the following (seemingly innocuous) * declaration of the automatic 'val': by looking at the * code, you might think that it could be declared in the * action processing loop, below. (That is, it's only used in * the action processing loop.) However, it must be declared * out of that scope because in the case of DIF expression * arguments to aggregating actions, one iteration of the * action loop will use the last iteration's value. */ uint64_t val = 0; mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE; mstate.dtms_getf = NULL; *flags &= ~CPU_DTRACE_ERROR; if (prov == dtrace_provider) { /* * If dtrace itself is the provider of this probe, * we're only going to continue processing the ECB if * arg0 (the dtrace_state_t) is equal to the ECB's * creating state. (This prevents disjoint consumers * from seeing one another's metaprobes.) */ if (arg0 != (uint64_t)(uintptr_t)state) continue; } if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) { /* * We're not currently active. If our provider isn't * the dtrace pseudo provider, we're not interested. */ if (prov != dtrace_provider) continue; /* * Now we must further check if we are in the BEGIN * probe. If we are, we will only continue processing * if we're still in WARMUP -- if one BEGIN enabling * has invoked the exit() action, we don't want to * evaluate subsequent BEGIN enablings. */ if (probe->dtpr_id == dtrace_probeid_begin && state->dts_activity != DTRACE_ACTIVITY_WARMUP) { ASSERT(state->dts_activity == DTRACE_ACTIVITY_DRAINING); continue; } } if (ecb->dte_cond) { /* * If the dte_cond bits indicate that this * consumer is only allowed to see user-mode firings * of this probe, call the provider's dtps_usermode() * entry point to check that the probe was fired * while in a user context. Skip this ECB if that's * not the case. */ if ((ecb->dte_cond & DTRACE_COND_USERMODE) && prov->dtpv_pops.dtps_usermode(prov->dtpv_arg, probe->dtpr_id, probe->dtpr_arg) == 0) continue; #ifdef illumos /* * This is more subtle than it looks. We have to be * absolutely certain that CRED() isn't going to * change out from under us so it's only legit to * examine that structure if we're in constrained * situations. Currently, the only times we'll this * check is if a non-super-user has enabled the * profile or syscall providers -- providers that * allow visibility of all processes. For the * profile case, the check above will ensure that * we're examining a user context. */ if (ecb->dte_cond & DTRACE_COND_OWNER) { cred_t *cr; cred_t *s_cr = ecb->dte_state->dts_cred.dcr_cred; proc_t *proc; ASSERT(s_cr != NULL); if ((cr = CRED()) == NULL || s_cr->cr_uid != cr->cr_uid || s_cr->cr_uid != cr->cr_ruid || s_cr->cr_uid != cr->cr_suid || s_cr->cr_gid != cr->cr_gid || s_cr->cr_gid != cr->cr_rgid || s_cr->cr_gid != cr->cr_sgid || (proc = ttoproc(curthread)) == NULL || (proc->p_flag & SNOCD)) continue; } if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) { cred_t *cr; cred_t *s_cr = ecb->dte_state->dts_cred.dcr_cred; ASSERT(s_cr != NULL); if ((cr = CRED()) == NULL || s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) continue; } #endif } if (now - state->dts_alive > dtrace_deadman_timeout) { /* * We seem to be dead. Unless we (a) have kernel * destructive permissions (b) have explicitly enabled * destructive actions and (c) destructive actions have * not been disabled, we're going to transition into * the KILLED state, from which no further processing * on this state will be performed. */ if (!dtrace_priv_kernel_destructive(state) || !state->dts_cred.dcr_destructive || dtrace_destructive_disallow) { void *activity = &state->dts_activity; dtrace_activity_t current; do { current = state->dts_activity; } while (dtrace_cas32(activity, current, DTRACE_ACTIVITY_KILLED) != current); continue; } } if ((offs = dtrace_buffer_reserve(buf, ecb->dte_needed, ecb->dte_alignment, state, &mstate)) < 0) continue; tomax = buf->dtb_tomax; ASSERT(tomax != NULL); if (ecb->dte_size != 0) { dtrace_rechdr_t dtrh; if (!(mstate.dtms_present & DTRACE_MSTATE_TIMESTAMP)) { mstate.dtms_timestamp = dtrace_gethrtime(); mstate.dtms_present |= DTRACE_MSTATE_TIMESTAMP; } ASSERT3U(ecb->dte_size, >=, sizeof (dtrace_rechdr_t)); dtrh.dtrh_epid = ecb->dte_epid; DTRACE_RECORD_STORE_TIMESTAMP(&dtrh, mstate.dtms_timestamp); *((dtrace_rechdr_t *)(tomax + offs)) = dtrh; } mstate.dtms_epid = ecb->dte_epid; mstate.dtms_present |= DTRACE_MSTATE_EPID; if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) mstate.dtms_access = DTRACE_ACCESS_KERNEL; else mstate.dtms_access = 0; if (pred != NULL) { dtrace_difo_t *dp = pred->dtp_difo; uint64_t rval; rval = dtrace_dif_emulate(dp, &mstate, vstate, state); if (!(*flags & CPU_DTRACE_ERROR) && !rval) { dtrace_cacheid_t cid = probe->dtpr_predcache; if (cid != DTRACE_CACHEIDNONE && !onintr) { /* * Update the predicate cache... */ ASSERT(cid == pred->dtp_cacheid); curthread->t_predcache = cid; } continue; } } for (act = ecb->dte_action; !(*flags & CPU_DTRACE_ERROR) && act != NULL; act = act->dta_next) { size_t valoffs; dtrace_difo_t *dp; dtrace_recdesc_t *rec = &act->dta_rec; size = rec->dtrd_size; valoffs = offs + rec->dtrd_offset; if (DTRACEACT_ISAGG(act->dta_kind)) { uint64_t v = 0xbad; dtrace_aggregation_t *agg; agg = (dtrace_aggregation_t *)act; if ((dp = act->dta_difo) != NULL) v = dtrace_dif_emulate(dp, &mstate, vstate, state); if (*flags & CPU_DTRACE_ERROR) continue; /* * Note that we always pass the expression * value from the previous iteration of the * action loop. This value will only be used * if there is an expression argument to the * aggregating action, denoted by the * dtag_hasarg field. */ dtrace_aggregate(agg, buf, offs, aggbuf, v, val); continue; } switch (act->dta_kind) { case DTRACEACT_STOP: if (dtrace_priv_proc_destructive(state)) dtrace_action_stop(); continue; case DTRACEACT_BREAKPOINT: if (dtrace_priv_kernel_destructive(state)) dtrace_action_breakpoint(ecb); continue; case DTRACEACT_PANIC: if (dtrace_priv_kernel_destructive(state)) dtrace_action_panic(ecb); continue; case DTRACEACT_STACK: if (!dtrace_priv_kernel(state)) continue; dtrace_getpcstack((pc_t *)(tomax + valoffs), size / sizeof (pc_t), probe->dtpr_aframes, DTRACE_ANCHORED(probe) ? NULL : (uint32_t *)arg0); continue; case DTRACEACT_JSTACK: case DTRACEACT_USTACK: if (!dtrace_priv_proc(state)) continue; /* * See comment in DIF_VAR_PID. */ if (DTRACE_ANCHORED(mstate.dtms_probe) && CPU_ON_INTR(CPU)) { int depth = DTRACE_USTACK_NFRAMES( rec->dtrd_arg) + 1; dtrace_bzero((void *)(tomax + valoffs), DTRACE_USTACK_STRSIZE(rec->dtrd_arg) + depth * sizeof (uint64_t)); continue; } if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0 && curproc->p_dtrace_helpers != NULL) { /* * This is the slow path -- we have * allocated string space, and we're * getting the stack of a process that * has helpers. Call into a separate * routine to perform this processing. */ dtrace_action_ustack(&mstate, state, (uint64_t *)(tomax + valoffs), rec->dtrd_arg); continue; } DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_getupcstack((uint64_t *) (tomax + valoffs), DTRACE_USTACK_NFRAMES(rec->dtrd_arg) + 1); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); continue; default: break; } dp = act->dta_difo; ASSERT(dp != NULL); val = dtrace_dif_emulate(dp, &mstate, vstate, state); if (*flags & CPU_DTRACE_ERROR) continue; switch (act->dta_kind) { case DTRACEACT_SPECULATE: { dtrace_rechdr_t *dtrh; ASSERT(buf == &state->dts_buffer[cpuid]); buf = dtrace_speculation_buffer(state, cpuid, val); if (buf == NULL) { *flags |= CPU_DTRACE_DROP; continue; } offs = dtrace_buffer_reserve(buf, ecb->dte_needed, ecb->dte_alignment, state, NULL); if (offs < 0) { *flags |= CPU_DTRACE_DROP; continue; } tomax = buf->dtb_tomax; ASSERT(tomax != NULL); if (ecb->dte_size == 0) continue; ASSERT3U(ecb->dte_size, >=, sizeof (dtrace_rechdr_t)); dtrh = ((void *)(tomax + offs)); dtrh->dtrh_epid = ecb->dte_epid; /* * When the speculation is committed, all of * the records in the speculative buffer will * have their timestamps set to the commit * time. Until then, it is set to a sentinel * value, for debugability. */ DTRACE_RECORD_STORE_TIMESTAMP(dtrh, UINT64_MAX); continue; } case DTRACEACT_PRINTM: { /* The DIF returns a 'memref'. */ uintptr_t *memref = (uintptr_t *)(uintptr_t) val; /* Get the size from the memref. */ size = memref[1]; /* * Check if the size exceeds the allocated * buffer size. */ if (size + sizeof(uintptr_t) > dp->dtdo_rtype.dtdt_size) { /* Flag a drop! */ *flags |= CPU_DTRACE_DROP; continue; } /* Store the size in the buffer first. */ DTRACE_STORE(uintptr_t, tomax, valoffs, size); /* * Offset the buffer address to the start * of the data. */ valoffs += sizeof(uintptr_t); /* * Reset to the memory address rather than * the memref array, then let the BYREF * code below do the work to store the * memory data in the buffer. */ val = memref[0]; break; } case DTRACEACT_CHILL: if (dtrace_priv_kernel_destructive(state)) dtrace_action_chill(&mstate, val); continue; case DTRACEACT_RAISE: if (dtrace_priv_proc_destructive(state)) dtrace_action_raise(val); continue; case DTRACEACT_COMMIT: ASSERT(!committed); /* * We need to commit our buffer state. */ if (ecb->dte_size) buf->dtb_offset = offs + ecb->dte_size; buf = &state->dts_buffer[cpuid]; dtrace_speculation_commit(state, cpuid, val); committed = 1; continue; case DTRACEACT_DISCARD: dtrace_speculation_discard(state, cpuid, val); continue; case DTRACEACT_DIFEXPR: case DTRACEACT_LIBACT: case DTRACEACT_PRINTF: case DTRACEACT_PRINTA: case DTRACEACT_SYSTEM: case DTRACEACT_FREOPEN: case DTRACEACT_TRACEMEM: break; case DTRACEACT_TRACEMEM_DYNSIZE: tracememsize = val; break; case DTRACEACT_SYM: case DTRACEACT_MOD: if (!dtrace_priv_kernel(state)) continue; break; case DTRACEACT_USYM: case DTRACEACT_UMOD: case DTRACEACT_UADDR: { #ifdef illumos struct pid *pid = curthread->t_procp->p_pidp; #endif if (!dtrace_priv_proc(state)) continue; DTRACE_STORE(uint64_t, tomax, #ifdef illumos valoffs, (uint64_t)pid->pid_id); #else valoffs, (uint64_t) curproc->p_pid); #endif DTRACE_STORE(uint64_t, tomax, valoffs + sizeof (uint64_t), val); continue; } case DTRACEACT_EXIT: { /* * For the exit action, we are going to attempt * to atomically set our activity to be * draining. If this fails (either because * another CPU has beat us to the exit action, * or because our current activity is something * other than ACTIVE or WARMUP), we will * continue. This assures that the exit action * can be successfully recorded at most once * when we're in the ACTIVE state. If we're * encountering the exit() action while in * COOLDOWN, however, we want to honor the new * status code. (We know that we're the only * thread in COOLDOWN, so there is no race.) */ void *activity = &state->dts_activity; dtrace_activity_t current = state->dts_activity; if (current == DTRACE_ACTIVITY_COOLDOWN) break; if (current != DTRACE_ACTIVITY_WARMUP) current = DTRACE_ACTIVITY_ACTIVE; if (dtrace_cas32(activity, current, DTRACE_ACTIVITY_DRAINING) != current) { *flags |= CPU_DTRACE_DROP; continue; } break; } default: ASSERT(0); } if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF || dp->dtdo_rtype.dtdt_flags & DIF_TF_BYUREF) { uintptr_t end = valoffs + size; if (tracememsize != 0 && valoffs + tracememsize < end) { end = valoffs + tracememsize; tracememsize = 0; } if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF && !dtrace_vcanload((void *)(uintptr_t)val, &dp->dtdo_rtype, NULL, &mstate, vstate)) continue; dtrace_store_by_ref(dp, tomax, size, &valoffs, &val, end, act->dta_intuple, dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF ? DIF_TF_BYREF: DIF_TF_BYUREF); continue; } switch (size) { case 0: break; case sizeof (uint8_t): DTRACE_STORE(uint8_t, tomax, valoffs, val); break; case sizeof (uint16_t): DTRACE_STORE(uint16_t, tomax, valoffs, val); break; case sizeof (uint32_t): DTRACE_STORE(uint32_t, tomax, valoffs, val); break; case sizeof (uint64_t): DTRACE_STORE(uint64_t, tomax, valoffs, val); break; default: /* * Any other size should have been returned by * reference, not by value. */ ASSERT(0); break; } } if (*flags & CPU_DTRACE_DROP) continue; if (*flags & CPU_DTRACE_FAULT) { int ndx; dtrace_action_t *err; buf->dtb_errors++; if (probe->dtpr_id == dtrace_probeid_error) { /* * There's nothing we can do -- we had an * error on the error probe. We bump an * error counter to at least indicate that * this condition happened. */ dtrace_error(&state->dts_dblerrors); continue; } if (vtime) { /* * Before recursing on dtrace_probe(), we * need to explicitly clear out our start * time to prevent it from being accumulated * into t_dtrace_vtime. */ curthread->t_dtrace_start = 0; } /* * Iterate over the actions to figure out which action * we were processing when we experienced the error. * Note that act points _past_ the faulting action; if * act is ecb->dte_action, the fault was in the * predicate, if it's ecb->dte_action->dta_next it's * in action #1, and so on. */ for (err = ecb->dte_action, ndx = 0; err != act; err = err->dta_next, ndx++) continue; dtrace_probe_error(state, ecb->dte_epid, ndx, (mstate.dtms_present & DTRACE_MSTATE_FLTOFFS) ? mstate.dtms_fltoffs : -1, DTRACE_FLAGS2FLT(*flags), cpu_core[cpuid].cpuc_dtrace_illval); continue; } if (!committed) buf->dtb_offset = offs + ecb->dte_size; } if (vtime) curthread->t_dtrace_start = dtrace_gethrtime(); dtrace_interrupt_enable(cookie); } /* * DTrace Probe Hashing Functions * * The functions in this section (and indeed, the functions in remaining * sections) are not _called_ from probe context. (Any exceptions to this are * marked with a "Note:".) Rather, they are called from elsewhere in the * DTrace framework to look-up probes in, add probes to and remove probes from * the DTrace probe hashes. (Each probe is hashed by each element of the * probe tuple -- allowing for fast lookups, regardless of what was * specified.) */ static uint_t dtrace_hash_str(const char *p) { unsigned int g; uint_t hval = 0; while (*p) { hval = (hval << 4) + *p++; if ((g = (hval & 0xf0000000)) != 0) hval ^= g >> 24; hval &= ~g; } return (hval); } static dtrace_hash_t * dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs) { dtrace_hash_t *hash = kmem_zalloc(sizeof (dtrace_hash_t), KM_SLEEP); hash->dth_stroffs = stroffs; hash->dth_nextoffs = nextoffs; hash->dth_prevoffs = prevoffs; hash->dth_size = 1; hash->dth_mask = hash->dth_size - 1; hash->dth_tab = kmem_zalloc(hash->dth_size * sizeof (dtrace_hashbucket_t *), KM_SLEEP); return (hash); } static void dtrace_hash_destroy(dtrace_hash_t *hash) { #ifdef DEBUG int i; for (i = 0; i < hash->dth_size; i++) ASSERT(hash->dth_tab[i] == NULL); #endif kmem_free(hash->dth_tab, hash->dth_size * sizeof (dtrace_hashbucket_t *)); kmem_free(hash, sizeof (dtrace_hash_t)); } static void dtrace_hash_resize(dtrace_hash_t *hash) { int size = hash->dth_size, i, ndx; int new_size = hash->dth_size << 1; int new_mask = new_size - 1; dtrace_hashbucket_t **new_tab, *bucket, *next; ASSERT((new_size & new_mask) == 0); new_tab = kmem_zalloc(new_size * sizeof (void *), KM_SLEEP); for (i = 0; i < size; i++) { for (bucket = hash->dth_tab[i]; bucket != NULL; bucket = next) { dtrace_probe_t *probe = bucket->dthb_chain; ASSERT(probe != NULL); ndx = DTRACE_HASHSTR(hash, probe) & new_mask; next = bucket->dthb_next; bucket->dthb_next = new_tab[ndx]; new_tab[ndx] = bucket; } } kmem_free(hash->dth_tab, hash->dth_size * sizeof (void *)); hash->dth_tab = new_tab; hash->dth_size = new_size; hash->dth_mask = new_mask; } static void dtrace_hash_add(dtrace_hash_t *hash, dtrace_probe_t *new) { int hashval = DTRACE_HASHSTR(hash, new); int ndx = hashval & hash->dth_mask; dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; dtrace_probe_t **nextp, **prevp; for (; bucket != NULL; bucket = bucket->dthb_next) { if (DTRACE_HASHEQ(hash, bucket->dthb_chain, new)) goto add; } if ((hash->dth_nbuckets >> 1) > hash->dth_size) { dtrace_hash_resize(hash); dtrace_hash_add(hash, new); return; } bucket = kmem_zalloc(sizeof (dtrace_hashbucket_t), KM_SLEEP); bucket->dthb_next = hash->dth_tab[ndx]; hash->dth_tab[ndx] = bucket; hash->dth_nbuckets++; add: nextp = DTRACE_HASHNEXT(hash, new); ASSERT(*nextp == NULL && *(DTRACE_HASHPREV(hash, new)) == NULL); *nextp = bucket->dthb_chain; if (bucket->dthb_chain != NULL) { prevp = DTRACE_HASHPREV(hash, bucket->dthb_chain); ASSERT(*prevp == NULL); *prevp = new; } bucket->dthb_chain = new; bucket->dthb_len++; } static dtrace_probe_t * dtrace_hash_lookup(dtrace_hash_t *hash, dtrace_probe_t *template) { int hashval = DTRACE_HASHSTR(hash, template); int ndx = hashval & hash->dth_mask; dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; for (; bucket != NULL; bucket = bucket->dthb_next) { if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template)) return (bucket->dthb_chain); } return (NULL); } static int dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template) { int hashval = DTRACE_HASHSTR(hash, template); int ndx = hashval & hash->dth_mask; dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; for (; bucket != NULL; bucket = bucket->dthb_next) { if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template)) return (bucket->dthb_len); } return (0); } static void dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe) { int ndx = DTRACE_HASHSTR(hash, probe) & hash->dth_mask; dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; dtrace_probe_t **prevp = DTRACE_HASHPREV(hash, probe); dtrace_probe_t **nextp = DTRACE_HASHNEXT(hash, probe); /* * Find the bucket that we're removing this probe from. */ for (; bucket != NULL; bucket = bucket->dthb_next) { if (DTRACE_HASHEQ(hash, bucket->dthb_chain, probe)) break; } ASSERT(bucket != NULL); if (*prevp == NULL) { if (*nextp == NULL) { /* * The removed probe was the only probe on this * bucket; we need to remove the bucket. */ dtrace_hashbucket_t *b = hash->dth_tab[ndx]; ASSERT(bucket->dthb_chain == probe); ASSERT(b != NULL); if (b == bucket) { hash->dth_tab[ndx] = bucket->dthb_next; } else { while (b->dthb_next != bucket) b = b->dthb_next; b->dthb_next = bucket->dthb_next; } ASSERT(hash->dth_nbuckets > 0); hash->dth_nbuckets--; kmem_free(bucket, sizeof (dtrace_hashbucket_t)); return; } bucket->dthb_chain = *nextp; } else { *(DTRACE_HASHNEXT(hash, *prevp)) = *nextp; } if (*nextp != NULL) *(DTRACE_HASHPREV(hash, *nextp)) = *prevp; } /* * DTrace Utility Functions * * These are random utility functions that are _not_ called from probe context. */ static int dtrace_badattr(const dtrace_attribute_t *a) { return (a->dtat_name > DTRACE_STABILITY_MAX || a->dtat_data > DTRACE_STABILITY_MAX || a->dtat_class > DTRACE_CLASS_MAX); } /* * Return a duplicate copy of a string. If the specified string is NULL, * this function returns a zero-length string. */ static char * dtrace_strdup(const char *str) { char *new = kmem_zalloc((str != NULL ? strlen(str) : 0) + 1, KM_SLEEP); if (str != NULL) (void) strcpy(new, str); return (new); } #define DTRACE_ISALPHA(c) \ (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z')) static int dtrace_badname(const char *s) { char c; if (s == NULL || (c = *s++) == '\0') return (0); if (!DTRACE_ISALPHA(c) && c != '-' && c != '_' && c != '.') return (1); while ((c = *s++) != '\0') { if (!DTRACE_ISALPHA(c) && (c < '0' || c > '9') && c != '-' && c != '_' && c != '.' && c != '`') return (1); } return (0); } static void dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp) { uint32_t priv; #ifdef illumos if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { /* * For DTRACE_PRIV_ALL, the uid and zoneid don't matter. */ priv = DTRACE_PRIV_ALL; } else { *uidp = crgetuid(cr); *zoneidp = crgetzoneid(cr); priv = 0; if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) priv |= DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER; else if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) priv |= DTRACE_PRIV_USER; if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) priv |= DTRACE_PRIV_PROC; if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) priv |= DTRACE_PRIV_OWNER; if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) priv |= DTRACE_PRIV_ZONEOWNER; } #else priv = DTRACE_PRIV_ALL; #endif *privp = priv; } #ifdef DTRACE_ERRDEBUG static void dtrace_errdebug(const char *str) { int hval = dtrace_hash_str(str) % DTRACE_ERRHASHSZ; int occupied = 0; mutex_enter(&dtrace_errlock); dtrace_errlast = str; dtrace_errthread = curthread; while (occupied++ < DTRACE_ERRHASHSZ) { if (dtrace_errhash[hval].dter_msg == str) { dtrace_errhash[hval].dter_count++; goto out; } if (dtrace_errhash[hval].dter_msg != NULL) { hval = (hval + 1) % DTRACE_ERRHASHSZ; continue; } dtrace_errhash[hval].dter_msg = str; dtrace_errhash[hval].dter_count = 1; goto out; } panic("dtrace: undersized error hash"); out: mutex_exit(&dtrace_errlock); } #endif /* * DTrace Matching Functions * * These functions are used to match groups of probes, given some elements of * a probe tuple, or some globbed expressions for elements of a probe tuple. */ static int dtrace_match_priv(const dtrace_probe_t *prp, uint32_t priv, uid_t uid, zoneid_t zoneid) { if (priv != DTRACE_PRIV_ALL) { uint32_t ppriv = prp->dtpr_provider->dtpv_priv.dtpp_flags; uint32_t match = priv & ppriv; /* * No PRIV_DTRACE_* privileges... */ if ((priv & (DTRACE_PRIV_PROC | DTRACE_PRIV_USER | DTRACE_PRIV_KERNEL)) == 0) return (0); /* * No matching bits, but there were bits to match... */ if (match == 0 && ppriv != 0) return (0); /* * Need to have permissions to the process, but don't... */ if (((ppriv & ~match) & DTRACE_PRIV_OWNER) != 0 && uid != prp->dtpr_provider->dtpv_priv.dtpp_uid) { return (0); } /* * Need to be in the same zone unless we possess the * privilege to examine all zones. */ if (((ppriv & ~match) & DTRACE_PRIV_ZONEOWNER) != 0 && zoneid != prp->dtpr_provider->dtpv_priv.dtpp_zoneid) { return (0); } } return (1); } /* * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which * consists of input pattern strings and an ops-vector to evaluate them. * This function returns >0 for match, 0 for no match, and <0 for error. */ static int dtrace_match_probe(const dtrace_probe_t *prp, const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, zoneid_t zoneid) { dtrace_provider_t *pvp = prp->dtpr_provider; int rv; if (pvp->dtpv_defunct) return (0); if ((rv = pkp->dtpk_pmatch(pvp->dtpv_name, pkp->dtpk_prov, 0)) <= 0) return (rv); if ((rv = pkp->dtpk_mmatch(prp->dtpr_mod, pkp->dtpk_mod, 0)) <= 0) return (rv); if ((rv = pkp->dtpk_fmatch(prp->dtpr_func, pkp->dtpk_func, 0)) <= 0) return (rv); if ((rv = pkp->dtpk_nmatch(prp->dtpr_name, pkp->dtpk_name, 0)) <= 0) return (rv); if (dtrace_match_priv(prp, priv, uid, zoneid) == 0) return (0); return (rv); } /* * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN) * interface for matching a glob pattern 'p' to an input string 's'. Unlike * libc's version, the kernel version only applies to 8-bit ASCII strings. * In addition, all of the recursion cases except for '*' matching have been * unwound. For '*', we still implement recursive evaluation, but a depth * counter is maintained and matching is aborted if we recurse too deep. * The function returns 0 if no match, >0 if match, and <0 if recursion error. */ static int dtrace_match_glob(const char *s, const char *p, int depth) { const char *olds; char s1, c; int gs; if (depth > DTRACE_PROBEKEY_MAXDEPTH) return (-1); if (s == NULL) s = ""; /* treat NULL as empty string */ top: olds = s; s1 = *s++; if (p == NULL) return (0); if ((c = *p++) == '\0') return (s1 == '\0'); switch (c) { case '[': { int ok = 0, notflag = 0; char lc = '\0'; if (s1 == '\0') return (0); if (*p == '!') { notflag = 1; p++; } if ((c = *p++) == '\0') return (0); do { if (c == '-' && lc != '\0' && *p != ']') { if ((c = *p++) == '\0') return (0); if (c == '\\' && (c = *p++) == '\0') return (0); if (notflag) { if (s1 < lc || s1 > c) ok++; else return (0); } else if (lc <= s1 && s1 <= c) ok++; } else if (c == '\\' && (c = *p++) == '\0') return (0); lc = c; /* save left-hand 'c' for next iteration */ if (notflag) { if (s1 != c) ok++; else return (0); } else if (s1 == c) ok++; if ((c = *p++) == '\0') return (0); } while (c != ']'); if (ok) goto top; return (0); } case '\\': if ((c = *p++) == '\0') return (0); /*FALLTHRU*/ default: if (c != s1) return (0); /*FALLTHRU*/ case '?': if (s1 != '\0') goto top; return (0); case '*': while (*p == '*') p++; /* consecutive *'s are identical to a single one */ if (*p == '\0') return (1); for (s = olds; *s != '\0'; s++) { if ((gs = dtrace_match_glob(s, p, depth + 1)) != 0) return (gs); } return (0); } } /*ARGSUSED*/ static int dtrace_match_string(const char *s, const char *p, int depth) { return (s != NULL && strcmp(s, p) == 0); } /*ARGSUSED*/ static int dtrace_match_nul(const char *s, const char *p, int depth) { return (1); /* always match the empty pattern */ } /*ARGSUSED*/ static int dtrace_match_nonzero(const char *s, const char *p, int depth) { return (s != NULL && s[0] != '\0'); } static int dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *), void *arg) { dtrace_probe_t template, *probe; dtrace_hash_t *hash = NULL; int len, best = INT_MAX, nmatched = 0; dtrace_id_t i; ASSERT(MUTEX_HELD(&dtrace_lock)); /* * If the probe ID is specified in the key, just lookup by ID and * invoke the match callback once if a matching probe is found. */ if (pkp->dtpk_id != DTRACE_IDNONE) { if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL && dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) { (void) (*matched)(probe, arg); nmatched++; } return (nmatched); } template.dtpr_mod = (char *)pkp->dtpk_mod; template.dtpr_func = (char *)pkp->dtpk_func; template.dtpr_name = (char *)pkp->dtpk_name; /* * We want to find the most distinct of the module name, function * name, and name. So for each one that is not a glob pattern or * empty string, we perform a lookup in the corresponding hash and * use the hash table with the fewest collisions to do our search. */ if (pkp->dtpk_mmatch == &dtrace_match_string && (len = dtrace_hash_collisions(dtrace_bymod, &template)) < best) { best = len; hash = dtrace_bymod; } if (pkp->dtpk_fmatch == &dtrace_match_string && (len = dtrace_hash_collisions(dtrace_byfunc, &template)) < best) { best = len; hash = dtrace_byfunc; } if (pkp->dtpk_nmatch == &dtrace_match_string && (len = dtrace_hash_collisions(dtrace_byname, &template)) < best) { best = len; hash = dtrace_byname; } /* * If we did not select a hash table, iterate over every probe and * invoke our callback for each one that matches our input probe key. */ if (hash == NULL) { for (i = 0; i < dtrace_nprobes; i++) { if ((probe = dtrace_probes[i]) == NULL || dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0) continue; nmatched++; if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT) break; } return (nmatched); } /* * If we selected a hash table, iterate over each probe of the same key * name and invoke the callback for every probe that matches the other * attributes of our input probe key. */ for (probe = dtrace_hash_lookup(hash, &template); probe != NULL; probe = *(DTRACE_HASHNEXT(hash, probe))) { if (dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0) continue; nmatched++; if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT) break; } return (nmatched); } /* * Return the function pointer dtrace_probecmp() should use to compare the * specified pattern with a string. For NULL or empty patterns, we select * dtrace_match_nul(). For glob pattern strings, we use dtrace_match_glob(). * For non-empty non-glob strings, we use dtrace_match_string(). */ static dtrace_probekey_f * dtrace_probekey_func(const char *p) { char c; if (p == NULL || *p == '\0') return (&dtrace_match_nul); while ((c = *p++) != '\0') { if (c == '[' || c == '?' || c == '*' || c == '\\') return (&dtrace_match_glob); } return (&dtrace_match_string); } /* * Build a probe comparison key for use with dtrace_match_probe() from the * given probe description. By convention, a null key only matches anchored * probes: if each field is the empty string, reset dtpk_fmatch to * dtrace_match_nonzero(). */ static void dtrace_probekey(dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp) { pkp->dtpk_prov = pdp->dtpd_provider; pkp->dtpk_pmatch = dtrace_probekey_func(pdp->dtpd_provider); pkp->dtpk_mod = pdp->dtpd_mod; pkp->dtpk_mmatch = dtrace_probekey_func(pdp->dtpd_mod); pkp->dtpk_func = pdp->dtpd_func; pkp->dtpk_fmatch = dtrace_probekey_func(pdp->dtpd_func); pkp->dtpk_name = pdp->dtpd_name; pkp->dtpk_nmatch = dtrace_probekey_func(pdp->dtpd_name); pkp->dtpk_id = pdp->dtpd_id; if (pkp->dtpk_id == DTRACE_IDNONE && pkp->dtpk_pmatch == &dtrace_match_nul && pkp->dtpk_mmatch == &dtrace_match_nul && pkp->dtpk_fmatch == &dtrace_match_nul && pkp->dtpk_nmatch == &dtrace_match_nul) pkp->dtpk_fmatch = &dtrace_match_nonzero; } /* * DTrace Provider-to-Framework API Functions * * These functions implement much of the Provider-to-Framework API, as * described in . The parts of the API not in this section are * the functions in the API for probe management (found below), and * dtrace_probe() itself (found above). */ /* * Register the calling provider with the DTrace framework. This should * generally be called by DTrace providers in their attach(9E) entry point. */ int dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, cred_t *cr, const dtrace_pops_t *pops, void *arg, dtrace_provider_id_t *idp) { dtrace_provider_t *provider; if (name == NULL || pap == NULL || pops == NULL || idp == NULL) { cmn_err(CE_WARN, "failed to register provider '%s': invalid " "arguments", name ? name : ""); return (EINVAL); } if (name[0] == '\0' || dtrace_badname(name)) { cmn_err(CE_WARN, "failed to register provider '%s': invalid " "provider name", name); return (EINVAL); } if ((pops->dtps_provide == NULL && pops->dtps_provide_module == NULL) || pops->dtps_enable == NULL || pops->dtps_disable == NULL || pops->dtps_destroy == NULL || ((pops->dtps_resume == NULL) != (pops->dtps_suspend == NULL))) { cmn_err(CE_WARN, "failed to register provider '%s': invalid " "provider ops", name); return (EINVAL); } if (dtrace_badattr(&pap->dtpa_provider) || dtrace_badattr(&pap->dtpa_mod) || dtrace_badattr(&pap->dtpa_func) || dtrace_badattr(&pap->dtpa_name) || dtrace_badattr(&pap->dtpa_args)) { cmn_err(CE_WARN, "failed to register provider '%s': invalid " "provider attributes", name); return (EINVAL); } if (priv & ~DTRACE_PRIV_ALL) { cmn_err(CE_WARN, "failed to register provider '%s': invalid " "privilege attributes", name); return (EINVAL); } if ((priv & DTRACE_PRIV_KERNEL) && (priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) && pops->dtps_usermode == NULL) { cmn_err(CE_WARN, "failed to register provider '%s': need " "dtps_usermode() op for given privilege attributes", name); return (EINVAL); } provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP); provider->dtpv_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); (void) strcpy(provider->dtpv_name, name); provider->dtpv_attr = *pap; provider->dtpv_priv.dtpp_flags = priv; if (cr != NULL) { provider->dtpv_priv.dtpp_uid = crgetuid(cr); provider->dtpv_priv.dtpp_zoneid = crgetzoneid(cr); } provider->dtpv_pops = *pops; if (pops->dtps_provide == NULL) { ASSERT(pops->dtps_provide_module != NULL); provider->dtpv_pops.dtps_provide = (void (*)(void *, dtrace_probedesc_t *))dtrace_nullop; } if (pops->dtps_provide_module == NULL) { ASSERT(pops->dtps_provide != NULL); provider->dtpv_pops.dtps_provide_module = (void (*)(void *, modctl_t *))dtrace_nullop; } if (pops->dtps_suspend == NULL) { ASSERT(pops->dtps_resume == NULL); provider->dtpv_pops.dtps_suspend = (void (*)(void *, dtrace_id_t, void *))dtrace_nullop; provider->dtpv_pops.dtps_resume = (void (*)(void *, dtrace_id_t, void *))dtrace_nullop; } provider->dtpv_arg = arg; *idp = (dtrace_provider_id_t)provider; if (pops == &dtrace_provider_ops) { ASSERT(MUTEX_HELD(&dtrace_provider_lock)); ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(dtrace_anon.dta_enabling == NULL); /* * We make sure that the DTrace provider is at the head of * the provider chain. */ provider->dtpv_next = dtrace_provider; dtrace_provider = provider; return (0); } mutex_enter(&dtrace_provider_lock); mutex_enter(&dtrace_lock); /* * If there is at least one provider registered, we'll add this * provider after the first provider. */ if (dtrace_provider != NULL) { provider->dtpv_next = dtrace_provider->dtpv_next; dtrace_provider->dtpv_next = provider; } else { dtrace_provider = provider; } if (dtrace_retained != NULL) { dtrace_enabling_provide(provider); /* * Now we need to call dtrace_enabling_matchall() -- which * will acquire cpu_lock and dtrace_lock. We therefore need * to drop all of our locks before calling into it... */ mutex_exit(&dtrace_lock); mutex_exit(&dtrace_provider_lock); dtrace_enabling_matchall(); return (0); } mutex_exit(&dtrace_lock); mutex_exit(&dtrace_provider_lock); return (0); } /* * Unregister the specified provider from the DTrace framework. This should * generally be called by DTrace providers in their detach(9E) entry point. */ int dtrace_unregister(dtrace_provider_id_t id) { dtrace_provider_t *old = (dtrace_provider_t *)id; dtrace_provider_t *prev = NULL; int i, self = 0, noreap = 0; dtrace_probe_t *probe, *first = NULL; if (old->dtpv_pops.dtps_enable == (void (*)(void *, dtrace_id_t, void *))dtrace_nullop) { /* * If DTrace itself is the provider, we're called with locks * already held. */ ASSERT(old == dtrace_provider); #ifdef illumos ASSERT(dtrace_devi != NULL); #endif ASSERT(MUTEX_HELD(&dtrace_provider_lock)); ASSERT(MUTEX_HELD(&dtrace_lock)); self = 1; if (dtrace_provider->dtpv_next != NULL) { /* * There's another provider here; return failure. */ return (EBUSY); } } else { mutex_enter(&dtrace_provider_lock); #ifdef illumos mutex_enter(&mod_lock); #endif mutex_enter(&dtrace_lock); } /* * If anyone has /dev/dtrace open, or if there are anonymous enabled * probes, we refuse to let providers slither away, unless this * provider has already been explicitly invalidated. */ if (!old->dtpv_defunct && (dtrace_opens || (dtrace_anon.dta_state != NULL && dtrace_anon.dta_state->dts_necbs > 0))) { if (!self) { mutex_exit(&dtrace_lock); #ifdef illumos mutex_exit(&mod_lock); #endif mutex_exit(&dtrace_provider_lock); } return (EBUSY); } /* * Attempt to destroy the probes associated with this provider. */ for (i = 0; i < dtrace_nprobes; i++) { if ((probe = dtrace_probes[i]) == NULL) continue; if (probe->dtpr_provider != old) continue; if (probe->dtpr_ecb == NULL) continue; /* * If we are trying to unregister a defunct provider, and the * provider was made defunct within the interval dictated by * dtrace_unregister_defunct_reap, we'll (asynchronously) * attempt to reap our enablings. To denote that the provider * should reattempt to unregister itself at some point in the * future, we will return a differentiable error code (EAGAIN * instead of EBUSY) in this case. */ if (dtrace_gethrtime() - old->dtpv_defunct > dtrace_unregister_defunct_reap) noreap = 1; if (!self) { mutex_exit(&dtrace_lock); #ifdef illumos mutex_exit(&mod_lock); #endif mutex_exit(&dtrace_provider_lock); } if (noreap) return (EBUSY); (void) taskq_dispatch(dtrace_taskq, (task_func_t *)dtrace_enabling_reap, NULL, TQ_SLEEP); return (EAGAIN); } /* * All of the probes for this provider are disabled; we can safely * remove all of them from their hash chains and from the probe array. */ for (i = 0; i < dtrace_nprobes; i++) { if ((probe = dtrace_probes[i]) == NULL) continue; if (probe->dtpr_provider != old) continue; dtrace_probes[i] = NULL; dtrace_hash_remove(dtrace_bymod, probe); dtrace_hash_remove(dtrace_byfunc, probe); dtrace_hash_remove(dtrace_byname, probe); if (first == NULL) { first = probe; probe->dtpr_nextmod = NULL; } else { probe->dtpr_nextmod = first; first = probe; } } /* * The provider's probes have been removed from the hash chains and * from the probe array. Now issue a dtrace_sync() to be sure that * everyone has cleared out from any probe array processing. */ dtrace_sync(); for (probe = first; probe != NULL; probe = first) { first = probe->dtpr_nextmod; old->dtpv_pops.dtps_destroy(old->dtpv_arg, probe->dtpr_id, probe->dtpr_arg); kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); #ifdef illumos vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1); #else free_unr(dtrace_arena, probe->dtpr_id); #endif kmem_free(probe, sizeof (dtrace_probe_t)); } if ((prev = dtrace_provider) == old) { #ifdef illumos ASSERT(self || dtrace_devi == NULL); ASSERT(old->dtpv_next == NULL || dtrace_devi == NULL); #endif dtrace_provider = old->dtpv_next; } else { while (prev != NULL && prev->dtpv_next != old) prev = prev->dtpv_next; if (prev == NULL) { panic("attempt to unregister non-existent " "dtrace provider %p\n", (void *)id); } prev->dtpv_next = old->dtpv_next; } if (!self) { mutex_exit(&dtrace_lock); #ifdef illumos mutex_exit(&mod_lock); #endif mutex_exit(&dtrace_provider_lock); } kmem_free(old->dtpv_name, strlen(old->dtpv_name) + 1); kmem_free(old, sizeof (dtrace_provider_t)); return (0); } /* * Invalidate the specified provider. All subsequent probe lookups for the * specified provider will fail, but its probes will not be removed. */ void dtrace_invalidate(dtrace_provider_id_t id) { dtrace_provider_t *pvp = (dtrace_provider_t *)id; ASSERT(pvp->dtpv_pops.dtps_enable != (void (*)(void *, dtrace_id_t, void *))dtrace_nullop); mutex_enter(&dtrace_provider_lock); mutex_enter(&dtrace_lock); pvp->dtpv_defunct = dtrace_gethrtime(); mutex_exit(&dtrace_lock); mutex_exit(&dtrace_provider_lock); } /* * Indicate whether or not DTrace has attached. */ int dtrace_attached(void) { /* * dtrace_provider will be non-NULL iff the DTrace driver has * attached. (It's non-NULL because DTrace is always itself a * provider.) */ return (dtrace_provider != NULL); } /* * Remove all the unenabled probes for the given provider. This function is * not unlike dtrace_unregister(), except that it doesn't remove the provider * -- just as many of its associated probes as it can. */ int dtrace_condense(dtrace_provider_id_t id) { dtrace_provider_t *prov = (dtrace_provider_t *)id; int i; dtrace_probe_t *probe; /* * Make sure this isn't the dtrace provider itself. */ ASSERT(prov->dtpv_pops.dtps_enable != (void (*)(void *, dtrace_id_t, void *))dtrace_nullop); mutex_enter(&dtrace_provider_lock); mutex_enter(&dtrace_lock); /* * Attempt to destroy the probes associated with this provider. */ for (i = 0; i < dtrace_nprobes; i++) { if ((probe = dtrace_probes[i]) == NULL) continue; if (probe->dtpr_provider != prov) continue; if (probe->dtpr_ecb != NULL) continue; dtrace_probes[i] = NULL; dtrace_hash_remove(dtrace_bymod, probe); dtrace_hash_remove(dtrace_byfunc, probe); dtrace_hash_remove(dtrace_byname, probe); prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, i + 1, probe->dtpr_arg); kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); kmem_free(probe, sizeof (dtrace_probe_t)); #ifdef illumos vmem_free(dtrace_arena, (void *)((uintptr_t)i + 1), 1); #else free_unr(dtrace_arena, i + 1); #endif } mutex_exit(&dtrace_lock); mutex_exit(&dtrace_provider_lock); return (0); } /* * DTrace Probe Management Functions * * The functions in this section perform the DTrace probe management, * including functions to create probes, look-up probes, and call into the * providers to request that probes be provided. Some of these functions are * in the Provider-to-Framework API; these functions can be identified by the * fact that they are not declared "static". */ /* * Create a probe with the specified module name, function name, and name. */ dtrace_id_t dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, const char *func, const char *name, int aframes, void *arg) { dtrace_probe_t *probe, **probes; dtrace_provider_t *provider = (dtrace_provider_t *)prov; dtrace_id_t id; if (provider == dtrace_provider) { ASSERT(MUTEX_HELD(&dtrace_lock)); } else { mutex_enter(&dtrace_lock); } #ifdef illumos id = (dtrace_id_t)(uintptr_t)vmem_alloc(dtrace_arena, 1, VM_BESTFIT | VM_SLEEP); #else id = alloc_unr(dtrace_arena); #endif probe = kmem_zalloc(sizeof (dtrace_probe_t), KM_SLEEP); probe->dtpr_id = id; probe->dtpr_gen = dtrace_probegen++; probe->dtpr_mod = dtrace_strdup(mod); probe->dtpr_func = dtrace_strdup(func); probe->dtpr_name = dtrace_strdup(name); probe->dtpr_arg = arg; probe->dtpr_aframes = aframes; probe->dtpr_provider = provider; dtrace_hash_add(dtrace_bymod, probe); dtrace_hash_add(dtrace_byfunc, probe); dtrace_hash_add(dtrace_byname, probe); if (id - 1 >= dtrace_nprobes) { size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *); size_t nsize = osize << 1; if (nsize == 0) { ASSERT(osize == 0); ASSERT(dtrace_probes == NULL); nsize = sizeof (dtrace_probe_t *); } probes = kmem_zalloc(nsize, KM_SLEEP); if (dtrace_probes == NULL) { ASSERT(osize == 0); dtrace_probes = probes; dtrace_nprobes = 1; } else { dtrace_probe_t **oprobes = dtrace_probes; bcopy(oprobes, probes, osize); dtrace_membar_producer(); dtrace_probes = probes; dtrace_sync(); /* * All CPUs are now seeing the new probes array; we can * safely free the old array. */ kmem_free(oprobes, osize); dtrace_nprobes <<= 1; } ASSERT(id - 1 < dtrace_nprobes); } ASSERT(dtrace_probes[id - 1] == NULL); dtrace_probes[id - 1] = probe; if (provider != dtrace_provider) mutex_exit(&dtrace_lock); return (id); } static dtrace_probe_t * dtrace_probe_lookup_id(dtrace_id_t id) { ASSERT(MUTEX_HELD(&dtrace_lock)); if (id == 0 || id > dtrace_nprobes) return (NULL); return (dtrace_probes[id - 1]); } static int dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg) { *((dtrace_id_t *)arg) = probe->dtpr_id; return (DTRACE_MATCH_DONE); } /* * Look up a probe based on provider and one or more of module name, function * name and probe name. */ dtrace_id_t dtrace_probe_lookup(dtrace_provider_id_t prid, char *mod, char *func, char *name) { dtrace_probekey_t pkey; dtrace_id_t id; int match; pkey.dtpk_prov = ((dtrace_provider_t *)prid)->dtpv_name; pkey.dtpk_pmatch = &dtrace_match_string; pkey.dtpk_mod = mod; pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul; pkey.dtpk_func = func; pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul; pkey.dtpk_name = name; pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul; pkey.dtpk_id = DTRACE_IDNONE; mutex_enter(&dtrace_lock); match = dtrace_match(&pkey, DTRACE_PRIV_ALL, 0, 0, dtrace_probe_lookup_match, &id); mutex_exit(&dtrace_lock); ASSERT(match == 1 || match == 0); return (match ? id : 0); } /* * Returns the probe argument associated with the specified probe. */ void * dtrace_probe_arg(dtrace_provider_id_t id, dtrace_id_t pid) { dtrace_probe_t *probe; void *rval = NULL; mutex_enter(&dtrace_lock); if ((probe = dtrace_probe_lookup_id(pid)) != NULL && probe->dtpr_provider == (dtrace_provider_t *)id) rval = probe->dtpr_arg; mutex_exit(&dtrace_lock); return (rval); } /* * Copy a probe into a probe description. */ static void dtrace_probe_description(const dtrace_probe_t *prp, dtrace_probedesc_t *pdp) { bzero(pdp, sizeof (dtrace_probedesc_t)); pdp->dtpd_id = prp->dtpr_id; (void) strncpy(pdp->dtpd_provider, prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN - 1); (void) strncpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN - 1); (void) strncpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN - 1); (void) strncpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN - 1); } /* * Called to indicate that a probe -- or probes -- should be provided by a * specfied provider. If the specified description is NULL, the provider will * be told to provide all of its probes. (This is done whenever a new * consumer comes along, or whenever a retained enabling is to be matched.) If * the specified description is non-NULL, the provider is given the * opportunity to dynamically provide the specified probe, allowing providers * to support the creation of probes on-the-fly. (So-called _autocreated_ * probes.) If the provider is NULL, the operations will be applied to all * providers; if the provider is non-NULL the operations will only be applied * to the specified provider. The dtrace_provider_lock must be held, and the * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation * will need to grab the dtrace_lock when it reenters the framework through * dtrace_probe_lookup(), dtrace_probe_create(), etc. */ static void dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv) { #ifdef illumos modctl_t *ctl; #endif int all = 0; ASSERT(MUTEX_HELD(&dtrace_provider_lock)); if (prv == NULL) { all = 1; prv = dtrace_provider; } do { /* * First, call the blanket provide operation. */ prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc); #ifdef illumos /* * Now call the per-module provide operation. We will grab * mod_lock to prevent the list from being modified. Note * that this also prevents the mod_busy bits from changing. * (mod_busy can only be changed with mod_lock held.) */ mutex_enter(&mod_lock); ctl = &modules; do { if (ctl->mod_busy || ctl->mod_mp == NULL) continue; prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); } while ((ctl = ctl->mod_next) != &modules); mutex_exit(&mod_lock); #endif } while (all && (prv = prv->dtpv_next) != NULL); } #ifdef illumos /* * Iterate over each probe, and call the Framework-to-Provider API function * denoted by offs. */ static void dtrace_probe_foreach(uintptr_t offs) { dtrace_provider_t *prov; void (*func)(void *, dtrace_id_t, void *); dtrace_probe_t *probe; dtrace_icookie_t cookie; int i; /* * We disable interrupts to walk through the probe array. This is * safe -- the dtrace_sync() in dtrace_unregister() assures that we * won't see stale data. */ cookie = dtrace_interrupt_disable(); for (i = 0; i < dtrace_nprobes; i++) { if ((probe = dtrace_probes[i]) == NULL) continue; if (probe->dtpr_ecb == NULL) { /* * This probe isn't enabled -- don't call the function. */ continue; } prov = probe->dtpr_provider; func = *((void(**)(void *, dtrace_id_t, void *)) ((uintptr_t)&prov->dtpv_pops + offs)); func(prov->dtpv_arg, i + 1, probe->dtpr_arg); } dtrace_interrupt_enable(cookie); } #endif static int dtrace_probe_enable(dtrace_probedesc_t *desc, dtrace_enabling_t *enab) { dtrace_probekey_t pkey; uint32_t priv; uid_t uid; zoneid_t zoneid; ASSERT(MUTEX_HELD(&dtrace_lock)); dtrace_ecb_create_cache = NULL; if (desc == NULL) { /* * If we're passed a NULL description, we're being asked to * create an ECB with a NULL probe. */ (void) dtrace_ecb_create_enable(NULL, enab); return (0); } dtrace_probekey(desc, &pkey); dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred, &priv, &uid, &zoneid); return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable, enab)); } /* * DTrace Helper Provider Functions */ static void dtrace_dofattr2attr(dtrace_attribute_t *attr, const dof_attr_t dofattr) { attr->dtat_name = DOF_ATTR_NAME(dofattr); attr->dtat_data = DOF_ATTR_DATA(dofattr); attr->dtat_class = DOF_ATTR_CLASS(dofattr); } static void dtrace_dofprov2hprov(dtrace_helper_provdesc_t *hprov, const dof_provider_t *dofprov, char *strtab) { hprov->dthpv_provname = strtab + dofprov->dofpv_name; dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_provider, dofprov->dofpv_provattr); dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_mod, dofprov->dofpv_modattr); dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_func, dofprov->dofpv_funcattr); dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_name, dofprov->dofpv_nameattr); dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_args, dofprov->dofpv_argsattr); } static void dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec; dof_provider_t *provider; dof_probe_t *probe; uint32_t *off, *enoff; uint8_t *arg; char *strtab; uint_t i, nprobes; dtrace_helper_provdesc_t dhpv; dtrace_helper_probedesc_t dhpb; dtrace_meta_t *meta = dtrace_meta_pid; dtrace_mops_t *mops = &meta->dtm_mops; void *parg; provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset); str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + provider->dofpv_strtab * dof->dofh_secsize); prb_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + provider->dofpv_probes * dof->dofh_secsize); arg_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + provider->dofpv_prargs * dof->dofh_secsize); off_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + provider->dofpv_proffs * dof->dofh_secsize); strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); off = (uint32_t *)(uintptr_t)(daddr + off_sec->dofs_offset); arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset); enoff = NULL; /* * See dtrace_helper_provider_validate(). */ if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && provider->dofpv_prenoffs != DOF_SECT_NONE) { enoff_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + provider->dofpv_prenoffs * dof->dofh_secsize); enoff = (uint32_t *)(uintptr_t)(daddr + enoff_sec->dofs_offset); } nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize; /* * Create the provider. */ dtrace_dofprov2hprov(&dhpv, provider, strtab); if ((parg = mops->dtms_provide_pid(meta->dtm_arg, &dhpv, pid)) == NULL) return; meta->dtm_count++; /* * Create the probes. */ for (i = 0; i < nprobes; i++) { probe = (dof_probe_t *)(uintptr_t)(daddr + prb_sec->dofs_offset + i * prb_sec->dofs_entsize); /* See the check in dtrace_helper_provider_validate(). */ if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN) continue; dhpb.dthpb_mod = dhp->dofhp_mod; dhpb.dthpb_func = strtab + probe->dofpr_func; dhpb.dthpb_name = strtab + probe->dofpr_name; dhpb.dthpb_base = probe->dofpr_addr; dhpb.dthpb_offs = off + probe->dofpr_offidx; dhpb.dthpb_noffs = probe->dofpr_noffs; if (enoff != NULL) { dhpb.dthpb_enoffs = enoff + probe->dofpr_enoffidx; dhpb.dthpb_nenoffs = probe->dofpr_nenoffs; } else { dhpb.dthpb_enoffs = NULL; dhpb.dthpb_nenoffs = 0; } dhpb.dthpb_args = arg + probe->dofpr_argidx; dhpb.dthpb_nargc = probe->dofpr_nargc; dhpb.dthpb_xargc = probe->dofpr_xargc; dhpb.dthpb_ntypes = strtab + probe->dofpr_nargv; dhpb.dthpb_xtypes = strtab + probe->dofpr_xargv; mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb); } } static void dtrace_helper_provide(dof_helper_t *dhp, pid_t pid) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; int i; ASSERT(MUTEX_HELD(&dtrace_meta_lock)); for (i = 0; i < dof->dofh_secnum; i++) { dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + i * dof->dofh_secsize); if (sec->dofs_type != DOF_SECT_PROVIDER) continue; dtrace_helper_provide_one(dhp, sec, pid); } /* * We may have just created probes, so we must now rematch against * any retained enablings. Note that this call will acquire both * cpu_lock and dtrace_lock; the fact that we are holding * dtrace_meta_lock now is what defines the ordering with respect to * these three locks. */ dtrace_enabling_matchall(); } static void dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; dof_sec_t *str_sec; dof_provider_t *provider; char *strtab; dtrace_helper_provdesc_t dhpv; dtrace_meta_t *meta = dtrace_meta_pid; dtrace_mops_t *mops = &meta->dtm_mops; provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset); str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + provider->dofpv_strtab * dof->dofh_secsize); strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); /* * Create the provider. */ dtrace_dofprov2hprov(&dhpv, provider, strtab); mops->dtms_remove_pid(meta->dtm_arg, &dhpv, pid); meta->dtm_count--; } static void dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; int i; ASSERT(MUTEX_HELD(&dtrace_meta_lock)); for (i = 0; i < dof->dofh_secnum; i++) { dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + i * dof->dofh_secsize); if (sec->dofs_type != DOF_SECT_PROVIDER) continue; dtrace_helper_provider_remove_one(dhp, sec, pid); } } /* * DTrace Meta Provider-to-Framework API Functions * * These functions implement the Meta Provider-to-Framework API, as described * in . */ int dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, dtrace_meta_provider_id_t *idp) { dtrace_meta_t *meta; dtrace_helpers_t *help, *next; int i; *idp = DTRACE_METAPROVNONE; /* * We strictly don't need the name, but we hold onto it for * debuggability. All hail error queues! */ if (name == NULL) { cmn_err(CE_WARN, "failed to register meta-provider: " "invalid name"); return (EINVAL); } if (mops == NULL || mops->dtms_create_probe == NULL || mops->dtms_provide_pid == NULL || mops->dtms_remove_pid == NULL) { cmn_err(CE_WARN, "failed to register meta-register %s: " "invalid ops", name); return (EINVAL); } meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP); meta->dtm_mops = *mops; meta->dtm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); (void) strcpy(meta->dtm_name, name); meta->dtm_arg = arg; mutex_enter(&dtrace_meta_lock); mutex_enter(&dtrace_lock); if (dtrace_meta_pid != NULL) { mutex_exit(&dtrace_lock); mutex_exit(&dtrace_meta_lock); cmn_err(CE_WARN, "failed to register meta-register %s: " "user-land meta-provider exists", name); kmem_free(meta->dtm_name, strlen(meta->dtm_name) + 1); kmem_free(meta, sizeof (dtrace_meta_t)); return (EINVAL); } dtrace_meta_pid = meta; *idp = (dtrace_meta_provider_id_t)meta; /* * If there are providers and probes ready to go, pass them * off to the new meta provider now. */ help = dtrace_deferred_pid; dtrace_deferred_pid = NULL; mutex_exit(&dtrace_lock); while (help != NULL) { for (i = 0; i < help->dthps_nprovs; i++) { dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, help->dthps_pid); } next = help->dthps_next; help->dthps_next = NULL; help->dthps_prev = NULL; help->dthps_deferred = 0; help = next; } mutex_exit(&dtrace_meta_lock); return (0); } int dtrace_meta_unregister(dtrace_meta_provider_id_t id) { dtrace_meta_t **pp, *old = (dtrace_meta_t *)id; mutex_enter(&dtrace_meta_lock); mutex_enter(&dtrace_lock); if (old == dtrace_meta_pid) { pp = &dtrace_meta_pid; } else { panic("attempt to unregister non-existent " "dtrace meta-provider %p\n", (void *)old); } if (old->dtm_count != 0) { mutex_exit(&dtrace_lock); mutex_exit(&dtrace_meta_lock); return (EBUSY); } *pp = NULL; mutex_exit(&dtrace_lock); mutex_exit(&dtrace_meta_lock); kmem_free(old->dtm_name, strlen(old->dtm_name) + 1); kmem_free(old, sizeof (dtrace_meta_t)); return (0); } /* * DTrace DIF Object Functions */ static int dtrace_difo_err(uint_t pc, const char *format, ...) { if (dtrace_err_verbose) { va_list alist; (void) uprintf("dtrace DIF object error: [%u]: ", pc); va_start(alist, format); (void) vuprintf(format, alist); va_end(alist); } #ifdef DTRACE_ERRDEBUG dtrace_errdebug(format); #endif return (1); } /* * Validate a DTrace DIF object by checking the IR instructions. The following * rules are currently enforced by dtrace_difo_validate(): * * 1. Each instruction must have a valid opcode * 2. Each register, string, variable, or subroutine reference must be valid * 3. No instruction can modify register %r0 (must be zero) * 4. All instruction reserved bits must be set to zero * 5. The last instruction must be a "ret" instruction * 6. All branch targets must reference a valid instruction _after_ the branch */ static int dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, cred_t *cr) { int err = 0, i; int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; int kcheckload; uint_t pc; int maxglobal = -1, maxlocal = -1, maxtlocal = -1; kcheckload = cr == NULL || (vstate->dtvs_state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) == 0; dp->dtdo_destructive = 0; for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) { dif_instr_t instr = dp->dtdo_buf[pc]; uint_t r1 = DIF_INSTR_R1(instr); uint_t r2 = DIF_INSTR_R2(instr); uint_t rd = DIF_INSTR_RD(instr); uint_t rs = DIF_INSTR_RS(instr); uint_t label = DIF_INSTR_LABEL(instr); uint_t v = DIF_INSTR_VAR(instr); uint_t subr = DIF_INSTR_SUBR(instr); uint_t type = DIF_INSTR_TYPE(instr); uint_t op = DIF_INSTR_OP(instr); switch (op) { case DIF_OP_OR: case DIF_OP_XOR: case DIF_OP_AND: case DIF_OP_SLL: case DIF_OP_SRL: case DIF_OP_SRA: case DIF_OP_SUB: case DIF_OP_ADD: case DIF_OP_MUL: case DIF_OP_SDIV: case DIF_OP_UDIV: case DIF_OP_SREM: case DIF_OP_UREM: case DIF_OP_COPYS: if (r1 >= nregs) err += efunc(pc, "invalid register %u\n", r1); if (r2 >= nregs) err += efunc(pc, "invalid register %u\n", r2); if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) err += efunc(pc, "cannot write to %r0\n"); break; case DIF_OP_NOT: case DIF_OP_MOV: case DIF_OP_ALLOCS: if (r1 >= nregs) err += efunc(pc, "invalid register %u\n", r1); if (r2 != 0) err += efunc(pc, "non-zero reserved bits\n"); if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) err += efunc(pc, "cannot write to %r0\n"); break; case DIF_OP_LDSB: case DIF_OP_LDSH: case DIF_OP_LDSW: case DIF_OP_LDUB: case DIF_OP_LDUH: case DIF_OP_LDUW: case DIF_OP_LDX: if (r1 >= nregs) err += efunc(pc, "invalid register %u\n", r1); if (r2 != 0) err += efunc(pc, "non-zero reserved bits\n"); if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) err += efunc(pc, "cannot write to %r0\n"); if (kcheckload) dp->dtdo_buf[pc] = DIF_INSTR_LOAD(op + DIF_OP_RLDSB - DIF_OP_LDSB, r1, rd); break; case DIF_OP_RLDSB: case DIF_OP_RLDSH: case DIF_OP_RLDSW: case DIF_OP_RLDUB: case DIF_OP_RLDUH: case DIF_OP_RLDUW: case DIF_OP_RLDX: if (r1 >= nregs) err += efunc(pc, "invalid register %u\n", r1); if (r2 != 0) err += efunc(pc, "non-zero reserved bits\n"); if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) err += efunc(pc, "cannot write to %r0\n"); break; case DIF_OP_ULDSB: case DIF_OP_ULDSH: case DIF_OP_ULDSW: case DIF_OP_ULDUB: case DIF_OP_ULDUH: case DIF_OP_ULDUW: case DIF_OP_ULDX: if (r1 >= nregs) err += efunc(pc, "invalid register %u\n", r1); if (r2 != 0) err += efunc(pc, "non-zero reserved bits\n"); if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) err += efunc(pc, "cannot write to %r0\n"); break; case DIF_OP_STB: case DIF_OP_STH: case DIF_OP_STW: case DIF_OP_STX: if (r1 >= nregs) err += efunc(pc, "invalid register %u\n", r1); if (r2 != 0) err += efunc(pc, "non-zero reserved bits\n"); if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) err += efunc(pc, "cannot write to 0 address\n"); break; case DIF_OP_CMP: case DIF_OP_SCMP: if (r1 >= nregs) err += efunc(pc, "invalid register %u\n", r1); if (r2 >= nregs) err += efunc(pc, "invalid register %u\n", r2); if (rd != 0) err += efunc(pc, "non-zero reserved bits\n"); break; case DIF_OP_TST: if (r1 >= nregs) err += efunc(pc, "invalid register %u\n", r1); if (r2 != 0 || rd != 0) err += efunc(pc, "non-zero reserved bits\n"); break; case DIF_OP_BA: case DIF_OP_BE: case DIF_OP_BNE: case DIF_OP_BG: case DIF_OP_BGU: case DIF_OP_BGE: case DIF_OP_BGEU: case DIF_OP_BL: case DIF_OP_BLU: case DIF_OP_BLE: case DIF_OP_BLEU: if (label >= dp->dtdo_len) { err += efunc(pc, "invalid branch target %u\n", label); } if (label <= pc) { err += efunc(pc, "backward branch to %u\n", label); } break; case DIF_OP_RET: if (r1 != 0 || r2 != 0) err += efunc(pc, "non-zero reserved bits\n"); if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); break; case DIF_OP_NOP: case DIF_OP_POPTS: case DIF_OP_FLUSHTS: if (r1 != 0 || r2 != 0 || rd != 0) err += efunc(pc, "non-zero reserved bits\n"); break; case DIF_OP_SETX: if (DIF_INSTR_INTEGER(instr) >= dp->dtdo_intlen) { err += efunc(pc, "invalid integer ref %u\n", DIF_INSTR_INTEGER(instr)); } if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) err += efunc(pc, "cannot write to %r0\n"); break; case DIF_OP_SETS: if (DIF_INSTR_STRING(instr) >= dp->dtdo_strlen) { err += efunc(pc, "invalid string ref %u\n", DIF_INSTR_STRING(instr)); } if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) err += efunc(pc, "cannot write to %r0\n"); break; case DIF_OP_LDGA: case DIF_OP_LDTA: if (r1 > DIF_VAR_ARRAY_MAX) err += efunc(pc, "invalid array %u\n", r1); if (r2 >= nregs) err += efunc(pc, "invalid register %u\n", r2); if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) err += efunc(pc, "cannot write to %r0\n"); break; case DIF_OP_LDGS: case DIF_OP_LDTS: case DIF_OP_LDLS: case DIF_OP_LDGAA: case DIF_OP_LDTAA: if (v < DIF_VAR_OTHER_MIN || v > DIF_VAR_OTHER_MAX) err += efunc(pc, "invalid variable %u\n", v); if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) err += efunc(pc, "cannot write to %r0\n"); break; case DIF_OP_STGS: case DIF_OP_STTS: case DIF_OP_STLS: case DIF_OP_STGAA: case DIF_OP_STTAA: if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX) err += efunc(pc, "invalid variable %u\n", v); if (rs >= nregs) err += efunc(pc, "invalid register %u\n", rd); break; case DIF_OP_CALL: if (subr > DIF_SUBR_MAX) err += efunc(pc, "invalid subr %u\n", subr); if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) err += efunc(pc, "cannot write to %r0\n"); if (subr == DIF_SUBR_COPYOUT || subr == DIF_SUBR_COPYOUTSTR) { dp->dtdo_destructive = 1; } if (subr == DIF_SUBR_GETF) { /* * If we have a getf() we need to record that * in our state. Note that our state can be * NULL if this is a helper -- but in that * case, the call to getf() is itself illegal, * and will be caught (slightly later) when * the helper is validated. */ if (vstate->dtvs_state != NULL) vstate->dtvs_state->dts_getf++; } break; case DIF_OP_PUSHTR: if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF) err += efunc(pc, "invalid ref type %u\n", type); if (r2 >= nregs) err += efunc(pc, "invalid register %u\n", r2); if (rs >= nregs) err += efunc(pc, "invalid register %u\n", rs); break; case DIF_OP_PUSHTV: if (type != DIF_TYPE_CTF) err += efunc(pc, "invalid val type %u\n", type); if (r2 >= nregs) err += efunc(pc, "invalid register %u\n", r2); if (rs >= nregs) err += efunc(pc, "invalid register %u\n", rs); break; default: err += efunc(pc, "invalid opcode %u\n", DIF_INSTR_OP(instr)); } } if (dp->dtdo_len != 0 && DIF_INSTR_OP(dp->dtdo_buf[dp->dtdo_len - 1]) != DIF_OP_RET) { err += efunc(dp->dtdo_len - 1, "expected 'ret' as last DIF instruction\n"); } if (!(dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF))) { /* * If we're not returning by reference, the size must be either * 0 or the size of one of the base types. */ switch (dp->dtdo_rtype.dtdt_size) { case 0: case sizeof (uint8_t): case sizeof (uint16_t): case sizeof (uint32_t): case sizeof (uint64_t): break; default: err += efunc(dp->dtdo_len - 1, "bad return size\n"); } } for (i = 0; i < dp->dtdo_varlen && err == 0; i++) { dtrace_difv_t *v = &dp->dtdo_vartab[i], *existing = NULL; dtrace_diftype_t *vt, *et; uint_t id, ndx; if (v->dtdv_scope != DIFV_SCOPE_GLOBAL && v->dtdv_scope != DIFV_SCOPE_THREAD && v->dtdv_scope != DIFV_SCOPE_LOCAL) { err += efunc(i, "unrecognized variable scope %d\n", v->dtdv_scope); break; } if (v->dtdv_kind != DIFV_KIND_ARRAY && v->dtdv_kind != DIFV_KIND_SCALAR) { err += efunc(i, "unrecognized variable type %d\n", v->dtdv_kind); break; } if ((id = v->dtdv_id) > DIF_VARIABLE_MAX) { err += efunc(i, "%d exceeds variable id limit\n", id); break; } if (id < DIF_VAR_OTHER_UBASE) continue; /* * For user-defined variables, we need to check that this * definition is identical to any previous definition that we * encountered. */ ndx = id - DIF_VAR_OTHER_UBASE; switch (v->dtdv_scope) { case DIFV_SCOPE_GLOBAL: if (maxglobal == -1 || ndx > maxglobal) maxglobal = ndx; if (ndx < vstate->dtvs_nglobals) { dtrace_statvar_t *svar; if ((svar = vstate->dtvs_globals[ndx]) != NULL) existing = &svar->dtsv_var; } break; case DIFV_SCOPE_THREAD: if (maxtlocal == -1 || ndx > maxtlocal) maxtlocal = ndx; if (ndx < vstate->dtvs_ntlocals) existing = &vstate->dtvs_tlocals[ndx]; break; case DIFV_SCOPE_LOCAL: if (maxlocal == -1 || ndx > maxlocal) maxlocal = ndx; if (ndx < vstate->dtvs_nlocals) { dtrace_statvar_t *svar; if ((svar = vstate->dtvs_locals[ndx]) != NULL) existing = &svar->dtsv_var; } break; } vt = &v->dtdv_type; if (vt->dtdt_flags & DIF_TF_BYREF) { if (vt->dtdt_size == 0) { err += efunc(i, "zero-sized variable\n"); break; } if ((v->dtdv_scope == DIFV_SCOPE_GLOBAL || v->dtdv_scope == DIFV_SCOPE_LOCAL) && vt->dtdt_size > dtrace_statvar_maxsize) { err += efunc(i, "oversized by-ref static\n"); break; } } if (existing == NULL || existing->dtdv_id == 0) continue; ASSERT(existing->dtdv_id == v->dtdv_id); ASSERT(existing->dtdv_scope == v->dtdv_scope); if (existing->dtdv_kind != v->dtdv_kind) err += efunc(i, "%d changed variable kind\n", id); et = &existing->dtdv_type; if (vt->dtdt_flags != et->dtdt_flags) { err += efunc(i, "%d changed variable type flags\n", id); break; } if (vt->dtdt_size != 0 && vt->dtdt_size != et->dtdt_size) { err += efunc(i, "%d changed variable type size\n", id); break; } } for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) { dif_instr_t instr = dp->dtdo_buf[pc]; uint_t v = DIF_INSTR_VAR(instr); uint_t op = DIF_INSTR_OP(instr); switch (op) { case DIF_OP_LDGS: case DIF_OP_LDGAA: case DIF_OP_STGS: case DIF_OP_STGAA: if (v > DIF_VAR_OTHER_UBASE + maxglobal) err += efunc(pc, "invalid variable %u\n", v); break; case DIF_OP_LDTS: case DIF_OP_LDTAA: case DIF_OP_STTS: case DIF_OP_STTAA: if (v > DIF_VAR_OTHER_UBASE + maxtlocal) err += efunc(pc, "invalid variable %u\n", v); break; case DIF_OP_LDLS: case DIF_OP_STLS: if (v > DIF_VAR_OTHER_UBASE + maxlocal) err += efunc(pc, "invalid variable %u\n", v); break; default: break; } } return (err); } /* * Validate a DTrace DIF object that it is to be used as a helper. Helpers * are much more constrained than normal DIFOs. Specifically, they may * not: * * 1. Make calls to subroutines other than copyin(), copyinstr() or * miscellaneous string routines * 2. Access DTrace variables other than the args[] array, and the * curthread, pid, ppid, tid, execname, zonename, uid and gid variables. * 3. Have thread-local variables. * 4. Have dynamic variables. */ static int dtrace_difo_validate_helper(dtrace_difo_t *dp) { int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; int err = 0; uint_t pc; for (pc = 0; pc < dp->dtdo_len; pc++) { dif_instr_t instr = dp->dtdo_buf[pc]; uint_t v = DIF_INSTR_VAR(instr); uint_t subr = DIF_INSTR_SUBR(instr); uint_t op = DIF_INSTR_OP(instr); switch (op) { case DIF_OP_OR: case DIF_OP_XOR: case DIF_OP_AND: case DIF_OP_SLL: case DIF_OP_SRL: case DIF_OP_SRA: case DIF_OP_SUB: case DIF_OP_ADD: case DIF_OP_MUL: case DIF_OP_SDIV: case DIF_OP_UDIV: case DIF_OP_SREM: case DIF_OP_UREM: case DIF_OP_COPYS: case DIF_OP_NOT: case DIF_OP_MOV: case DIF_OP_RLDSB: case DIF_OP_RLDSH: case DIF_OP_RLDSW: case DIF_OP_RLDUB: case DIF_OP_RLDUH: case DIF_OP_RLDUW: case DIF_OP_RLDX: case DIF_OP_ULDSB: case DIF_OP_ULDSH: case DIF_OP_ULDSW: case DIF_OP_ULDUB: case DIF_OP_ULDUH: case DIF_OP_ULDUW: case DIF_OP_ULDX: case DIF_OP_STB: case DIF_OP_STH: case DIF_OP_STW: case DIF_OP_STX: case DIF_OP_ALLOCS: case DIF_OP_CMP: case DIF_OP_SCMP: case DIF_OP_TST: case DIF_OP_BA: case DIF_OP_BE: case DIF_OP_BNE: case DIF_OP_BG: case DIF_OP_BGU: case DIF_OP_BGE: case DIF_OP_BGEU: case DIF_OP_BL: case DIF_OP_BLU: case DIF_OP_BLE: case DIF_OP_BLEU: case DIF_OP_RET: case DIF_OP_NOP: case DIF_OP_POPTS: case DIF_OP_FLUSHTS: case DIF_OP_SETX: case DIF_OP_SETS: case DIF_OP_LDGA: case DIF_OP_LDLS: case DIF_OP_STGS: case DIF_OP_STLS: case DIF_OP_PUSHTR: case DIF_OP_PUSHTV: break; case DIF_OP_LDGS: if (v >= DIF_VAR_OTHER_UBASE) break; if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) break; if (v == DIF_VAR_CURTHREAD || v == DIF_VAR_PID || v == DIF_VAR_PPID || v == DIF_VAR_TID || v == DIF_VAR_EXECARGS || v == DIF_VAR_EXECNAME || v == DIF_VAR_ZONENAME || v == DIF_VAR_UID || v == DIF_VAR_GID) break; err += efunc(pc, "illegal variable %u\n", v); break; case DIF_OP_LDTA: case DIF_OP_LDTS: case DIF_OP_LDGAA: case DIF_OP_LDTAA: err += efunc(pc, "illegal dynamic variable load\n"); break; case DIF_OP_STTS: case DIF_OP_STGAA: case DIF_OP_STTAA: err += efunc(pc, "illegal dynamic variable store\n"); break; case DIF_OP_CALL: if (subr == DIF_SUBR_ALLOCA || subr == DIF_SUBR_BCOPY || subr == DIF_SUBR_COPYIN || subr == DIF_SUBR_COPYINTO || subr == DIF_SUBR_COPYINSTR || subr == DIF_SUBR_INDEX || subr == DIF_SUBR_INET_NTOA || subr == DIF_SUBR_INET_NTOA6 || subr == DIF_SUBR_INET_NTOP || subr == DIF_SUBR_JSON || subr == DIF_SUBR_LLTOSTR || subr == DIF_SUBR_STRTOLL || subr == DIF_SUBR_RINDEX || subr == DIF_SUBR_STRCHR || subr == DIF_SUBR_STRJOIN || subr == DIF_SUBR_STRRCHR || subr == DIF_SUBR_STRSTR || subr == DIF_SUBR_HTONS || subr == DIF_SUBR_HTONL || subr == DIF_SUBR_HTONLL || subr == DIF_SUBR_NTOHS || subr == DIF_SUBR_NTOHL || subr == DIF_SUBR_NTOHLL || subr == DIF_SUBR_MEMREF) break; #ifdef __FreeBSD__ if (subr == DIF_SUBR_MEMSTR) break; #endif err += efunc(pc, "invalid subr %u\n", subr); break; default: err += efunc(pc, "invalid opcode %u\n", DIF_INSTR_OP(instr)); } } return (err); } /* * Returns 1 if the expression in the DIF object can be cached on a per-thread * basis; 0 if not. */ static int dtrace_difo_cacheable(dtrace_difo_t *dp) { int i; if (dp == NULL) return (0); for (i = 0; i < dp->dtdo_varlen; i++) { dtrace_difv_t *v = &dp->dtdo_vartab[i]; if (v->dtdv_scope != DIFV_SCOPE_GLOBAL) continue; switch (v->dtdv_id) { case DIF_VAR_CURTHREAD: case DIF_VAR_PID: case DIF_VAR_TID: case DIF_VAR_EXECARGS: case DIF_VAR_EXECNAME: case DIF_VAR_ZONENAME: break; default: return (0); } } /* * This DIF object may be cacheable. Now we need to look for any * array loading instructions, any memory loading instructions, or * any stores to thread-local variables. */ for (i = 0; i < dp->dtdo_len; i++) { uint_t op = DIF_INSTR_OP(dp->dtdo_buf[i]); if ((op >= DIF_OP_LDSB && op <= DIF_OP_LDX) || (op >= DIF_OP_ULDSB && op <= DIF_OP_ULDX) || (op >= DIF_OP_RLDSB && op <= DIF_OP_RLDX) || op == DIF_OP_LDGA || op == DIF_OP_STTS) return (0); } return (1); } static void dtrace_difo_hold(dtrace_difo_t *dp) { int i; ASSERT(MUTEX_HELD(&dtrace_lock)); dp->dtdo_refcnt++; ASSERT(dp->dtdo_refcnt != 0); /* * We need to check this DIF object for references to the variable * DIF_VAR_VTIMESTAMP. */ for (i = 0; i < dp->dtdo_varlen; i++) { dtrace_difv_t *v = &dp->dtdo_vartab[i]; if (v->dtdv_id != DIF_VAR_VTIMESTAMP) continue; if (dtrace_vtime_references++ == 0) dtrace_vtime_enable(); } } /* * This routine calculates the dynamic variable chunksize for a given DIF * object. The calculation is not fool-proof, and can probably be tricked by * malicious DIF -- but it works for all compiler-generated DIF. Because this * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail * if a dynamic variable size exceeds the chunksize. */ static void dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { uint64_t sval = 0; dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */ const dif_instr_t *text = dp->dtdo_buf; uint_t pc, srd = 0; uint_t ttop = 0; size_t size, ksize; uint_t id, i; for (pc = 0; pc < dp->dtdo_len; pc++) { dif_instr_t instr = text[pc]; uint_t op = DIF_INSTR_OP(instr); uint_t rd = DIF_INSTR_RD(instr); uint_t r1 = DIF_INSTR_R1(instr); uint_t nkeys = 0; uchar_t scope = 0; dtrace_key_t *key = tupregs; switch (op) { case DIF_OP_SETX: sval = dp->dtdo_inttab[DIF_INSTR_INTEGER(instr)]; srd = rd; continue; case DIF_OP_STTS: key = &tupregs[DIF_DTR_NREGS]; key[0].dttk_size = 0; key[1].dttk_size = 0; nkeys = 2; scope = DIFV_SCOPE_THREAD; break; case DIF_OP_STGAA: case DIF_OP_STTAA: nkeys = ttop; if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) key[nkeys++].dttk_size = 0; key[nkeys++].dttk_size = 0; if (op == DIF_OP_STTAA) { scope = DIFV_SCOPE_THREAD; } else { scope = DIFV_SCOPE_GLOBAL; } break; case DIF_OP_PUSHTR: if (ttop == DIF_DTR_NREGS) return; if ((srd == 0 || sval == 0) && r1 == DIF_TYPE_STRING) { /* * If the register for the size of the "pushtr" * is %r0 (or the value is 0) and the type is * a string, we'll use the system-wide default * string size. */ tupregs[ttop++].dttk_size = dtrace_strsize_default; } else { if (srd == 0) return; if (sval > LONG_MAX) return; tupregs[ttop++].dttk_size = sval; } break; case DIF_OP_PUSHTV: if (ttop == DIF_DTR_NREGS) return; tupregs[ttop++].dttk_size = 0; break; case DIF_OP_FLUSHTS: ttop = 0; break; case DIF_OP_POPTS: if (ttop != 0) ttop--; break; } sval = 0; srd = 0; if (nkeys == 0) continue; /* * We have a dynamic variable allocation; calculate its size. */ for (ksize = 0, i = 0; i < nkeys; i++) ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t)); size = sizeof (dtrace_dynvar_t); size += sizeof (dtrace_key_t) * (nkeys - 1); size += ksize; /* * Now we need to determine the size of the stored data. */ id = DIF_INSTR_VAR(instr); for (i = 0; i < dp->dtdo_varlen; i++) { dtrace_difv_t *v = &dp->dtdo_vartab[i]; if (v->dtdv_id == id && v->dtdv_scope == scope) { size += v->dtdv_type.dtdt_size; break; } } if (i == dp->dtdo_varlen) return; /* * We have the size. If this is larger than the chunk size * for our dynamic variable state, reset the chunk size. */ size = P2ROUNDUP(size, sizeof (uint64_t)); /* * Before setting the chunk size, check that we're not going * to set it to a negative value... */ if (size > LONG_MAX) return; /* * ...and make certain that we didn't badly overflow. */ if (size < ksize || size < sizeof (dtrace_dynvar_t)) return; if (size > vstate->dtvs_dynvars.dtds_chunksize) vstate->dtvs_dynvars.dtds_chunksize = size; } } static void dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { int i, oldsvars, osz, nsz, otlocals, ntlocals; uint_t id; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0); for (i = 0; i < dp->dtdo_varlen; i++) { dtrace_difv_t *v = &dp->dtdo_vartab[i]; dtrace_statvar_t *svar, ***svarp = NULL; size_t dsize = 0; uint8_t scope = v->dtdv_scope; int *np = NULL; if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE) continue; id -= DIF_VAR_OTHER_UBASE; switch (scope) { case DIFV_SCOPE_THREAD: while (id >= (otlocals = vstate->dtvs_ntlocals)) { dtrace_difv_t *tlocals; if ((ntlocals = (otlocals << 1)) == 0) ntlocals = 1; osz = otlocals * sizeof (dtrace_difv_t); nsz = ntlocals * sizeof (dtrace_difv_t); tlocals = kmem_zalloc(nsz, KM_SLEEP); if (osz != 0) { bcopy(vstate->dtvs_tlocals, tlocals, osz); kmem_free(vstate->dtvs_tlocals, osz); } vstate->dtvs_tlocals = tlocals; vstate->dtvs_ntlocals = ntlocals; } vstate->dtvs_tlocals[id] = *v; continue; case DIFV_SCOPE_LOCAL: np = &vstate->dtvs_nlocals; svarp = &vstate->dtvs_locals; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) dsize = NCPU * (v->dtdv_type.dtdt_size + sizeof (uint64_t)); else dsize = NCPU * sizeof (uint64_t); break; case DIFV_SCOPE_GLOBAL: np = &vstate->dtvs_nglobals; svarp = &vstate->dtvs_globals; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) dsize = v->dtdv_type.dtdt_size + sizeof (uint64_t); break; default: ASSERT(0); } while (id >= (oldsvars = *np)) { dtrace_statvar_t **statics; int newsvars, oldsize, newsize; if ((newsvars = (oldsvars << 1)) == 0) newsvars = 1; oldsize = oldsvars * sizeof (dtrace_statvar_t *); newsize = newsvars * sizeof (dtrace_statvar_t *); statics = kmem_zalloc(newsize, KM_SLEEP); if (oldsize != 0) { bcopy(*svarp, statics, oldsize); kmem_free(*svarp, oldsize); } *svarp = statics; *np = newsvars; } if ((svar = (*svarp)[id]) == NULL) { svar = kmem_zalloc(sizeof (dtrace_statvar_t), KM_SLEEP); svar->dtsv_var = *v; if ((svar->dtsv_size = dsize) != 0) { svar->dtsv_data = (uint64_t)(uintptr_t) kmem_zalloc(dsize, KM_SLEEP); } (*svarp)[id] = svar; } svar->dtsv_refcnt++; } dtrace_difo_chunksize(dp, vstate); dtrace_difo_hold(dp); } static dtrace_difo_t * dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { dtrace_difo_t *new; size_t sz; ASSERT(dp->dtdo_buf != NULL); ASSERT(dp->dtdo_refcnt != 0); new = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP); ASSERT(dp->dtdo_buf != NULL); sz = dp->dtdo_len * sizeof (dif_instr_t); new->dtdo_buf = kmem_alloc(sz, KM_SLEEP); bcopy(dp->dtdo_buf, new->dtdo_buf, sz); new->dtdo_len = dp->dtdo_len; if (dp->dtdo_strtab != NULL) { ASSERT(dp->dtdo_strlen != 0); new->dtdo_strtab = kmem_alloc(dp->dtdo_strlen, KM_SLEEP); bcopy(dp->dtdo_strtab, new->dtdo_strtab, dp->dtdo_strlen); new->dtdo_strlen = dp->dtdo_strlen; } if (dp->dtdo_inttab != NULL) { ASSERT(dp->dtdo_intlen != 0); sz = dp->dtdo_intlen * sizeof (uint64_t); new->dtdo_inttab = kmem_alloc(sz, KM_SLEEP); bcopy(dp->dtdo_inttab, new->dtdo_inttab, sz); new->dtdo_intlen = dp->dtdo_intlen; } if (dp->dtdo_vartab != NULL) { ASSERT(dp->dtdo_varlen != 0); sz = dp->dtdo_varlen * sizeof (dtrace_difv_t); new->dtdo_vartab = kmem_alloc(sz, KM_SLEEP); bcopy(dp->dtdo_vartab, new->dtdo_vartab, sz); new->dtdo_varlen = dp->dtdo_varlen; } dtrace_difo_init(new, vstate); return (new); } static void dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { int i; ASSERT(dp->dtdo_refcnt == 0); for (i = 0; i < dp->dtdo_varlen; i++) { dtrace_difv_t *v = &dp->dtdo_vartab[i]; dtrace_statvar_t *svar, **svarp = NULL; uint_t id; uint8_t scope = v->dtdv_scope; int *np = NULL; switch (scope) { case DIFV_SCOPE_THREAD: continue; case DIFV_SCOPE_LOCAL: np = &vstate->dtvs_nlocals; svarp = vstate->dtvs_locals; break; case DIFV_SCOPE_GLOBAL: np = &vstate->dtvs_nglobals; svarp = vstate->dtvs_globals; break; default: ASSERT(0); } if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE) continue; id -= DIF_VAR_OTHER_UBASE; ASSERT(id < *np); svar = svarp[id]; ASSERT(svar != NULL); ASSERT(svar->dtsv_refcnt > 0); if (--svar->dtsv_refcnt > 0) continue; if (svar->dtsv_size != 0) { ASSERT(svar->dtsv_data != 0); kmem_free((void *)(uintptr_t)svar->dtsv_data, svar->dtsv_size); } kmem_free(svar, sizeof (dtrace_statvar_t)); svarp[id] = NULL; } if (dp->dtdo_buf != NULL) kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t)); if (dp->dtdo_inttab != NULL) kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t)); if (dp->dtdo_strtab != NULL) kmem_free(dp->dtdo_strtab, dp->dtdo_strlen); if (dp->dtdo_vartab != NULL) kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t)); kmem_free(dp, sizeof (dtrace_difo_t)); } static void dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { int i; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(dp->dtdo_refcnt != 0); for (i = 0; i < dp->dtdo_varlen; i++) { dtrace_difv_t *v = &dp->dtdo_vartab[i]; if (v->dtdv_id != DIF_VAR_VTIMESTAMP) continue; ASSERT(dtrace_vtime_references > 0); if (--dtrace_vtime_references == 0) dtrace_vtime_disable(); } if (--dp->dtdo_refcnt == 0) dtrace_difo_destroy(dp, vstate); } /* * DTrace Format Functions */ static uint16_t dtrace_format_add(dtrace_state_t *state, char *str) { char *fmt, **new; uint16_t ndx, len = strlen(str) + 1; fmt = kmem_zalloc(len, KM_SLEEP); bcopy(str, fmt, len); for (ndx = 0; ndx < state->dts_nformats; ndx++) { if (state->dts_formats[ndx] == NULL) { state->dts_formats[ndx] = fmt; return (ndx + 1); } } if (state->dts_nformats == USHRT_MAX) { /* * This is only likely if a denial-of-service attack is being * attempted. As such, it's okay to fail silently here. */ kmem_free(fmt, len); return (0); } /* * For simplicity, we always resize the formats array to be exactly the * number of formats. */ ndx = state->dts_nformats++; new = kmem_alloc((ndx + 1) * sizeof (char *), KM_SLEEP); if (state->dts_formats != NULL) { ASSERT(ndx != 0); bcopy(state->dts_formats, new, ndx * sizeof (char *)); kmem_free(state->dts_formats, ndx * sizeof (char *)); } state->dts_formats = new; state->dts_formats[ndx] = fmt; return (ndx + 1); } static void dtrace_format_remove(dtrace_state_t *state, uint16_t format) { char *fmt; ASSERT(state->dts_formats != NULL); ASSERT(format <= state->dts_nformats); ASSERT(state->dts_formats[format - 1] != NULL); fmt = state->dts_formats[format - 1]; kmem_free(fmt, strlen(fmt) + 1); state->dts_formats[format - 1] = NULL; } static void dtrace_format_destroy(dtrace_state_t *state) { int i; if (state->dts_nformats == 0) { ASSERT(state->dts_formats == NULL); return; } ASSERT(state->dts_formats != NULL); for (i = 0; i < state->dts_nformats; i++) { char *fmt = state->dts_formats[i]; if (fmt == NULL) continue; kmem_free(fmt, strlen(fmt) + 1); } kmem_free(state->dts_formats, state->dts_nformats * sizeof (char *)); state->dts_nformats = 0; state->dts_formats = NULL; } /* * DTrace Predicate Functions */ static dtrace_predicate_t * dtrace_predicate_create(dtrace_difo_t *dp) { dtrace_predicate_t *pred; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(dp->dtdo_refcnt != 0); pred = kmem_zalloc(sizeof (dtrace_predicate_t), KM_SLEEP); pred->dtp_difo = dp; pred->dtp_refcnt = 1; if (!dtrace_difo_cacheable(dp)) return (pred); if (dtrace_predcache_id == DTRACE_CACHEIDNONE) { /* * This is only theoretically possible -- we have had 2^32 * cacheable predicates on this machine. We cannot allow any * more predicates to become cacheable: as unlikely as it is, * there may be a thread caching a (now stale) predicate cache * ID. (N.B.: the temptation is being successfully resisted to * have this cmn_err() "Holy shit -- we executed this code!") */ return (pred); } pred->dtp_cacheid = dtrace_predcache_id++; return (pred); } static void dtrace_predicate_hold(dtrace_predicate_t *pred) { ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(pred->dtp_difo != NULL && pred->dtp_difo->dtdo_refcnt != 0); ASSERT(pred->dtp_refcnt > 0); pred->dtp_refcnt++; } static void dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate) { dtrace_difo_t *dp = pred->dtp_difo; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(dp != NULL && dp->dtdo_refcnt != 0); ASSERT(pred->dtp_refcnt > 0); if (--pred->dtp_refcnt == 0) { dtrace_difo_release(pred->dtp_difo, vstate); kmem_free(pred, sizeof (dtrace_predicate_t)); } } /* * DTrace Action Description Functions */ static dtrace_actdesc_t * dtrace_actdesc_create(dtrace_actkind_t kind, uint32_t ntuple, uint64_t uarg, uint64_t arg) { dtrace_actdesc_t *act; #ifdef illumos ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL && arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA)); #endif act = kmem_zalloc(sizeof (dtrace_actdesc_t), KM_SLEEP); act->dtad_kind = kind; act->dtad_ntuple = ntuple; act->dtad_uarg = uarg; act->dtad_arg = arg; act->dtad_refcnt = 1; return (act); } static void dtrace_actdesc_hold(dtrace_actdesc_t *act) { ASSERT(act->dtad_refcnt >= 1); act->dtad_refcnt++; } static void dtrace_actdesc_release(dtrace_actdesc_t *act, dtrace_vstate_t *vstate) { dtrace_actkind_t kind = act->dtad_kind; dtrace_difo_t *dp; ASSERT(act->dtad_refcnt >= 1); if (--act->dtad_refcnt != 0) return; if ((dp = act->dtad_difo) != NULL) dtrace_difo_release(dp, vstate); if (DTRACEACT_ISPRINTFLIKE(kind)) { char *str = (char *)(uintptr_t)act->dtad_arg; #ifdef illumos ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) || (str == NULL && act->dtad_kind == DTRACEACT_PRINTA)); #endif if (str != NULL) kmem_free(str, strlen(str) + 1); } kmem_free(act, sizeof (dtrace_actdesc_t)); } /* * DTrace ECB Functions */ static dtrace_ecb_t * dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe) { dtrace_ecb_t *ecb; dtrace_epid_t epid; ASSERT(MUTEX_HELD(&dtrace_lock)); ecb = kmem_zalloc(sizeof (dtrace_ecb_t), KM_SLEEP); ecb->dte_predicate = NULL; ecb->dte_probe = probe; /* * The default size is the size of the default action: recording * the header. */ ecb->dte_size = ecb->dte_needed = sizeof (dtrace_rechdr_t); ecb->dte_alignment = sizeof (dtrace_epid_t); epid = state->dts_epid++; if (epid - 1 >= state->dts_necbs) { dtrace_ecb_t **oecbs = state->dts_ecbs, **ecbs; int necbs = state->dts_necbs << 1; ASSERT(epid == state->dts_necbs + 1); if (necbs == 0) { ASSERT(oecbs == NULL); necbs = 1; } ecbs = kmem_zalloc(necbs * sizeof (*ecbs), KM_SLEEP); if (oecbs != NULL) bcopy(oecbs, ecbs, state->dts_necbs * sizeof (*ecbs)); dtrace_membar_producer(); state->dts_ecbs = ecbs; if (oecbs != NULL) { /* * If this state is active, we must dtrace_sync() * before we can free the old dts_ecbs array: we're * coming in hot, and there may be active ring * buffer processing (which indexes into the dts_ecbs * array) on another CPU. */ if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) dtrace_sync(); kmem_free(oecbs, state->dts_necbs * sizeof (*ecbs)); } dtrace_membar_producer(); state->dts_necbs = necbs; } ecb->dte_state = state; ASSERT(state->dts_ecbs[epid - 1] == NULL); dtrace_membar_producer(); state->dts_ecbs[(ecb->dte_epid = epid) - 1] = ecb; return (ecb); } static void dtrace_ecb_enable(dtrace_ecb_t *ecb) { dtrace_probe_t *probe = ecb->dte_probe; ASSERT(MUTEX_HELD(&cpu_lock)); ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(ecb->dte_next == NULL); if (probe == NULL) { /* * This is the NULL probe -- there's nothing to do. */ return; } if (probe->dtpr_ecb == NULL) { dtrace_provider_t *prov = probe->dtpr_provider; /* * We're the first ECB on this probe. */ probe->dtpr_ecb = probe->dtpr_ecb_last = ecb; if (ecb->dte_predicate != NULL) probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid; prov->dtpv_pops.dtps_enable(prov->dtpv_arg, probe->dtpr_id, probe->dtpr_arg); } else { /* * This probe is already active. Swing the last pointer to * point to the new ECB, and issue a dtrace_sync() to assure * that all CPUs have seen the change. */ ASSERT(probe->dtpr_ecb_last != NULL); probe->dtpr_ecb_last->dte_next = ecb; probe->dtpr_ecb_last = ecb; probe->dtpr_predcache = 0; dtrace_sync(); } } static int dtrace_ecb_resize(dtrace_ecb_t *ecb) { dtrace_action_t *act; uint32_t curneeded = UINT32_MAX; uint32_t aggbase = UINT32_MAX; /* * If we record anything, we always record the dtrace_rechdr_t. (And * we always record it first.) */ ecb->dte_size = sizeof (dtrace_rechdr_t); ecb->dte_alignment = sizeof (dtrace_epid_t); for (act = ecb->dte_action; act != NULL; act = act->dta_next) { dtrace_recdesc_t *rec = &act->dta_rec; ASSERT(rec->dtrd_size > 0 || rec->dtrd_alignment == 1); ecb->dte_alignment = MAX(ecb->dte_alignment, rec->dtrd_alignment); if (DTRACEACT_ISAGG(act->dta_kind)) { dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act; ASSERT(rec->dtrd_size != 0); ASSERT(agg->dtag_first != NULL); ASSERT(act->dta_prev->dta_intuple); ASSERT(aggbase != UINT32_MAX); ASSERT(curneeded != UINT32_MAX); agg->dtag_base = aggbase; curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment); rec->dtrd_offset = curneeded; if (curneeded + rec->dtrd_size < curneeded) return (EINVAL); curneeded += rec->dtrd_size; ecb->dte_needed = MAX(ecb->dte_needed, curneeded); aggbase = UINT32_MAX; curneeded = UINT32_MAX; } else if (act->dta_intuple) { if (curneeded == UINT32_MAX) { /* * This is the first record in a tuple. Align * curneeded to be at offset 4 in an 8-byte * aligned block. */ ASSERT(act->dta_prev == NULL || !act->dta_prev->dta_intuple); ASSERT3U(aggbase, ==, UINT32_MAX); curneeded = P2PHASEUP(ecb->dte_size, sizeof (uint64_t), sizeof (dtrace_aggid_t)); aggbase = curneeded - sizeof (dtrace_aggid_t); ASSERT(IS_P2ALIGNED(aggbase, sizeof (uint64_t))); } curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment); rec->dtrd_offset = curneeded; if (curneeded + rec->dtrd_size < curneeded) return (EINVAL); curneeded += rec->dtrd_size; } else { /* tuples must be followed by an aggregation */ ASSERT(act->dta_prev == NULL || !act->dta_prev->dta_intuple); ecb->dte_size = P2ROUNDUP(ecb->dte_size, rec->dtrd_alignment); rec->dtrd_offset = ecb->dte_size; if (ecb->dte_size + rec->dtrd_size < ecb->dte_size) return (EINVAL); ecb->dte_size += rec->dtrd_size; ecb->dte_needed = MAX(ecb->dte_needed, ecb->dte_size); } } if ((act = ecb->dte_action) != NULL && !(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) && ecb->dte_size == sizeof (dtrace_rechdr_t)) { /* * If the size is still sizeof (dtrace_rechdr_t), then all * actions store no data; set the size to 0. */ ecb->dte_size = 0; } ecb->dte_size = P2ROUNDUP(ecb->dte_size, sizeof (dtrace_epid_t)); ecb->dte_needed = P2ROUNDUP(ecb->dte_needed, (sizeof (dtrace_epid_t))); ecb->dte_state->dts_needed = MAX(ecb->dte_state->dts_needed, ecb->dte_needed); return (0); } static dtrace_action_t * dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) { dtrace_aggregation_t *agg; size_t size = sizeof (uint64_t); int ntuple = desc->dtad_ntuple; dtrace_action_t *act; dtrace_recdesc_t *frec; dtrace_aggid_t aggid; dtrace_state_t *state = ecb->dte_state; agg = kmem_zalloc(sizeof (dtrace_aggregation_t), KM_SLEEP); agg->dtag_ecb = ecb; ASSERT(DTRACEACT_ISAGG(desc->dtad_kind)); switch (desc->dtad_kind) { case DTRACEAGG_MIN: agg->dtag_initial = INT64_MAX; agg->dtag_aggregate = dtrace_aggregate_min; break; case DTRACEAGG_MAX: agg->dtag_initial = INT64_MIN; agg->dtag_aggregate = dtrace_aggregate_max; break; case DTRACEAGG_COUNT: agg->dtag_aggregate = dtrace_aggregate_count; break; case DTRACEAGG_QUANTIZE: agg->dtag_aggregate = dtrace_aggregate_quantize; size = (((sizeof (uint64_t) * NBBY) - 1) * 2 + 1) * sizeof (uint64_t); break; case DTRACEAGG_LQUANTIZE: { uint16_t step = DTRACE_LQUANTIZE_STEP(desc->dtad_arg); uint16_t levels = DTRACE_LQUANTIZE_LEVELS(desc->dtad_arg); agg->dtag_initial = desc->dtad_arg; agg->dtag_aggregate = dtrace_aggregate_lquantize; if (step == 0 || levels == 0) goto err; size = levels * sizeof (uint64_t) + 3 * sizeof (uint64_t); break; } case DTRACEAGG_LLQUANTIZE: { uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg); uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg); uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg); uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg); int64_t v; agg->dtag_initial = desc->dtad_arg; agg->dtag_aggregate = dtrace_aggregate_llquantize; if (factor < 2 || low >= high || nsteps < factor) goto err; /* * Now check that the number of steps evenly divides a power * of the factor. (This assures both integer bucket size and * linearity within each magnitude.) */ for (v = factor; v < nsteps; v *= factor) continue; if ((v % nsteps) || (nsteps % factor)) goto err; size = (dtrace_aggregate_llquantize_bucket(factor, low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t); break; } case DTRACEAGG_AVG: agg->dtag_aggregate = dtrace_aggregate_avg; size = sizeof (uint64_t) * 2; break; case DTRACEAGG_STDDEV: agg->dtag_aggregate = dtrace_aggregate_stddev; size = sizeof (uint64_t) * 4; break; case DTRACEAGG_SUM: agg->dtag_aggregate = dtrace_aggregate_sum; break; default: goto err; } agg->dtag_action.dta_rec.dtrd_size = size; if (ntuple == 0) goto err; /* * We must make sure that we have enough actions for the n-tuple. */ for (act = ecb->dte_action_last; act != NULL; act = act->dta_prev) { if (DTRACEACT_ISAGG(act->dta_kind)) break; if (--ntuple == 0) { /* * This is the action with which our n-tuple begins. */ agg->dtag_first = act; goto success; } } /* * This n-tuple is short by ntuple elements. Return failure. */ ASSERT(ntuple != 0); err: kmem_free(agg, sizeof (dtrace_aggregation_t)); return (NULL); success: /* * If the last action in the tuple has a size of zero, it's actually * an expression argument for the aggregating action. */ ASSERT(ecb->dte_action_last != NULL); act = ecb->dte_action_last; if (act->dta_kind == DTRACEACT_DIFEXPR) { ASSERT(act->dta_difo != NULL); if (act->dta_difo->dtdo_rtype.dtdt_size == 0) agg->dtag_hasarg = 1; } /* * We need to allocate an id for this aggregation. */ #ifdef illumos aggid = (dtrace_aggid_t)(uintptr_t)vmem_alloc(state->dts_aggid_arena, 1, VM_BESTFIT | VM_SLEEP); #else aggid = alloc_unr(state->dts_aggid_arena); #endif if (aggid - 1 >= state->dts_naggregations) { dtrace_aggregation_t **oaggs = state->dts_aggregations; dtrace_aggregation_t **aggs; int naggs = state->dts_naggregations << 1; int onaggs = state->dts_naggregations; ASSERT(aggid == state->dts_naggregations + 1); if (naggs == 0) { ASSERT(oaggs == NULL); naggs = 1; } aggs = kmem_zalloc(naggs * sizeof (*aggs), KM_SLEEP); if (oaggs != NULL) { bcopy(oaggs, aggs, onaggs * sizeof (*aggs)); kmem_free(oaggs, onaggs * sizeof (*aggs)); } state->dts_aggregations = aggs; state->dts_naggregations = naggs; } ASSERT(state->dts_aggregations[aggid - 1] == NULL); state->dts_aggregations[(agg->dtag_id = aggid) - 1] = agg; frec = &agg->dtag_first->dta_rec; if (frec->dtrd_alignment < sizeof (dtrace_aggid_t)) frec->dtrd_alignment = sizeof (dtrace_aggid_t); for (act = agg->dtag_first; act != NULL; act = act->dta_next) { ASSERT(!act->dta_intuple); act->dta_intuple = 1; } return (&agg->dtag_action); } static void dtrace_ecb_aggregation_destroy(dtrace_ecb_t *ecb, dtrace_action_t *act) { dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act; dtrace_state_t *state = ecb->dte_state; dtrace_aggid_t aggid = agg->dtag_id; ASSERT(DTRACEACT_ISAGG(act->dta_kind)); #ifdef illumos vmem_free(state->dts_aggid_arena, (void *)(uintptr_t)aggid, 1); #else free_unr(state->dts_aggid_arena, aggid); #endif ASSERT(state->dts_aggregations[aggid - 1] == agg); state->dts_aggregations[aggid - 1] = NULL; kmem_free(agg, sizeof (dtrace_aggregation_t)); } static int dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) { dtrace_action_t *action, *last; dtrace_difo_t *dp = desc->dtad_difo; uint32_t size = 0, align = sizeof (uint8_t), mask; uint16_t format = 0; dtrace_recdesc_t *rec; dtrace_state_t *state = ecb->dte_state; dtrace_optval_t *opt = state->dts_options, nframes = 0, strsize; uint64_t arg = desc->dtad_arg; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(ecb->dte_action == NULL || ecb->dte_action->dta_refcnt == 1); if (DTRACEACT_ISAGG(desc->dtad_kind)) { /* * If this is an aggregating action, there must be neither * a speculate nor a commit on the action chain. */ dtrace_action_t *act; for (act = ecb->dte_action; act != NULL; act = act->dta_next) { if (act->dta_kind == DTRACEACT_COMMIT) return (EINVAL); if (act->dta_kind == DTRACEACT_SPECULATE) return (EINVAL); } action = dtrace_ecb_aggregation_create(ecb, desc); if (action == NULL) return (EINVAL); } else { if (DTRACEACT_ISDESTRUCTIVE(desc->dtad_kind) || (desc->dtad_kind == DTRACEACT_DIFEXPR && dp != NULL && dp->dtdo_destructive)) { state->dts_destructive = 1; } switch (desc->dtad_kind) { case DTRACEACT_PRINTF: case DTRACEACT_PRINTA: case DTRACEACT_SYSTEM: case DTRACEACT_FREOPEN: case DTRACEACT_DIFEXPR: /* * We know that our arg is a string -- turn it into a * format. */ if (arg == 0) { ASSERT(desc->dtad_kind == DTRACEACT_PRINTA || desc->dtad_kind == DTRACEACT_DIFEXPR); format = 0; } else { ASSERT(arg != 0); #ifdef illumos ASSERT(arg > KERNELBASE); #endif format = dtrace_format_add(state, (char *)(uintptr_t)arg); } /*FALLTHROUGH*/ case DTRACEACT_LIBACT: case DTRACEACT_TRACEMEM: case DTRACEACT_TRACEMEM_DYNSIZE: if (dp == NULL) return (EINVAL); if ((size = dp->dtdo_rtype.dtdt_size) != 0) break; if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) { if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) return (EINVAL); size = opt[DTRACEOPT_STRSIZE]; } break; case DTRACEACT_STACK: if ((nframes = arg) == 0) { nframes = opt[DTRACEOPT_STACKFRAMES]; ASSERT(nframes > 0); arg = nframes; } size = nframes * sizeof (pc_t); break; case DTRACEACT_JSTACK: if ((strsize = DTRACE_USTACK_STRSIZE(arg)) == 0) strsize = opt[DTRACEOPT_JSTACKSTRSIZE]; if ((nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) nframes = opt[DTRACEOPT_JSTACKFRAMES]; arg = DTRACE_USTACK_ARG(nframes, strsize); /*FALLTHROUGH*/ case DTRACEACT_USTACK: if (desc->dtad_kind != DTRACEACT_JSTACK && (nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) { strsize = DTRACE_USTACK_STRSIZE(arg); nframes = opt[DTRACEOPT_USTACKFRAMES]; ASSERT(nframes > 0); arg = DTRACE_USTACK_ARG(nframes, strsize); } /* * Save a slot for the pid. */ size = (nframes + 1) * sizeof (uint64_t); size += DTRACE_USTACK_STRSIZE(arg); size = P2ROUNDUP(size, (uint32_t)(sizeof (uintptr_t))); break; case DTRACEACT_SYM: case DTRACEACT_MOD: if (dp == NULL || ((size = dp->dtdo_rtype.dtdt_size) != sizeof (uint64_t)) || (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) return (EINVAL); break; case DTRACEACT_USYM: case DTRACEACT_UMOD: case DTRACEACT_UADDR: if (dp == NULL || (dp->dtdo_rtype.dtdt_size != sizeof (uint64_t)) || (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) return (EINVAL); /* * We have a slot for the pid, plus a slot for the * argument. To keep things simple (aligned with * bitness-neutral sizing), we store each as a 64-bit * quantity. */ size = 2 * sizeof (uint64_t); break; case DTRACEACT_STOP: case DTRACEACT_BREAKPOINT: case DTRACEACT_PANIC: break; case DTRACEACT_CHILL: case DTRACEACT_DISCARD: case DTRACEACT_RAISE: if (dp == NULL) return (EINVAL); break; case DTRACEACT_EXIT: if (dp == NULL || (size = dp->dtdo_rtype.dtdt_size) != sizeof (int) || (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) return (EINVAL); break; case DTRACEACT_SPECULATE: if (ecb->dte_size > sizeof (dtrace_rechdr_t)) return (EINVAL); if (dp == NULL) return (EINVAL); state->dts_speculates = 1; break; case DTRACEACT_PRINTM: size = dp->dtdo_rtype.dtdt_size; break; case DTRACEACT_COMMIT: { dtrace_action_t *act = ecb->dte_action; for (; act != NULL; act = act->dta_next) { if (act->dta_kind == DTRACEACT_COMMIT) return (EINVAL); } if (dp == NULL) return (EINVAL); break; } default: return (EINVAL); } if (size != 0 || desc->dtad_kind == DTRACEACT_SPECULATE) { /* * If this is a data-storing action or a speculate, * we must be sure that there isn't a commit on the * action chain. */ dtrace_action_t *act = ecb->dte_action; for (; act != NULL; act = act->dta_next) { if (act->dta_kind == DTRACEACT_COMMIT) return (EINVAL); } } action = kmem_zalloc(sizeof (dtrace_action_t), KM_SLEEP); action->dta_rec.dtrd_size = size; } action->dta_refcnt = 1; rec = &action->dta_rec; size = rec->dtrd_size; for (mask = sizeof (uint64_t) - 1; size != 0 && mask > 0; mask >>= 1) { if (!(size & mask)) { align = mask + 1; break; } } action->dta_kind = desc->dtad_kind; if ((action->dta_difo = dp) != NULL) dtrace_difo_hold(dp); rec->dtrd_action = action->dta_kind; rec->dtrd_arg = arg; rec->dtrd_uarg = desc->dtad_uarg; rec->dtrd_alignment = (uint16_t)align; rec->dtrd_format = format; if ((last = ecb->dte_action_last) != NULL) { ASSERT(ecb->dte_action != NULL); action->dta_prev = last; last->dta_next = action; } else { ASSERT(ecb->dte_action == NULL); ecb->dte_action = action; } ecb->dte_action_last = action; return (0); } static void dtrace_ecb_action_remove(dtrace_ecb_t *ecb) { dtrace_action_t *act = ecb->dte_action, *next; dtrace_vstate_t *vstate = &ecb->dte_state->dts_vstate; dtrace_difo_t *dp; uint16_t format; if (act != NULL && act->dta_refcnt > 1) { ASSERT(act->dta_next == NULL || act->dta_next->dta_refcnt == 1); act->dta_refcnt--; } else { for (; act != NULL; act = next) { next = act->dta_next; ASSERT(next != NULL || act == ecb->dte_action_last); ASSERT(act->dta_refcnt == 1); if ((format = act->dta_rec.dtrd_format) != 0) dtrace_format_remove(ecb->dte_state, format); if ((dp = act->dta_difo) != NULL) dtrace_difo_release(dp, vstate); if (DTRACEACT_ISAGG(act->dta_kind)) { dtrace_ecb_aggregation_destroy(ecb, act); } else { kmem_free(act, sizeof (dtrace_action_t)); } } } ecb->dte_action = NULL; ecb->dte_action_last = NULL; ecb->dte_size = 0; } static void dtrace_ecb_disable(dtrace_ecb_t *ecb) { /* * We disable the ECB by removing it from its probe. */ dtrace_ecb_t *pecb, *prev = NULL; dtrace_probe_t *probe = ecb->dte_probe; ASSERT(MUTEX_HELD(&dtrace_lock)); if (probe == NULL) { /* * This is the NULL probe; there is nothing to disable. */ return; } for (pecb = probe->dtpr_ecb; pecb != NULL; pecb = pecb->dte_next) { if (pecb == ecb) break; prev = pecb; } ASSERT(pecb != NULL); if (prev == NULL) { probe->dtpr_ecb = ecb->dte_next; } else { prev->dte_next = ecb->dte_next; } if (ecb == probe->dtpr_ecb_last) { ASSERT(ecb->dte_next == NULL); probe->dtpr_ecb_last = prev; } /* * The ECB has been disconnected from the probe; now sync to assure * that all CPUs have seen the change before returning. */ dtrace_sync(); if (probe->dtpr_ecb == NULL) { /* * That was the last ECB on the probe; clear the predicate * cache ID for the probe, disable it and sync one more time * to assure that we'll never hit it again. */ dtrace_provider_t *prov = probe->dtpr_provider; ASSERT(ecb->dte_next == NULL); ASSERT(probe->dtpr_ecb_last == NULL); probe->dtpr_predcache = DTRACE_CACHEIDNONE; prov->dtpv_pops.dtps_disable(prov->dtpv_arg, probe->dtpr_id, probe->dtpr_arg); dtrace_sync(); } else { /* * There is at least one ECB remaining on the probe. If there * is _exactly_ one, set the probe's predicate cache ID to be * the predicate cache ID of the remaining ECB. */ ASSERT(probe->dtpr_ecb_last != NULL); ASSERT(probe->dtpr_predcache == DTRACE_CACHEIDNONE); if (probe->dtpr_ecb == probe->dtpr_ecb_last) { dtrace_predicate_t *p = probe->dtpr_ecb->dte_predicate; ASSERT(probe->dtpr_ecb->dte_next == NULL); if (p != NULL) probe->dtpr_predcache = p->dtp_cacheid; } ecb->dte_next = NULL; } } static void dtrace_ecb_destroy(dtrace_ecb_t *ecb) { dtrace_state_t *state = ecb->dte_state; dtrace_vstate_t *vstate = &state->dts_vstate; dtrace_predicate_t *pred; dtrace_epid_t epid = ecb->dte_epid; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(ecb->dte_next == NULL); ASSERT(ecb->dte_probe == NULL || ecb->dte_probe->dtpr_ecb != ecb); if ((pred = ecb->dte_predicate) != NULL) dtrace_predicate_release(pred, vstate); dtrace_ecb_action_remove(ecb); ASSERT(state->dts_ecbs[epid - 1] == ecb); state->dts_ecbs[epid - 1] = NULL; kmem_free(ecb, sizeof (dtrace_ecb_t)); } static dtrace_ecb_t * dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe, dtrace_enabling_t *enab) { dtrace_ecb_t *ecb; dtrace_predicate_t *pred; dtrace_actdesc_t *act; dtrace_provider_t *prov; dtrace_ecbdesc_t *desc = enab->dten_current; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(state != NULL); ecb = dtrace_ecb_add(state, probe); ecb->dte_uarg = desc->dted_uarg; if ((pred = desc->dted_pred.dtpdd_predicate) != NULL) { dtrace_predicate_hold(pred); ecb->dte_predicate = pred; } if (probe != NULL) { /* * If the provider shows more leg than the consumer is old * enough to see, we need to enable the appropriate implicit * predicate bits to prevent the ecb from activating at * revealing times. * * Providers specifying DTRACE_PRIV_USER at register time * are stating that they need the /proc-style privilege * model to be enforced, and this is what DTRACE_COND_OWNER * and DTRACE_COND_ZONEOWNER will then do at probe time. */ prov = probe->dtpr_provider; if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLPROC) && (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER)) ecb->dte_cond |= DTRACE_COND_OWNER; if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLZONE) && (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER)) ecb->dte_cond |= DTRACE_COND_ZONEOWNER; /* * If the provider shows us kernel innards and the user * is lacking sufficient privilege, enable the * DTRACE_COND_USERMODE implicit predicate. */ if (!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) && (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_KERNEL)) ecb->dte_cond |= DTRACE_COND_USERMODE; } if (dtrace_ecb_create_cache != NULL) { /* * If we have a cached ecb, we'll use its action list instead * of creating our own (saving both time and space). */ dtrace_ecb_t *cached = dtrace_ecb_create_cache; dtrace_action_t *act = cached->dte_action; if (act != NULL) { ASSERT(act->dta_refcnt > 0); act->dta_refcnt++; ecb->dte_action = act; ecb->dte_action_last = cached->dte_action_last; ecb->dte_needed = cached->dte_needed; ecb->dte_size = cached->dte_size; ecb->dte_alignment = cached->dte_alignment; } return (ecb); } for (act = desc->dted_action; act != NULL; act = act->dtad_next) { if ((enab->dten_error = dtrace_ecb_action_add(ecb, act)) != 0) { dtrace_ecb_destroy(ecb); return (NULL); } } if ((enab->dten_error = dtrace_ecb_resize(ecb)) != 0) { dtrace_ecb_destroy(ecb); return (NULL); } return (dtrace_ecb_create_cache = ecb); } static int dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg) { dtrace_ecb_t *ecb; dtrace_enabling_t *enab = arg; dtrace_state_t *state = enab->dten_vstate->dtvs_state; ASSERT(state != NULL); if (probe != NULL && probe->dtpr_gen < enab->dten_probegen) { /* * This probe was created in a generation for which this * enabling has previously created ECBs; we don't want to * enable it again, so just kick out. */ return (DTRACE_MATCH_NEXT); } if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL) return (DTRACE_MATCH_DONE); dtrace_ecb_enable(ecb); return (DTRACE_MATCH_NEXT); } static dtrace_ecb_t * dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id) { dtrace_ecb_t *ecb; ASSERT(MUTEX_HELD(&dtrace_lock)); if (id == 0 || id > state->dts_necbs) return (NULL); ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL); ASSERT((ecb = state->dts_ecbs[id - 1]) == NULL || ecb->dte_epid == id); return (state->dts_ecbs[id - 1]); } static dtrace_aggregation_t * dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id) { dtrace_aggregation_t *agg; ASSERT(MUTEX_HELD(&dtrace_lock)); if (id == 0 || id > state->dts_naggregations) return (NULL); ASSERT(state->dts_naggregations > 0 && state->dts_aggregations != NULL); ASSERT((agg = state->dts_aggregations[id - 1]) == NULL || agg->dtag_id == id); return (state->dts_aggregations[id - 1]); } /* * DTrace Buffer Functions * * The following functions manipulate DTrace buffers. Most of these functions * are called in the context of establishing or processing consumer state; * exceptions are explicitly noted. */ /* * Note: called from cross call context. This function switches the two * buffers on a given CPU. The atomicity of this operation is assured by * disabling interrupts while the actual switch takes place; the disabling of * interrupts serializes the execution with any execution of dtrace_probe() on * the same CPU. */ static void dtrace_buffer_switch(dtrace_buffer_t *buf) { caddr_t tomax = buf->dtb_tomax; caddr_t xamot = buf->dtb_xamot; dtrace_icookie_t cookie; hrtime_t now; ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); ASSERT(!(buf->dtb_flags & DTRACEBUF_RING)); cookie = dtrace_interrupt_disable(); now = dtrace_gethrtime(); buf->dtb_tomax = xamot; buf->dtb_xamot = tomax; buf->dtb_xamot_drops = buf->dtb_drops; buf->dtb_xamot_offset = buf->dtb_offset; buf->dtb_xamot_errors = buf->dtb_errors; buf->dtb_xamot_flags = buf->dtb_flags; buf->dtb_offset = 0; buf->dtb_drops = 0; buf->dtb_errors = 0; buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED); buf->dtb_interval = now - buf->dtb_switched; buf->dtb_switched = now; dtrace_interrupt_enable(cookie); } /* * Note: called from cross call context. This function activates a buffer * on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation * is guaranteed by the disabling of interrupts. */ static void dtrace_buffer_activate(dtrace_state_t *state) { dtrace_buffer_t *buf; dtrace_icookie_t cookie = dtrace_interrupt_disable(); buf = &state->dts_buffer[curcpu]; if (buf->dtb_tomax != NULL) { /* * We might like to assert that the buffer is marked inactive, * but this isn't necessarily true: the buffer for the CPU * that processes the BEGIN probe has its buffer activated * manually. In this case, we take the (harmless) action * re-clearing the bit INACTIVE bit. */ buf->dtb_flags &= ~DTRACEBUF_INACTIVE; } dtrace_interrupt_enable(cookie); } #ifdef __FreeBSD__ /* * Activate the specified per-CPU buffer. This is used instead of * dtrace_buffer_activate() when APs have not yet started, i.e. when * activating anonymous state. */ static void dtrace_buffer_activate_cpu(dtrace_state_t *state, int cpu) { if (state->dts_buffer[cpu].dtb_tomax != NULL) state->dts_buffer[cpu].dtb_flags &= ~DTRACEBUF_INACTIVE; } #endif static int dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, processorid_t cpu, int *factor) { #ifdef illumos cpu_t *cp; #endif dtrace_buffer_t *buf; int allocated = 0, desired = 0; #ifdef illumos ASSERT(MUTEX_HELD(&cpu_lock)); ASSERT(MUTEX_HELD(&dtrace_lock)); *factor = 1; if (size > dtrace_nonroot_maxsize && !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE)) return (EFBIG); cp = cpu_list; do { if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id) continue; buf = &bufs[cp->cpu_id]; /* * If there is already a buffer allocated for this CPU, it * is only possible that this is a DR event. In this case, */ if (buf->dtb_tomax != NULL) { ASSERT(buf->dtb_size == size); continue; } ASSERT(buf->dtb_xamot == NULL); if ((buf->dtb_tomax = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL) goto err; buf->dtb_size = size; buf->dtb_flags = flags; buf->dtb_offset = 0; buf->dtb_drops = 0; if (flags & DTRACEBUF_NOSWITCH) continue; if ((buf->dtb_xamot = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL) goto err; } while ((cp = cp->cpu_next) != cpu_list); return (0); err: cp = cpu_list; do { if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id) continue; buf = &bufs[cp->cpu_id]; desired += 2; if (buf->dtb_xamot != NULL) { ASSERT(buf->dtb_tomax != NULL); ASSERT(buf->dtb_size == size); kmem_free(buf->dtb_xamot, size); allocated++; } if (buf->dtb_tomax != NULL) { ASSERT(buf->dtb_size == size); kmem_free(buf->dtb_tomax, size); allocated++; } buf->dtb_tomax = NULL; buf->dtb_xamot = NULL; buf->dtb_size = 0; } while ((cp = cp->cpu_next) != cpu_list); #else int i; *factor = 1; #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \ defined(__mips__) || defined(__powerpc__) || defined(__riscv__) /* * FreeBSD isn't good at limiting the amount of memory we * ask to malloc, so let's place a limit here before trying * to do something that might well end in tears at bedtime. */ if (size > physmem * PAGE_SIZE / (128 * (mp_maxid + 1))) return (ENOMEM); #endif ASSERT(MUTEX_HELD(&dtrace_lock)); CPU_FOREACH(i) { if (cpu != DTRACE_CPUALL && cpu != i) continue; buf = &bufs[i]; /* * If there is already a buffer allocated for this CPU, it * is only possible that this is a DR event. In this case, * the buffer size must match our specified size. */ if (buf->dtb_tomax != NULL) { ASSERT(buf->dtb_size == size); continue; } ASSERT(buf->dtb_xamot == NULL); if ((buf->dtb_tomax = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL) goto err; buf->dtb_size = size; buf->dtb_flags = flags; buf->dtb_offset = 0; buf->dtb_drops = 0; if (flags & DTRACEBUF_NOSWITCH) continue; if ((buf->dtb_xamot = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL) goto err; } return (0); err: /* * Error allocating memory, so free the buffers that were * allocated before the failed allocation. */ CPU_FOREACH(i) { if (cpu != DTRACE_CPUALL && cpu != i) continue; buf = &bufs[i]; desired += 2; if (buf->dtb_xamot != NULL) { ASSERT(buf->dtb_tomax != NULL); ASSERT(buf->dtb_size == size); kmem_free(buf->dtb_xamot, size); allocated++; } if (buf->dtb_tomax != NULL) { ASSERT(buf->dtb_size == size); kmem_free(buf->dtb_tomax, size); allocated++; } buf->dtb_tomax = NULL; buf->dtb_xamot = NULL; buf->dtb_size = 0; } #endif *factor = desired / (allocated > 0 ? allocated : 1); return (ENOMEM); } /* * Note: called from probe context. This function just increments the drop * count on a buffer. It has been made a function to allow for the * possibility of understanding the source of mysterious drop counts. (A * problem for which one may be particularly disappointed that DTrace cannot * be used to understand DTrace.) */ static void dtrace_buffer_drop(dtrace_buffer_t *buf) { buf->dtb_drops++; } /* * Note: called from probe context. This function is called to reserve space * in a buffer. If mstate is non-NULL, sets the scratch base and size in the * mstate. Returns the new offset in the buffer, or a negative value if an * error has occurred. */ static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, dtrace_state_t *state, dtrace_mstate_t *mstate) { intptr_t offs = buf->dtb_offset, soffs; intptr_t woffs; caddr_t tomax; size_t total; if (buf->dtb_flags & DTRACEBUF_INACTIVE) return (-1); if ((tomax = buf->dtb_tomax) == NULL) { dtrace_buffer_drop(buf); return (-1); } if (!(buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL))) { while (offs & (align - 1)) { /* * Assert that our alignment is off by a number which * is itself sizeof (uint32_t) aligned. */ ASSERT(!((align - (offs & (align - 1))) & (sizeof (uint32_t) - 1))); DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE); offs += sizeof (uint32_t); } if ((soffs = offs + needed) > buf->dtb_size) { dtrace_buffer_drop(buf); return (-1); } if (mstate == NULL) return (offs); mstate->dtms_scratch_base = (uintptr_t)tomax + soffs; mstate->dtms_scratch_size = buf->dtb_size - soffs; mstate->dtms_scratch_ptr = mstate->dtms_scratch_base; return (offs); } if (buf->dtb_flags & DTRACEBUF_FILL) { if (state->dts_activity != DTRACE_ACTIVITY_COOLDOWN && (buf->dtb_flags & DTRACEBUF_FULL)) return (-1); goto out; } total = needed + (offs & (align - 1)); /* * For a ring buffer, life is quite a bit more complicated. Before * we can store any padding, we need to adjust our wrapping offset. * (If we've never before wrapped or we're not about to, no adjustment * is required.) */ if ((buf->dtb_flags & DTRACEBUF_WRAPPED) || offs + total > buf->dtb_size) { woffs = buf->dtb_xamot_offset; if (offs + total > buf->dtb_size) { /* * We can't fit in the end of the buffer. First, a * sanity check that we can fit in the buffer at all. */ if (total > buf->dtb_size) { dtrace_buffer_drop(buf); return (-1); } /* * We're going to be storing at the top of the buffer, * so now we need to deal with the wrapped offset. We * only reset our wrapped offset to 0 if it is * currently greater than the current offset. If it * is less than the current offset, it is because a * previous allocation induced a wrap -- but the * allocation didn't subsequently take the space due * to an error or false predicate evaluation. In this * case, we'll just leave the wrapped offset alone: if * the wrapped offset hasn't been advanced far enough * for this allocation, it will be adjusted in the * lower loop. */ if (buf->dtb_flags & DTRACEBUF_WRAPPED) { if (woffs >= offs) woffs = 0; } else { woffs = 0; } /* * Now we know that we're going to be storing to the * top of the buffer and that there is room for us * there. We need to clear the buffer from the current * offset to the end (there may be old gunk there). */ while (offs < buf->dtb_size) tomax[offs++] = 0; /* * We need to set our offset to zero. And because we * are wrapping, we need to set the bit indicating as * much. We can also adjust our needed space back * down to the space required by the ECB -- we know * that the top of the buffer is aligned. */ offs = 0; total = needed; buf->dtb_flags |= DTRACEBUF_WRAPPED; } else { /* * There is room for us in the buffer, so we simply * need to check the wrapped offset. */ if (woffs < offs) { /* * The wrapped offset is less than the offset. * This can happen if we allocated buffer space * that induced a wrap, but then we didn't * subsequently take the space due to an error * or false predicate evaluation. This is * okay; we know that _this_ allocation isn't * going to induce a wrap. We still can't * reset the wrapped offset to be zero, * however: the space may have been trashed in * the previous failed probe attempt. But at * least the wrapped offset doesn't need to * be adjusted at all... */ goto out; } } while (offs + total > woffs) { dtrace_epid_t epid = *(uint32_t *)(tomax + woffs); size_t size; if (epid == DTRACE_EPIDNONE) { size = sizeof (uint32_t); } else { ASSERT3U(epid, <=, state->dts_necbs); ASSERT(state->dts_ecbs[epid - 1] != NULL); size = state->dts_ecbs[epid - 1]->dte_size; } ASSERT(woffs + size <= buf->dtb_size); ASSERT(size != 0); if (woffs + size == buf->dtb_size) { /* * We've reached the end of the buffer; we want * to set the wrapped offset to 0 and break * out. However, if the offs is 0, then we're * in a strange edge-condition: the amount of * space that we want to reserve plus the size * of the record that we're overwriting is * greater than the size of the buffer. This * is problematic because if we reserve the * space but subsequently don't consume it (due * to a failed predicate or error) the wrapped * offset will be 0 -- yet the EPID at offset 0 * will not be committed. This situation is * relatively easy to deal with: if we're in * this case, the buffer is indistinguishable * from one that hasn't wrapped; we need only * finish the job by clearing the wrapped bit, * explicitly setting the offset to be 0, and * zero'ing out the old data in the buffer. */ if (offs == 0) { buf->dtb_flags &= ~DTRACEBUF_WRAPPED; buf->dtb_offset = 0; woffs = total; while (woffs < buf->dtb_size) tomax[woffs++] = 0; } woffs = 0; break; } woffs += size; } /* * We have a wrapped offset. It may be that the wrapped offset * has become zero -- that's okay. */ buf->dtb_xamot_offset = woffs; } out: /* * Now we can plow the buffer with any necessary padding. */ while (offs & (align - 1)) { /* * Assert that our alignment is off by a number which * is itself sizeof (uint32_t) aligned. */ ASSERT(!((align - (offs & (align - 1))) & (sizeof (uint32_t) - 1))); DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE); offs += sizeof (uint32_t); } if (buf->dtb_flags & DTRACEBUF_FILL) { if (offs + needed > buf->dtb_size - state->dts_reserve) { buf->dtb_flags |= DTRACEBUF_FULL; return (-1); } } if (mstate == NULL) return (offs); /* * For ring buffers and fill buffers, the scratch space is always * the inactive buffer. */ mstate->dtms_scratch_base = (uintptr_t)buf->dtb_xamot; mstate->dtms_scratch_size = buf->dtb_size; mstate->dtms_scratch_ptr = mstate->dtms_scratch_base; return (offs); } static void dtrace_buffer_polish(dtrace_buffer_t *buf) { ASSERT(buf->dtb_flags & DTRACEBUF_RING); ASSERT(MUTEX_HELD(&dtrace_lock)); if (!(buf->dtb_flags & DTRACEBUF_WRAPPED)) return; /* * We need to polish the ring buffer. There are three cases: * * - The first (and presumably most common) is that there is no gap * between the buffer offset and the wrapped offset. In this case, * there is nothing in the buffer that isn't valid data; we can * mark the buffer as polished and return. * * - The second (less common than the first but still more common * than the third) is that there is a gap between the buffer offset * and the wrapped offset, and the wrapped offset is larger than the * buffer offset. This can happen because of an alignment issue, or * can happen because of a call to dtrace_buffer_reserve() that * didn't subsequently consume the buffer space. In this case, * we need to zero the data from the buffer offset to the wrapped * offset. * * - The third (and least common) is that there is a gap between the * buffer offset and the wrapped offset, but the wrapped offset is * _less_ than the buffer offset. This can only happen because a * call to dtrace_buffer_reserve() induced a wrap, but the space * was not subsequently consumed. In this case, we need to zero the * space from the offset to the end of the buffer _and_ from the * top of the buffer to the wrapped offset. */ if (buf->dtb_offset < buf->dtb_xamot_offset) { bzero(buf->dtb_tomax + buf->dtb_offset, buf->dtb_xamot_offset - buf->dtb_offset); } if (buf->dtb_offset > buf->dtb_xamot_offset) { bzero(buf->dtb_tomax + buf->dtb_offset, buf->dtb_size - buf->dtb_offset); bzero(buf->dtb_tomax, buf->dtb_xamot_offset); } } /* * This routine determines if data generated at the specified time has likely * been entirely consumed at user-level. This routine is called to determine * if an ECB on a defunct probe (but for an active enabling) can be safely * disabled and destroyed. */ static int dtrace_buffer_consumed(dtrace_buffer_t *bufs, hrtime_t when) { int i; for (i = 0; i < NCPU; i++) { dtrace_buffer_t *buf = &bufs[i]; if (buf->dtb_size == 0) continue; if (buf->dtb_flags & DTRACEBUF_RING) return (0); if (!buf->dtb_switched && buf->dtb_offset != 0) return (0); if (buf->dtb_switched - buf->dtb_interval < when) return (0); } return (1); } static void dtrace_buffer_free(dtrace_buffer_t *bufs) { int i; for (i = 0; i < NCPU; i++) { dtrace_buffer_t *buf = &bufs[i]; if (buf->dtb_tomax == NULL) { ASSERT(buf->dtb_xamot == NULL); ASSERT(buf->dtb_size == 0); continue; } if (buf->dtb_xamot != NULL) { ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); kmem_free(buf->dtb_xamot, buf->dtb_size); } kmem_free(buf->dtb_tomax, buf->dtb_size); buf->dtb_size = 0; buf->dtb_tomax = NULL; buf->dtb_xamot = NULL; } } /* * DTrace Enabling Functions */ static dtrace_enabling_t * dtrace_enabling_create(dtrace_vstate_t *vstate) { dtrace_enabling_t *enab; enab = kmem_zalloc(sizeof (dtrace_enabling_t), KM_SLEEP); enab->dten_vstate = vstate; return (enab); } static void dtrace_enabling_add(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb) { dtrace_ecbdesc_t **ndesc; size_t osize, nsize; /* * We can't add to enablings after we've enabled them, or after we've * retained them. */ ASSERT(enab->dten_probegen == 0); ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); if (enab->dten_ndesc < enab->dten_maxdesc) { enab->dten_desc[enab->dten_ndesc++] = ecb; return; } osize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *); if (enab->dten_maxdesc == 0) { enab->dten_maxdesc = 1; } else { enab->dten_maxdesc <<= 1; } ASSERT(enab->dten_ndesc < enab->dten_maxdesc); nsize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *); ndesc = kmem_zalloc(nsize, KM_SLEEP); bcopy(enab->dten_desc, ndesc, osize); if (enab->dten_desc != NULL) kmem_free(enab->dten_desc, osize); enab->dten_desc = ndesc; enab->dten_desc[enab->dten_ndesc++] = ecb; } static void dtrace_enabling_addlike(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb, dtrace_probedesc_t *pd) { dtrace_ecbdesc_t *new; dtrace_predicate_t *pred; dtrace_actdesc_t *act; /* * We're going to create a new ECB description that matches the * specified ECB in every way, but has the specified probe description. */ new = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP); if ((pred = ecb->dted_pred.dtpdd_predicate) != NULL) dtrace_predicate_hold(pred); for (act = ecb->dted_action; act != NULL; act = act->dtad_next) dtrace_actdesc_hold(act); new->dted_action = ecb->dted_action; new->dted_pred = ecb->dted_pred; new->dted_probe = *pd; new->dted_uarg = ecb->dted_uarg; dtrace_enabling_add(enab, new); } static void dtrace_enabling_dump(dtrace_enabling_t *enab) { int i; for (i = 0; i < enab->dten_ndesc; i++) { dtrace_probedesc_t *desc = &enab->dten_desc[i]->dted_probe; #ifdef __FreeBSD__ printf("dtrace: enabling probe %d (%s:%s:%s:%s)\n", i, desc->dtpd_provider, desc->dtpd_mod, desc->dtpd_func, desc->dtpd_name); #else cmn_err(CE_NOTE, "enabling probe %d (%s:%s:%s:%s)", i, desc->dtpd_provider, desc->dtpd_mod, desc->dtpd_func, desc->dtpd_name); #endif } } static void dtrace_enabling_destroy(dtrace_enabling_t *enab) { int i; dtrace_ecbdesc_t *ep; dtrace_vstate_t *vstate = enab->dten_vstate; ASSERT(MUTEX_HELD(&dtrace_lock)); for (i = 0; i < enab->dten_ndesc; i++) { dtrace_actdesc_t *act, *next; dtrace_predicate_t *pred; ep = enab->dten_desc[i]; if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) dtrace_predicate_release(pred, vstate); for (act = ep->dted_action; act != NULL; act = next) { next = act->dtad_next; dtrace_actdesc_release(act, vstate); } kmem_free(ep, sizeof (dtrace_ecbdesc_t)); } if (enab->dten_desc != NULL) kmem_free(enab->dten_desc, enab->dten_maxdesc * sizeof (dtrace_enabling_t *)); /* * If this was a retained enabling, decrement the dts_nretained count * and take it off of the dtrace_retained list. */ if (enab->dten_prev != NULL || enab->dten_next != NULL || dtrace_retained == enab) { ASSERT(enab->dten_vstate->dtvs_state != NULL); ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0); enab->dten_vstate->dtvs_state->dts_nretained--; dtrace_retained_gen++; } if (enab->dten_prev == NULL) { if (dtrace_retained == enab) { dtrace_retained = enab->dten_next; if (dtrace_retained != NULL) dtrace_retained->dten_prev = NULL; } } else { ASSERT(enab != dtrace_retained); ASSERT(dtrace_retained != NULL); enab->dten_prev->dten_next = enab->dten_next; } if (enab->dten_next != NULL) { ASSERT(dtrace_retained != NULL); enab->dten_next->dten_prev = enab->dten_prev; } kmem_free(enab, sizeof (dtrace_enabling_t)); } static int dtrace_enabling_retain(dtrace_enabling_t *enab) { dtrace_state_t *state; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); ASSERT(enab->dten_vstate != NULL); state = enab->dten_vstate->dtvs_state; ASSERT(state != NULL); /* * We only allow each state to retain dtrace_retain_max enablings. */ if (state->dts_nretained >= dtrace_retain_max) return (ENOSPC); state->dts_nretained++; dtrace_retained_gen++; if (dtrace_retained == NULL) { dtrace_retained = enab; return (0); } enab->dten_next = dtrace_retained; dtrace_retained->dten_prev = enab; dtrace_retained = enab; return (0); } static int dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match, dtrace_probedesc_t *create) { dtrace_enabling_t *new, *enab; int found = 0, err = ENOENT; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(strlen(match->dtpd_provider) < DTRACE_PROVNAMELEN); ASSERT(strlen(match->dtpd_mod) < DTRACE_MODNAMELEN); ASSERT(strlen(match->dtpd_func) < DTRACE_FUNCNAMELEN); ASSERT(strlen(match->dtpd_name) < DTRACE_NAMELEN); new = dtrace_enabling_create(&state->dts_vstate); /* * Iterate over all retained enablings, looking for enablings that * match the specified state. */ for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { int i; /* * dtvs_state can only be NULL for helper enablings -- and * helper enablings can't be retained. */ ASSERT(enab->dten_vstate->dtvs_state != NULL); if (enab->dten_vstate->dtvs_state != state) continue; /* * Now iterate over each probe description; we're looking for * an exact match to the specified probe description. */ for (i = 0; i < enab->dten_ndesc; i++) { dtrace_ecbdesc_t *ep = enab->dten_desc[i]; dtrace_probedesc_t *pd = &ep->dted_probe; if (strcmp(pd->dtpd_provider, match->dtpd_provider)) continue; if (strcmp(pd->dtpd_mod, match->dtpd_mod)) continue; if (strcmp(pd->dtpd_func, match->dtpd_func)) continue; if (strcmp(pd->dtpd_name, match->dtpd_name)) continue; /* * We have a winning probe! Add it to our growing * enabling. */ found = 1; dtrace_enabling_addlike(new, ep, create); } } if (!found || (err = dtrace_enabling_retain(new)) != 0) { dtrace_enabling_destroy(new); return (err); } return (0); } static void dtrace_enabling_retract(dtrace_state_t *state) { dtrace_enabling_t *enab, *next; ASSERT(MUTEX_HELD(&dtrace_lock)); /* * Iterate over all retained enablings, destroy the enablings retained * for the specified state. */ for (enab = dtrace_retained; enab != NULL; enab = next) { next = enab->dten_next; /* * dtvs_state can only be NULL for helper enablings -- and * helper enablings can't be retained. */ ASSERT(enab->dten_vstate->dtvs_state != NULL); if (enab->dten_vstate->dtvs_state == state) { ASSERT(state->dts_nretained > 0); dtrace_enabling_destroy(enab); } } ASSERT(state->dts_nretained == 0); } static int dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) { int i = 0; int matched = 0; ASSERT(MUTEX_HELD(&cpu_lock)); ASSERT(MUTEX_HELD(&dtrace_lock)); for (i = 0; i < enab->dten_ndesc; i++) { dtrace_ecbdesc_t *ep = enab->dten_desc[i]; enab->dten_current = ep; enab->dten_error = 0; matched += dtrace_probe_enable(&ep->dted_probe, enab); if (enab->dten_error != 0) { /* * If we get an error half-way through enabling the * probes, we kick out -- perhaps with some number of * them enabled. Leaving enabled probes enabled may * be slightly confusing for user-level, but we expect * that no one will attempt to actually drive on in * the face of such errors. If this is an anonymous * enabling (indicated with a NULL nmatched pointer), * we cmn_err() a message. We aren't expecting to * get such an error -- such as it can exist at all, * it would be a result of corrupted DOF in the driver * properties. */ if (nmatched == NULL) { cmn_err(CE_WARN, "dtrace_enabling_match() " "error on %p: %d", (void *)ep, enab->dten_error); } return (enab->dten_error); } } enab->dten_probegen = dtrace_probegen; if (nmatched != NULL) *nmatched = matched; return (0); } static void dtrace_enabling_matchall(void) { dtrace_enabling_t *enab; mutex_enter(&cpu_lock); mutex_enter(&dtrace_lock); /* * Iterate over all retained enablings to see if any probes match * against them. We only perform this operation on enablings for which * we have sufficient permissions by virtue of being in the global zone * or in the same zone as the DTrace client. Because we can be called * after dtrace_detach() has been called, we cannot assert that there * are retained enablings. We can safely load from dtrace_retained, * however: the taskq_destroy() at the end of dtrace_detach() will * block pending our completion. */ for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { #ifdef illumos cred_t *cr = enab->dten_vstate->dtvs_state->dts_cred.dcr_cred; if (INGLOBALZONE(curproc) || cr != NULL && getzoneid() == crgetzoneid(cr)) #endif (void) dtrace_enabling_match(enab, NULL); } mutex_exit(&dtrace_lock); mutex_exit(&cpu_lock); } /* * If an enabling is to be enabled without having matched probes (that is, if * dtrace_state_go() is to be called on the underlying dtrace_state_t), the * enabling must be _primed_ by creating an ECB for every ECB description. * This must be done to assure that we know the number of speculations, the * number of aggregations, the minimum buffer size needed, etc. before we * transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually * enabling any probes, we create ECBs for every ECB decription, but with a * NULL probe -- which is exactly what this function does. */ static void dtrace_enabling_prime(dtrace_state_t *state) { dtrace_enabling_t *enab; int i; for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { ASSERT(enab->dten_vstate->dtvs_state != NULL); if (enab->dten_vstate->dtvs_state != state) continue; /* * We don't want to prime an enabling more than once, lest * we allow a malicious user to induce resource exhaustion. * (The ECBs that result from priming an enabling aren't * leaked -- but they also aren't deallocated until the * consumer state is destroyed.) */ if (enab->dten_primed) continue; for (i = 0; i < enab->dten_ndesc; i++) { enab->dten_current = enab->dten_desc[i]; (void) dtrace_probe_enable(NULL, enab); } enab->dten_primed = 1; } } /* * Called to indicate that probes should be provided due to retained * enablings. This is implemented in terms of dtrace_probe_provide(), but it * must take an initial lap through the enabling calling the dtps_provide() * entry point explicitly to allow for autocreated probes. */ static void dtrace_enabling_provide(dtrace_provider_t *prv) { int i, all = 0; dtrace_probedesc_t desc; dtrace_genid_t gen; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(MUTEX_HELD(&dtrace_provider_lock)); if (prv == NULL) { all = 1; prv = dtrace_provider; } do { dtrace_enabling_t *enab; void *parg = prv->dtpv_arg; retry: gen = dtrace_retained_gen; for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { for (i = 0; i < enab->dten_ndesc; i++) { desc = enab->dten_desc[i]->dted_probe; mutex_exit(&dtrace_lock); prv->dtpv_pops.dtps_provide(parg, &desc); mutex_enter(&dtrace_lock); /* * Process the retained enablings again if * they have changed while we weren't holding * dtrace_lock. */ if (gen != dtrace_retained_gen) goto retry; } } } while (all && (prv = prv->dtpv_next) != NULL); mutex_exit(&dtrace_lock); dtrace_probe_provide(NULL, all ? NULL : prv); mutex_enter(&dtrace_lock); } /* * Called to reap ECBs that are attached to probes from defunct providers. */ static void dtrace_enabling_reap(void) { dtrace_provider_t *prov; dtrace_probe_t *probe; dtrace_ecb_t *ecb; hrtime_t when; int i; mutex_enter(&cpu_lock); mutex_enter(&dtrace_lock); for (i = 0; i < dtrace_nprobes; i++) { if ((probe = dtrace_probes[i]) == NULL) continue; if (probe->dtpr_ecb == NULL) continue; prov = probe->dtpr_provider; if ((when = prov->dtpv_defunct) == 0) continue; /* * We have ECBs on a defunct provider: we want to reap these * ECBs to allow the provider to unregister. The destruction * of these ECBs must be done carefully: if we destroy the ECB * and the consumer later wishes to consume an EPID that * corresponds to the destroyed ECB (and if the EPID metadata * has not been previously consumed), the consumer will abort * processing on the unknown EPID. To reduce (but not, sadly, * eliminate) the possibility of this, we will only destroy an * ECB for a defunct provider if, for the state that * corresponds to the ECB: * * (a) There is no speculative tracing (which can effectively * cache an EPID for an arbitrary amount of time). * * (b) The principal buffers have been switched twice since the * provider became defunct. * * (c) The aggregation buffers are of zero size or have been * switched twice since the provider became defunct. * * We use dts_speculates to determine (a) and call a function * (dtrace_buffer_consumed()) to determine (b) and (c). Note * that as soon as we've been unable to destroy one of the ECBs * associated with the probe, we quit trying -- reaping is only * fruitful in as much as we can destroy all ECBs associated * with the defunct provider's probes. */ while ((ecb = probe->dtpr_ecb) != NULL) { dtrace_state_t *state = ecb->dte_state; dtrace_buffer_t *buf = state->dts_buffer; dtrace_buffer_t *aggbuf = state->dts_aggbuffer; if (state->dts_speculates) break; if (!dtrace_buffer_consumed(buf, when)) break; if (!dtrace_buffer_consumed(aggbuf, when)) break; dtrace_ecb_disable(ecb); ASSERT(probe->dtpr_ecb != ecb); dtrace_ecb_destroy(ecb); } } mutex_exit(&dtrace_lock); mutex_exit(&cpu_lock); } /* * DTrace DOF Functions */ /*ARGSUSED*/ static void dtrace_dof_error(dof_hdr_t *dof, const char *str) { if (dtrace_err_verbose) cmn_err(CE_WARN, "failed to process DOF: %s", str); #ifdef DTRACE_ERRDEBUG dtrace_errdebug(str); #endif } /* * Create DOF out of a currently enabled state. Right now, we only create * DOF containing the run-time options -- but this could be expanded to create * complete DOF representing the enabled state. */ static dof_hdr_t * dtrace_dof_create(dtrace_state_t *state) { dof_hdr_t *dof; dof_sec_t *sec; dof_optdesc_t *opt; int i, len = sizeof (dof_hdr_t) + roundup(sizeof (dof_sec_t), sizeof (uint64_t)) + sizeof (dof_optdesc_t) * DTRACEOPT_MAX; ASSERT(MUTEX_HELD(&dtrace_lock)); dof = kmem_zalloc(len, KM_SLEEP); dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0; dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1; dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2; dof->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3; dof->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_NATIVE; dof->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE; dof->dofh_ident[DOF_ID_VERSION] = DOF_VERSION; dof->dofh_ident[DOF_ID_DIFVERS] = DIF_VERSION; dof->dofh_ident[DOF_ID_DIFIREG] = DIF_DIR_NREGS; dof->dofh_ident[DOF_ID_DIFTREG] = DIF_DTR_NREGS; dof->dofh_flags = 0; dof->dofh_hdrsize = sizeof (dof_hdr_t); dof->dofh_secsize = sizeof (dof_sec_t); dof->dofh_secnum = 1; /* only DOF_SECT_OPTDESC */ dof->dofh_secoff = sizeof (dof_hdr_t); dof->dofh_loadsz = len; dof->dofh_filesz = len; dof->dofh_pad = 0; /* * Fill in the option section header... */ sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t)); sec->dofs_type = DOF_SECT_OPTDESC; sec->dofs_align = sizeof (uint64_t); sec->dofs_flags = DOF_SECF_LOAD; sec->dofs_entsize = sizeof (dof_optdesc_t); opt = (dof_optdesc_t *)((uintptr_t)sec + roundup(sizeof (dof_sec_t), sizeof (uint64_t))); sec->dofs_offset = (uintptr_t)opt - (uintptr_t)dof; sec->dofs_size = sizeof (dof_optdesc_t) * DTRACEOPT_MAX; for (i = 0; i < DTRACEOPT_MAX; i++) { opt[i].dofo_option = i; opt[i].dofo_strtab = DOF_SECIDX_NONE; opt[i].dofo_value = state->dts_options[i]; } return (dof); } static dof_hdr_t * dtrace_dof_copyin(uintptr_t uarg, int *errp) { dof_hdr_t hdr, *dof; ASSERT(!MUTEX_HELD(&dtrace_lock)); /* * First, we're going to copyin() the sizeof (dof_hdr_t). */ if (copyin((void *)uarg, &hdr, sizeof (hdr)) != 0) { dtrace_dof_error(NULL, "failed to copyin DOF header"); *errp = EFAULT; return (NULL); } /* * Now we'll allocate the entire DOF and copy it in -- provided * that the length isn't outrageous. */ if (hdr.dofh_loadsz >= dtrace_dof_maxsize) { dtrace_dof_error(&hdr, "load size exceeds maximum"); *errp = E2BIG; return (NULL); } if (hdr.dofh_loadsz < sizeof (hdr)) { dtrace_dof_error(&hdr, "invalid load size"); *errp = EINVAL; return (NULL); } dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP); if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 || dof->dofh_loadsz != hdr.dofh_loadsz) { kmem_free(dof, hdr.dofh_loadsz); *errp = EFAULT; return (NULL); } return (dof); } #ifdef __FreeBSD__ static dof_hdr_t * dtrace_dof_copyin_proc(struct proc *p, uintptr_t uarg, int *errp) { dof_hdr_t hdr, *dof; struct thread *td; size_t loadsz; ASSERT(!MUTEX_HELD(&dtrace_lock)); td = curthread; /* * First, we're going to copyin() the sizeof (dof_hdr_t). */ if (proc_readmem(td, p, uarg, &hdr, sizeof(hdr)) != sizeof(hdr)) { dtrace_dof_error(NULL, "failed to copyin DOF header"); *errp = EFAULT; return (NULL); } /* * Now we'll allocate the entire DOF and copy it in -- provided * that the length isn't outrageous. */ if (hdr.dofh_loadsz >= dtrace_dof_maxsize) { dtrace_dof_error(&hdr, "load size exceeds maximum"); *errp = E2BIG; return (NULL); } loadsz = (size_t)hdr.dofh_loadsz; if (loadsz < sizeof (hdr)) { dtrace_dof_error(&hdr, "invalid load size"); *errp = EINVAL; return (NULL); } dof = kmem_alloc(loadsz, KM_SLEEP); if (proc_readmem(td, p, uarg, dof, loadsz) != loadsz || dof->dofh_loadsz != loadsz) { kmem_free(dof, hdr.dofh_loadsz); *errp = EFAULT; return (NULL); } return (dof); } static __inline uchar_t dtrace_dof_char(char c) { switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return (c - '0'); case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': return (c - 'A' + 10); case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': return (c - 'a' + 10); } /* Should not reach here. */ return (UCHAR_MAX); } #endif /* __FreeBSD__ */ static dof_hdr_t * dtrace_dof_property(const char *name) { #ifdef __FreeBSD__ uint8_t *dofbuf; u_char *data, *eol; caddr_t doffile; size_t bytes, len, i; dof_hdr_t *dof; u_char c1, c2; dof = NULL; doffile = preload_search_by_type("dtrace_dof"); if (doffile == NULL) return (NULL); data = preload_fetch_addr(doffile); len = preload_fetch_size(doffile); for (;;) { /* Look for the end of the line. All lines end in a newline. */ eol = memchr(data, '\n', len); if (eol == NULL) return (NULL); if (strncmp(name, data, strlen(name)) == 0) break; eol++; /* skip past the newline */ len -= eol - data; data = eol; } /* We've found the data corresponding to the specified key. */ data += strlen(name) + 1; /* skip past the '=' */ len = eol - data; bytes = len / 2; if (bytes < sizeof(dof_hdr_t)) { dtrace_dof_error(NULL, "truncated header"); goto doferr; } /* * Each byte is represented by the two ASCII characters in its hex * representation. */ dofbuf = malloc(bytes, M_SOLARIS, M_WAITOK); for (i = 0; i < bytes; i++) { c1 = dtrace_dof_char(data[i * 2]); c2 = dtrace_dof_char(data[i * 2 + 1]); if (c1 == UCHAR_MAX || c2 == UCHAR_MAX) { dtrace_dof_error(NULL, "invalid hex char in DOF"); goto doferr; } dofbuf[i] = c1 * 16 + c2; } dof = (dof_hdr_t *)dofbuf; if (bytes < dof->dofh_loadsz) { dtrace_dof_error(NULL, "truncated DOF"); goto doferr; } if (dof->dofh_loadsz >= dtrace_dof_maxsize) { dtrace_dof_error(NULL, "oversized DOF"); goto doferr; } return (dof); doferr: free(dof, M_SOLARIS); return (NULL); #else /* __FreeBSD__ */ uchar_t *buf; uint64_t loadsz; unsigned int len, i; dof_hdr_t *dof; /* * Unfortunately, array of values in .conf files are always (and * only) interpreted to be integer arrays. We must read our DOF * as an integer array, and then squeeze it into a byte array. */ if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0, (char *)name, (int **)&buf, &len) != DDI_PROP_SUCCESS) return (NULL); for (i = 0; i < len; i++) buf[i] = (uchar_t)(((int *)buf)[i]); if (len < sizeof (dof_hdr_t)) { ddi_prop_free(buf); dtrace_dof_error(NULL, "truncated header"); return (NULL); } if (len < (loadsz = ((dof_hdr_t *)buf)->dofh_loadsz)) { ddi_prop_free(buf); dtrace_dof_error(NULL, "truncated DOF"); return (NULL); } if (loadsz >= dtrace_dof_maxsize) { ddi_prop_free(buf); dtrace_dof_error(NULL, "oversized DOF"); return (NULL); } dof = kmem_alloc(loadsz, KM_SLEEP); bcopy(buf, dof, loadsz); ddi_prop_free(buf); return (dof); #endif /* !__FreeBSD__ */ } static void dtrace_dof_destroy(dof_hdr_t *dof) { kmem_free(dof, dof->dofh_loadsz); } /* * Return the dof_sec_t pointer corresponding to a given section index. If the * index is not valid, dtrace_dof_error() is called and NULL is returned. If * a type other than DOF_SECT_NONE is specified, the header is checked against * this type and NULL is returned if the types do not match. */ static dof_sec_t * dtrace_dof_sect(dof_hdr_t *dof, uint32_t type, dof_secidx_t i) { dof_sec_t *sec = (dof_sec_t *)(uintptr_t) ((uintptr_t)dof + dof->dofh_secoff + i * dof->dofh_secsize); if (i >= dof->dofh_secnum) { dtrace_dof_error(dof, "referenced section index is invalid"); return (NULL); } if (!(sec->dofs_flags & DOF_SECF_LOAD)) { dtrace_dof_error(dof, "referenced section is not loadable"); return (NULL); } if (type != DOF_SECT_NONE && type != sec->dofs_type) { dtrace_dof_error(dof, "referenced section is the wrong type"); return (NULL); } return (sec); } static dtrace_probedesc_t * dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) { dof_probedesc_t *probe; dof_sec_t *strtab; uintptr_t daddr = (uintptr_t)dof; uintptr_t str; size_t size; if (sec->dofs_type != DOF_SECT_PROBEDESC) { dtrace_dof_error(dof, "invalid probe section"); return (NULL); } if (sec->dofs_align != sizeof (dof_secidx_t)) { dtrace_dof_error(dof, "bad alignment in probe description"); return (NULL); } if (sec->dofs_offset + sizeof (dof_probedesc_t) > dof->dofh_loadsz) { dtrace_dof_error(dof, "truncated probe description"); return (NULL); } probe = (dof_probedesc_t *)(uintptr_t)(daddr + sec->dofs_offset); strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, probe->dofp_strtab); if (strtab == NULL) return (NULL); str = daddr + strtab->dofs_offset; size = strtab->dofs_size; if (probe->dofp_provider >= strtab->dofs_size) { dtrace_dof_error(dof, "corrupt probe provider"); return (NULL); } (void) strncpy(desc->dtpd_provider, (char *)(str + probe->dofp_provider), MIN(DTRACE_PROVNAMELEN - 1, size - probe->dofp_provider)); if (probe->dofp_mod >= strtab->dofs_size) { dtrace_dof_error(dof, "corrupt probe module"); return (NULL); } (void) strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod), MIN(DTRACE_MODNAMELEN - 1, size - probe->dofp_mod)); if (probe->dofp_func >= strtab->dofs_size) { dtrace_dof_error(dof, "corrupt probe function"); return (NULL); } (void) strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func), MIN(DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func)); if (probe->dofp_name >= strtab->dofs_size) { dtrace_dof_error(dof, "corrupt probe name"); return (NULL); } (void) strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name), MIN(DTRACE_NAMELEN - 1, size - probe->dofp_name)); return (desc); } static dtrace_difo_t * dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, cred_t *cr) { dtrace_difo_t *dp; size_t ttl = 0; dof_difohdr_t *dofd; uintptr_t daddr = (uintptr_t)dof; size_t max = dtrace_difo_maxsize; int i, l, n; static const struct { int section; int bufoffs; int lenoffs; int entsize; int align; const char *msg; } difo[] = { { DOF_SECT_DIF, offsetof(dtrace_difo_t, dtdo_buf), offsetof(dtrace_difo_t, dtdo_len), sizeof (dif_instr_t), sizeof (dif_instr_t), "multiple DIF sections" }, { DOF_SECT_INTTAB, offsetof(dtrace_difo_t, dtdo_inttab), offsetof(dtrace_difo_t, dtdo_intlen), sizeof (uint64_t), sizeof (uint64_t), "multiple integer tables" }, { DOF_SECT_STRTAB, offsetof(dtrace_difo_t, dtdo_strtab), offsetof(dtrace_difo_t, dtdo_strlen), 0, sizeof (char), "multiple string tables" }, { DOF_SECT_VARTAB, offsetof(dtrace_difo_t, dtdo_vartab), offsetof(dtrace_difo_t, dtdo_varlen), sizeof (dtrace_difv_t), sizeof (uint_t), "multiple variable tables" }, { DOF_SECT_NONE, 0, 0, 0, 0, NULL } }; if (sec->dofs_type != DOF_SECT_DIFOHDR) { dtrace_dof_error(dof, "invalid DIFO header section"); return (NULL); } if (sec->dofs_align != sizeof (dof_secidx_t)) { dtrace_dof_error(dof, "bad alignment in DIFO header"); return (NULL); } if (sec->dofs_size < sizeof (dof_difohdr_t) || sec->dofs_size % sizeof (dof_secidx_t)) { dtrace_dof_error(dof, "bad size in DIFO header"); return (NULL); } dofd = (dof_difohdr_t *)(uintptr_t)(daddr + sec->dofs_offset); n = (sec->dofs_size - sizeof (*dofd)) / sizeof (dof_secidx_t) + 1; dp = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP); dp->dtdo_rtype = dofd->dofd_rtype; for (l = 0; l < n; l++) { dof_sec_t *subsec; void **bufp; uint32_t *lenp; if ((subsec = dtrace_dof_sect(dof, DOF_SECT_NONE, dofd->dofd_links[l])) == NULL) goto err; /* invalid section link */ if (ttl + subsec->dofs_size > max) { dtrace_dof_error(dof, "exceeds maximum size"); goto err; } ttl += subsec->dofs_size; for (i = 0; difo[i].section != DOF_SECT_NONE; i++) { if (subsec->dofs_type != difo[i].section) continue; if (!(subsec->dofs_flags & DOF_SECF_LOAD)) { dtrace_dof_error(dof, "section not loaded"); goto err; } if (subsec->dofs_align != difo[i].align) { dtrace_dof_error(dof, "bad alignment"); goto err; } bufp = (void **)((uintptr_t)dp + difo[i].bufoffs); lenp = (uint32_t *)((uintptr_t)dp + difo[i].lenoffs); if (*bufp != NULL) { dtrace_dof_error(dof, difo[i].msg); goto err; } if (difo[i].entsize != subsec->dofs_entsize) { dtrace_dof_error(dof, "entry size mismatch"); goto err; } if (subsec->dofs_entsize != 0 && (subsec->dofs_size % subsec->dofs_entsize) != 0) { dtrace_dof_error(dof, "corrupt entry size"); goto err; } *lenp = subsec->dofs_size; *bufp = kmem_alloc(subsec->dofs_size, KM_SLEEP); bcopy((char *)(uintptr_t)(daddr + subsec->dofs_offset), *bufp, subsec->dofs_size); if (subsec->dofs_entsize != 0) *lenp /= subsec->dofs_entsize; break; } /* * If we encounter a loadable DIFO sub-section that is not * known to us, assume this is a broken program and fail. */ if (difo[i].section == DOF_SECT_NONE && (subsec->dofs_flags & DOF_SECF_LOAD)) { dtrace_dof_error(dof, "unrecognized DIFO subsection"); goto err; } } if (dp->dtdo_buf == NULL) { /* * We can't have a DIF object without DIF text. */ dtrace_dof_error(dof, "missing DIF text"); goto err; } /* * Before we validate the DIF object, run through the variable table * looking for the strings -- if any of their size are under, we'll set * their size to be the system-wide default string size. Note that * this should _not_ happen if the "strsize" option has been set -- * in this case, the compiler should have set the size to reflect the * setting of the option. */ for (i = 0; i < dp->dtdo_varlen; i++) { dtrace_difv_t *v = &dp->dtdo_vartab[i]; dtrace_diftype_t *t = &v->dtdv_type; if (v->dtdv_id < DIF_VAR_OTHER_UBASE) continue; if (t->dtdt_kind == DIF_TYPE_STRING && t->dtdt_size == 0) t->dtdt_size = dtrace_strsize_default; } if (dtrace_difo_validate(dp, vstate, DIF_DIR_NREGS, cr) != 0) goto err; dtrace_difo_init(dp, vstate); return (dp); err: kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t)); kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t)); kmem_free(dp->dtdo_strtab, dp->dtdo_strlen); kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t)); kmem_free(dp, sizeof (dtrace_difo_t)); return (NULL); } static dtrace_predicate_t * dtrace_dof_predicate(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, cred_t *cr) { dtrace_difo_t *dp; if ((dp = dtrace_dof_difo(dof, sec, vstate, cr)) == NULL) return (NULL); return (dtrace_predicate_create(dp)); } static dtrace_actdesc_t * dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, cred_t *cr) { dtrace_actdesc_t *act, *first = NULL, *last = NULL, *next; dof_actdesc_t *desc; dof_sec_t *difosec; size_t offs; uintptr_t daddr = (uintptr_t)dof; uint64_t arg; dtrace_actkind_t kind; if (sec->dofs_type != DOF_SECT_ACTDESC) { dtrace_dof_error(dof, "invalid action section"); return (NULL); } if (sec->dofs_offset + sizeof (dof_actdesc_t) > dof->dofh_loadsz) { dtrace_dof_error(dof, "truncated action description"); return (NULL); } if (sec->dofs_align != sizeof (uint64_t)) { dtrace_dof_error(dof, "bad alignment in action description"); return (NULL); } if (sec->dofs_size < sec->dofs_entsize) { dtrace_dof_error(dof, "section entry size exceeds total size"); return (NULL); } if (sec->dofs_entsize != sizeof (dof_actdesc_t)) { dtrace_dof_error(dof, "bad entry size in action description"); return (NULL); } if (sec->dofs_size / sec->dofs_entsize > dtrace_actions_max) { dtrace_dof_error(dof, "actions exceed dtrace_actions_max"); return (NULL); } for (offs = 0; offs < sec->dofs_size; offs += sec->dofs_entsize) { desc = (dof_actdesc_t *)(daddr + (uintptr_t)sec->dofs_offset + offs); kind = (dtrace_actkind_t)desc->dofa_kind; if ((DTRACEACT_ISPRINTFLIKE(kind) && (kind != DTRACEACT_PRINTA || desc->dofa_strtab != DOF_SECIDX_NONE)) || (kind == DTRACEACT_DIFEXPR && desc->dofa_strtab != DOF_SECIDX_NONE)) { dof_sec_t *strtab; char *str, *fmt; uint64_t i; /* * The argument to these actions is an index into the * DOF string table. For printf()-like actions, this * is the format string. For print(), this is the * CTF type of the expression result. */ if ((strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL) goto err; str = (char *)((uintptr_t)dof + (uintptr_t)strtab->dofs_offset); for (i = desc->dofa_arg; i < strtab->dofs_size; i++) { if (str[i] == '\0') break; } if (i >= strtab->dofs_size) { dtrace_dof_error(dof, "bogus format string"); goto err; } if (i == desc->dofa_arg) { dtrace_dof_error(dof, "empty format string"); goto err; } i -= desc->dofa_arg; fmt = kmem_alloc(i + 1, KM_SLEEP); bcopy(&str[desc->dofa_arg], fmt, i + 1); arg = (uint64_t)(uintptr_t)fmt; } else { if (kind == DTRACEACT_PRINTA) { ASSERT(desc->dofa_strtab == DOF_SECIDX_NONE); arg = 0; } else { arg = desc->dofa_arg; } } act = dtrace_actdesc_create(kind, desc->dofa_ntuple, desc->dofa_uarg, arg); if (last != NULL) { last->dtad_next = act; } else { first = act; } last = act; if (desc->dofa_difo == DOF_SECIDX_NONE) continue; if ((difosec = dtrace_dof_sect(dof, DOF_SECT_DIFOHDR, desc->dofa_difo)) == NULL) goto err; act->dtad_difo = dtrace_dof_difo(dof, difosec, vstate, cr); if (act->dtad_difo == NULL) goto err; } ASSERT(first != NULL); return (first); err: for (act = first; act != NULL; act = next) { next = act->dtad_next; dtrace_actdesc_release(act, vstate); } return (NULL); } static dtrace_ecbdesc_t * dtrace_dof_ecbdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, cred_t *cr) { dtrace_ecbdesc_t *ep; dof_ecbdesc_t *ecb; dtrace_probedesc_t *desc; dtrace_predicate_t *pred = NULL; if (sec->dofs_size < sizeof (dof_ecbdesc_t)) { dtrace_dof_error(dof, "truncated ECB description"); return (NULL); } if (sec->dofs_align != sizeof (uint64_t)) { dtrace_dof_error(dof, "bad alignment in ECB description"); return (NULL); } ecb = (dof_ecbdesc_t *)((uintptr_t)dof + (uintptr_t)sec->dofs_offset); sec = dtrace_dof_sect(dof, DOF_SECT_PROBEDESC, ecb->dofe_probes); if (sec == NULL) return (NULL); ep = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP); ep->dted_uarg = ecb->dofe_uarg; desc = &ep->dted_probe; if (dtrace_dof_probedesc(dof, sec, desc) == NULL) goto err; if (ecb->dofe_pred != DOF_SECIDX_NONE) { if ((sec = dtrace_dof_sect(dof, DOF_SECT_DIFOHDR, ecb->dofe_pred)) == NULL) goto err; if ((pred = dtrace_dof_predicate(dof, sec, vstate, cr)) == NULL) goto err; ep->dted_pred.dtpdd_predicate = pred; } if (ecb->dofe_actions != DOF_SECIDX_NONE) { if ((sec = dtrace_dof_sect(dof, DOF_SECT_ACTDESC, ecb->dofe_actions)) == NULL) goto err; ep->dted_action = dtrace_dof_actdesc(dof, sec, vstate, cr); if (ep->dted_action == NULL) goto err; } return (ep); err: if (pred != NULL) dtrace_predicate_release(pred, vstate); kmem_free(ep, sizeof (dtrace_ecbdesc_t)); return (NULL); } /* * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the * specified DOF. At present, this amounts to simply adding 'ubase' to the * site of any user SETX relocations to account for load object base address. * In the future, if we need other relocations, this function can be extended. */ static int dtrace_dof_relocate(dof_hdr_t *dof, dof_sec_t *sec, uint64_t ubase) { uintptr_t daddr = (uintptr_t)dof; dof_relohdr_t *dofr = (dof_relohdr_t *)(uintptr_t)(daddr + sec->dofs_offset); dof_sec_t *ss, *rs, *ts; dof_relodesc_t *r; uint_t i, n; if (sec->dofs_size < sizeof (dof_relohdr_t) || sec->dofs_align != sizeof (dof_secidx_t)) { dtrace_dof_error(dof, "invalid relocation header"); return (-1); } ss = dtrace_dof_sect(dof, DOF_SECT_STRTAB, dofr->dofr_strtab); rs = dtrace_dof_sect(dof, DOF_SECT_RELTAB, dofr->dofr_relsec); ts = dtrace_dof_sect(dof, DOF_SECT_NONE, dofr->dofr_tgtsec); if (ss == NULL || rs == NULL || ts == NULL) return (-1); /* dtrace_dof_error() has been called already */ if (rs->dofs_entsize < sizeof (dof_relodesc_t) || rs->dofs_align != sizeof (uint64_t)) { dtrace_dof_error(dof, "invalid relocation section"); return (-1); } r = (dof_relodesc_t *)(uintptr_t)(daddr + rs->dofs_offset); n = rs->dofs_size / rs->dofs_entsize; for (i = 0; i < n; i++) { uintptr_t taddr = daddr + ts->dofs_offset + r->dofr_offset; switch (r->dofr_type) { case DOF_RELO_NONE: break; case DOF_RELO_SETX: if (r->dofr_offset >= ts->dofs_size || r->dofr_offset + sizeof (uint64_t) > ts->dofs_size) { dtrace_dof_error(dof, "bad relocation offset"); return (-1); } if (!IS_P2ALIGNED(taddr, sizeof (uint64_t))) { dtrace_dof_error(dof, "misaligned setx relo"); return (-1); } *(uint64_t *)taddr += ubase; break; default: dtrace_dof_error(dof, "invalid relocation type"); return (-1); } r = (dof_relodesc_t *)((uintptr_t)r + rs->dofs_entsize); } return (0); } /* * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated * header: it should be at the front of a memory region that is at least * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in * size. It need not be validated in any other way. */ static int dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, dtrace_enabling_t **enabp, uint64_t ubase, int noprobes) { uint64_t len = dof->dofh_loadsz, seclen; uintptr_t daddr = (uintptr_t)dof; dtrace_ecbdesc_t *ep; dtrace_enabling_t *enab; uint_t i; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(dof->dofh_loadsz >= sizeof (dof_hdr_t)); /* * Check the DOF header identification bytes. In addition to checking * valid settings, we also verify that unused bits/bytes are zeroed so * we can use them later without fear of regressing existing binaries. */ if (bcmp(&dof->dofh_ident[DOF_ID_MAG0], DOF_MAG_STRING, DOF_MAG_STRLEN) != 0) { dtrace_dof_error(dof, "DOF magic string mismatch"); return (-1); } if (dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_ILP32 && dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_LP64) { dtrace_dof_error(dof, "DOF has invalid data model"); return (-1); } if (dof->dofh_ident[DOF_ID_ENCODING] != DOF_ENCODE_NATIVE) { dtrace_dof_error(dof, "DOF encoding mismatch"); return (-1); } if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_2) { dtrace_dof_error(dof, "DOF version mismatch"); return (-1); } if (dof->dofh_ident[DOF_ID_DIFVERS] != DIF_VERSION_2) { dtrace_dof_error(dof, "DOF uses unsupported instruction set"); return (-1); } if (dof->dofh_ident[DOF_ID_DIFIREG] > DIF_DIR_NREGS) { dtrace_dof_error(dof, "DOF uses too many integer registers"); return (-1); } if (dof->dofh_ident[DOF_ID_DIFTREG] > DIF_DTR_NREGS) { dtrace_dof_error(dof, "DOF uses too many tuple registers"); return (-1); } for (i = DOF_ID_PAD; i < DOF_ID_SIZE; i++) { if (dof->dofh_ident[i] != 0) { dtrace_dof_error(dof, "DOF has invalid ident byte set"); return (-1); } } if (dof->dofh_flags & ~DOF_FL_VALID) { dtrace_dof_error(dof, "DOF has invalid flag bits set"); return (-1); } if (dof->dofh_secsize == 0) { dtrace_dof_error(dof, "zero section header size"); return (-1); } /* * Check that the section headers don't exceed the amount of DOF * data. Note that we cast the section size and number of sections * to uint64_t's to prevent possible overflow in the multiplication. */ seclen = (uint64_t)dof->dofh_secnum * (uint64_t)dof->dofh_secsize; if (dof->dofh_secoff > len || seclen > len || dof->dofh_secoff + seclen > len) { dtrace_dof_error(dof, "truncated section headers"); return (-1); } if (!IS_P2ALIGNED(dof->dofh_secoff, sizeof (uint64_t))) { dtrace_dof_error(dof, "misaligned section headers"); return (-1); } if (!IS_P2ALIGNED(dof->dofh_secsize, sizeof (uint64_t))) { dtrace_dof_error(dof, "misaligned section size"); return (-1); } /* * Take an initial pass through the section headers to be sure that * the headers don't have stray offsets. If the 'noprobes' flag is * set, do not permit sections relating to providers, probes, or args. */ for (i = 0; i < dof->dofh_secnum; i++) { dof_sec_t *sec = (dof_sec_t *)(daddr + (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); if (noprobes) { switch (sec->dofs_type) { case DOF_SECT_PROVIDER: case DOF_SECT_PROBES: case DOF_SECT_PRARGS: case DOF_SECT_PROFFS: dtrace_dof_error(dof, "illegal sections " "for enabling"); return (-1); } } if (DOF_SEC_ISLOADABLE(sec->dofs_type) && !(sec->dofs_flags & DOF_SECF_LOAD)) { dtrace_dof_error(dof, "loadable section with load " "flag unset"); return (-1); } if (!(sec->dofs_flags & DOF_SECF_LOAD)) continue; /* just ignore non-loadable sections */ if (!ISP2(sec->dofs_align)) { dtrace_dof_error(dof, "bad section alignment"); return (-1); } if (sec->dofs_offset & (sec->dofs_align - 1)) { dtrace_dof_error(dof, "misaligned section"); return (-1); } if (sec->dofs_offset > len || sec->dofs_size > len || sec->dofs_offset + sec->dofs_size > len) { dtrace_dof_error(dof, "corrupt section header"); return (-1); } if (sec->dofs_type == DOF_SECT_STRTAB && *((char *)daddr + sec->dofs_offset + sec->dofs_size - 1) != '\0') { dtrace_dof_error(dof, "non-terminating string table"); return (-1); } } /* * Take a second pass through the sections and locate and perform any * relocations that are present. We do this after the first pass to * be sure that all sections have had their headers validated. */ for (i = 0; i < dof->dofh_secnum; i++) { dof_sec_t *sec = (dof_sec_t *)(daddr + (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); if (!(sec->dofs_flags & DOF_SECF_LOAD)) continue; /* skip sections that are not loadable */ switch (sec->dofs_type) { case DOF_SECT_URELHDR: if (dtrace_dof_relocate(dof, sec, ubase) != 0) return (-1); break; } } if ((enab = *enabp) == NULL) enab = *enabp = dtrace_enabling_create(vstate); for (i = 0; i < dof->dofh_secnum; i++) { dof_sec_t *sec = (dof_sec_t *)(daddr + (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); if (sec->dofs_type != DOF_SECT_ECBDESC) continue; if ((ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr)) == NULL) { dtrace_enabling_destroy(enab); *enabp = NULL; return (-1); } dtrace_enabling_add(enab, ep); } return (0); } /* * Process DOF for any options. This routine assumes that the DOF has been * at least processed by dtrace_dof_slurp(). */ static int dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state) { int i, rval; uint32_t entsize; size_t offs; dof_optdesc_t *desc; for (i = 0; i < dof->dofh_secnum; i++) { dof_sec_t *sec = (dof_sec_t *)((uintptr_t)dof + (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); if (sec->dofs_type != DOF_SECT_OPTDESC) continue; if (sec->dofs_align != sizeof (uint64_t)) { dtrace_dof_error(dof, "bad alignment in " "option description"); return (EINVAL); } if ((entsize = sec->dofs_entsize) == 0) { dtrace_dof_error(dof, "zeroed option entry size"); return (EINVAL); } if (entsize < sizeof (dof_optdesc_t)) { dtrace_dof_error(dof, "bad option entry size"); return (EINVAL); } for (offs = 0; offs < sec->dofs_size; offs += entsize) { desc = (dof_optdesc_t *)((uintptr_t)dof + (uintptr_t)sec->dofs_offset + offs); if (desc->dofo_strtab != DOF_SECIDX_NONE) { dtrace_dof_error(dof, "non-zero option string"); return (EINVAL); } if (desc->dofo_value == DTRACEOPT_UNSET) { dtrace_dof_error(dof, "unset option"); return (EINVAL); } if ((rval = dtrace_state_option(state, desc->dofo_option, desc->dofo_value)) != 0) { dtrace_dof_error(dof, "rejected option"); return (rval); } } } return (0); } /* * DTrace Consumer State Functions */ static int dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) { size_t hashsize, maxper, min, chunksize = dstate->dtds_chunksize; void *base; uintptr_t limit; dtrace_dynvar_t *dvar, *next, *start; int i; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL); bzero(dstate, sizeof (dtrace_dstate_t)); if ((dstate->dtds_chunksize = chunksize) == 0) dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE; VERIFY(dstate->dtds_chunksize < LONG_MAX); if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t))) size = min; if ((base = kmem_zalloc(size, KM_NOSLEEP | KM_NORMALPRI)) == NULL) return (ENOMEM); dstate->dtds_size = size; dstate->dtds_base = base; dstate->dtds_percpu = kmem_cache_alloc(dtrace_state_cache, KM_SLEEP); bzero(dstate->dtds_percpu, NCPU * sizeof (dtrace_dstate_percpu_t)); hashsize = size / (dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)); if (hashsize != 1 && (hashsize & 1)) hashsize--; dstate->dtds_hashsize = hashsize; dstate->dtds_hash = dstate->dtds_base; /* * Set all of our hash buckets to point to the single sink, and (if * it hasn't already been set), set the sink's hash value to be the * sink sentinel value. The sink is needed for dynamic variable * lookups to know that they have iterated over an entire, valid hash * chain. */ for (i = 0; i < hashsize; i++) dstate->dtds_hash[i].dtdh_chain = &dtrace_dynhash_sink; if (dtrace_dynhash_sink.dtdv_hashval != DTRACE_DYNHASH_SINK) dtrace_dynhash_sink.dtdv_hashval = DTRACE_DYNHASH_SINK; /* * Determine number of active CPUs. Divide free list evenly among * active CPUs. */ start = (dtrace_dynvar_t *) ((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t)); limit = (uintptr_t)base + size; VERIFY((uintptr_t)start < limit); VERIFY((uintptr_t)start >= (uintptr_t)base); maxper = (limit - (uintptr_t)start) / NCPU; maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize; #ifndef illumos CPU_FOREACH(i) { #else for (i = 0; i < NCPU; i++) { #endif dstate->dtds_percpu[i].dtdsc_free = dvar = start; /* * If we don't even have enough chunks to make it once through * NCPUs, we're just going to allocate everything to the first * CPU. And if we're on the last CPU, we're going to allocate * whatever is left over. In either case, we set the limit to * be the limit of the dynamic variable space. */ if (maxper == 0 || i == NCPU - 1) { limit = (uintptr_t)base + size; start = NULL; } else { limit = (uintptr_t)start + maxper; start = (dtrace_dynvar_t *)limit; } VERIFY(limit <= (uintptr_t)base + size); for (;;) { next = (dtrace_dynvar_t *)((uintptr_t)dvar + dstate->dtds_chunksize); if ((uintptr_t)next + dstate->dtds_chunksize >= limit) break; VERIFY((uintptr_t)dvar >= (uintptr_t)base && (uintptr_t)dvar <= (uintptr_t)base + size); dvar->dtdv_next = next; dvar = next; } if (maxper == 0) break; } return (0); } static void dtrace_dstate_fini(dtrace_dstate_t *dstate) { ASSERT(MUTEX_HELD(&cpu_lock)); if (dstate->dtds_base == NULL) return; kmem_free(dstate->dtds_base, dstate->dtds_size); kmem_cache_free(dtrace_state_cache, dstate->dtds_percpu); } static void dtrace_vstate_fini(dtrace_vstate_t *vstate) { /* * Logical XOR, where are you? */ ASSERT((vstate->dtvs_nglobals == 0) ^ (vstate->dtvs_globals != NULL)); if (vstate->dtvs_nglobals > 0) { kmem_free(vstate->dtvs_globals, vstate->dtvs_nglobals * sizeof (dtrace_statvar_t *)); } if (vstate->dtvs_ntlocals > 0) { kmem_free(vstate->dtvs_tlocals, vstate->dtvs_ntlocals * sizeof (dtrace_difv_t)); } ASSERT((vstate->dtvs_nlocals == 0) ^ (vstate->dtvs_locals != NULL)); if (vstate->dtvs_nlocals > 0) { kmem_free(vstate->dtvs_locals, vstate->dtvs_nlocals * sizeof (dtrace_statvar_t *)); } } #ifdef illumos static void dtrace_state_clean(dtrace_state_t *state) { if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) return; dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars); dtrace_speculation_clean(state); } static void dtrace_state_deadman(dtrace_state_t *state) { hrtime_t now; dtrace_sync(); now = dtrace_gethrtime(); if (state != dtrace_anon.dta_state && now - state->dts_laststatus >= dtrace_deadman_user) return; /* * We must be sure that dts_alive never appears to be less than the * value upon entry to dtrace_state_deadman(), and because we lack a * dtrace_cas64(), we cannot store to it atomically. We thus instead * store INT64_MAX to it, followed by a memory barrier, followed by * the new value. This assures that dts_alive never appears to be * less than its true value, regardless of the order in which the * stores to the underlying storage are issued. */ state->dts_alive = INT64_MAX; dtrace_membar_producer(); state->dts_alive = now; } #else /* !illumos */ static void dtrace_state_clean(void *arg) { dtrace_state_t *state = arg; dtrace_optval_t *opt = state->dts_options; if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) return; dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars); dtrace_speculation_clean(state); callout_reset(&state->dts_cleaner, hz * opt[DTRACEOPT_CLEANRATE] / NANOSEC, dtrace_state_clean, state); } static void dtrace_state_deadman(void *arg) { dtrace_state_t *state = arg; hrtime_t now; dtrace_sync(); dtrace_debug_output(); now = dtrace_gethrtime(); if (state != dtrace_anon.dta_state && now - state->dts_laststatus >= dtrace_deadman_user) return; /* * We must be sure that dts_alive never appears to be less than the * value upon entry to dtrace_state_deadman(), and because we lack a * dtrace_cas64(), we cannot store to it atomically. We thus instead * store INT64_MAX to it, followed by a memory barrier, followed by * the new value. This assures that dts_alive never appears to be * less than its true value, regardless of the order in which the * stores to the underlying storage are issued. */ state->dts_alive = INT64_MAX; dtrace_membar_producer(); state->dts_alive = now; callout_reset(&state->dts_deadman, hz * dtrace_deadman_interval / NANOSEC, dtrace_state_deadman, state); } #endif /* illumos */ static dtrace_state_t * #ifdef illumos dtrace_state_create(dev_t *devp, cred_t *cr) #else dtrace_state_create(struct cdev *dev, struct ucred *cred __unused) #endif { #ifdef illumos minor_t minor; major_t major; #else cred_t *cr = NULL; int m = 0; #endif char c[30]; dtrace_state_t *state; dtrace_optval_t *opt; int bufsize = NCPU * sizeof (dtrace_buffer_t), i; + int cpu_it; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(MUTEX_HELD(&cpu_lock)); #ifdef illumos minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1, VM_BESTFIT | VM_SLEEP); if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) { vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); return (NULL); } state = ddi_get_soft_state(dtrace_softstate, minor); #else if (dev != NULL) { cr = dev->si_cred; m = dev2unit(dev); } /* Allocate memory for the state. */ state = kmem_zalloc(sizeof(dtrace_state_t), KM_SLEEP); #endif state->dts_epid = DTRACE_EPIDNONE + 1; (void) snprintf(c, sizeof (c), "dtrace_aggid_%d", m); #ifdef illumos state->dts_aggid_arena = vmem_create(c, (void *)1, UINT32_MAX, 1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); if (devp != NULL) { major = getemajor(*devp); } else { major = ddi_driver_major(dtrace_devi); } state->dts_dev = makedevice(major, minor); if (devp != NULL) *devp = state->dts_dev; #else state->dts_aggid_arena = new_unrhdr(1, INT_MAX, &dtrace_unr_mtx); state->dts_dev = dev; #endif /* * We allocate NCPU buffers. On the one hand, this can be quite * a bit of memory per instance (nearly 36K on a Starcat). On the * other hand, it saves an additional memory reference in the probe * path. */ state->dts_buffer = kmem_zalloc(bufsize, KM_SLEEP); state->dts_aggbuffer = kmem_zalloc(bufsize, KM_SLEEP); + + /* + * Allocate and initialise the per-process per-CPU random state. + * SI_SUB_RANDOM < SI_SUB_DTRACE_ANON therefore entropy device is + * assumed to be seeded at this point (if from Fortuna seed file). + */ + (void) read_random(&state->dts_rstate[0], 2 * sizeof(uint64_t)); + for (cpu_it = 1; cpu_it < NCPU; cpu_it++) { + /* + * Each CPU is assigned a 2^64 period, non-overlapping + * subsequence. + */ + dtrace_xoroshiro128_plus_jump(state->dts_rstate[cpu_it-1], + state->dts_rstate[cpu_it]); + } #ifdef illumos state->dts_cleaner = CYCLIC_NONE; state->dts_deadman = CYCLIC_NONE; #else callout_init(&state->dts_cleaner, 1); callout_init(&state->dts_deadman, 1); #endif state->dts_vstate.dtvs_state = state; for (i = 0; i < DTRACEOPT_MAX; i++) state->dts_options[i] = DTRACEOPT_UNSET; /* * Set the default options. */ opt = state->dts_options; opt[DTRACEOPT_BUFPOLICY] = DTRACEOPT_BUFPOLICY_SWITCH; opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_AUTO; opt[DTRACEOPT_NSPEC] = dtrace_nspec_default; opt[DTRACEOPT_SPECSIZE] = dtrace_specsize_default; opt[DTRACEOPT_CPU] = (dtrace_optval_t)DTRACE_CPUALL; opt[DTRACEOPT_STRSIZE] = dtrace_strsize_default; opt[DTRACEOPT_STACKFRAMES] = dtrace_stackframes_default; opt[DTRACEOPT_USTACKFRAMES] = dtrace_ustackframes_default; opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_default; opt[DTRACEOPT_AGGRATE] = dtrace_aggrate_default; opt[DTRACEOPT_SWITCHRATE] = dtrace_switchrate_default; opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default; opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default; opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default; state->dts_activity = DTRACE_ACTIVITY_INACTIVE; /* * Depending on the user credentials, we set flag bits which alter probe * visibility or the amount of destructiveness allowed. In the case of * actual anonymous tracing, or the possession of all privileges, all of * the normal checks are bypassed. */ if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { state->dts_cred.dcr_visible = DTRACE_CRV_ALL; state->dts_cred.dcr_action = DTRACE_CRA_ALL; } else { /* * Set up the credentials for this instantiation. We take a * hold on the credential to prevent it from disappearing on * us; this in turn prevents the zone_t referenced by this * credential from disappearing. This means that we can * examine the credential and the zone from probe context. */ crhold(cr); state->dts_cred.dcr_cred = cr; /* * CRA_PROC means "we have *some* privilege for dtrace" and * unlocks the use of variables like pid, zonename, etc. */ if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE) || PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) { state->dts_cred.dcr_action |= DTRACE_CRA_PROC; } /* * dtrace_user allows use of syscall and profile providers. * If the user also has proc_owner and/or proc_zone, we * extend the scope to include additional visibility and * destructive power. */ if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) { if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) { state->dts_cred.dcr_visible |= DTRACE_CRV_ALLPROC; state->dts_cred.dcr_action |= DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; } if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) { state->dts_cred.dcr_visible |= DTRACE_CRV_ALLZONE; state->dts_cred.dcr_action |= DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; } /* * If we have all privs in whatever zone this is, * we can do destructive things to processes which * have altered credentials. */ #ifdef illumos if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE), cr->cr_zone->zone_privset)) { state->dts_cred.dcr_action |= DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; } #endif } /* * Holding the dtrace_kernel privilege also implies that * the user has the dtrace_user privilege from a visibility * perspective. But without further privileges, some * destructive actions are not available. */ if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) { /* * Make all probes in all zones visible. However, * this doesn't mean that all actions become available * to all zones. */ state->dts_cred.dcr_visible |= DTRACE_CRV_KERNEL | DTRACE_CRV_ALLPROC | DTRACE_CRV_ALLZONE; state->dts_cred.dcr_action |= DTRACE_CRA_KERNEL | DTRACE_CRA_PROC; /* * Holding proc_owner means that destructive actions * for *this* zone are allowed. */ if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) state->dts_cred.dcr_action |= DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; /* * Holding proc_zone means that destructive actions * for this user/group ID in all zones is allowed. */ if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) state->dts_cred.dcr_action |= DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; #ifdef illumos /* * If we have all privs in whatever zone this is, * we can do destructive things to processes which * have altered credentials. */ if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE), cr->cr_zone->zone_privset)) { state->dts_cred.dcr_action |= DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; } #endif } /* * Holding the dtrace_proc privilege gives control over fasttrap * and pid providers. We need to grant wider destructive * privileges in the event that the user has proc_owner and/or * proc_zone. */ if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) { if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) state->dts_cred.dcr_action |= DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) state->dts_cred.dcr_action |= DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; } } return (state); } static int dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) { dtrace_optval_t *opt = state->dts_options, size; processorid_t cpu = 0;; int flags = 0, rval, factor, divisor = 1; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(MUTEX_HELD(&cpu_lock)); ASSERT(which < DTRACEOPT_MAX); ASSERT(state->dts_activity == DTRACE_ACTIVITY_INACTIVE || (state == dtrace_anon.dta_state && state->dts_activity == DTRACE_ACTIVITY_ACTIVE)); if (opt[which] == DTRACEOPT_UNSET || opt[which] == 0) return (0); if (opt[DTRACEOPT_CPU] != DTRACEOPT_UNSET) cpu = opt[DTRACEOPT_CPU]; if (which == DTRACEOPT_SPECSIZE) flags |= DTRACEBUF_NOSWITCH; if (which == DTRACEOPT_BUFSIZE) { if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_RING) flags |= DTRACEBUF_RING; if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_FILL) flags |= DTRACEBUF_FILL; if (state != dtrace_anon.dta_state || state->dts_activity != DTRACE_ACTIVITY_ACTIVE) flags |= DTRACEBUF_INACTIVE; } for (size = opt[which]; size >= sizeof (uint64_t); size /= divisor) { /* * The size must be 8-byte aligned. If the size is not 8-byte * aligned, drop it down by the difference. */ if (size & (sizeof (uint64_t) - 1)) size -= size & (sizeof (uint64_t) - 1); if (size < state->dts_reserve) { /* * Buffers always must be large enough to accommodate * their prereserved space. We return E2BIG instead * of ENOMEM in this case to allow for user-level * software to differentiate the cases. */ return (E2BIG); } rval = dtrace_buffer_alloc(buf, size, flags, cpu, &factor); if (rval != ENOMEM) { opt[which] = size; return (rval); } if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL) return (rval); for (divisor = 2; divisor < factor; divisor <<= 1) continue; } return (ENOMEM); } static int dtrace_state_buffers(dtrace_state_t *state) { dtrace_speculation_t *spec = state->dts_speculations; int rval, i; if ((rval = dtrace_state_buffer(state, state->dts_buffer, DTRACEOPT_BUFSIZE)) != 0) return (rval); if ((rval = dtrace_state_buffer(state, state->dts_aggbuffer, DTRACEOPT_AGGSIZE)) != 0) return (rval); for (i = 0; i < state->dts_nspeculations; i++) { if ((rval = dtrace_state_buffer(state, spec[i].dtsp_buffer, DTRACEOPT_SPECSIZE)) != 0) return (rval); } return (0); } static void dtrace_state_prereserve(dtrace_state_t *state) { dtrace_ecb_t *ecb; dtrace_probe_t *probe; state->dts_reserve = 0; if (state->dts_options[DTRACEOPT_BUFPOLICY] != DTRACEOPT_BUFPOLICY_FILL) return; /* * If our buffer policy is a "fill" buffer policy, we need to set the * prereserved space to be the space required by the END probes. */ probe = dtrace_probes[dtrace_probeid_end - 1]; ASSERT(probe != NULL); for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) { if (ecb->dte_state != state) continue; state->dts_reserve += ecb->dte_needed + ecb->dte_alignment; } } static int dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) { dtrace_optval_t *opt = state->dts_options, sz, nspec; dtrace_speculation_t *spec; dtrace_buffer_t *buf; #ifdef illumos cyc_handler_t hdlr; cyc_time_t when; #endif int rval = 0, i, bufsize = NCPU * sizeof (dtrace_buffer_t); dtrace_icookie_t cookie; mutex_enter(&cpu_lock); mutex_enter(&dtrace_lock); if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) { rval = EBUSY; goto out; } /* * Before we can perform any checks, we must prime all of the * retained enablings that correspond to this state. */ dtrace_enabling_prime(state); if (state->dts_destructive && !state->dts_cred.dcr_destructive) { rval = EACCES; goto out; } dtrace_state_prereserve(state); /* * Now we want to do is try to allocate our speculations. * We do not automatically resize the number of speculations; if * this fails, we will fail the operation. */ nspec = opt[DTRACEOPT_NSPEC]; ASSERT(nspec != DTRACEOPT_UNSET); if (nspec > INT_MAX) { rval = ENOMEM; goto out; } spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t), KM_NOSLEEP | KM_NORMALPRI); if (spec == NULL) { rval = ENOMEM; goto out; } state->dts_speculations = spec; state->dts_nspeculations = (int)nspec; for (i = 0; i < nspec; i++) { if ((buf = kmem_zalloc(bufsize, KM_NOSLEEP | KM_NORMALPRI)) == NULL) { rval = ENOMEM; goto err; } spec[i].dtsp_buffer = buf; } if (opt[DTRACEOPT_GRABANON] != DTRACEOPT_UNSET) { if (dtrace_anon.dta_state == NULL) { rval = ENOENT; goto out; } if (state->dts_necbs != 0) { rval = EALREADY; goto out; } state->dts_anon = dtrace_anon_grab(); ASSERT(state->dts_anon != NULL); state = state->dts_anon; /* * We want "grabanon" to be set in the grabbed state, so we'll * copy that option value from the grabbing state into the * grabbed state. */ state->dts_options[DTRACEOPT_GRABANON] = opt[DTRACEOPT_GRABANON]; *cpu = dtrace_anon.dta_beganon; /* * If the anonymous state is active (as it almost certainly * is if the anonymous enabling ultimately matched anything), * we don't allow any further option processing -- but we * don't return failure. */ if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) goto out; } if (opt[DTRACEOPT_AGGSIZE] != DTRACEOPT_UNSET && opt[DTRACEOPT_AGGSIZE] != 0) { if (state->dts_aggregations == NULL) { /* * We're not going to create an aggregation buffer * because we don't have any ECBs that contain * aggregations -- set this option to 0. */ opt[DTRACEOPT_AGGSIZE] = 0; } else { /* * If we have an aggregation buffer, we must also have * a buffer to use as scratch. */ if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET || opt[DTRACEOPT_BUFSIZE] < state->dts_needed) { opt[DTRACEOPT_BUFSIZE] = state->dts_needed; } } } if (opt[DTRACEOPT_SPECSIZE] != DTRACEOPT_UNSET && opt[DTRACEOPT_SPECSIZE] != 0) { if (!state->dts_speculates) { /* * We're not going to create speculation buffers * because we don't have any ECBs that actually * speculate -- set the speculation size to 0. */ opt[DTRACEOPT_SPECSIZE] = 0; } } /* * The bare minimum size for any buffer that we're actually going to * do anything to is sizeof (uint64_t). */ sz = sizeof (uint64_t); if ((state->dts_needed != 0 && opt[DTRACEOPT_BUFSIZE] < sz) || (state->dts_speculates && opt[DTRACEOPT_SPECSIZE] < sz) || (state->dts_aggregations != NULL && opt[DTRACEOPT_AGGSIZE] < sz)) { /* * A buffer size has been explicitly set to 0 (or to a size * that will be adjusted to 0) and we need the space -- we * need to return failure. We return ENOSPC to differentiate * it from failing to allocate a buffer due to failure to meet * the reserve (for which we return E2BIG). */ rval = ENOSPC; goto out; } if ((rval = dtrace_state_buffers(state)) != 0) goto err; if ((sz = opt[DTRACEOPT_DYNVARSIZE]) == DTRACEOPT_UNSET) sz = dtrace_dstate_defsize; do { rval = dtrace_dstate_init(&state->dts_vstate.dtvs_dynvars, sz); if (rval == 0) break; if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL) goto err; } while (sz >>= 1); opt[DTRACEOPT_DYNVARSIZE] = sz; if (rval != 0) goto err; if (opt[DTRACEOPT_STATUSRATE] > dtrace_statusrate_max) opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_max; if (opt[DTRACEOPT_CLEANRATE] == 0) opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max; if (opt[DTRACEOPT_CLEANRATE] < dtrace_cleanrate_min) opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_min; if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max) opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max; state->dts_alive = state->dts_laststatus = dtrace_gethrtime(); #ifdef illumos hdlr.cyh_func = (cyc_func_t)dtrace_state_clean; hdlr.cyh_arg = state; hdlr.cyh_level = CY_LOW_LEVEL; when.cyt_when = 0; when.cyt_interval = opt[DTRACEOPT_CLEANRATE]; state->dts_cleaner = cyclic_add(&hdlr, &when); hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman; hdlr.cyh_arg = state; hdlr.cyh_level = CY_LOW_LEVEL; when.cyt_when = 0; when.cyt_interval = dtrace_deadman_interval; state->dts_deadman = cyclic_add(&hdlr, &when); #else callout_reset(&state->dts_cleaner, hz * opt[DTRACEOPT_CLEANRATE] / NANOSEC, dtrace_state_clean, state); callout_reset(&state->dts_deadman, hz * dtrace_deadman_interval / NANOSEC, dtrace_state_deadman, state); #endif state->dts_activity = DTRACE_ACTIVITY_WARMUP; #ifdef illumos if (state->dts_getf != 0 && !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) { /* * We don't have kernel privs but we have at least one call * to getf(); we need to bump our zone's count, and (if * this is the first enabling to have an unprivileged call * to getf()) we need to hook into closef(). */ state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf++; if (dtrace_getf++ == 0) { ASSERT(dtrace_closef == NULL); dtrace_closef = dtrace_getf_barrier; } } #endif /* * Now it's time to actually fire the BEGIN probe. We need to disable * interrupts here both to record the CPU on which we fired the BEGIN * probe (the data from this CPU will be processed first at user * level) and to manually activate the buffer for this CPU. */ cookie = dtrace_interrupt_disable(); *cpu = curcpu; ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE); state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE; dtrace_probe(dtrace_probeid_begin, (uint64_t)(uintptr_t)state, 0, 0, 0, 0); dtrace_interrupt_enable(cookie); /* * We may have had an exit action from a BEGIN probe; only change our * state to ACTIVE if we're still in WARMUP. */ ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP || state->dts_activity == DTRACE_ACTIVITY_DRAINING); if (state->dts_activity == DTRACE_ACTIVITY_WARMUP) state->dts_activity = DTRACE_ACTIVITY_ACTIVE; #ifdef __FreeBSD__ /* * We enable anonymous tracing before APs are started, so we must * activate buffers using the current CPU. */ if (state == dtrace_anon.dta_state) for (int i = 0; i < NCPU; i++) dtrace_buffer_activate_cpu(state, i); else dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_buffer_activate, state); #else /* * Regardless of whether or not now we're in ACTIVE or DRAINING, we * want each CPU to transition its principal buffer out of the * INACTIVE state. Doing this assures that no CPU will suddenly begin * processing an ECB halfway down a probe's ECB chain; all CPUs will * atomically transition from processing none of a state's ECBs to * processing all of them. */ dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_buffer_activate, state); #endif goto out; err: dtrace_buffer_free(state->dts_buffer); dtrace_buffer_free(state->dts_aggbuffer); if ((nspec = state->dts_nspeculations) == 0) { ASSERT(state->dts_speculations == NULL); goto out; } spec = state->dts_speculations; ASSERT(spec != NULL); for (i = 0; i < state->dts_nspeculations; i++) { if ((buf = spec[i].dtsp_buffer) == NULL) break; dtrace_buffer_free(buf); kmem_free(buf, bufsize); } kmem_free(spec, nspec * sizeof (dtrace_speculation_t)); state->dts_nspeculations = 0; state->dts_speculations = NULL; out: mutex_exit(&dtrace_lock); mutex_exit(&cpu_lock); return (rval); } static int dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu) { dtrace_icookie_t cookie; ASSERT(MUTEX_HELD(&dtrace_lock)); if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE && state->dts_activity != DTRACE_ACTIVITY_DRAINING) return (EINVAL); /* * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync * to be sure that every CPU has seen it. See below for the details * on why this is done. */ state->dts_activity = DTRACE_ACTIVITY_DRAINING; dtrace_sync(); /* * By this point, it is impossible for any CPU to be still processing * with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any * other CPU in dtrace_buffer_reserve(). This allows dtrace_probe() * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN * iff we're in the END probe. */ state->dts_activity = DTRACE_ACTIVITY_COOLDOWN; dtrace_sync(); ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN); /* * Finally, we can release the reserve and call the END probe. We * disable interrupts across calling the END probe to allow us to * return the CPU on which we actually called the END probe. This * allows user-land to be sure that this CPU's principal buffer is * processed last. */ state->dts_reserve = 0; cookie = dtrace_interrupt_disable(); *cpu = curcpu; dtrace_probe(dtrace_probeid_end, (uint64_t)(uintptr_t)state, 0, 0, 0, 0); dtrace_interrupt_enable(cookie); state->dts_activity = DTRACE_ACTIVITY_STOPPED; dtrace_sync(); #ifdef illumos if (state->dts_getf != 0 && !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) { /* * We don't have kernel privs but we have at least one call * to getf(); we need to lower our zone's count, and (if * this is the last enabling to have an unprivileged call * to getf()) we need to clear the closef() hook. */ ASSERT(state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf > 0); ASSERT(dtrace_closef == dtrace_getf_barrier); ASSERT(dtrace_getf > 0); state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf--; if (--dtrace_getf == 0) dtrace_closef = NULL; } #endif return (0); } static int dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option, dtrace_optval_t val) { ASSERT(MUTEX_HELD(&dtrace_lock)); if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) return (EBUSY); if (option >= DTRACEOPT_MAX) return (EINVAL); if (option != DTRACEOPT_CPU && val < 0) return (EINVAL); switch (option) { case DTRACEOPT_DESTRUCTIVE: if (dtrace_destructive_disallow) return (EACCES); state->dts_cred.dcr_destructive = 1; break; case DTRACEOPT_BUFSIZE: case DTRACEOPT_DYNVARSIZE: case DTRACEOPT_AGGSIZE: case DTRACEOPT_SPECSIZE: case DTRACEOPT_STRSIZE: if (val < 0) return (EINVAL); if (val >= LONG_MAX) { /* * If this is an otherwise negative value, set it to * the highest multiple of 128m less than LONG_MAX. * Technically, we're adjusting the size without * regard to the buffer resizing policy, but in fact, * this has no effect -- if we set the buffer size to * ~LONG_MAX and the buffer policy is ultimately set to * be "manual", the buffer allocation is guaranteed to * fail, if only because the allocation requires two * buffers. (We set the the size to the highest * multiple of 128m because it ensures that the size * will remain a multiple of a megabyte when * repeatedly halved -- all the way down to 15m.) */ val = LONG_MAX - (1 << 27) + 1; } } state->dts_options[option] = val; return (0); } static void dtrace_state_destroy(dtrace_state_t *state) { dtrace_ecb_t *ecb; dtrace_vstate_t *vstate = &state->dts_vstate; #ifdef illumos minor_t minor = getminor(state->dts_dev); #endif int i, bufsize = NCPU * sizeof (dtrace_buffer_t); dtrace_speculation_t *spec = state->dts_speculations; int nspec = state->dts_nspeculations; uint32_t match; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(MUTEX_HELD(&cpu_lock)); /* * First, retract any retained enablings for this state. */ dtrace_enabling_retract(state); ASSERT(state->dts_nretained == 0); if (state->dts_activity == DTRACE_ACTIVITY_ACTIVE || state->dts_activity == DTRACE_ACTIVITY_DRAINING) { /* * We have managed to come into dtrace_state_destroy() on a * hot enabling -- almost certainly because of a disorderly * shutdown of a consumer. (That is, a consumer that is * exiting without having called dtrace_stop().) In this case, * we're going to set our activity to be KILLED, and then * issue a sync to be sure that everyone is out of probe * context before we start blowing away ECBs. */ state->dts_activity = DTRACE_ACTIVITY_KILLED; dtrace_sync(); } /* * Release the credential hold we took in dtrace_state_create(). */ if (state->dts_cred.dcr_cred != NULL) crfree(state->dts_cred.dcr_cred); /* * Now we can safely disable and destroy any enabled probes. Because * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress * (especially if they're all enabled), we take two passes through the * ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and * in the second we disable whatever is left over. */ for (match = DTRACE_PRIV_KERNEL; ; match = 0) { for (i = 0; i < state->dts_necbs; i++) { if ((ecb = state->dts_ecbs[i]) == NULL) continue; if (match && ecb->dte_probe != NULL) { dtrace_probe_t *probe = ecb->dte_probe; dtrace_provider_t *prov = probe->dtpr_provider; if (!(prov->dtpv_priv.dtpp_flags & match)) continue; } dtrace_ecb_disable(ecb); dtrace_ecb_destroy(ecb); } if (!match) break; } /* * Before we free the buffers, perform one more sync to assure that * every CPU is out of probe context. */ dtrace_sync(); dtrace_buffer_free(state->dts_buffer); dtrace_buffer_free(state->dts_aggbuffer); for (i = 0; i < nspec; i++) dtrace_buffer_free(spec[i].dtsp_buffer); #ifdef illumos if (state->dts_cleaner != CYCLIC_NONE) cyclic_remove(state->dts_cleaner); if (state->dts_deadman != CYCLIC_NONE) cyclic_remove(state->dts_deadman); #else callout_stop(&state->dts_cleaner); callout_drain(&state->dts_cleaner); callout_stop(&state->dts_deadman); callout_drain(&state->dts_deadman); #endif dtrace_dstate_fini(&vstate->dtvs_dynvars); dtrace_vstate_fini(vstate); if (state->dts_ecbs != NULL) kmem_free(state->dts_ecbs, state->dts_necbs * sizeof (dtrace_ecb_t *)); if (state->dts_aggregations != NULL) { #ifdef DEBUG for (i = 0; i < state->dts_naggregations; i++) ASSERT(state->dts_aggregations[i] == NULL); #endif ASSERT(state->dts_naggregations > 0); kmem_free(state->dts_aggregations, state->dts_naggregations * sizeof (dtrace_aggregation_t *)); } kmem_free(state->dts_buffer, bufsize); kmem_free(state->dts_aggbuffer, bufsize); for (i = 0; i < nspec; i++) kmem_free(spec[i].dtsp_buffer, bufsize); if (spec != NULL) kmem_free(spec, nspec * sizeof (dtrace_speculation_t)); dtrace_format_destroy(state); if (state->dts_aggid_arena != NULL) { #ifdef illumos vmem_destroy(state->dts_aggid_arena); #else delete_unrhdr(state->dts_aggid_arena); #endif state->dts_aggid_arena = NULL; } #ifdef illumos ddi_soft_state_free(dtrace_softstate, minor); vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); #endif } /* * DTrace Anonymous Enabling Functions */ static dtrace_state_t * dtrace_anon_grab(void) { dtrace_state_t *state; ASSERT(MUTEX_HELD(&dtrace_lock)); if ((state = dtrace_anon.dta_state) == NULL) { ASSERT(dtrace_anon.dta_enabling == NULL); return (NULL); } ASSERT(dtrace_anon.dta_enabling != NULL); ASSERT(dtrace_retained != NULL); dtrace_enabling_destroy(dtrace_anon.dta_enabling); dtrace_anon.dta_enabling = NULL; dtrace_anon.dta_state = NULL; return (state); } static void dtrace_anon_property(void) { int i, rv; dtrace_state_t *state; dof_hdr_t *dof; char c[32]; /* enough for "dof-data-" + digits */ ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(MUTEX_HELD(&cpu_lock)); for (i = 0; ; i++) { (void) snprintf(c, sizeof (c), "dof-data-%d", i); dtrace_err_verbose = 1; if ((dof = dtrace_dof_property(c)) == NULL) { dtrace_err_verbose = 0; break; } #ifdef illumos /* * We want to create anonymous state, so we need to transition * the kernel debugger to indicate that DTrace is active. If * this fails (e.g. because the debugger has modified text in * some way), we won't continue with the processing. */ if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) { cmn_err(CE_NOTE, "kernel debugger active; anonymous " "enabling ignored."); dtrace_dof_destroy(dof); break; } #endif /* * If we haven't allocated an anonymous state, we'll do so now. */ if ((state = dtrace_anon.dta_state) == NULL) { state = dtrace_state_create(NULL, NULL); dtrace_anon.dta_state = state; if (state == NULL) { /* * This basically shouldn't happen: the only * failure mode from dtrace_state_create() is a * failure of ddi_soft_state_zalloc() that * itself should never happen. Still, the * interface allows for a failure mode, and * we want to fail as gracefully as possible: * we'll emit an error message and cease * processing anonymous state in this case. */ cmn_err(CE_WARN, "failed to create " "anonymous state"); dtrace_dof_destroy(dof); break; } } rv = dtrace_dof_slurp(dof, &state->dts_vstate, CRED(), &dtrace_anon.dta_enabling, 0, B_TRUE); if (rv == 0) rv = dtrace_dof_options(dof, state); dtrace_err_verbose = 0; dtrace_dof_destroy(dof); if (rv != 0) { /* * This is malformed DOF; chuck any anonymous state * that we created. */ ASSERT(dtrace_anon.dta_enabling == NULL); dtrace_state_destroy(state); dtrace_anon.dta_state = NULL; break; } ASSERT(dtrace_anon.dta_enabling != NULL); } if (dtrace_anon.dta_enabling != NULL) { int rval; /* * dtrace_enabling_retain() can only fail because we are * trying to retain more enablings than are allowed -- but * we only have one anonymous enabling, and we are guaranteed * to be allowed at least one retained enabling; we assert * that dtrace_enabling_retain() returns success. */ rval = dtrace_enabling_retain(dtrace_anon.dta_enabling); ASSERT(rval == 0); dtrace_enabling_dump(dtrace_anon.dta_enabling); } } /* * DTrace Helper Functions */ static void dtrace_helper_trace(dtrace_helper_action_t *helper, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where) { uint32_t size, next, nnext, i; dtrace_helptrace_t *ent, *buffer; uint16_t flags = cpu_core[curcpu].cpuc_dtrace_flags; if ((buffer = dtrace_helptrace_buffer) == NULL) return; ASSERT(vstate->dtvs_nlocals <= dtrace_helptrace_nlocals); /* * What would a tracing framework be without its own tracing * framework? (Well, a hell of a lot simpler, for starters...) */ size = sizeof (dtrace_helptrace_t) + dtrace_helptrace_nlocals * sizeof (uint64_t) - sizeof (uint64_t); /* * Iterate until we can allocate a slot in the trace buffer. */ do { next = dtrace_helptrace_next; if (next + size < dtrace_helptrace_bufsize) { nnext = next + size; } else { nnext = size; } } while (dtrace_cas32(&dtrace_helptrace_next, next, nnext) != next); /* * We have our slot; fill it in. */ if (nnext == size) { dtrace_helptrace_wrapped++; next = 0; } ent = (dtrace_helptrace_t *)((uintptr_t)buffer + next); ent->dtht_helper = helper; ent->dtht_where = where; ent->dtht_nlocals = vstate->dtvs_nlocals; ent->dtht_fltoffs = (mstate->dtms_present & DTRACE_MSTATE_FLTOFFS) ? mstate->dtms_fltoffs : -1; ent->dtht_fault = DTRACE_FLAGS2FLT(flags); ent->dtht_illval = cpu_core[curcpu].cpuc_dtrace_illval; for (i = 0; i < vstate->dtvs_nlocals; i++) { dtrace_statvar_t *svar; if ((svar = vstate->dtvs_locals[i]) == NULL) continue; ASSERT(svar->dtsv_size >= NCPU * sizeof (uint64_t)); ent->dtht_locals[i] = ((uint64_t *)(uintptr_t)svar->dtsv_data)[curcpu]; } } static uint64_t dtrace_helper(int which, dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t arg0, uint64_t arg1) { uint16_t *flags = &cpu_core[curcpu].cpuc_dtrace_flags; uint64_t sarg0 = mstate->dtms_arg[0]; uint64_t sarg1 = mstate->dtms_arg[1]; uint64_t rval = 0; dtrace_helpers_t *helpers = curproc->p_dtrace_helpers; dtrace_helper_action_t *helper; dtrace_vstate_t *vstate; dtrace_difo_t *pred; int i, trace = dtrace_helptrace_buffer != NULL; ASSERT(which >= 0 && which < DTRACE_NHELPER_ACTIONS); if (helpers == NULL) return (0); if ((helper = helpers->dthps_actions[which]) == NULL) return (0); vstate = &helpers->dthps_vstate; mstate->dtms_arg[0] = arg0; mstate->dtms_arg[1] = arg1; /* * Now iterate over each helper. If its predicate evaluates to 'true', * we'll call the corresponding actions. Note that the below calls * to dtrace_dif_emulate() may set faults in machine state. This is * okay: our caller (the outer dtrace_dif_emulate()) will simply plow * the stored DIF offset with its own (which is the desired behavior). * Also, note the calls to dtrace_dif_emulate() may allocate scratch * from machine state; this is okay, too. */ for (; helper != NULL; helper = helper->dtha_next) { if ((pred = helper->dtha_predicate) != NULL) { if (trace) dtrace_helper_trace(helper, mstate, vstate, 0); if (!dtrace_dif_emulate(pred, mstate, vstate, state)) goto next; if (*flags & CPU_DTRACE_FAULT) goto err; } for (i = 0; i < helper->dtha_nactions; i++) { if (trace) dtrace_helper_trace(helper, mstate, vstate, i + 1); rval = dtrace_dif_emulate(helper->dtha_actions[i], mstate, vstate, state); if (*flags & CPU_DTRACE_FAULT) goto err; } next: if (trace) dtrace_helper_trace(helper, mstate, vstate, DTRACE_HELPTRACE_NEXT); } if (trace) dtrace_helper_trace(helper, mstate, vstate, DTRACE_HELPTRACE_DONE); /* * Restore the arg0 that we saved upon entry. */ mstate->dtms_arg[0] = sarg0; mstate->dtms_arg[1] = sarg1; return (rval); err: if (trace) dtrace_helper_trace(helper, mstate, vstate, DTRACE_HELPTRACE_ERR); /* * Restore the arg0 that we saved upon entry. */ mstate->dtms_arg[0] = sarg0; mstate->dtms_arg[1] = sarg1; return (0); } static void dtrace_helper_action_destroy(dtrace_helper_action_t *helper, dtrace_vstate_t *vstate) { int i; if (helper->dtha_predicate != NULL) dtrace_difo_release(helper->dtha_predicate, vstate); for (i = 0; i < helper->dtha_nactions; i++) { ASSERT(helper->dtha_actions[i] != NULL); dtrace_difo_release(helper->dtha_actions[i], vstate); } kmem_free(helper->dtha_actions, helper->dtha_nactions * sizeof (dtrace_difo_t *)); kmem_free(helper, sizeof (dtrace_helper_action_t)); } static int dtrace_helper_destroygen(dtrace_helpers_t *help, int gen) { proc_t *p = curproc; dtrace_vstate_t *vstate; int i; if (help == NULL) help = p->p_dtrace_helpers; ASSERT(MUTEX_HELD(&dtrace_lock)); if (help == NULL || gen > help->dthps_generation) return (EINVAL); vstate = &help->dthps_vstate; for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { dtrace_helper_action_t *last = NULL, *h, *next; for (h = help->dthps_actions[i]; h != NULL; h = next) { next = h->dtha_next; if (h->dtha_generation == gen) { if (last != NULL) { last->dtha_next = next; } else { help->dthps_actions[i] = next; } dtrace_helper_action_destroy(h, vstate); } else { last = h; } } } /* * Interate until we've cleared out all helper providers with the * given generation number. */ for (;;) { dtrace_helper_provider_t *prov; /* * Look for a helper provider with the right generation. We * have to start back at the beginning of the list each time * because we drop dtrace_lock. It's unlikely that we'll make * more than two passes. */ for (i = 0; i < help->dthps_nprovs; i++) { prov = help->dthps_provs[i]; if (prov->dthp_generation == gen) break; } /* * If there were no matches, we're done. */ if (i == help->dthps_nprovs) break; /* * Move the last helper provider into this slot. */ help->dthps_nprovs--; help->dthps_provs[i] = help->dthps_provs[help->dthps_nprovs]; help->dthps_provs[help->dthps_nprovs] = NULL; mutex_exit(&dtrace_lock); /* * If we have a meta provider, remove this helper provider. */ mutex_enter(&dtrace_meta_lock); if (dtrace_meta_pid != NULL) { ASSERT(dtrace_deferred_pid == NULL); dtrace_helper_provider_remove(&prov->dthp_prov, p->p_pid); } mutex_exit(&dtrace_meta_lock); dtrace_helper_provider_destroy(prov); mutex_enter(&dtrace_lock); } return (0); } static int dtrace_helper_validate(dtrace_helper_action_t *helper) { int err = 0, i; dtrace_difo_t *dp; if ((dp = helper->dtha_predicate) != NULL) err += dtrace_difo_validate_helper(dp); for (i = 0; i < helper->dtha_nactions; i++) err += dtrace_difo_validate_helper(helper->dtha_actions[i]); return (err == 0); } static int dtrace_helper_action_add(int which, dtrace_ecbdesc_t *ep, dtrace_helpers_t *help) { dtrace_helper_action_t *helper, *last; dtrace_actdesc_t *act; dtrace_vstate_t *vstate; dtrace_predicate_t *pred; int count = 0, nactions = 0, i; if (which < 0 || which >= DTRACE_NHELPER_ACTIONS) return (EINVAL); last = help->dthps_actions[which]; vstate = &help->dthps_vstate; for (count = 0; last != NULL; last = last->dtha_next) { count++; if (last->dtha_next == NULL) break; } /* * If we already have dtrace_helper_actions_max helper actions for this * helper action type, we'll refuse to add a new one. */ if (count >= dtrace_helper_actions_max) return (ENOSPC); helper = kmem_zalloc(sizeof (dtrace_helper_action_t), KM_SLEEP); helper->dtha_generation = help->dthps_generation; if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) { ASSERT(pred->dtp_difo != NULL); dtrace_difo_hold(pred->dtp_difo); helper->dtha_predicate = pred->dtp_difo; } for (act = ep->dted_action; act != NULL; act = act->dtad_next) { if (act->dtad_kind != DTRACEACT_DIFEXPR) goto err; if (act->dtad_difo == NULL) goto err; nactions++; } helper->dtha_actions = kmem_zalloc(sizeof (dtrace_difo_t *) * (helper->dtha_nactions = nactions), KM_SLEEP); for (act = ep->dted_action, i = 0; act != NULL; act = act->dtad_next) { dtrace_difo_hold(act->dtad_difo); helper->dtha_actions[i++] = act->dtad_difo; } if (!dtrace_helper_validate(helper)) goto err; if (last == NULL) { help->dthps_actions[which] = helper; } else { last->dtha_next = helper; } if (vstate->dtvs_nlocals > dtrace_helptrace_nlocals) { dtrace_helptrace_nlocals = vstate->dtvs_nlocals; dtrace_helptrace_next = 0; } return (0); err: dtrace_helper_action_destroy(helper, vstate); return (EINVAL); } static void dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help, dof_helper_t *dofhp) { ASSERT(MUTEX_NOT_HELD(&dtrace_lock)); mutex_enter(&dtrace_meta_lock); mutex_enter(&dtrace_lock); if (!dtrace_attached() || dtrace_meta_pid == NULL) { /* * If the dtrace module is loaded but not attached, or if * there aren't isn't a meta provider registered to deal with * these provider descriptions, we need to postpone creating * the actual providers until later. */ if (help->dthps_next == NULL && help->dthps_prev == NULL && dtrace_deferred_pid != help) { help->dthps_deferred = 1; help->dthps_pid = p->p_pid; help->dthps_next = dtrace_deferred_pid; help->dthps_prev = NULL; if (dtrace_deferred_pid != NULL) dtrace_deferred_pid->dthps_prev = help; dtrace_deferred_pid = help; } mutex_exit(&dtrace_lock); } else if (dofhp != NULL) { /* * If the dtrace module is loaded and we have a particular * helper provider description, pass that off to the * meta provider. */ mutex_exit(&dtrace_lock); dtrace_helper_provide(dofhp, p->p_pid); } else { /* * Otherwise, just pass all the helper provider descriptions * off to the meta provider. */ int i; mutex_exit(&dtrace_lock); for (i = 0; i < help->dthps_nprovs; i++) { dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, p->p_pid); } } mutex_exit(&dtrace_meta_lock); } static int dtrace_helper_provider_add(dof_helper_t *dofhp, dtrace_helpers_t *help, int gen) { dtrace_helper_provider_t *hprov, **tmp_provs; uint_t tmp_maxprovs, i; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(help != NULL); /* * If we already have dtrace_helper_providers_max helper providers, * we're refuse to add a new one. */ if (help->dthps_nprovs >= dtrace_helper_providers_max) return (ENOSPC); /* * Check to make sure this isn't a duplicate. */ for (i = 0; i < help->dthps_nprovs; i++) { if (dofhp->dofhp_addr == help->dthps_provs[i]->dthp_prov.dofhp_addr) return (EALREADY); } hprov = kmem_zalloc(sizeof (dtrace_helper_provider_t), KM_SLEEP); hprov->dthp_prov = *dofhp; hprov->dthp_ref = 1; hprov->dthp_generation = gen; /* * Allocate a bigger table for helper providers if it's already full. */ if (help->dthps_maxprovs == help->dthps_nprovs) { tmp_maxprovs = help->dthps_maxprovs; tmp_provs = help->dthps_provs; if (help->dthps_maxprovs == 0) help->dthps_maxprovs = 2; else help->dthps_maxprovs *= 2; if (help->dthps_maxprovs > dtrace_helper_providers_max) help->dthps_maxprovs = dtrace_helper_providers_max; ASSERT(tmp_maxprovs < help->dthps_maxprovs); help->dthps_provs = kmem_zalloc(help->dthps_maxprovs * sizeof (dtrace_helper_provider_t *), KM_SLEEP); if (tmp_provs != NULL) { bcopy(tmp_provs, help->dthps_provs, tmp_maxprovs * sizeof (dtrace_helper_provider_t *)); kmem_free(tmp_provs, tmp_maxprovs * sizeof (dtrace_helper_provider_t *)); } } help->dthps_provs[help->dthps_nprovs] = hprov; help->dthps_nprovs++; return (0); } static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *hprov) { mutex_enter(&dtrace_lock); if (--hprov->dthp_ref == 0) { dof_hdr_t *dof; mutex_exit(&dtrace_lock); dof = (dof_hdr_t *)(uintptr_t)hprov->dthp_prov.dofhp_dof; dtrace_dof_destroy(dof); kmem_free(hprov, sizeof (dtrace_helper_provider_t)); } else { mutex_exit(&dtrace_lock); } } static int dtrace_helper_provider_validate(dof_hdr_t *dof, dof_sec_t *sec) { uintptr_t daddr = (uintptr_t)dof; dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec; dof_provider_t *provider; dof_probe_t *probe; uint8_t *arg; char *strtab, *typestr; dof_stridx_t typeidx; size_t typesz; uint_t nprobes, j, k; ASSERT(sec->dofs_type == DOF_SECT_PROVIDER); if (sec->dofs_offset & (sizeof (uint_t) - 1)) { dtrace_dof_error(dof, "misaligned section offset"); return (-1); } /* * The section needs to be large enough to contain the DOF provider * structure appropriate for the given version. */ if (sec->dofs_size < ((dof->dofh_ident[DOF_ID_VERSION] == DOF_VERSION_1) ? offsetof(dof_provider_t, dofpv_prenoffs) : sizeof (dof_provider_t))) { dtrace_dof_error(dof, "provider section too small"); return (-1); } provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset); str_sec = dtrace_dof_sect(dof, DOF_SECT_STRTAB, provider->dofpv_strtab); prb_sec = dtrace_dof_sect(dof, DOF_SECT_PROBES, provider->dofpv_probes); arg_sec = dtrace_dof_sect(dof, DOF_SECT_PRARGS, provider->dofpv_prargs); off_sec = dtrace_dof_sect(dof, DOF_SECT_PROFFS, provider->dofpv_proffs); if (str_sec == NULL || prb_sec == NULL || arg_sec == NULL || off_sec == NULL) return (-1); enoff_sec = NULL; if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && provider->dofpv_prenoffs != DOF_SECT_NONE && (enoff_sec = dtrace_dof_sect(dof, DOF_SECT_PRENOFFS, provider->dofpv_prenoffs)) == NULL) return (-1); strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset); if (provider->dofpv_name >= str_sec->dofs_size || strlen(strtab + provider->dofpv_name) >= DTRACE_PROVNAMELEN) { dtrace_dof_error(dof, "invalid provider name"); return (-1); } if (prb_sec->dofs_entsize == 0 || prb_sec->dofs_entsize > prb_sec->dofs_size) { dtrace_dof_error(dof, "invalid entry size"); return (-1); } if (prb_sec->dofs_entsize & (sizeof (uintptr_t) - 1)) { dtrace_dof_error(dof, "misaligned entry size"); return (-1); } if (off_sec->dofs_entsize != sizeof (uint32_t)) { dtrace_dof_error(dof, "invalid entry size"); return (-1); } if (off_sec->dofs_offset & (sizeof (uint32_t) - 1)) { dtrace_dof_error(dof, "misaligned section offset"); return (-1); } if (arg_sec->dofs_entsize != sizeof (uint8_t)) { dtrace_dof_error(dof, "invalid entry size"); return (-1); } arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset); nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize; /* * Take a pass through the probes to check for errors. */ for (j = 0; j < nprobes; j++) { probe = (dof_probe_t *)(uintptr_t)(daddr + prb_sec->dofs_offset + j * prb_sec->dofs_entsize); if (probe->dofpr_func >= str_sec->dofs_size) { dtrace_dof_error(dof, "invalid function name"); return (-1); } if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN) { dtrace_dof_error(dof, "function name too long"); /* * Keep going if the function name is too long. * Unlike provider and probe names, we cannot reasonably * impose restrictions on function names, since they're * a property of the code being instrumented. We will * skip this probe in dtrace_helper_provide_one(). */ } if (probe->dofpr_name >= str_sec->dofs_size || strlen(strtab + probe->dofpr_name) >= DTRACE_NAMELEN) { dtrace_dof_error(dof, "invalid probe name"); return (-1); } /* * The offset count must not wrap the index, and the offsets * must also not overflow the section's data. */ if (probe->dofpr_offidx + probe->dofpr_noffs < probe->dofpr_offidx || (probe->dofpr_offidx + probe->dofpr_noffs) * off_sec->dofs_entsize > off_sec->dofs_size) { dtrace_dof_error(dof, "invalid probe offset"); return (-1); } if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1) { /* * If there's no is-enabled offset section, make sure * there aren't any is-enabled offsets. Otherwise * perform the same checks as for probe offsets * (immediately above). */ if (enoff_sec == NULL) { if (probe->dofpr_enoffidx != 0 || probe->dofpr_nenoffs != 0) { dtrace_dof_error(dof, "is-enabled " "offsets with null section"); return (-1); } } else if (probe->dofpr_enoffidx + probe->dofpr_nenoffs < probe->dofpr_enoffidx || (probe->dofpr_enoffidx + probe->dofpr_nenoffs) * enoff_sec->dofs_entsize > enoff_sec->dofs_size) { dtrace_dof_error(dof, "invalid is-enabled " "offset"); return (-1); } if (probe->dofpr_noffs + probe->dofpr_nenoffs == 0) { dtrace_dof_error(dof, "zero probe and " "is-enabled offsets"); return (-1); } } else if (probe->dofpr_noffs == 0) { dtrace_dof_error(dof, "zero probe offsets"); return (-1); } if (probe->dofpr_argidx + probe->dofpr_xargc < probe->dofpr_argidx || (probe->dofpr_argidx + probe->dofpr_xargc) * arg_sec->dofs_entsize > arg_sec->dofs_size) { dtrace_dof_error(dof, "invalid args"); return (-1); } typeidx = probe->dofpr_nargv; typestr = strtab + probe->dofpr_nargv; for (k = 0; k < probe->dofpr_nargc; k++) { if (typeidx >= str_sec->dofs_size) { dtrace_dof_error(dof, "bad " "native argument type"); return (-1); } typesz = strlen(typestr) + 1; if (typesz > DTRACE_ARGTYPELEN) { dtrace_dof_error(dof, "native " "argument type too long"); return (-1); } typeidx += typesz; typestr += typesz; } typeidx = probe->dofpr_xargv; typestr = strtab + probe->dofpr_xargv; for (k = 0; k < probe->dofpr_xargc; k++) { if (arg[probe->dofpr_argidx + k] > probe->dofpr_nargc) { dtrace_dof_error(dof, "bad " "native argument index"); return (-1); } if (typeidx >= str_sec->dofs_size) { dtrace_dof_error(dof, "bad " "translated argument type"); return (-1); } typesz = strlen(typestr) + 1; if (typesz > DTRACE_ARGTYPELEN) { dtrace_dof_error(dof, "translated argument " "type too long"); return (-1); } typeidx += typesz; typestr += typesz; } } return (0); } static int dtrace_helper_slurp(dof_hdr_t *dof, dof_helper_t *dhp, struct proc *p) { dtrace_helpers_t *help; dtrace_vstate_t *vstate; dtrace_enabling_t *enab = NULL; int i, gen, rv, nhelpers = 0, nprovs = 0, destroy = 1; uintptr_t daddr = (uintptr_t)dof; ASSERT(MUTEX_HELD(&dtrace_lock)); if ((help = p->p_dtrace_helpers) == NULL) help = dtrace_helpers_create(p); vstate = &help->dthps_vstate; if ((rv = dtrace_dof_slurp(dof, vstate, NULL, &enab, dhp->dofhp_addr, B_FALSE)) != 0) { dtrace_dof_destroy(dof); return (rv); } /* * Look for helper providers and validate their descriptions. */ for (i = 0; i < dof->dofh_secnum; i++) { dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + i * dof->dofh_secsize); if (sec->dofs_type != DOF_SECT_PROVIDER) continue; if (dtrace_helper_provider_validate(dof, sec) != 0) { dtrace_enabling_destroy(enab); dtrace_dof_destroy(dof); return (-1); } nprovs++; } /* * Now we need to walk through the ECB descriptions in the enabling. */ for (i = 0; i < enab->dten_ndesc; i++) { dtrace_ecbdesc_t *ep = enab->dten_desc[i]; dtrace_probedesc_t *desc = &ep->dted_probe; if (strcmp(desc->dtpd_provider, "dtrace") != 0) continue; if (strcmp(desc->dtpd_mod, "helper") != 0) continue; if (strcmp(desc->dtpd_func, "ustack") != 0) continue; if ((rv = dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK, ep, help)) != 0) { /* * Adding this helper action failed -- we are now going * to rip out the entire generation and return failure. */ (void) dtrace_helper_destroygen(help, help->dthps_generation); dtrace_enabling_destroy(enab); dtrace_dof_destroy(dof); return (-1); } nhelpers++; } if (nhelpers < enab->dten_ndesc) dtrace_dof_error(dof, "unmatched helpers"); gen = help->dthps_generation++; dtrace_enabling_destroy(enab); if (nprovs > 0) { /* * Now that this is in-kernel, we change the sense of the * members: dofhp_dof denotes the in-kernel copy of the DOF * and dofhp_addr denotes the address at user-level. */ dhp->dofhp_addr = dhp->dofhp_dof; dhp->dofhp_dof = (uint64_t)(uintptr_t)dof; if (dtrace_helper_provider_add(dhp, help, gen) == 0) { mutex_exit(&dtrace_lock); dtrace_helper_provider_register(p, help, dhp); mutex_enter(&dtrace_lock); destroy = 0; } } if (destroy) dtrace_dof_destroy(dof); return (gen); } static dtrace_helpers_t * dtrace_helpers_create(proc_t *p) { dtrace_helpers_t *help; ASSERT(MUTEX_HELD(&dtrace_lock)); ASSERT(p->p_dtrace_helpers == NULL); help = kmem_zalloc(sizeof (dtrace_helpers_t), KM_SLEEP); help->dthps_actions = kmem_zalloc(sizeof (dtrace_helper_action_t *) * DTRACE_NHELPER_ACTIONS, KM_SLEEP); p->p_dtrace_helpers = help; dtrace_helpers++; return (help); } #ifdef illumos static #endif void dtrace_helpers_destroy(proc_t *p) { dtrace_helpers_t *help; dtrace_vstate_t *vstate; #ifdef illumos proc_t *p = curproc; #endif int i; mutex_enter(&dtrace_lock); ASSERT(p->p_dtrace_helpers != NULL); ASSERT(dtrace_helpers > 0); help = p->p_dtrace_helpers; vstate = &help->dthps_vstate; /* * We're now going to lose the help from this process. */ p->p_dtrace_helpers = NULL; dtrace_sync(); /* * Destory the helper actions. */ for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { dtrace_helper_action_t *h, *next; for (h = help->dthps_actions[i]; h != NULL; h = next) { next = h->dtha_next; dtrace_helper_action_destroy(h, vstate); h = next; } } mutex_exit(&dtrace_lock); /* * Destroy the helper providers. */ if (help->dthps_maxprovs > 0) { mutex_enter(&dtrace_meta_lock); if (dtrace_meta_pid != NULL) { ASSERT(dtrace_deferred_pid == NULL); for (i = 0; i < help->dthps_nprovs; i++) { dtrace_helper_provider_remove( &help->dthps_provs[i]->dthp_prov, p->p_pid); } } else { mutex_enter(&dtrace_lock); ASSERT(help->dthps_deferred == 0 || help->dthps_next != NULL || help->dthps_prev != NULL || help == dtrace_deferred_pid); /* * Remove the helper from the deferred list. */ if (help->dthps_next != NULL) help->dthps_next->dthps_prev = help->dthps_prev; if (help->dthps_prev != NULL) help->dthps_prev->dthps_next = help->dthps_next; if (dtrace_deferred_pid == help) { dtrace_deferred_pid = help->dthps_next; ASSERT(help->dthps_prev == NULL); } mutex_exit(&dtrace_lock); } mutex_exit(&dtrace_meta_lock); for (i = 0; i < help->dthps_nprovs; i++) { dtrace_helper_provider_destroy(help->dthps_provs[i]); } kmem_free(help->dthps_provs, help->dthps_maxprovs * sizeof (dtrace_helper_provider_t *)); } mutex_enter(&dtrace_lock); dtrace_vstate_fini(&help->dthps_vstate); kmem_free(help->dthps_actions, sizeof (dtrace_helper_action_t *) * DTRACE_NHELPER_ACTIONS); kmem_free(help, sizeof (dtrace_helpers_t)); --dtrace_helpers; mutex_exit(&dtrace_lock); } #ifdef illumos static #endif void dtrace_helpers_duplicate(proc_t *from, proc_t *to) { dtrace_helpers_t *help, *newhelp; dtrace_helper_action_t *helper, *new, *last; dtrace_difo_t *dp; dtrace_vstate_t *vstate; int i, j, sz, hasprovs = 0; mutex_enter(&dtrace_lock); ASSERT(from->p_dtrace_helpers != NULL); ASSERT(dtrace_helpers > 0); help = from->p_dtrace_helpers; newhelp = dtrace_helpers_create(to); ASSERT(to->p_dtrace_helpers != NULL); newhelp->dthps_generation = help->dthps_generation; vstate = &newhelp->dthps_vstate; /* * Duplicate the helper actions. */ for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) { if ((helper = help->dthps_actions[i]) == NULL) continue; for (last = NULL; helper != NULL; helper = helper->dtha_next) { new = kmem_zalloc(sizeof (dtrace_helper_action_t), KM_SLEEP); new->dtha_generation = helper->dtha_generation; if ((dp = helper->dtha_predicate) != NULL) { dp = dtrace_difo_duplicate(dp, vstate); new->dtha_predicate = dp; } new->dtha_nactions = helper->dtha_nactions; sz = sizeof (dtrace_difo_t *) * new->dtha_nactions; new->dtha_actions = kmem_alloc(sz, KM_SLEEP); for (j = 0; j < new->dtha_nactions; j++) { dtrace_difo_t *dp = helper->dtha_actions[j]; ASSERT(dp != NULL); dp = dtrace_difo_duplicate(dp, vstate); new->dtha_actions[j] = dp; } if (last != NULL) { last->dtha_next = new; } else { newhelp->dthps_actions[i] = new; } last = new; } } /* * Duplicate the helper providers and register them with the * DTrace framework. */ if (help->dthps_nprovs > 0) { newhelp->dthps_nprovs = help->dthps_nprovs; newhelp->dthps_maxprovs = help->dthps_nprovs; newhelp->dthps_provs = kmem_alloc(newhelp->dthps_nprovs * sizeof (dtrace_helper_provider_t *), KM_SLEEP); for (i = 0; i < newhelp->dthps_nprovs; i++) { newhelp->dthps_provs[i] = help->dthps_provs[i]; newhelp->dthps_provs[i]->dthp_ref++; } hasprovs = 1; } mutex_exit(&dtrace_lock); if (hasprovs) dtrace_helper_provider_register(to, newhelp, NULL); } /* * DTrace Hook Functions */ static void dtrace_module_loaded(modctl_t *ctl) { dtrace_provider_t *prv; mutex_enter(&dtrace_provider_lock); #ifdef illumos mutex_enter(&mod_lock); #endif #ifdef illumos ASSERT(ctl->mod_busy); #endif /* * We're going to call each providers per-module provide operation * specifying only this module. */ for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next) prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); #ifdef illumos mutex_exit(&mod_lock); #endif mutex_exit(&dtrace_provider_lock); /* * If we have any retained enablings, we need to match against them. * Enabling probes requires that cpu_lock be held, and we cannot hold * cpu_lock here -- it is legal for cpu_lock to be held when loading a * module. (In particular, this happens when loading scheduling * classes.) So if we have any retained enablings, we need to dispatch * our task queue to do the match for us. */ mutex_enter(&dtrace_lock); if (dtrace_retained == NULL) { mutex_exit(&dtrace_lock); return; } (void) taskq_dispatch(dtrace_taskq, (task_func_t *)dtrace_enabling_matchall, NULL, TQ_SLEEP); mutex_exit(&dtrace_lock); /* * And now, for a little heuristic sleaze: in general, we want to * match modules as soon as they load. However, we cannot guarantee * this, because it would lead us to the lock ordering violation * outlined above. The common case, of course, is that cpu_lock is * _not_ held -- so we delay here for a clock tick, hoping that that's * long enough for the task queue to do its work. If it's not, it's * not a serious problem -- it just means that the module that we * just loaded may not be immediately instrumentable. */ delay(1); } static void #ifdef illumos dtrace_module_unloaded(modctl_t *ctl) #else dtrace_module_unloaded(modctl_t *ctl, int *error) #endif { dtrace_probe_t template, *probe, *first, *next; dtrace_provider_t *prov; #ifndef illumos char modname[DTRACE_MODNAMELEN]; size_t len; #endif #ifdef illumos template.dtpr_mod = ctl->mod_modname; #else /* Handle the fact that ctl->filename may end in ".ko". */ strlcpy(modname, ctl->filename, sizeof(modname)); len = strlen(ctl->filename); if (len > 3 && strcmp(modname + len - 3, ".ko") == 0) modname[len - 3] = '\0'; template.dtpr_mod = modname; #endif mutex_enter(&dtrace_provider_lock); #ifdef illumos mutex_enter(&mod_lock); #endif mutex_enter(&dtrace_lock); #ifndef illumos if (ctl->nenabled > 0) { /* Don't allow unloads if a probe is enabled. */ mutex_exit(&dtrace_provider_lock); mutex_exit(&dtrace_lock); *error = -1; printf( "kldunload: attempt to unload module that has DTrace probes enabled\n"); return; } #endif if (dtrace_bymod == NULL) { /* * The DTrace module is loaded (obviously) but not attached; * we don't have any work to do. */ mutex_exit(&dtrace_provider_lock); #ifdef illumos mutex_exit(&mod_lock); #endif mutex_exit(&dtrace_lock); return; } for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template); probe != NULL; probe = probe->dtpr_nextmod) { if (probe->dtpr_ecb != NULL) { mutex_exit(&dtrace_provider_lock); #ifdef illumos mutex_exit(&mod_lock); #endif mutex_exit(&dtrace_lock); /* * This shouldn't _actually_ be possible -- we're * unloading a module that has an enabled probe in it. * (It's normally up to the provider to make sure that * this can't happen.) However, because dtps_enable() * doesn't have a failure mode, there can be an * enable/unload race. Upshot: we don't want to * assert, but we're not going to disable the * probe, either. */ if (dtrace_err_verbose) { #ifdef illumos cmn_err(CE_WARN, "unloaded module '%s' had " "enabled probes", ctl->mod_modname); #else cmn_err(CE_WARN, "unloaded module '%s' had " "enabled probes", modname); #endif } return; } } probe = first; for (first = NULL; probe != NULL; probe = next) { ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe); dtrace_probes[probe->dtpr_id - 1] = NULL; next = probe->dtpr_nextmod; dtrace_hash_remove(dtrace_bymod, probe); dtrace_hash_remove(dtrace_byfunc, probe); dtrace_hash_remove(dtrace_byname, probe); if (first == NULL) { first = probe; probe->dtpr_nextmod = NULL; } else { probe->dtpr_nextmod = first; first = probe; } } /* * We've removed all of the module's probes from the hash chains and * from the probe array. Now issue a dtrace_sync() to be sure that * everyone has cleared out from any probe array processing. */ dtrace_sync(); for (probe = first; probe != NULL; probe = first) { first = probe->dtpr_nextmod; prov = probe->dtpr_provider; prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id, probe->dtpr_arg); kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); #ifdef illumos vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1); #else free_unr(dtrace_arena, probe->dtpr_id); #endif kmem_free(probe, sizeof (dtrace_probe_t)); } mutex_exit(&dtrace_lock); #ifdef illumos mutex_exit(&mod_lock); #endif mutex_exit(&dtrace_provider_lock); } #ifndef illumos static void dtrace_kld_load(void *arg __unused, linker_file_t lf) { dtrace_module_loaded(lf); } static void dtrace_kld_unload_try(void *arg __unused, linker_file_t lf, int *error) { if (*error != 0) /* We already have an error, so don't do anything. */ return; dtrace_module_unloaded(lf, error); } #endif #ifdef illumos static void dtrace_suspend(void) { dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_suspend)); } static void dtrace_resume(void) { dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_resume)); } #endif static int dtrace_cpu_setup(cpu_setup_t what, processorid_t cpu) { ASSERT(MUTEX_HELD(&cpu_lock)); mutex_enter(&dtrace_lock); switch (what) { case CPU_CONFIG: { dtrace_state_t *state; dtrace_optval_t *opt, rs, c; /* * For now, we only allocate a new buffer for anonymous state. */ if ((state = dtrace_anon.dta_state) == NULL) break; if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) break; opt = state->dts_options; c = opt[DTRACEOPT_CPU]; if (c != DTRACE_CPUALL && c != DTRACEOPT_UNSET && c != cpu) break; /* * Regardless of what the actual policy is, we're going to * temporarily set our resize policy to be manual. We're * also going to temporarily set our CPU option to denote * the newly configured CPU. */ rs = opt[DTRACEOPT_BUFRESIZE]; opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_MANUAL; opt[DTRACEOPT_CPU] = (dtrace_optval_t)cpu; (void) dtrace_state_buffers(state); opt[DTRACEOPT_BUFRESIZE] = rs; opt[DTRACEOPT_CPU] = c; break; } case CPU_UNCONFIG: /* * We don't free the buffer in the CPU_UNCONFIG case. (The * buffer will be freed when the consumer exits.) */ break; default: break; } mutex_exit(&dtrace_lock); return (0); } #ifdef illumos static void dtrace_cpu_setup_initial(processorid_t cpu) { (void) dtrace_cpu_setup(CPU_CONFIG, cpu); } #endif static void dtrace_toxrange_add(uintptr_t base, uintptr_t limit) { if (dtrace_toxranges >= dtrace_toxranges_max) { int osize, nsize; dtrace_toxrange_t *range; osize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t); if (osize == 0) { ASSERT(dtrace_toxrange == NULL); ASSERT(dtrace_toxranges_max == 0); dtrace_toxranges_max = 1; } else { dtrace_toxranges_max <<= 1; } nsize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t); range = kmem_zalloc(nsize, KM_SLEEP); if (dtrace_toxrange != NULL) { ASSERT(osize != 0); bcopy(dtrace_toxrange, range, osize); kmem_free(dtrace_toxrange, osize); } dtrace_toxrange = range; } ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == 0); ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == 0); dtrace_toxrange[dtrace_toxranges].dtt_base = base; dtrace_toxrange[dtrace_toxranges].dtt_limit = limit; dtrace_toxranges++; } static void dtrace_getf_barrier() { #ifdef illumos /* * When we have unprivileged (that is, non-DTRACE_CRV_KERNEL) enablings * that contain calls to getf(), this routine will be called on every * closef() before either the underlying vnode is released or the * file_t itself is freed. By the time we are here, it is essential * that the file_t can no longer be accessed from a call to getf() * in probe context -- that assures that a dtrace_sync() can be used * to clear out any enablings referring to the old structures. */ if (curthread->t_procp->p_zone->zone_dtrace_getf != 0 || kcred->cr_zone->zone_dtrace_getf != 0) dtrace_sync(); #endif } /* * DTrace Driver Cookbook Functions */ #ifdef illumos /*ARGSUSED*/ static int dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) { dtrace_provider_id_t id; dtrace_state_t *state = NULL; dtrace_enabling_t *enab; mutex_enter(&cpu_lock); mutex_enter(&dtrace_provider_lock); mutex_enter(&dtrace_lock); if (ddi_soft_state_init(&dtrace_softstate, sizeof (dtrace_state_t), 0) != 0) { cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state"); mutex_exit(&cpu_lock); mutex_exit(&dtrace_provider_lock); mutex_exit(&dtrace_lock); return (DDI_FAILURE); } if (ddi_create_minor_node(devi, DTRACEMNR_DTRACE, S_IFCHR, DTRACEMNRN_DTRACE, DDI_PSEUDO, NULL) == DDI_FAILURE || ddi_create_minor_node(devi, DTRACEMNR_HELPER, S_IFCHR, DTRACEMNRN_HELPER, DDI_PSEUDO, NULL) == DDI_FAILURE) { cmn_err(CE_NOTE, "/dev/dtrace couldn't create minor nodes"); ddi_remove_minor_node(devi, NULL); ddi_soft_state_fini(&dtrace_softstate); mutex_exit(&cpu_lock); mutex_exit(&dtrace_provider_lock); mutex_exit(&dtrace_lock); return (DDI_FAILURE); } ddi_report_dev(devi); dtrace_devi = devi; dtrace_modload = dtrace_module_loaded; dtrace_modunload = dtrace_module_unloaded; dtrace_cpu_init = dtrace_cpu_setup_initial; dtrace_helpers_cleanup = dtrace_helpers_destroy; dtrace_helpers_fork = dtrace_helpers_duplicate; dtrace_cpustart_init = dtrace_suspend; dtrace_cpustart_fini = dtrace_resume; dtrace_debugger_init = dtrace_suspend; dtrace_debugger_fini = dtrace_resume; register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL); ASSERT(MUTEX_HELD(&cpu_lock)); dtrace_arena = vmem_create("dtrace", (void *)1, UINT32_MAX, 1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); dtrace_minor = vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE, UINT32_MAX - DTRACEMNRN_CLONE, 1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri, 1, INT_MAX, 0); dtrace_state_cache = kmem_cache_create("dtrace_state_cache", sizeof (dtrace_dstate_percpu_t) * NCPU, DTRACE_STATE_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); ASSERT(MUTEX_HELD(&cpu_lock)); dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod), offsetof(dtrace_probe_t, dtpr_nextmod), offsetof(dtrace_probe_t, dtpr_prevmod)); dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func), offsetof(dtrace_probe_t, dtpr_nextfunc), offsetof(dtrace_probe_t, dtpr_prevfunc)); dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name), offsetof(dtrace_probe_t, dtpr_nextname), offsetof(dtrace_probe_t, dtpr_prevname)); if (dtrace_retain_max < 1) { cmn_err(CE_WARN, "illegal value (%lu) for dtrace_retain_max; " "setting to 1", dtrace_retain_max); dtrace_retain_max = 1; } /* * Now discover our toxic ranges. */ dtrace_toxic_ranges(dtrace_toxrange_add); /* * Before we register ourselves as a provider to our own framework, * we would like to assert that dtrace_provider is NULL -- but that's * not true if we were loaded as a dependency of a DTrace provider. * Once we've registered, we can assert that dtrace_provider is our * pseudo provider. */ (void) dtrace_register("dtrace", &dtrace_provider_attr, DTRACE_PRIV_NONE, 0, &dtrace_provider_ops, NULL, &id); ASSERT(dtrace_provider != NULL); ASSERT((dtrace_provider_id_t)dtrace_provider == id); dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) dtrace_provider, NULL, NULL, "BEGIN", 0, NULL); dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) dtrace_provider, NULL, NULL, "END", 0, NULL); dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) dtrace_provider, NULL, NULL, "ERROR", 1, NULL); dtrace_anon_property(); mutex_exit(&cpu_lock); /* * If there are already providers, we must ask them to provide their * probes, and then match any anonymous enabling against them. Note * that there should be no other retained enablings at this time: * the only retained enablings at this time should be the anonymous * enabling. */ if (dtrace_anon.dta_enabling != NULL) { ASSERT(dtrace_retained == dtrace_anon.dta_enabling); dtrace_enabling_provide(NULL); state = dtrace_anon.dta_state; /* * We couldn't hold cpu_lock across the above call to * dtrace_enabling_provide(), but we must hold it to actually * enable the probes. We have to drop all of our locks, pick * up cpu_lock, and regain our locks before matching the * retained anonymous enabling. */ mutex_exit(&dtrace_lock); mutex_exit(&dtrace_provider_lock); mutex_enter(&cpu_lock); mutex_enter(&dtrace_provider_lock); mutex_enter(&dtrace_lock); if ((enab = dtrace_anon.dta_enabling) != NULL) (void) dtrace_enabling_match(enab, NULL); mutex_exit(&cpu_lock); } mutex_exit(&dtrace_lock); mutex_exit(&dtrace_provider_lock); if (state != NULL) { /* * If we created any anonymous state, set it going now. */ (void) dtrace_state_go(state, &dtrace_anon.dta_beganon); } return (DDI_SUCCESS); } #endif /* illumos */ #ifndef illumos static void dtrace_dtr(void *); #endif /*ARGSUSED*/ static int #ifdef illumos dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) #else dtrace_open(struct cdev *dev, int oflags, int devtype, struct thread *td) #endif { dtrace_state_t *state; uint32_t priv; uid_t uid; zoneid_t zoneid; #ifdef illumos if (getminor(*devp) == DTRACEMNRN_HELPER) return (0); /* * If this wasn't an open with the "helper" minor, then it must be * the "dtrace" minor. */ if (getminor(*devp) == DTRACEMNRN_DTRACE) return (ENXIO); #else cred_t *cred_p = NULL; cred_p = dev->si_cred; /* * If no DTRACE_PRIV_* bits are set in the credential, then the * caller lacks sufficient permission to do anything with DTrace. */ dtrace_cred2priv(cred_p, &priv, &uid, &zoneid); if (priv == DTRACE_PRIV_NONE) { #endif return (EACCES); } /* * Ask all providers to provide all their probes. */ mutex_enter(&dtrace_provider_lock); dtrace_probe_provide(NULL, NULL); mutex_exit(&dtrace_provider_lock); mutex_enter(&cpu_lock); mutex_enter(&dtrace_lock); dtrace_opens++; dtrace_membar_producer(); #ifdef illumos /* * If the kernel debugger is active (that is, if the kernel debugger * modified text in some way), we won't allow the open. */ if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) { dtrace_opens--; mutex_exit(&cpu_lock); mutex_exit(&dtrace_lock); return (EBUSY); } if (dtrace_helptrace_enable && dtrace_helptrace_buffer == NULL) { /* * If DTrace helper tracing is enabled, we need to allocate the * trace buffer and initialize the values. */ dtrace_helptrace_buffer = kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP); dtrace_helptrace_next = 0; dtrace_helptrace_wrapped = 0; dtrace_helptrace_enable = 0; } state = dtrace_state_create(devp, cred_p); #else state = dtrace_state_create(dev, NULL); devfs_set_cdevpriv(state, dtrace_dtr); #endif mutex_exit(&cpu_lock); if (state == NULL) { #ifdef illumos if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); #else --dtrace_opens; #endif mutex_exit(&dtrace_lock); return (EAGAIN); } mutex_exit(&dtrace_lock); return (0); } /*ARGSUSED*/ #ifdef illumos static int dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) #else static void dtrace_dtr(void *data) #endif { #ifdef illumos minor_t minor = getminor(dev); dtrace_state_t *state; #endif dtrace_helptrace_t *buf = NULL; #ifdef illumos if (minor == DTRACEMNRN_HELPER) return (0); state = ddi_get_soft_state(dtrace_softstate, minor); #else dtrace_state_t *state = data; #endif mutex_enter(&cpu_lock); mutex_enter(&dtrace_lock); #ifdef illumos if (state->dts_anon) #else if (state != NULL && state->dts_anon) #endif { /* * There is anonymous state. Destroy that first. */ ASSERT(dtrace_anon.dta_state == NULL); dtrace_state_destroy(state->dts_anon); } if (dtrace_helptrace_disable) { /* * If we have been told to disable helper tracing, set the * buffer to NULL before calling into dtrace_state_destroy(); * we take advantage of its dtrace_sync() to know that no * CPU is in probe context with enabled helper tracing * after it returns. */ buf = dtrace_helptrace_buffer; dtrace_helptrace_buffer = NULL; } #ifdef illumos dtrace_state_destroy(state); #else if (state != NULL) { dtrace_state_destroy(state); kmem_free(state, 0); } #endif ASSERT(dtrace_opens > 0); #ifdef illumos /* * Only relinquish control of the kernel debugger interface when there * are no consumers and no anonymous enablings. */ if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); #else --dtrace_opens; #endif if (buf != NULL) { kmem_free(buf, dtrace_helptrace_bufsize); dtrace_helptrace_disable = 0; } mutex_exit(&dtrace_lock); mutex_exit(&cpu_lock); #ifdef illumos return (0); #endif } #ifdef illumos /*ARGSUSED*/ static int dtrace_ioctl_helper(int cmd, intptr_t arg, int *rv) { int rval; dof_helper_t help, *dhp = NULL; switch (cmd) { case DTRACEHIOC_ADDDOF: if (copyin((void *)arg, &help, sizeof (help)) != 0) { dtrace_dof_error(NULL, "failed to copyin DOF helper"); return (EFAULT); } dhp = &help; arg = (intptr_t)help.dofhp_dof; /*FALLTHROUGH*/ case DTRACEHIOC_ADD: { dof_hdr_t *dof = dtrace_dof_copyin(arg, &rval); if (dof == NULL) return (rval); mutex_enter(&dtrace_lock); /* * dtrace_helper_slurp() takes responsibility for the dof -- * it may free it now or it may save it and free it later. */ if ((rval = dtrace_helper_slurp(dof, dhp)) != -1) { *rv = rval; rval = 0; } else { rval = EINVAL; } mutex_exit(&dtrace_lock); return (rval); } case DTRACEHIOC_REMOVE: { mutex_enter(&dtrace_lock); rval = dtrace_helper_destroygen(NULL, arg); mutex_exit(&dtrace_lock); return (rval); } default: break; } return (ENOTTY); } /*ARGSUSED*/ static int dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) { minor_t minor = getminor(dev); dtrace_state_t *state; int rval; if (minor == DTRACEMNRN_HELPER) return (dtrace_ioctl_helper(cmd, arg, rv)); state = ddi_get_soft_state(dtrace_softstate, minor); if (state->dts_anon) { ASSERT(dtrace_anon.dta_state == NULL); state = state->dts_anon; } switch (cmd) { case DTRACEIOC_PROVIDER: { dtrace_providerdesc_t pvd; dtrace_provider_t *pvp; if (copyin((void *)arg, &pvd, sizeof (pvd)) != 0) return (EFAULT); pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0'; mutex_enter(&dtrace_provider_lock); for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) { if (strcmp(pvp->dtpv_name, pvd.dtvd_name) == 0) break; } mutex_exit(&dtrace_provider_lock); if (pvp == NULL) return (ESRCH); bcopy(&pvp->dtpv_priv, &pvd.dtvd_priv, sizeof (dtrace_ppriv_t)); bcopy(&pvp->dtpv_attr, &pvd.dtvd_attr, sizeof (dtrace_pattr_t)); if (copyout(&pvd, (void *)arg, sizeof (pvd)) != 0) return (EFAULT); return (0); } case DTRACEIOC_EPROBE: { dtrace_eprobedesc_t epdesc; dtrace_ecb_t *ecb; dtrace_action_t *act; void *buf; size_t size; uintptr_t dest; int nrecs; if (copyin((void *)arg, &epdesc, sizeof (epdesc)) != 0) return (EFAULT); mutex_enter(&dtrace_lock); if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) { mutex_exit(&dtrace_lock); return (EINVAL); } if (ecb->dte_probe == NULL) { mutex_exit(&dtrace_lock); return (EINVAL); } epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id; epdesc.dtepd_uarg = ecb->dte_uarg; epdesc.dtepd_size = ecb->dte_size; nrecs = epdesc.dtepd_nrecs; epdesc.dtepd_nrecs = 0; for (act = ecb->dte_action; act != NULL; act = act->dta_next) { if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) continue; epdesc.dtepd_nrecs++; } /* * Now that we have the size, we need to allocate a temporary * buffer in which to store the complete description. We need * the temporary buffer to be able to drop dtrace_lock() * across the copyout(), below. */ size = sizeof (dtrace_eprobedesc_t) + (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t)); buf = kmem_alloc(size, KM_SLEEP); dest = (uintptr_t)buf; bcopy(&epdesc, (void *)dest, sizeof (epdesc)); dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]); for (act = ecb->dte_action; act != NULL; act = act->dta_next) { if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple) continue; if (nrecs-- == 0) break; bcopy(&act->dta_rec, (void *)dest, sizeof (dtrace_recdesc_t)); dest += sizeof (dtrace_recdesc_t); } mutex_exit(&dtrace_lock); if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) { kmem_free(buf, size); return (EFAULT); } kmem_free(buf, size); return (0); } case DTRACEIOC_AGGDESC: { dtrace_aggdesc_t aggdesc; dtrace_action_t *act; dtrace_aggregation_t *agg; int nrecs; uint32_t offs; dtrace_recdesc_t *lrec; void *buf; size_t size; uintptr_t dest; if (copyin((void *)arg, &aggdesc, sizeof (aggdesc)) != 0) return (EFAULT); mutex_enter(&dtrace_lock); if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) { mutex_exit(&dtrace_lock); return (EINVAL); } aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid; nrecs = aggdesc.dtagd_nrecs; aggdesc.dtagd_nrecs = 0; offs = agg->dtag_base; lrec = &agg->dtag_action.dta_rec; aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs; for (act = agg->dtag_first; ; act = act->dta_next) { ASSERT(act->dta_intuple || DTRACEACT_ISAGG(act->dta_kind)); /* * If this action has a record size of zero, it * denotes an argument to the aggregating action. * Because the presence of this record doesn't (or * shouldn't) affect the way the data is interpreted, * we don't copy it out to save user-level the * confusion of dealing with a zero-length record. */ if (act->dta_rec.dtrd_size == 0) { ASSERT(agg->dtag_hasarg); continue; } aggdesc.dtagd_nrecs++; if (act == &agg->dtag_action) break; } /* * Now that we have the size, we need to allocate a temporary * buffer in which to store the complete description. We need * the temporary buffer to be able to drop dtrace_lock() * across the copyout(), below. */ size = sizeof (dtrace_aggdesc_t) + (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t)); buf = kmem_alloc(size, KM_SLEEP); dest = (uintptr_t)buf; bcopy(&aggdesc, (void *)dest, sizeof (aggdesc)); dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]); for (act = agg->dtag_first; ; act = act->dta_next) { dtrace_recdesc_t rec = act->dta_rec; /* * See the comment in the above loop for why we pass * over zero-length records. */ if (rec.dtrd_size == 0) { ASSERT(agg->dtag_hasarg); continue; } if (nrecs-- == 0) break; rec.dtrd_offset -= offs; bcopy(&rec, (void *)dest, sizeof (rec)); dest += sizeof (dtrace_recdesc_t); if (act == &agg->dtag_action) break; } mutex_exit(&dtrace_lock); if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) { kmem_free(buf, size); return (EFAULT); } kmem_free(buf, size); return (0); } case DTRACEIOC_ENABLE: { dof_hdr_t *dof; dtrace_enabling_t *enab = NULL; dtrace_vstate_t *vstate; int err = 0; *rv = 0; /* * If a NULL argument has been passed, we take this as our * cue to reevaluate our enablings. */ if (arg == NULL) { dtrace_enabling_matchall(); return (0); } if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL) return (rval); mutex_enter(&cpu_lock); mutex_enter(&dtrace_lock); vstate = &state->dts_vstate; if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) { mutex_exit(&dtrace_lock); mutex_exit(&cpu_lock); dtrace_dof_destroy(dof); return (EBUSY); } if (dtrace_dof_slurp(dof, vstate, cr, &enab, 0, B_TRUE) != 0) { mutex_exit(&dtrace_lock); mutex_exit(&cpu_lock); dtrace_dof_destroy(dof); return (EINVAL); } if ((rval = dtrace_dof_options(dof, state)) != 0) { dtrace_enabling_destroy(enab); mutex_exit(&dtrace_lock); mutex_exit(&cpu_lock); dtrace_dof_destroy(dof); return (rval); } if ((err = dtrace_enabling_match(enab, rv)) == 0) { err = dtrace_enabling_retain(enab); } else { dtrace_enabling_destroy(enab); } mutex_exit(&cpu_lock); mutex_exit(&dtrace_lock); dtrace_dof_destroy(dof); return (err); } case DTRACEIOC_REPLICATE: { dtrace_repldesc_t desc; dtrace_probedesc_t *match = &desc.dtrpd_match; dtrace_probedesc_t *create = &desc.dtrpd_create; int err; if (copyin((void *)arg, &desc, sizeof (desc)) != 0) return (EFAULT); match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; match->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; create->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; mutex_enter(&dtrace_lock); err = dtrace_enabling_replicate(state, match, create); mutex_exit(&dtrace_lock); return (err); } case DTRACEIOC_PROBEMATCH: case DTRACEIOC_PROBES: { dtrace_probe_t *probe = NULL; dtrace_probedesc_t desc; dtrace_probekey_t pkey; dtrace_id_t i; int m = 0; uint32_t priv; uid_t uid; zoneid_t zoneid; if (copyin((void *)arg, &desc, sizeof (desc)) != 0) return (EFAULT); desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; desc.dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; desc.dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; desc.dtpd_name[DTRACE_NAMELEN - 1] = '\0'; /* * Before we attempt to match this probe, we want to give * all providers the opportunity to provide it. */ if (desc.dtpd_id == DTRACE_IDNONE) { mutex_enter(&dtrace_provider_lock); dtrace_probe_provide(&desc, NULL); mutex_exit(&dtrace_provider_lock); desc.dtpd_id++; } if (cmd == DTRACEIOC_PROBEMATCH) { dtrace_probekey(&desc, &pkey); pkey.dtpk_id = DTRACE_IDNONE; } dtrace_cred2priv(cr, &priv, &uid, &zoneid); mutex_enter(&dtrace_lock); if (cmd == DTRACEIOC_PROBEMATCH) { for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) { if ((probe = dtrace_probes[i - 1]) != NULL && (m = dtrace_match_probe(probe, &pkey, priv, uid, zoneid)) != 0) break; } if (m < 0) { mutex_exit(&dtrace_lock); return (EINVAL); } } else { for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) { if ((probe = dtrace_probes[i - 1]) != NULL && dtrace_match_priv(probe, priv, uid, zoneid)) break; } } if (probe == NULL) { mutex_exit(&dtrace_lock); return (ESRCH); } dtrace_probe_description(probe, &desc); mutex_exit(&dtrace_lock); if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) return (EFAULT); return (0); } case DTRACEIOC_PROBEARG: { dtrace_argdesc_t desc; dtrace_probe_t *probe; dtrace_provider_t *prov; if (copyin((void *)arg, &desc, sizeof (desc)) != 0) return (EFAULT); if (desc.dtargd_id == DTRACE_IDNONE) return (EINVAL); if (desc.dtargd_ndx == DTRACE_ARGNONE) return (EINVAL); mutex_enter(&dtrace_provider_lock); mutex_enter(&mod_lock); mutex_enter(&dtrace_lock); if (desc.dtargd_id > dtrace_nprobes) { mutex_exit(&dtrace_lock); mutex_exit(&mod_lock); mutex_exit(&dtrace_provider_lock); return (EINVAL); } if ((probe = dtrace_probes[desc.dtargd_id - 1]) == NULL) { mutex_exit(&dtrace_lock); mutex_exit(&mod_lock); mutex_exit(&dtrace_provider_lock); return (EINVAL); } mutex_exit(&dtrace_lock); prov = probe->dtpr_provider; if (prov->dtpv_pops.dtps_getargdesc == NULL) { /* * There isn't any typed information for this probe. * Set the argument number to DTRACE_ARGNONE. */ desc.dtargd_ndx = DTRACE_ARGNONE; } else { desc.dtargd_native[0] = '\0'; desc.dtargd_xlate[0] = '\0'; desc.dtargd_mapping = desc.dtargd_ndx; prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg, probe->dtpr_id, probe->dtpr_arg, &desc); } mutex_exit(&mod_lock); mutex_exit(&dtrace_provider_lock); if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) return (EFAULT); return (0); } case DTRACEIOC_GO: { processorid_t cpuid; rval = dtrace_state_go(state, &cpuid); if (rval != 0) return (rval); if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) return (EFAULT); return (0); } case DTRACEIOC_STOP: { processorid_t cpuid; mutex_enter(&dtrace_lock); rval = dtrace_state_stop(state, &cpuid); mutex_exit(&dtrace_lock); if (rval != 0) return (rval); if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) return (EFAULT); return (0); } case DTRACEIOC_DOFGET: { dof_hdr_t hdr, *dof; uint64_t len; if (copyin((void *)arg, &hdr, sizeof (hdr)) != 0) return (EFAULT); mutex_enter(&dtrace_lock); dof = dtrace_dof_create(state); mutex_exit(&dtrace_lock); len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz); rval = copyout(dof, (void *)arg, len); dtrace_dof_destroy(dof); return (rval == 0 ? 0 : EFAULT); } case DTRACEIOC_AGGSNAP: case DTRACEIOC_BUFSNAP: { dtrace_bufdesc_t desc; caddr_t cached; dtrace_buffer_t *buf; if (copyin((void *)arg, &desc, sizeof (desc)) != 0) return (EFAULT); if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU) return (EINVAL); mutex_enter(&dtrace_lock); if (cmd == DTRACEIOC_BUFSNAP) { buf = &state->dts_buffer[desc.dtbd_cpu]; } else { buf = &state->dts_aggbuffer[desc.dtbd_cpu]; } if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) { size_t sz = buf->dtb_offset; if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) { mutex_exit(&dtrace_lock); return (EBUSY); } /* * If this buffer has already been consumed, we're * going to indicate that there's nothing left here * to consume. */ if (buf->dtb_flags & DTRACEBUF_CONSUMED) { mutex_exit(&dtrace_lock); desc.dtbd_size = 0; desc.dtbd_drops = 0; desc.dtbd_errors = 0; desc.dtbd_oldest = 0; sz = sizeof (desc); if (copyout(&desc, (void *)arg, sz) != 0) return (EFAULT); return (0); } /* * If this is a ring buffer that has wrapped, we want * to copy the whole thing out. */ if (buf->dtb_flags & DTRACEBUF_WRAPPED) { dtrace_buffer_polish(buf); sz = buf->dtb_size; } if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) { mutex_exit(&dtrace_lock); return (EFAULT); } desc.dtbd_size = sz; desc.dtbd_drops = buf->dtb_drops; desc.dtbd_errors = buf->dtb_errors; desc.dtbd_oldest = buf->dtb_xamot_offset; desc.dtbd_timestamp = dtrace_gethrtime(); mutex_exit(&dtrace_lock); if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) return (EFAULT); buf->dtb_flags |= DTRACEBUF_CONSUMED; return (0); } if (buf->dtb_tomax == NULL) { ASSERT(buf->dtb_xamot == NULL); mutex_exit(&dtrace_lock); return (ENOENT); } cached = buf->dtb_tomax; ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); dtrace_xcall(desc.dtbd_cpu, (dtrace_xcall_t)dtrace_buffer_switch, buf); state->dts_errors += buf->dtb_xamot_errors; /* * If the buffers did not actually switch, then the cross call * did not take place -- presumably because the given CPU is * not in the ready set. If this is the case, we'll return * ENOENT. */ if (buf->dtb_tomax == cached) { ASSERT(buf->dtb_xamot != cached); mutex_exit(&dtrace_lock); return (ENOENT); } ASSERT(cached == buf->dtb_xamot); /* * We have our snapshot; now copy it out. */ if (copyout(buf->dtb_xamot, desc.dtbd_data, buf->dtb_xamot_offset) != 0) { mutex_exit(&dtrace_lock); return (EFAULT); } desc.dtbd_size = buf->dtb_xamot_offset; desc.dtbd_drops = buf->dtb_xamot_drops; desc.dtbd_errors = buf->dtb_xamot_errors; desc.dtbd_oldest = 0; desc.dtbd_timestamp = buf->dtb_switched; mutex_exit(&dtrace_lock); /* * Finally, copy out the buffer description. */ if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) return (EFAULT); return (0); } case DTRACEIOC_CONF: { dtrace_conf_t conf; bzero(&conf, sizeof (conf)); conf.dtc_difversion = DIF_VERSION; conf.dtc_difintregs = DIF_DIR_NREGS; conf.dtc_diftupregs = DIF_DTR_NREGS; conf.dtc_ctfmodel = CTF_MODEL_NATIVE; if (copyout(&conf, (void *)arg, sizeof (conf)) != 0) return (EFAULT); return (0); } case DTRACEIOC_STATUS: { dtrace_status_t stat; dtrace_dstate_t *dstate; int i, j; uint64_t nerrs; /* * See the comment in dtrace_state_deadman() for the reason * for setting dts_laststatus to INT64_MAX before setting * it to the correct value. */ state->dts_laststatus = INT64_MAX; dtrace_membar_producer(); state->dts_laststatus = dtrace_gethrtime(); bzero(&stat, sizeof (stat)); mutex_enter(&dtrace_lock); if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) { mutex_exit(&dtrace_lock); return (ENOENT); } if (state->dts_activity == DTRACE_ACTIVITY_DRAINING) stat.dtst_exiting = 1; nerrs = state->dts_errors; dstate = &state->dts_vstate.dtvs_dynvars; for (i = 0; i < NCPU; i++) { dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i]; stat.dtst_dyndrops += dcpu->dtdsc_drops; stat.dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops; stat.dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops; if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL) stat.dtst_filled++; nerrs += state->dts_buffer[i].dtb_errors; for (j = 0; j < state->dts_nspeculations; j++) { dtrace_speculation_t *spec; dtrace_buffer_t *buf; spec = &state->dts_speculations[j]; buf = &spec->dtsp_buffer[i]; stat.dtst_specdrops += buf->dtb_xamot_drops; } } stat.dtst_specdrops_busy = state->dts_speculations_busy; stat.dtst_specdrops_unavail = state->dts_speculations_unavail; stat.dtst_stkstroverflows = state->dts_stkstroverflows; stat.dtst_dblerrors = state->dts_dblerrors; stat.dtst_killed = (state->dts_activity == DTRACE_ACTIVITY_KILLED); stat.dtst_errors = nerrs; mutex_exit(&dtrace_lock); if (copyout(&stat, (void *)arg, sizeof (stat)) != 0) return (EFAULT); return (0); } case DTRACEIOC_FORMAT: { dtrace_fmtdesc_t fmt; char *str; int len; if (copyin((void *)arg, &fmt, sizeof (fmt)) != 0) return (EFAULT); mutex_enter(&dtrace_lock); if (fmt.dtfd_format == 0 || fmt.dtfd_format > state->dts_nformats) { mutex_exit(&dtrace_lock); return (EINVAL); } /* * Format strings are allocated contiguously and they are * never freed; if a format index is less than the number * of formats, we can assert that the format map is non-NULL * and that the format for the specified index is non-NULL. */ ASSERT(state->dts_formats != NULL); str = state->dts_formats[fmt.dtfd_format - 1]; ASSERT(str != NULL); len = strlen(str) + 1; if (len > fmt.dtfd_length) { fmt.dtfd_length = len; if (copyout(&fmt, (void *)arg, sizeof (fmt)) != 0) { mutex_exit(&dtrace_lock); return (EINVAL); } } else { if (copyout(str, fmt.dtfd_string, len) != 0) { mutex_exit(&dtrace_lock); return (EINVAL); } } mutex_exit(&dtrace_lock); return (0); } default: break; } return (ENOTTY); } /*ARGSUSED*/ static int dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) { dtrace_state_t *state; switch (cmd) { case DDI_DETACH: break; case DDI_SUSPEND: return (DDI_SUCCESS); default: return (DDI_FAILURE); } mutex_enter(&cpu_lock); mutex_enter(&dtrace_provider_lock); mutex_enter(&dtrace_lock); ASSERT(dtrace_opens == 0); if (dtrace_helpers > 0) { mutex_exit(&dtrace_provider_lock); mutex_exit(&dtrace_lock); mutex_exit(&cpu_lock); return (DDI_FAILURE); } if (dtrace_unregister((dtrace_provider_id_t)dtrace_provider) != 0) { mutex_exit(&dtrace_provider_lock); mutex_exit(&dtrace_lock); mutex_exit(&cpu_lock); return (DDI_FAILURE); } dtrace_provider = NULL; if ((state = dtrace_anon_grab()) != NULL) { /* * If there were ECBs on this state, the provider should * have not been allowed to detach; assert that there is * none. */ ASSERT(state->dts_necbs == 0); dtrace_state_destroy(state); /* * If we're being detached with anonymous state, we need to * indicate to the kernel debugger that DTrace is now inactive. */ (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); } bzero(&dtrace_anon, sizeof (dtrace_anon_t)); unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL); dtrace_cpu_init = NULL; dtrace_helpers_cleanup = NULL; dtrace_helpers_fork = NULL; dtrace_cpustart_init = NULL; dtrace_cpustart_fini = NULL; dtrace_debugger_init = NULL; dtrace_debugger_fini = NULL; dtrace_modload = NULL; dtrace_modunload = NULL; ASSERT(dtrace_getf == 0); ASSERT(dtrace_closef == NULL); mutex_exit(&cpu_lock); kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *)); dtrace_probes = NULL; dtrace_nprobes = 0; dtrace_hash_destroy(dtrace_bymod); dtrace_hash_destroy(dtrace_byfunc); dtrace_hash_destroy(dtrace_byname); dtrace_bymod = NULL; dtrace_byfunc = NULL; dtrace_byname = NULL; kmem_cache_destroy(dtrace_state_cache); vmem_destroy(dtrace_minor); vmem_destroy(dtrace_arena); if (dtrace_toxrange != NULL) { kmem_free(dtrace_toxrange, dtrace_toxranges_max * sizeof (dtrace_toxrange_t)); dtrace_toxrange = NULL; dtrace_toxranges = 0; dtrace_toxranges_max = 0; } ddi_remove_minor_node(dtrace_devi, NULL); dtrace_devi = NULL; ddi_soft_state_fini(&dtrace_softstate); ASSERT(dtrace_vtime_references == 0); ASSERT(dtrace_opens == 0); ASSERT(dtrace_retained == NULL); mutex_exit(&dtrace_lock); mutex_exit(&dtrace_provider_lock); /* * We don't destroy the task queue until after we have dropped our * locks (taskq_destroy() may block on running tasks). To prevent * attempting to do work after we have effectively detached but before * the task queue has been destroyed, all tasks dispatched via the * task queue must check that DTrace is still attached before * performing any operation. */ taskq_destroy(dtrace_taskq); dtrace_taskq = NULL; return (DDI_SUCCESS); } #endif #ifdef illumos /*ARGSUSED*/ static int dtrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) { int error; switch (infocmd) { case DDI_INFO_DEVT2DEVINFO: *result = (void *)dtrace_devi; error = DDI_SUCCESS; break; case DDI_INFO_DEVT2INSTANCE: *result = (void *)0; error = DDI_SUCCESS; break; default: error = DDI_FAILURE; } return (error); } #endif #ifdef illumos static struct cb_ops dtrace_cb_ops = { dtrace_open, /* open */ dtrace_close, /* close */ nulldev, /* strategy */ nulldev, /* print */ nodev, /* dump */ nodev, /* read */ nodev, /* write */ dtrace_ioctl, /* ioctl */ nodev, /* devmap */ nodev, /* mmap */ nodev, /* segmap */ nochpoll, /* poll */ ddi_prop_op, /* cb_prop_op */ 0, /* streamtab */ D_NEW | D_MP /* Driver compatibility flag */ }; static struct dev_ops dtrace_ops = { DEVO_REV, /* devo_rev */ 0, /* refcnt */ dtrace_info, /* get_dev_info */ nulldev, /* identify */ nulldev, /* probe */ dtrace_attach, /* attach */ dtrace_detach, /* detach */ nodev, /* reset */ &dtrace_cb_ops, /* driver operations */ NULL, /* bus operations */ nodev /* dev power */ }; static struct modldrv modldrv = { &mod_driverops, /* module type (this is a pseudo driver) */ "Dynamic Tracing", /* name of module */ &dtrace_ops, /* driver ops */ }; static struct modlinkage modlinkage = { MODREV_1, (void *)&modldrv, NULL }; int _init(void) { return (mod_install(&modlinkage)); } int _info(struct modinfo *modinfop) { return (mod_info(&modlinkage, modinfop)); } int _fini(void) { return (mod_remove(&modlinkage)); } #else static d_ioctl_t dtrace_ioctl; static d_ioctl_t dtrace_ioctl_helper; static void dtrace_load(void *); static int dtrace_unload(void); static struct cdev *dtrace_dev; static struct cdev *helper_dev; void dtrace_invop_init(void); void dtrace_invop_uninit(void); static struct cdevsw dtrace_cdevsw = { .d_version = D_VERSION, .d_ioctl = dtrace_ioctl, .d_open = dtrace_open, .d_name = "dtrace", }; static struct cdevsw helper_cdevsw = { .d_version = D_VERSION, .d_ioctl = dtrace_ioctl_helper, .d_name = "helper", }; #include #include #include #include #include #include #include #include #include SYSINIT(dtrace_load, SI_SUB_DTRACE, SI_ORDER_FIRST, dtrace_load, NULL); SYSUNINIT(dtrace_unload, SI_SUB_DTRACE, SI_ORDER_FIRST, dtrace_unload, NULL); SYSINIT(dtrace_anon_init, SI_SUB_DTRACE_ANON, SI_ORDER_FIRST, dtrace_anon_init, NULL); DEV_MODULE(dtrace, dtrace_modevent, NULL); MODULE_VERSION(dtrace, 1); MODULE_DEPEND(dtrace, opensolaris, 1, 1, 1); #endif Index: projects/ipsec/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace_xoroshiro128_plus.c =================================================================== --- projects/ipsec/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace_xoroshiro128_plus.c (nonexistent) +++ projects/ipsec/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace_xoroshiro128_plus.c (revision 313187) @@ -0,0 +1,89 @@ +/*- + * Copyright (c) 2016 (Graeme Jenkinson) + * All rights reserved. + * + * This software was developed by BAE Systems, the University of Cambridge + * Computer Laboratory, and Memorial University under DARPA/AFRL contract + * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing + * (TC) research program. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include + +#include "dtrace_xoroshiro128_plus.h" + +static __inline uint64_t +rotl(const uint64_t x, int k) +{ + return (x << k) | (x >> (64 - k)); +} + +/* + * This is the jump function for the generator. It is equivalent to 2^64 calls + * to next(); it can be used to generate 2^64 non-overlapping subsequences for + * parallel computations. + */ +void +dtrace_xoroshiro128_plus_jump(uint64_t * const state, + uint64_t * const jump_state) +{ + static const uint64_t JUMP[] = { 0xbeac0467eba5facb, + 0xd86b048b86aa9922 }; + + uint64_t s0 = 0; + uint64_t s1 = 0; + int i = 0; + int b = 0; + for (i = 0; i < sizeof JUMP / sizeof *JUMP; i++) { + for (b = 0; b < 64; b++) { + if (JUMP[i] & 1ULL << b) { + s0 ^= state[0]; + s1 ^= state[1]; + } + dtrace_xoroshiro128_plus_next(state); + } + } + jump_state[0] = s0; + jump_state[1] = s1; +} + +/* + * xoroshiro128+ - XOR/rotate/shift/rotate + * xorshift.di.unimi.it + */ +uint64_t +dtrace_xoroshiro128_plus_next(uint64_t * const state) +{ + const uint64_t s0 = state[0]; + uint64_t s1 = state[1]; + uint64_t result; + result = s0 + s1; + + s1 ^= s0; + state[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14); + state[1] = rotl(s1, 36); + + return result; +} Property changes on: projects/ipsec/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace_xoroshiro128_plus.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/ipsec/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace_xoroshiro128_plus.h =================================================================== --- projects/ipsec/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace_xoroshiro128_plus.h (nonexistent) +++ projects/ipsec/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace_xoroshiro128_plus.h (revision 313187) @@ -0,0 +1,40 @@ +/*- + * Copyright (c) 2016 (Graeme Jenkinson) + * All rights reserved. + * + * This software was developed by BAE Systems, the University of Cambridge + * Computer Laboratory, and Memorial University under DARPA/AFRL contract + * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing + * (TC) research program. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef _DTRACE_XOROSHIRO128_PLUS_H +#define _DTRACE_XOROSHIRO128_PLUS_H +#endif + +#include + +extern void dtrace_xoroshiro128_plus_jump(uint64_t * const, uint64_t * const); +extern uint64_t dtrace_xoroshiro128_plus_next(uint64_t * const); Property changes on: projects/ipsec/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace_xoroshiro128_plus.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/ipsec/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace_impl.h =================================================================== --- projects/ipsec/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace_impl.h (revision 313186) +++ projects/ipsec/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace_impl.h (revision 313187) @@ -1,1345 +1,1351 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END * * $FreeBSD$ */ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright 2016 Joyent, Inc. * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_DTRACE_IMPL_H #define _SYS_DTRACE_IMPL_H #ifdef __cplusplus extern "C" { #endif /* * DTrace Dynamic Tracing Software: Kernel Implementation Interfaces * * Note: The contents of this file are private to the implementation of the * Solaris system and DTrace subsystem and are subject to change at any time * without notice. Applications and drivers using these interfaces will fail * to run on future releases. These interfaces should not be used for any * purpose except those expressly outlined in dtrace(7D) and libdtrace(3LIB). * Please refer to the "Solaris Dynamic Tracing Guide" for more information. */ #include + #ifndef illumos #ifdef __sparcv9 typedef uint32_t pc_t; #else typedef uintptr_t pc_t; #endif typedef u_long greg_t; #endif /* * DTrace Implementation Constants and Typedefs */ #define DTRACE_MAXPROPLEN 128 #define DTRACE_DYNVAR_CHUNKSIZE 256 +#ifdef __FreeBSD__ +#define NCPU MAXCPU +#endif /* __FreeBSD__ */ + struct dtrace_probe; struct dtrace_ecb; struct dtrace_predicate; struct dtrace_action; struct dtrace_provider; struct dtrace_state; typedef struct dtrace_probe dtrace_probe_t; typedef struct dtrace_ecb dtrace_ecb_t; typedef struct dtrace_predicate dtrace_predicate_t; typedef struct dtrace_action dtrace_action_t; typedef struct dtrace_provider dtrace_provider_t; typedef struct dtrace_meta dtrace_meta_t; typedef struct dtrace_state dtrace_state_t; typedef uint32_t dtrace_optid_t; typedef uint32_t dtrace_specid_t; typedef uint64_t dtrace_genid_t; /* * DTrace Probes * * The probe is the fundamental unit of the DTrace architecture. Probes are * created by DTrace providers, and managed by the DTrace framework. A probe * is identified by a unique tuple, and has * a unique probe identifier assigned to it. (Some probes are not associated * with a specific point in text; these are called _unanchored probes_ and have * no module or function associated with them.) Probes are represented as a * dtrace_probe structure. To allow quick lookups based on each element of the * probe tuple, probes are hashed by each of provider, module, function and * name. (If a lookup is performed based on a regular expression, a * dtrace_probekey is prepared, and a linear search is performed.) Each probe * is additionally pointed to by a linear array indexed by its identifier. The * identifier is the provider's mechanism for indicating to the DTrace * framework that a probe has fired: the identifier is passed as the first * argument to dtrace_probe(), where it is then mapped into the corresponding * dtrace_probe structure. From the dtrace_probe structure, dtrace_probe() can * iterate over the probe's list of enabling control blocks; see "DTrace * Enabling Control Blocks", below.) */ struct dtrace_probe { dtrace_id_t dtpr_id; /* probe identifier */ dtrace_ecb_t *dtpr_ecb; /* ECB list; see below */ dtrace_ecb_t *dtpr_ecb_last; /* last ECB in list */ void *dtpr_arg; /* provider argument */ dtrace_cacheid_t dtpr_predcache; /* predicate cache ID */ int dtpr_aframes; /* artificial frames */ dtrace_provider_t *dtpr_provider; /* pointer to provider */ char *dtpr_mod; /* probe's module name */ char *dtpr_func; /* probe's function name */ char *dtpr_name; /* probe's name */ dtrace_probe_t *dtpr_nextmod; /* next in module hash */ dtrace_probe_t *dtpr_prevmod; /* previous in module hash */ dtrace_probe_t *dtpr_nextfunc; /* next in function hash */ dtrace_probe_t *dtpr_prevfunc; /* previous in function hash */ dtrace_probe_t *dtpr_nextname; /* next in name hash */ dtrace_probe_t *dtpr_prevname; /* previous in name hash */ dtrace_genid_t dtpr_gen; /* probe generation ID */ }; typedef int dtrace_probekey_f(const char *, const char *, int); typedef struct dtrace_probekey { char *dtpk_prov; /* provider name to match */ dtrace_probekey_f *dtpk_pmatch; /* provider matching function */ char *dtpk_mod; /* module name to match */ dtrace_probekey_f *dtpk_mmatch; /* module matching function */ char *dtpk_func; /* func name to match */ dtrace_probekey_f *dtpk_fmatch; /* func matching function */ char *dtpk_name; /* name to match */ dtrace_probekey_f *dtpk_nmatch; /* name matching function */ dtrace_id_t dtpk_id; /* identifier to match */ } dtrace_probekey_t; typedef struct dtrace_hashbucket { struct dtrace_hashbucket *dthb_next; /* next on hash chain */ dtrace_probe_t *dthb_chain; /* chain of probes */ int dthb_len; /* number of probes here */ } dtrace_hashbucket_t; typedef struct dtrace_hash { dtrace_hashbucket_t **dth_tab; /* hash table */ int dth_size; /* size of hash table */ int dth_mask; /* mask to index into table */ int dth_nbuckets; /* total number of buckets */ uintptr_t dth_nextoffs; /* offset of next in probe */ uintptr_t dth_prevoffs; /* offset of prev in probe */ uintptr_t dth_stroffs; /* offset of str in probe */ } dtrace_hash_t; /* * DTrace Enabling Control Blocks * * When a provider wishes to fire a probe, it calls into dtrace_probe(), * passing the probe identifier as the first argument. As described above, * dtrace_probe() maps the identifier into a pointer to a dtrace_probe_t * structure. This structure contains information about the probe, and a * pointer to the list of Enabling Control Blocks (ECBs). Each ECB points to * DTrace consumer state, and contains an optional predicate, and a list of * actions. (Shown schematically below.) The ECB abstraction allows a single * probe to be multiplexed across disjoint consumers, or across disjoint * enablings of a single probe within one consumer. * * Enabling Control Block * dtrace_ecb_t * +------------------------+ * | dtrace_epid_t ---------+--------------> Enabled Probe ID (EPID) * | dtrace_state_t * ------+--------------> State associated with this ECB * | dtrace_predicate_t * --+---------+ * | dtrace_action_t * -----+----+ | * | dtrace_ecb_t * ---+ | | | Predicate (if any) * +-------------------+----+ | | dtrace_predicate_t * | | +---> +--------------------+ * | | | dtrace_difo_t * ---+----> DIFO * | | +--------------------+ * | | * Next ECB | | Action * (if any) | | dtrace_action_t * : +--> +-------------------+ * : | dtrace_actkind_t -+------> kind * v | dtrace_difo_t * --+------> DIFO (if any) * | dtrace_recdesc_t -+------> record descr. * | dtrace_action_t * +------+ * +-------------------+ | * | Next action * +-------------------------------+ (if any) * | * | Action * | dtrace_action_t * +--> +-------------------+ * | dtrace_actkind_t -+------> kind * | dtrace_difo_t * --+------> DIFO (if any) * | dtrace_action_t * +------+ * +-------------------+ | * | Next action * +-------------------------------+ (if any) * | * : * v * * * dtrace_probe() iterates over the ECB list. If the ECB needs less space * than is available in the principal buffer, the ECB is processed: if the * predicate is non-NULL, the DIF object is executed. If the result is * non-zero, the action list is processed, with each action being executed * accordingly. When the action list has been completely executed, processing * advances to the next ECB. The ECB abstraction allows disjoint consumers * to multiplex on single probes. * * Execution of the ECB results in consuming dte_size bytes in the buffer * to record data. During execution, dte_needed bytes must be available in * the buffer. This space is used for both recorded data and tuple data. */ struct dtrace_ecb { dtrace_epid_t dte_epid; /* enabled probe ID */ uint32_t dte_alignment; /* required alignment */ size_t dte_needed; /* space needed for execution */ size_t dte_size; /* size of recorded payload */ dtrace_predicate_t *dte_predicate; /* predicate, if any */ dtrace_action_t *dte_action; /* actions, if any */ dtrace_ecb_t *dte_next; /* next ECB on probe */ dtrace_state_t *dte_state; /* pointer to state */ uint32_t dte_cond; /* security condition */ dtrace_probe_t *dte_probe; /* pointer to probe */ dtrace_action_t *dte_action_last; /* last action on ECB */ uint64_t dte_uarg; /* library argument */ }; struct dtrace_predicate { dtrace_difo_t *dtp_difo; /* DIF object */ dtrace_cacheid_t dtp_cacheid; /* cache identifier */ int dtp_refcnt; /* reference count */ }; struct dtrace_action { dtrace_actkind_t dta_kind; /* kind of action */ uint16_t dta_intuple; /* boolean: in aggregation */ uint32_t dta_refcnt; /* reference count */ dtrace_difo_t *dta_difo; /* pointer to DIFO */ dtrace_recdesc_t dta_rec; /* record description */ dtrace_action_t *dta_prev; /* previous action */ dtrace_action_t *dta_next; /* next action */ }; typedef struct dtrace_aggregation { dtrace_action_t dtag_action; /* action; must be first */ dtrace_aggid_t dtag_id; /* identifier */ dtrace_ecb_t *dtag_ecb; /* corresponding ECB */ dtrace_action_t *dtag_first; /* first action in tuple */ uint32_t dtag_base; /* base of aggregation */ uint8_t dtag_hasarg; /* boolean: has argument */ uint64_t dtag_initial; /* initial value */ void (*dtag_aggregate)(uint64_t *, uint64_t, uint64_t); } dtrace_aggregation_t; /* * DTrace Buffers * * Principal buffers, aggregation buffers, and speculative buffers are all * managed with the dtrace_buffer structure. By default, this structure * includes twin data buffers -- dtb_tomax and dtb_xamot -- that serve as the * active and passive buffers, respectively. For speculative buffers, * dtb_xamot will be NULL; for "ring" and "fill" buffers, dtb_xamot will point * to a scratch buffer. For all buffer types, the dtrace_buffer structure is * always allocated on a per-CPU basis; a single dtrace_buffer structure is * never shared among CPUs. (That is, there is never true sharing of the * dtrace_buffer structure; to prevent false sharing of the structure, it must * always be aligned to the coherence granularity -- generally 64 bytes.) * * One of the critical design decisions of DTrace is that a given ECB always * stores the same quantity and type of data. This is done to assure that the * only metadata required for an ECB's traced data is the EPID. That is, from * the EPID, the consumer can determine the data layout. (The data buffer * layout is shown schematically below.) By assuring that one can determine * data layout from the EPID, the metadata stream can be separated from the * data stream -- simplifying the data stream enormously. The ECB always * proceeds the recorded data as part of the dtrace_rechdr_t structure that * includes the EPID and a high-resolution timestamp used for output ordering * consistency. * * base of data buffer ---> +--------+--------------------+--------+ * | rechdr | data | rechdr | * +--------+------+--------+----+--------+ * | data | rechdr | data | * +---------------+--------+-------------+ * | data, cont. | * +--------+--------------------+--------+ * | rechdr | data | | * +--------+--------------------+ | * | || | * | || | * | \/ | * : : * . . * . . * . . * : : * | | * limit of data buffer ---> +--------------------------------------+ * * When evaluating an ECB, dtrace_probe() determines if the ECB's needs of the * principal buffer (both scratch and payload) exceed the available space. If * the ECB's needs exceed available space (and if the principal buffer policy * is the default "switch" policy), the ECB is dropped, the buffer's drop count * is incremented, and processing advances to the next ECB. If the ECB's needs * can be met with the available space, the ECB is processed, but the offset in * the principal buffer is only advanced if the ECB completes processing * without error. * * When a buffer is to be switched (either because the buffer is the principal * buffer with a "switch" policy or because it is an aggregation buffer), a * cross call is issued to the CPU associated with the buffer. In the cross * call context, interrupts are disabled, and the active and the inactive * buffers are atomically switched. This involves switching the data pointers, * copying the various state fields (offset, drops, errors, etc.) into their * inactive equivalents, and clearing the state fields. Because interrupts are * disabled during this procedure, the switch is guaranteed to appear atomic to * dtrace_probe(). * * DTrace Ring Buffering * * To process a ring buffer correctly, one must know the oldest valid record. * Processing starts at the oldest record in the buffer and continues until * the end of the buffer is reached. Processing then resumes starting with * the record stored at offset 0 in the buffer, and continues until the * youngest record is processed. If trace records are of a fixed-length, * determining the oldest record is trivial: * * - If the ring buffer has not wrapped, the oldest record is the record * stored at offset 0. * * - If the ring buffer has wrapped, the oldest record is the record stored * at the current offset. * * With variable length records, however, just knowing the current offset * doesn't suffice for determining the oldest valid record: assuming that one * allows for arbitrary data, one has no way of searching forward from the * current offset to find the oldest valid record. (That is, one has no way * of separating data from metadata.) It would be possible to simply refuse to * process any data in the ring buffer between the current offset and the * limit, but this leaves (potentially) an enormous amount of otherwise valid * data unprocessed. * * To effect ring buffering, we track two offsets in the buffer: the current * offset and the _wrapped_ offset. If a request is made to reserve some * amount of data, and the buffer has wrapped, the wrapped offset is * incremented until the wrapped offset minus the current offset is greater * than or equal to the reserve request. This is done by repeatedly looking * up the ECB corresponding to the EPID at the current wrapped offset, and * incrementing the wrapped offset by the size of the data payload * corresponding to that ECB. If this offset is greater than or equal to the * limit of the data buffer, the wrapped offset is set to 0. Thus, the * current offset effectively "chases" the wrapped offset around the buffer. * Schematically: * * base of data buffer ---> +------+--------------------+------+ * | EPID | data | EPID | * +------+--------+------+----+------+ * | data | EPID | data | * +---------------+------+-----------+ * | data, cont. | * +------+---------------------------+ * | EPID | data | * current offset ---> +------+---------------------------+ * | invalid data | * wrapped offset ---> +------+--------------------+------+ * | EPID | data | EPID | * +------+--------+------+----+------+ * | data | EPID | data | * +---------------+------+-----------+ * : : * . . * . ... valid data ... . * . . * : : * +------+-------------+------+------+ * | EPID | data | EPID | data | * +------+------------++------+------+ * | data, cont. | leftover | * limit of data buffer ---> +-------------------+--------------+ * * If the amount of requested buffer space exceeds the amount of space * available between the current offset and the end of the buffer: * * (1) all words in the data buffer between the current offset and the limit * of the data buffer (marked "leftover", above) are set to * DTRACE_EPIDNONE * * (2) the wrapped offset is set to zero * * (3) the iteration process described above occurs until the wrapped offset * is greater than the amount of desired space. * * The wrapped offset is implemented by (re-)using the inactive offset. * In a "switch" buffer policy, the inactive offset stores the offset in * the inactive buffer; in a "ring" buffer policy, it stores the wrapped * offset. * * DTrace Scratch Buffering * * Some ECBs may wish to allocate dynamically-sized temporary scratch memory. * To accommodate such requests easily, scratch memory may be allocated in * the buffer beyond the current offset plus the needed memory of the current * ECB. If there isn't sufficient room in the buffer for the requested amount * of scratch space, the allocation fails and an error is generated. Scratch * memory is tracked in the dtrace_mstate_t and is automatically freed when * the ECB ceases processing. Note that ring buffers cannot allocate their * scratch from the principal buffer -- lest they needlessly overwrite older, * valid data. Ring buffers therefore have their own dedicated scratch buffer * from which scratch is allocated. */ #define DTRACEBUF_RING 0x0001 /* bufpolicy set to "ring" */ #define DTRACEBUF_FILL 0x0002 /* bufpolicy set to "fill" */ #define DTRACEBUF_NOSWITCH 0x0004 /* do not switch buffer */ #define DTRACEBUF_WRAPPED 0x0008 /* ring buffer has wrapped */ #define DTRACEBUF_DROPPED 0x0010 /* drops occurred */ #define DTRACEBUF_ERROR 0x0020 /* errors occurred */ #define DTRACEBUF_FULL 0x0040 /* "fill" buffer is full */ #define DTRACEBUF_CONSUMED 0x0080 /* buffer has been consumed */ #define DTRACEBUF_INACTIVE 0x0100 /* buffer is not yet active */ typedef struct dtrace_buffer { uint64_t dtb_offset; /* current offset in buffer */ uint64_t dtb_size; /* size of buffer */ uint32_t dtb_flags; /* flags */ uint32_t dtb_drops; /* number of drops */ caddr_t dtb_tomax; /* active buffer */ caddr_t dtb_xamot; /* inactive buffer */ uint32_t dtb_xamot_flags; /* inactive flags */ uint32_t dtb_xamot_drops; /* drops in inactive buffer */ uint64_t dtb_xamot_offset; /* offset in inactive buffer */ uint32_t dtb_errors; /* number of errors */ uint32_t dtb_xamot_errors; /* errors in inactive buffer */ #ifndef _LP64 uint64_t dtb_pad1; /* pad out to 64 bytes */ #endif uint64_t dtb_switched; /* time of last switch */ uint64_t dtb_interval; /* observed switch interval */ uint64_t dtb_pad2[6]; /* pad to avoid false sharing */ } dtrace_buffer_t; /* * DTrace Aggregation Buffers * * Aggregation buffers use much of the same mechanism as described above * ("DTrace Buffers"). However, because an aggregation is fundamentally a * hash, there exists dynamic metadata associated with an aggregation buffer * that is not associated with other kinds of buffers. This aggregation * metadata is _only_ relevant for the in-kernel implementation of * aggregations; it is not actually relevant to user-level consumers. To do * this, we allocate dynamic aggregation data (hash keys and hash buckets) * starting below the _limit_ of the buffer, and we allocate data from the * _base_ of the buffer. When the aggregation buffer is copied out, _only_ the * data is copied out; the metadata is simply discarded. Schematically, * aggregation buffers look like: * * base of data buffer ---> +-------+------+-----------+-------+ * | aggid | key | value | aggid | * +-------+------+-----------+-------+ * | key | * +-------+-------+-----+------------+ * | value | aggid | key | value | * +-------+------++-----+------+-----+ * | aggid | key | value | | * +-------+------+-------------+ | * | || | * | || | * | \/ | * : : * . . * . . * . . * : : * | /\ | * | || +------------+ * | || | | * +---------------------+ | * | hash keys | * | (dtrace_aggkey structures) | * | | * +----------------------------------+ * | hash buckets | * | (dtrace_aggbuffer structure) | * | | * limit of data buffer ---> +----------------------------------+ * * * As implied above, just as we assure that ECBs always store a constant * amount of data, we assure that a given aggregation -- identified by its * aggregation ID -- always stores data of a constant quantity and type. * As with EPIDs, this allows the aggregation ID to serve as the metadata for a * given record. * * Note that the size of the dtrace_aggkey structure must be sizeof (uintptr_t) * aligned. (If this the structure changes such that this becomes false, an * assertion will fail in dtrace_aggregate().) */ typedef struct dtrace_aggkey { uint32_t dtak_hashval; /* hash value */ uint32_t dtak_action:4; /* action -- 4 bits */ uint32_t dtak_size:28; /* size -- 28 bits */ caddr_t dtak_data; /* data pointer */ struct dtrace_aggkey *dtak_next; /* next in hash chain */ } dtrace_aggkey_t; typedef struct dtrace_aggbuffer { uintptr_t dtagb_hashsize; /* number of buckets */ uintptr_t dtagb_free; /* free list of keys */ dtrace_aggkey_t **dtagb_hash; /* hash table */ } dtrace_aggbuffer_t; /* * DTrace Speculations * * Speculations have a per-CPU buffer and a global state. Once a speculation * buffer has been comitted or discarded, it cannot be reused until all CPUs * have taken the same action (commit or discard) on their respective * speculative buffer. However, because DTrace probes may execute in arbitrary * context, other CPUs cannot simply be cross-called at probe firing time to * perform the necessary commit or discard. The speculation states thus * optimize for the case that a speculative buffer is only active on one CPU at * the time of a commit() or discard() -- for if this is the case, other CPUs * need not take action, and the speculation is immediately available for * reuse. If the speculation is active on multiple CPUs, it must be * asynchronously cleaned -- potentially leading to a higher rate of dirty * speculative drops. The speculation states are as follows: * * DTRACESPEC_INACTIVE <= Initial state; inactive speculation * DTRACESPEC_ACTIVE <= Allocated, but not yet speculatively traced to * DTRACESPEC_ACTIVEONE <= Speculatively traced to on one CPU * DTRACESPEC_ACTIVEMANY <= Speculatively traced to on more than one CPU * DTRACESPEC_COMMITTING <= Currently being commited on one CPU * DTRACESPEC_COMMITTINGMANY <= Currently being commited on many CPUs * DTRACESPEC_DISCARDING <= Currently being discarded on many CPUs * * The state transition diagram is as follows: * * +----------------------------------------------------------+ * | | * | +------------+ | * | +-------------------| COMMITTING |<-----------------+ | * | | +------------+ | | * | | copied spec. ^ commit() on | | discard() on * | | into principal | active CPU | | active CPU * | | | commit() | | * V V | | | * +----------+ +--------+ +-----------+ * | INACTIVE |---------------->| ACTIVE |--------------->| ACTIVEONE | * +----------+ speculation() +--------+ speculate() +-----------+ * ^ ^ | | | * | | | discard() | | * | | asynchronously | discard() on | | speculate() * | | cleaned V inactive CPU | | on inactive * | | +------------+ | | CPU * | +-------------------| DISCARDING |<-----------------+ | * | +------------+ | * | asynchronously ^ | * | copied spec. | discard() | * | into principal +------------------------+ | * | | V * +----------------+ commit() +------------+ * | COMMITTINGMANY |<----------------------------------| ACTIVEMANY | * +----------------+ +------------+ */ typedef enum dtrace_speculation_state { DTRACESPEC_INACTIVE = 0, DTRACESPEC_ACTIVE, DTRACESPEC_ACTIVEONE, DTRACESPEC_ACTIVEMANY, DTRACESPEC_COMMITTING, DTRACESPEC_COMMITTINGMANY, DTRACESPEC_DISCARDING } dtrace_speculation_state_t; typedef struct dtrace_speculation { dtrace_speculation_state_t dtsp_state; /* current speculation state */ int dtsp_cleaning; /* non-zero if being cleaned */ dtrace_buffer_t *dtsp_buffer; /* speculative buffer */ } dtrace_speculation_t; /* * DTrace Dynamic Variables * * The dynamic variable problem is obviously decomposed into two subproblems: * allocating new dynamic storage, and freeing old dynamic storage. The * presence of the second problem makes the first much more complicated -- or * rather, the absence of the second renders the first trivial. This is the * case with aggregations, for which there is effectively no deallocation of * dynamic storage. (Or more accurately, all dynamic storage is deallocated * when a snapshot is taken of the aggregation.) As DTrace dynamic variables * allow for both dynamic allocation and dynamic deallocation, the * implementation of dynamic variables is quite a bit more complicated than * that of their aggregation kin. * * We observe that allocating new dynamic storage is tricky only because the * size can vary -- the allocation problem is much easier if allocation sizes * are uniform. We further observe that in D, the size of dynamic variables is * actually _not_ dynamic -- dynamic variable sizes may be determined by static * analysis of DIF text. (This is true even of putatively dynamically-sized * objects like strings and stacks, the sizes of which are dictated by the * "stringsize" and "stackframes" variables, respectively.) We exploit this by * performing this analysis on all DIF before enabling any probes. For each * dynamic load or store, we calculate the dynamically-allocated size plus the * size of the dtrace_dynvar structure plus the storage required to key the * data. For all DIF, we take the largest value and dub it the _chunksize_. * We then divide dynamic memory into two parts: a hash table that is wide * enough to have every chunk in its own bucket, and a larger region of equal * chunksize units. Whenever we wish to dynamically allocate a variable, we * always allocate a single chunk of memory. Depending on the uniformity of * allocation, this will waste some amount of memory -- but it eliminates the * non-determinism inherent in traditional heap fragmentation. * * Dynamic objects are allocated by storing a non-zero value to them; they are * deallocated by storing a zero value to them. Dynamic variables are * complicated enormously by being shared between CPUs. In particular, * consider the following scenario: * * CPU A CPU B * +---------------------------------+ +---------------------------------+ * | | | | * | allocates dynamic object a[123] | | | * | by storing the value 345 to it | | | * | ---------> | * | | | wishing to load from object | * | | | a[123], performs lookup in | * | | | dynamic variable space | * | <--------- | * | deallocates object a[123] by | | | * | storing 0 to it | | | * | | | | * | allocates dynamic object b[567] | | performs load from a[123] | * | by storing the value 789 to it | | | * : : : : * . . . . * * This is obviously a race in the D program, but there are nonetheless only * two valid values for CPU B's load from a[123]: 345 or 0. Most importantly, * CPU B may _not_ see the value 789 for a[123]. * * There are essentially two ways to deal with this: * * (1) Explicitly spin-lock variables. That is, if CPU B wishes to load * from a[123], it needs to lock a[123] and hold the lock for the * duration that it wishes to manipulate it. * * (2) Avoid reusing freed chunks until it is known that no CPU is referring * to them. * * The implementation of (1) is rife with complexity, because it requires the * user of a dynamic variable to explicitly decree when they are done using it. * Were all variables by value, this perhaps wouldn't be debilitating -- but * dynamic variables of non-scalar types are tracked by reference. That is, if * a dynamic variable is, say, a string, and that variable is to be traced to, * say, the principal buffer, the DIF emulation code returns to the main * dtrace_probe() loop a pointer to the underlying storage, not the contents of * the storage. Further, code calling on DIF emulation would have to be aware * that the DIF emulation has returned a reference to a dynamic variable that * has been potentially locked. The variable would have to be unlocked after * the main dtrace_probe() loop is finished with the variable, and the main * dtrace_probe() loop would have to be careful to not call any further DIF * emulation while the variable is locked to avoid deadlock. More generally, * if one were to implement (1), DIF emulation code dealing with dynamic * variables could only deal with one dynamic variable at a time (lest deadlock * result). To sum, (1) exports too much subtlety to the users of dynamic * variables -- increasing maintenance burden and imposing serious constraints * on future DTrace development. * * The implementation of (2) is also complex, but the complexity is more * manageable. We need to be sure that when a variable is deallocated, it is * not placed on a traditional free list, but rather on a _dirty_ list. Once a * variable is on a dirty list, it cannot be found by CPUs performing a * subsequent lookup of the variable -- but it may still be in use by other * CPUs. To assure that all CPUs that may be seeing the old variable have * cleared out of probe context, a dtrace_sync() can be issued. Once the * dtrace_sync() has completed, it can be known that all CPUs are done * manipulating the dynamic variable -- the dirty list can be atomically * appended to the free list. Unfortunately, there's a slight hiccup in this * mechanism: dtrace_sync() may not be issued from probe context. The * dtrace_sync() must be therefore issued asynchronously from non-probe * context. For this we rely on the DTrace cleaner, a cyclic that runs at the * "cleanrate" frequency. To ease this implementation, we define several chunk * lists: * * - Dirty. Deallocated chunks, not yet cleaned. Not available. * * - Rinsing. Formerly dirty chunks that are currently being asynchronously * cleaned. Not available, but will be shortly. Dynamic variable * allocation may not spin or block for availability, however. * * - Clean. Clean chunks, ready for allocation -- but not on the free list. * * - Free. Available for allocation. * * Moreover, to avoid absurd contention, _each_ of these lists is implemented * on a per-CPU basis. This is only for performance, not correctness; chunks * may be allocated from another CPU's free list. The algorithm for allocation * then is this: * * (1) Attempt to atomically allocate from current CPU's free list. If list * is non-empty and allocation is successful, allocation is complete. * * (2) If the clean list is non-empty, atomically move it to the free list, * and reattempt (1). * * (3) If the dynamic variable space is in the CLEAN state, look for free * and clean lists on other CPUs by setting the current CPU to the next * CPU, and reattempting (1). If the next CPU is the current CPU (that * is, if all CPUs have been checked), atomically switch the state of * the dynamic variable space based on the following: * * - If no free chunks were found and no dirty chunks were found, * atomically set the state to EMPTY. * * - If dirty chunks were found, atomically set the state to DIRTY. * * - If rinsing chunks were found, atomically set the state to RINSING. * * (4) Based on state of dynamic variable space state, increment appropriate * counter to indicate dynamic drops (if in EMPTY state) vs. dynamic * dirty drops (if in DIRTY state) vs. dynamic rinsing drops (if in * RINSING state). Fail the allocation. * * The cleaning cyclic operates with the following algorithm: for all CPUs * with a non-empty dirty list, atomically move the dirty list to the rinsing * list. Perform a dtrace_sync(). For all CPUs with a non-empty rinsing list, * atomically move the rinsing list to the clean list. Perform another * dtrace_sync(). By this point, all CPUs have seen the new clean list; the * state of the dynamic variable space can be restored to CLEAN. * * There exist two final races that merit explanation. The first is a simple * allocation race: * * CPU A CPU B * +---------------------------------+ +---------------------------------+ * | | | | * | allocates dynamic object a[123] | | allocates dynamic object a[123] | * | by storing the value 345 to it | | by storing the value 567 to it | * | | | | * : : : : * . . . . * * Again, this is a race in the D program. It can be resolved by having a[123] * hold the value 345 or a[123] hold the value 567 -- but it must be true that * a[123] have only _one_ of these values. (That is, the racing CPUs may not * put the same element twice on the same hash chain.) This is resolved * simply: before the allocation is undertaken, the start of the new chunk's * hash chain is noted. Later, after the allocation is complete, the hash * chain is atomically switched to point to the new element. If this fails * (because of either concurrent allocations or an allocation concurrent with a * deletion), the newly allocated chunk is deallocated to the dirty list, and * the whole process of looking up (and potentially allocating) the dynamic * variable is reattempted. * * The final race is a simple deallocation race: * * CPU A CPU B * +---------------------------------+ +---------------------------------+ * | | | | * | deallocates dynamic object | | deallocates dynamic object | * | a[123] by storing the value 0 | | a[123] by storing the value 0 | * | to it | | to it | * | | | | * : : : : * . . . . * * Once again, this is a race in the D program, but it is one that we must * handle without corrupting the underlying data structures. Because * deallocations require the deletion of a chunk from the middle of a hash * chain, we cannot use a single-word atomic operation to remove it. For this, * we add a spin lock to the hash buckets that is _only_ used for deallocations * (allocation races are handled as above). Further, this spin lock is _only_ * held for the duration of the delete; before control is returned to the DIF * emulation code, the hash bucket is unlocked. */ typedef struct dtrace_key { uint64_t dttk_value; /* data value or data pointer */ uint64_t dttk_size; /* 0 if by-val, >0 if by-ref */ } dtrace_key_t; typedef struct dtrace_tuple { uint32_t dtt_nkeys; /* number of keys in tuple */ uint32_t dtt_pad; /* padding */ dtrace_key_t dtt_key[1]; /* array of tuple keys */ } dtrace_tuple_t; typedef struct dtrace_dynvar { uint64_t dtdv_hashval; /* hash value -- 0 if free */ struct dtrace_dynvar *dtdv_next; /* next on list or hash chain */ void *dtdv_data; /* pointer to data */ dtrace_tuple_t dtdv_tuple; /* tuple key */ } dtrace_dynvar_t; typedef enum dtrace_dynvar_op { DTRACE_DYNVAR_ALLOC, DTRACE_DYNVAR_NOALLOC, DTRACE_DYNVAR_DEALLOC } dtrace_dynvar_op_t; typedef struct dtrace_dynhash { dtrace_dynvar_t *dtdh_chain; /* hash chain for this bucket */ uintptr_t dtdh_lock; /* deallocation lock */ #ifdef _LP64 uintptr_t dtdh_pad[6]; /* pad to avoid false sharing */ #else uintptr_t dtdh_pad[14]; /* pad to avoid false sharing */ #endif } dtrace_dynhash_t; typedef struct dtrace_dstate_percpu { dtrace_dynvar_t *dtdsc_free; /* free list for this CPU */ dtrace_dynvar_t *dtdsc_dirty; /* dirty list for this CPU */ dtrace_dynvar_t *dtdsc_rinsing; /* rinsing list for this CPU */ dtrace_dynvar_t *dtdsc_clean; /* clean list for this CPU */ uint64_t dtdsc_drops; /* number of capacity drops */ uint64_t dtdsc_dirty_drops; /* number of dirty drops */ uint64_t dtdsc_rinsing_drops; /* number of rinsing drops */ #ifdef _LP64 uint64_t dtdsc_pad; /* pad to avoid false sharing */ #else uint64_t dtdsc_pad[2]; /* pad to avoid false sharing */ #endif } dtrace_dstate_percpu_t; typedef enum dtrace_dstate_state { DTRACE_DSTATE_CLEAN = 0, DTRACE_DSTATE_EMPTY, DTRACE_DSTATE_DIRTY, DTRACE_DSTATE_RINSING } dtrace_dstate_state_t; typedef struct dtrace_dstate { void *dtds_base; /* base of dynamic var. space */ size_t dtds_size; /* size of dynamic var. space */ size_t dtds_hashsize; /* number of buckets in hash */ size_t dtds_chunksize; /* size of each chunk */ dtrace_dynhash_t *dtds_hash; /* pointer to hash table */ dtrace_dstate_state_t dtds_state; /* current dynamic var. state */ dtrace_dstate_percpu_t *dtds_percpu; /* per-CPU dyn. var. state */ } dtrace_dstate_t; /* * DTrace Variable State * * The DTrace variable state tracks user-defined variables in its dtrace_vstate * structure. Each DTrace consumer has exactly one dtrace_vstate structure, * but some dtrace_vstate structures may exist without a corresponding DTrace * consumer (see "DTrace Helpers", below). As described in , * user-defined variables can have one of three scopes: * * DIFV_SCOPE_GLOBAL => global scope * DIFV_SCOPE_THREAD => thread-local scope (i.e. "self->" variables) * DIFV_SCOPE_LOCAL => clause-local scope (i.e. "this->" variables) * * The variable state tracks variables by both their scope and their allocation * type: * * - The dtvs_globals and dtvs_locals members each point to an array of * dtrace_statvar structures. These structures contain both the variable * metadata (dtrace_difv structures) and the underlying storage for all * statically allocated variables, including statically allocated * DIFV_SCOPE_GLOBAL variables and all DIFV_SCOPE_LOCAL variables. * * - The dtvs_tlocals member points to an array of dtrace_difv structures for * DIFV_SCOPE_THREAD variables. As such, this array tracks _only_ the * variable metadata for DIFV_SCOPE_THREAD variables; the underlying storage * is allocated out of the dynamic variable space. * * - The dtvs_dynvars member is the dynamic variable state associated with the * variable state. The dynamic variable state (described in "DTrace Dynamic * Variables", above) tracks all DIFV_SCOPE_THREAD variables and all * dynamically-allocated DIFV_SCOPE_GLOBAL variables. */ typedef struct dtrace_statvar { uint64_t dtsv_data; /* data or pointer to it */ size_t dtsv_size; /* size of pointed-to data */ int dtsv_refcnt; /* reference count */ dtrace_difv_t dtsv_var; /* variable metadata */ } dtrace_statvar_t; typedef struct dtrace_vstate { dtrace_state_t *dtvs_state; /* back pointer to state */ dtrace_statvar_t **dtvs_globals; /* statically-allocated glbls */ int dtvs_nglobals; /* number of globals */ dtrace_difv_t *dtvs_tlocals; /* thread-local metadata */ int dtvs_ntlocals; /* number of thread-locals */ dtrace_statvar_t **dtvs_locals; /* clause-local data */ int dtvs_nlocals; /* number of clause-locals */ dtrace_dstate_t dtvs_dynvars; /* dynamic variable state */ } dtrace_vstate_t; /* * DTrace Machine State * * In the process of processing a fired probe, DTrace needs to track and/or * cache some per-CPU state associated with that particular firing. This is * state that is always discarded after the probe firing has completed, and * much of it is not specific to any DTrace consumer, remaining valid across * all ECBs. This state is tracked in the dtrace_mstate structure. */ #define DTRACE_MSTATE_ARGS 0x00000001 #define DTRACE_MSTATE_PROBE 0x00000002 #define DTRACE_MSTATE_EPID 0x00000004 #define DTRACE_MSTATE_TIMESTAMP 0x00000008 #define DTRACE_MSTATE_STACKDEPTH 0x00000010 #define DTRACE_MSTATE_CALLER 0x00000020 #define DTRACE_MSTATE_IPL 0x00000040 #define DTRACE_MSTATE_FLTOFFS 0x00000080 #define DTRACE_MSTATE_WALLTIMESTAMP 0x00000100 #define DTRACE_MSTATE_USTACKDEPTH 0x00000200 #define DTRACE_MSTATE_UCALLER 0x00000400 typedef struct dtrace_mstate { uintptr_t dtms_scratch_base; /* base of scratch space */ uintptr_t dtms_scratch_ptr; /* current scratch pointer */ size_t dtms_scratch_size; /* scratch size */ uint32_t dtms_present; /* variables that are present */ uint64_t dtms_arg[5]; /* cached arguments */ dtrace_epid_t dtms_epid; /* current EPID */ uint64_t dtms_timestamp; /* cached timestamp */ hrtime_t dtms_walltimestamp; /* cached wall timestamp */ int dtms_stackdepth; /* cached stackdepth */ int dtms_ustackdepth; /* cached ustackdepth */ struct dtrace_probe *dtms_probe; /* current probe */ uintptr_t dtms_caller; /* cached caller */ uint64_t dtms_ucaller; /* cached user-level caller */ int dtms_ipl; /* cached interrupt pri lev */ int dtms_fltoffs; /* faulting DIFO offset */ uintptr_t dtms_strtok; /* saved strtok() pointer */ uintptr_t dtms_strtok_limit; /* upper bound of strtok ptr */ uint32_t dtms_access; /* memory access rights */ dtrace_difo_t *dtms_difo; /* current dif object */ file_t *dtms_getf; /* cached rval of getf() */ } dtrace_mstate_t; #define DTRACE_COND_OWNER 0x1 #define DTRACE_COND_USERMODE 0x2 #define DTRACE_COND_ZONEOWNER 0x4 #define DTRACE_PROBEKEY_MAXDEPTH 8 /* max glob recursion depth */ /* * Access flag used by dtrace_mstate.dtms_access. */ #define DTRACE_ACCESS_KERNEL 0x1 /* the priv to read kmem */ /* * DTrace Activity * * Each DTrace consumer is in one of several states, which (for purposes of * avoiding yet-another overloading of the noun "state") we call the current * _activity_. The activity transitions on dtrace_go() (from DTRACIOCGO), on * dtrace_stop() (from DTRACIOCSTOP) and on the exit() action. Activities may * only transition in one direction; the activity transition diagram is a * directed acyclic graph. The activity transition diagram is as follows: * * * +----------+ +--------+ +--------+ * | INACTIVE |------------------>| WARMUP |------------------>| ACTIVE | * +----------+ dtrace_go(), +--------+ dtrace_go(), +--------+ * before BEGIN | after BEGIN | | | * | | | | * exit() action | | | | * from BEGIN ECB | | | | * | | | | * v | | | * +----------+ exit() action | | | * +-----------------------------| DRAINING |<-------------------+ | | * | +----------+ | | * | | | | * | dtrace_stop(), | | | * | before END | | | * | | | | * | v | | * | +---------+ +----------+ | | * | | STOPPED |<----------------| COOLDOWN |<----------------------+ | * | +---------+ dtrace_stop(), +----------+ dtrace_stop(), | * | after END before END | * | | * | +--------+ | * +----------------------------->| KILLED |<--------------------------+ * deadman timeout or +--------+ deadman timeout or * killed consumer killed consumer * * Note that once a DTrace consumer has stopped tracing, there is no way to * restart it; if a DTrace consumer wishes to restart tracing, it must reopen * the DTrace pseudodevice. */ typedef enum dtrace_activity { DTRACE_ACTIVITY_INACTIVE = 0, /* not yet running */ DTRACE_ACTIVITY_WARMUP, /* while starting */ DTRACE_ACTIVITY_ACTIVE, /* running */ DTRACE_ACTIVITY_DRAINING, /* before stopping */ DTRACE_ACTIVITY_COOLDOWN, /* while stopping */ DTRACE_ACTIVITY_STOPPED, /* after stopping */ DTRACE_ACTIVITY_KILLED /* killed */ } dtrace_activity_t; /* * DTrace Helper Implementation * * A description of the helper architecture may be found in . * Each process contains a pointer to its helpers in its p_dtrace_helpers * member. This is a pointer to a dtrace_helpers structure, which contains an * array of pointers to dtrace_helper structures, helper variable state (shared * among a process's helpers) and a generation count. (The generation count is * used to provide an identifier when a helper is added so that it may be * subsequently removed.) The dtrace_helper structure is self-explanatory, * containing pointers to the objects needed to execute the helper. Note that * helpers are _duplicated_ across fork(2), and destroyed on exec(2). No more * than dtrace_helpers_max are allowed per-process. */ #define DTRACE_HELPER_ACTION_USTACK 0 #define DTRACE_NHELPER_ACTIONS 1 typedef struct dtrace_helper_action { int dtha_generation; /* helper action generation */ int dtha_nactions; /* number of actions */ dtrace_difo_t *dtha_predicate; /* helper action predicate */ dtrace_difo_t **dtha_actions; /* array of actions */ struct dtrace_helper_action *dtha_next; /* next helper action */ } dtrace_helper_action_t; typedef struct dtrace_helper_provider { int dthp_generation; /* helper provider generation */ uint32_t dthp_ref; /* reference count */ dof_helper_t dthp_prov; /* DOF w/ provider and probes */ } dtrace_helper_provider_t; typedef struct dtrace_helpers { dtrace_helper_action_t **dthps_actions; /* array of helper actions */ dtrace_vstate_t dthps_vstate; /* helper action var. state */ dtrace_helper_provider_t **dthps_provs; /* array of providers */ uint_t dthps_nprovs; /* count of providers */ uint_t dthps_maxprovs; /* provider array size */ int dthps_generation; /* current generation */ pid_t dthps_pid; /* pid of associated proc */ int dthps_deferred; /* helper in deferred list */ struct dtrace_helpers *dthps_next; /* next pointer */ struct dtrace_helpers *dthps_prev; /* prev pointer */ } dtrace_helpers_t; /* * DTrace Helper Action Tracing * * Debugging helper actions can be arduous. To ease the development and * debugging of helpers, DTrace contains a tracing-framework-within-a-tracing- * framework: helper tracing. If dtrace_helptrace_enabled is non-zero (which * it is by default on DEBUG kernels), all helper activity will be traced to a * global, in-kernel ring buffer. Each entry includes a pointer to the specific * helper, the location within the helper, and a trace of all local variables. * The ring buffer may be displayed in a human-readable format with the * ::dtrace_helptrace mdb(1) dcmd. */ #define DTRACE_HELPTRACE_NEXT (-1) #define DTRACE_HELPTRACE_DONE (-2) #define DTRACE_HELPTRACE_ERR (-3) typedef struct dtrace_helptrace { dtrace_helper_action_t *dtht_helper; /* helper action */ int dtht_where; /* where in helper action */ int dtht_nlocals; /* number of locals */ int dtht_fault; /* type of fault (if any) */ int dtht_fltoffs; /* DIF offset */ uint64_t dtht_illval; /* faulting value */ uint64_t dtht_locals[1]; /* local variables */ } dtrace_helptrace_t; /* * DTrace Credentials * * In probe context, we have limited flexibility to examine the credentials * of the DTrace consumer that created a particular enabling. We use * the Least Privilege interfaces to cache the consumer's cred pointer and * some facts about that credential in a dtrace_cred_t structure. These * can limit the consumer's breadth of visibility and what actions the * consumer may take. */ #define DTRACE_CRV_ALLPROC 0x01 #define DTRACE_CRV_KERNEL 0x02 #define DTRACE_CRV_ALLZONE 0x04 #define DTRACE_CRV_ALL (DTRACE_CRV_ALLPROC | DTRACE_CRV_KERNEL | \ DTRACE_CRV_ALLZONE) #define DTRACE_CRA_PROC 0x0001 #define DTRACE_CRA_PROC_CONTROL 0x0002 #define DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER 0x0004 #define DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE 0x0008 #define DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG 0x0010 #define DTRACE_CRA_KERNEL 0x0020 #define DTRACE_CRA_KERNEL_DESTRUCTIVE 0x0040 #define DTRACE_CRA_ALL (DTRACE_CRA_PROC | \ DTRACE_CRA_PROC_CONTROL | \ DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER | \ DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE | \ DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG | \ DTRACE_CRA_KERNEL | \ DTRACE_CRA_KERNEL_DESTRUCTIVE) typedef struct dtrace_cred { cred_t *dcr_cred; uint8_t dcr_destructive; uint8_t dcr_visible; uint16_t dcr_action; } dtrace_cred_t; /* * DTrace Consumer State * * Each DTrace consumer has an associated dtrace_state structure that contains * its in-kernel DTrace state -- including options, credentials, statistics and * pointers to ECBs, buffers, speculations and formats. A dtrace_state * structure is also allocated for anonymous enablings. When anonymous state * is grabbed, the grabbing consumers dts_anon pointer is set to the grabbed * dtrace_state structure. */ struct dtrace_state { #ifdef illumos dev_t dts_dev; /* device */ #else struct cdev *dts_dev; /* device */ #endif int dts_necbs; /* total number of ECBs */ dtrace_ecb_t **dts_ecbs; /* array of ECBs */ dtrace_epid_t dts_epid; /* next EPID to allocate */ size_t dts_needed; /* greatest needed space */ struct dtrace_state *dts_anon; /* anon. state, if grabbed */ dtrace_activity_t dts_activity; /* current activity */ dtrace_vstate_t dts_vstate; /* variable state */ dtrace_buffer_t *dts_buffer; /* principal buffer */ dtrace_buffer_t *dts_aggbuffer; /* aggregation buffer */ dtrace_speculation_t *dts_speculations; /* speculation array */ int dts_nspeculations; /* number of speculations */ int dts_naggregations; /* number of aggregations */ dtrace_aggregation_t **dts_aggregations; /* aggregation array */ #ifdef illumos vmem_t *dts_aggid_arena; /* arena for aggregation IDs */ #else struct unrhdr *dts_aggid_arena; /* arena for aggregation IDs */ #endif uint64_t dts_errors; /* total number of errors */ uint32_t dts_speculations_busy; /* number of spec. busy */ uint32_t dts_speculations_unavail; /* number of spec unavail */ uint32_t dts_stkstroverflows; /* stack string tab overflows */ uint32_t dts_dblerrors; /* errors in ERROR probes */ uint32_t dts_reserve; /* space reserved for END */ hrtime_t dts_laststatus; /* time of last status */ #ifdef illumos cyclic_id_t dts_cleaner; /* cleaning cyclic */ cyclic_id_t dts_deadman; /* deadman cyclic */ #else struct callout dts_cleaner; /* Cleaning callout. */ struct callout dts_deadman; /* Deadman callout. */ #endif hrtime_t dts_alive; /* time last alive */ char dts_speculates; /* boolean: has speculations */ char dts_destructive; /* boolean: has dest. actions */ int dts_nformats; /* number of formats */ char **dts_formats; /* format string array */ dtrace_optval_t dts_options[DTRACEOPT_MAX]; /* options */ dtrace_cred_t dts_cred; /* credentials */ size_t dts_nretained; /* number of retained enabs */ int dts_getf; /* number of getf() calls */ + uint64_t dts_rstate[NCPU][2]; /* per-CPU random state */ }; struct dtrace_provider { dtrace_pattr_t dtpv_attr; /* provider attributes */ dtrace_ppriv_t dtpv_priv; /* provider privileges */ dtrace_pops_t dtpv_pops; /* provider operations */ char *dtpv_name; /* provider name */ void *dtpv_arg; /* provider argument */ hrtime_t dtpv_defunct; /* when made defunct */ struct dtrace_provider *dtpv_next; /* next provider */ }; struct dtrace_meta { dtrace_mops_t dtm_mops; /* meta provider operations */ char *dtm_name; /* meta provider name */ void *dtm_arg; /* meta provider user arg */ uint64_t dtm_count; /* no. of associated provs. */ }; /* * DTrace Enablings * * A dtrace_enabling structure is used to track a collection of ECB * descriptions -- before they have been turned into actual ECBs. This is * created as a result of DOF processing, and is generally used to generate * ECBs immediately thereafter. However, enablings are also generally * retained should the probes they describe be created at a later time; as * each new module or provider registers with the framework, the retained * enablings are reevaluated, with any new match resulting in new ECBs. To * prevent probes from being matched more than once, the enabling tracks the * last probe generation matched, and only matches probes from subsequent * generations. */ typedef struct dtrace_enabling { dtrace_ecbdesc_t **dten_desc; /* all ECB descriptions */ int dten_ndesc; /* number of ECB descriptions */ int dten_maxdesc; /* size of ECB array */ dtrace_vstate_t *dten_vstate; /* associated variable state */ dtrace_genid_t dten_probegen; /* matched probe generation */ dtrace_ecbdesc_t *dten_current; /* current ECB description */ int dten_error; /* current error value */ int dten_primed; /* boolean: set if primed */ struct dtrace_enabling *dten_prev; /* previous enabling */ struct dtrace_enabling *dten_next; /* next enabling */ } dtrace_enabling_t; /* * DTrace Anonymous Enablings * * Anonymous enablings are DTrace enablings that are not associated with a * controlling process, but rather derive their enabling from DOF stored as * properties in the dtrace.conf file. If there is an anonymous enabling, a * DTrace consumer state and enabling are created on attach. The state may be * subsequently grabbed by the first consumer specifying the "grabanon" * option. As long as an anonymous DTrace enabling exists, dtrace(7D) will * refuse to unload. */ typedef struct dtrace_anon { dtrace_state_t *dta_state; /* DTrace consumer state */ dtrace_enabling_t *dta_enabling; /* pointer to enabling */ processorid_t dta_beganon; /* which CPU BEGIN ran on */ } dtrace_anon_t; /* * DTrace Error Debugging */ #ifdef DEBUG #define DTRACE_ERRDEBUG #endif #ifdef DTRACE_ERRDEBUG typedef struct dtrace_errhash { const char *dter_msg; /* error message */ int dter_count; /* number of times seen */ } dtrace_errhash_t; #define DTRACE_ERRHASHSZ 256 /* must be > number of err msgs */ #endif /* DTRACE_ERRDEBUG */ /* * DTrace Toxic Ranges * * DTrace supports safe loads from probe context; if the address turns out to * be invalid, a bit will be set by the kernel indicating that DTrace * encountered a memory error, and DTrace will propagate the error to the user * accordingly. However, there may exist some regions of memory in which an * arbitrary load can change system state, and from which it is impossible to * recover from such a load after it has been attempted. Examples of this may * include memory in which programmable I/O registers are mapped (for which a * read may have some implications for the device) or (in the specific case of * UltraSPARC-I and -II) the virtual address hole. The platform is required * to make DTrace aware of these toxic ranges; DTrace will then check that * target addresses are not in a toxic range before attempting to issue a * safe load. */ typedef struct dtrace_toxrange { uintptr_t dtt_base; /* base of toxic range */ uintptr_t dtt_limit; /* limit of toxic range */ } dtrace_toxrange_t; #ifdef illumos extern uint64_t dtrace_getarg(int, int); #else extern uint64_t __noinline dtrace_getarg(int, int); #endif extern greg_t dtrace_getfp(void); extern int dtrace_getipl(void); extern uintptr_t dtrace_caller(int); extern uint32_t dtrace_cas32(uint32_t *, uint32_t, uint32_t); extern void *dtrace_casptr(volatile void *, volatile void *, volatile void *); extern void dtrace_copyin(uintptr_t, uintptr_t, size_t, volatile uint16_t *); extern void dtrace_copyinstr(uintptr_t, uintptr_t, size_t, volatile uint16_t *); extern void dtrace_copyout(uintptr_t, uintptr_t, size_t, volatile uint16_t *); extern void dtrace_copyoutstr(uintptr_t, uintptr_t, size_t, volatile uint16_t *); extern void dtrace_getpcstack(pc_t *, int, int, uint32_t *); extern ulong_t dtrace_getreg(struct trapframe *, uint_t); extern int dtrace_getstackdepth(int); extern void dtrace_getupcstack(uint64_t *, int); extern void dtrace_getufpstack(uint64_t *, uint64_t *, int); extern int dtrace_getustackdepth(void); extern uintptr_t dtrace_fulword(void *); extern uint8_t dtrace_fuword8(void *); extern uint16_t dtrace_fuword16(void *); extern uint32_t dtrace_fuword32(void *); extern uint64_t dtrace_fuword64(void *); extern void dtrace_probe_error(dtrace_state_t *, dtrace_epid_t, int, int, int, uintptr_t); extern int dtrace_assfail(const char *, const char *, int); extern int dtrace_attached(void); #ifdef illumos extern hrtime_t dtrace_gethrestime(void); #endif #ifdef __sparc extern void dtrace_flush_windows(void); extern void dtrace_flush_user_windows(void); extern uint_t dtrace_getotherwin(void); extern uint_t dtrace_getfprs(void); #else extern void dtrace_copy(uintptr_t, uintptr_t, size_t); extern void dtrace_copystr(uintptr_t, uintptr_t, size_t, volatile uint16_t *); #endif /* * DTrace Assertions * * DTrace calls ASSERT and VERIFY from probe context. To assure that a failed * ASSERT or VERIFY does not induce a markedly more catastrophic failure (e.g., * one from which a dump cannot be gleaned), DTrace must define its own ASSERT * and VERIFY macros to be ones that may safely be called from probe context. * This header file must thus be included by any DTrace component that calls * ASSERT and/or VERIFY from probe context, and _only_ by those components. * (The only exception to this is kernel debugging infrastructure at user-level * that doesn't depend on calling ASSERT.) */ #undef ASSERT #undef VERIFY #define VERIFY(EX) ((void)((EX) || \ dtrace_assfail(#EX, __FILE__, __LINE__))) #ifdef DEBUG #define ASSERT(EX) ((void)((EX) || \ dtrace_assfail(#EX, __FILE__, __LINE__))) #else #define ASSERT(X) ((void)0) #endif #ifdef __cplusplus } #endif #endif /* _SYS_DTRACE_IMPL_H */ Index: projects/ipsec/sys/cddl/contrib/opensolaris =================================================================== --- projects/ipsec/sys/cddl/contrib/opensolaris (revision 313186) +++ projects/ipsec/sys/cddl/contrib/opensolaris (revision 313187) Property changes on: projects/ipsec/sys/cddl/contrib/opensolaris ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/cddl/contrib/opensolaris:r313116-313186 Index: projects/ipsec/sys/cddl/dev/dtrace/x86/dis_tables.c =================================================================== --- projects/ipsec/sys/cddl/dev/dtrace/x86/dis_tables.c (revision 313186) +++ projects/ipsec/sys/cddl/dev/dtrace/x86/dis_tables.c (revision 313187) @@ -1,5012 +1,5597 @@ /* * * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright 2016 Joyent, Inc. */ /* * Copyright (c) 2010, Intel Corporation. * All rights reserved. */ /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ /* * $FreeBSD$ */ #include "dis_tables.h" /* BEGIN CSTYLED */ /* * Disassembly begins in dis_distable, which is equivalent to the One-byte * Opcode Map in the Intel IA32 ISA Reference (page A-6 in my copy). The * decoding loops then traverse out through the other tables as necessary to * decode a given instruction. * * The behavior of this file can be controlled by one of the following flags: * * DIS_TEXT Include text for disassembly * DIS_MEM Include memory-size calculations * * Either or both of these can be defined. * * This file is not, and will never be, cstyled. If anything, the tables should * be taken out another tab stop or two so nothing overlaps. */ /* * These functions must be provided for the consumer to do disassembly. */ #ifdef DIS_TEXT extern char *strncpy(char *, const char *, size_t); extern size_t strlen(const char *); extern int strcmp(const char *, const char *); extern int strncmp(const char *, const char *, size_t); extern size_t strlcat(char *, const char *, size_t); #endif #define TERM 0 /* used to indicate that the 'indirect' */ /* field terminates - no pointer. */ /* Used to decode instructions. */ typedef struct instable { struct instable *it_indirect; /* for decode op codes */ uchar_t it_adrmode; #ifdef DIS_TEXT char it_name[NCPS]; uint_t it_suffix:1; /* mnem + "w", "l", or "d" */ #endif #ifdef DIS_MEM uint_t it_size:16; #endif uint_t it_invalid64:1; /* opcode invalid in amd64 */ uint_t it_always64:1; /* 64 bit when in 64 bit mode */ uint_t it_invalid32:1; /* invalid in IA32 */ uint_t it_stackop:1; /* push/pop stack operation */ + uint_t it_vexwoxmm:1; /* VEX instructions that don't use XMM/YMM */ + uint_t it_avxsuf:1; /* AVX suffix required */ } instable_t; /* * Instruction formats. */ enum { UNKNOWN, MRw, IMlw, IMw, IR, OA, AO, MS, SM, Mv, Mw, M, /* register or memory */ MG9, /* register or memory in group 9 (prefix optional) */ Mb, /* register or memory, always byte sized */ MO, /* memory only (no registers) */ PREF, SWAPGS_RDTSCP, MONITOR_MWAIT, R, RA, SEG, MR, RM, RM_66r, /* RM, but with a required 0x66 prefix */ IA, MA, SD, AD, SA, D, INM, SO, BD, I, P, V, DSHIFT, /* for double shift that has an 8-bit immediate */ U, OVERRIDE, NORM, /* instructions w/o ModR/M byte, no memory access */ IMPLMEM, /* instructions w/o ModR/M byte, implicit mem access */ O, /* for call */ JTAB, /* jump table */ IMUL, /* for 186 iimul instr */ CBW, /* so data16 can be evaluated for cbw and variants */ MvI, /* for 186 logicals */ ENTER, /* for 186 enter instr */ RMw, /* for 286 arpl instr */ Ib, /* for push immediate byte */ F, /* for 287 instructions */ FF, /* for 287 instructions */ FFC, /* for 287 instructions */ DM, /* 16-bit data */ AM, /* 16-bit addr */ LSEG, /* for 3-bit seg reg encoding */ MIb, /* for 386 logicals */ SREG, /* for 386 special registers */ PREFIX, /* a REP instruction prefix */ LOCK, /* a LOCK instruction prefix */ INT3, /* The int 3 instruction, which has a fake operand */ INTx, /* The normal int instruction, with explicit int num */ DSHIFTcl, /* for double shift that implicitly uses %cl */ CWD, /* so data16 can be evaluated for cwd and variants */ RET, /* single immediate 16-bit operand */ MOVZ, /* for movs and movz, with different size operands */ CRC32, /* for crc32, with different size operands */ XADDB, /* for xaddb */ MOVSXZ, /* AMD64 mov sign extend 32 to 64 bit instruction */ MOVBE, /* movbe instruction */ /* * MMX/SIMD addressing modes. */ MMO, /* Prefixable MMX/SIMD-Int mm/mem -> mm */ MMOIMPL, /* Prefixable MMX/SIMD-Int mm -> mm (mem) */ MMO3P, /* Prefixable MMX/SIMD-Int mm -> r32,imm8 */ MMOM3, /* Prefixable MMX/SIMD-Int mm -> r32 */ MMOS, /* Prefixable MMX/SIMD-Int mm -> mm/mem */ MMOMS, /* Prefixable MMX/SIMD-Int mm -> mem */ MMOPM, /* MMX/SIMD-Int mm/mem -> mm,imm8 */ MMOPM_66o, /* MMX/SIMD-Int 0x66 optional mm/mem -> mm,imm8 */ MMOPRM, /* Prefixable MMX/SIMD-Int r32/mem -> mm,imm8 */ MMOSH, /* Prefixable MMX mm,imm8 */ MM, /* MMX/SIMD-Int mm/mem -> mm */ MMS, /* MMX/SIMD-Int mm -> mm/mem */ MMSH, /* MMX mm,imm8 */ XMMO, /* Prefixable SIMD xmm/mem -> xmm */ XMMOS, /* Prefixable SIMD xmm -> xmm/mem */ XMMOPM, /* Prefixable SIMD xmm/mem w/to xmm,imm8 */ XMMOMX, /* Prefixable SIMD mm/mem -> xmm */ XMMOX3, /* Prefixable SIMD xmm -> r32 */ XMMOXMM, /* Prefixable SIMD xmm/mem -> mm */ XMMOM, /* Prefixable SIMD xmm -> mem */ XMMOMS, /* Prefixable SIMD mem -> xmm */ XMM, /* SIMD xmm/mem -> xmm */ XMM_66r, /* SIMD 0x66 prefix required xmm/mem -> xmm */ XMM_66o, /* SIMD 0x66 prefix optional xmm/mem -> xmm */ XMMXIMPL, /* SIMD xmm -> xmm (mem) */ XMM3P, /* SIMD xmm -> r32,imm8 */ XMM3PM_66r, /* SIMD 0x66 prefix required xmm -> r32/mem,imm8 */ XMMP, /* SIMD xmm/mem w/to xmm,imm8 */ XMMP_66o, /* SIMD 0x66 prefix optional xmm/mem w/to xmm,imm8 */ XMMP_66r, /* SIMD 0x66 prefix required xmm/mem w/to xmm,imm8 */ XMMPRM, /* SIMD r32/mem -> xmm,imm8 */ XMMPRM_66r, /* SIMD 0x66 prefix required r32/mem -> xmm,imm8 */ XMMS, /* SIMD xmm -> xmm/mem */ XMMM, /* SIMD mem -> xmm */ XMMM_66r, /* SIMD 0x66 prefix required mem -> xmm */ XMMMS, /* SIMD xmm -> mem */ XMM3MX, /* SIMD r32/mem -> xmm */ XMM3MXS, /* SIMD xmm -> r32/mem */ XMMSH, /* SIMD xmm,imm8 */ XMMXM3, /* SIMD xmm/mem -> r32 */ XMMX3, /* SIMD xmm -> r32 */ XMMXMM, /* SIMD xmm/mem -> mm */ XMMMX, /* SIMD mm -> xmm */ XMMXM, /* SIMD xmm -> mm */ XMMX2I, /* SIMD xmm -> xmm, imm, imm */ XMM2I, /* SIMD xmm, imm, imm */ XMMFENCE, /* SIMD lfence or mfence */ XMMSFNC, /* SIMD sfence (none or mem) */ XGETBV_XSETBV, VEX_NONE, /* VEX no operand */ VEX_MO, /* VEX mod_rm -> implicit reg */ VEX_RMrX, /* VEX VEX.vvvv, mod_rm -> mod_reg */ + VEX_VRMrX, /* VEX mod_rm, VEX.vvvv -> mod_rm */ VEX_RRX, /* VEX VEX.vvvv, mod_reg -> mod_rm */ VEX_RMRX, /* VEX VEX.vvvv, mod_rm, imm8[7:4] -> mod_reg */ VEX_MX, /* VEX mod_rm -> mod_reg */ VEX_MXI, /* VEX mod_rm, imm8 -> mod_reg */ VEX_XXI, /* VEX mod_rm, imm8 -> VEX.vvvv */ VEX_MR, /* VEX mod_rm -> mod_reg */ VEX_RRI, /* VEX mod_reg, mod_rm -> implicit(eflags/r32) */ VEX_RX, /* VEX mod_reg -> mod_rm */ VEX_RR, /* VEX mod_rm -> mod_reg */ VEX_RRi, /* VEX mod_rm, imm8 -> mod_reg */ VEX_RM, /* VEX mod_reg -> mod_rm */ + VEX_RIM, /* VEX mod_reg, imm8 -> mod_rm */ VEX_RRM, /* VEX VEX.vvvv, mod_reg -> mod_rm */ VEX_RMX, /* VEX VEX.vvvv, mod_rm -> mod_reg */ + VEX_SbVM, /* VEX SIB, VEX.vvvv -> mod_rm */ VMx, /* vmcall/vmlaunch/vmresume/vmxoff */ VMxo, /* VMx instruction with optional prefix */ - SVM /* AMD SVM instructions */ + SVM, /* AMD SVM instructions */ + BLS, /* BLSR, BLSMSK, BLSI */ + FMA, /* FMA instructions, all VEX_RMrX */ + ADX /* ADX instructions, support REX.w, mod_rm->mod_reg */ }; /* * VEX prefixes */ #define VEX_2bytes 0xC5 /* the first byte of two-byte form */ #define VEX_3bytes 0xC4 /* the first byte of three-byte form */ #define FILL 0x90 /* Fill byte used for alignment (nop) */ /* ** Register numbers for the i386 */ #define EAX_REGNO 0 #define ECX_REGNO 1 #define EDX_REGNO 2 #define EBX_REGNO 3 #define ESP_REGNO 4 #define EBP_REGNO 5 #define ESI_REGNO 6 #define EDI_REGNO 7 /* * modes for immediate values */ #define MODE_NONE 0 #define MODE_IPREL 1 /* signed IP relative value */ #define MODE_SIGNED 2 /* sign extended immediate */ #define MODE_IMPLIED 3 /* constant value implied from opcode */ #define MODE_OFFSET 4 /* offset part of an address */ #define MODE_RIPREL 5 /* like IPREL, but from %rip (amd64) */ /* * The letters used in these macros are: * IND - indirect to another to another table * "T" - means to Terminate indirections (this is the final opcode) * "S" - means "operand length suffix required" + * "Sa" - means AVX2 suffix (d/q) required * "NS" - means "no suffix" which is the operand length suffix of the opcode * "Z" - means instruction size arg required * "u" - means the opcode is invalid in IA32 but valid in amd64 * "x" - means the opcode is invalid in amd64, but not IA32 * "y" - means the operand size is always 64 bits in 64 bit mode * "p" - means push/pop stack operation + * "vr" - means VEX instruction that operates on normal registers, not fpu */ #if defined(DIS_TEXT) && defined(DIS_MEM) #define IND(table) {(instable_t *)table, 0, "", 0, 0, 0, 0, 0, 0} #define INDx(table) {(instable_t *)table, 0, "", 0, 0, 1, 0, 0, 0} #define TNS(name, amode) {TERM, amode, name, 0, 0, 0, 0, 0, 0} #define TNSu(name, amode) {TERM, amode, name, 0, 0, 0, 0, 1, 0} #define TNSx(name, amode) {TERM, amode, name, 0, 0, 1, 0, 0, 0} #define TNSy(name, amode) {TERM, amode, name, 0, 0, 0, 1, 0, 0} #define TNSyp(name, amode) {TERM, amode, name, 0, 0, 0, 1, 0, 1} #define TNSZ(name, amode, sz) {TERM, amode, name, 0, sz, 0, 0, 0, 0} #define TNSZy(name, amode, sz) {TERM, amode, name, 0, sz, 0, 1, 0, 0} +#define TNSZvr(name, amode, sz) {TERM, amode, name, 0, sz, 0, 0, 0, 0, 1} #define TS(name, amode) {TERM, amode, name, 1, 0, 0, 0, 0, 0} #define TSx(name, amode) {TERM, amode, name, 1, 0, 1, 0, 0, 0} #define TSy(name, amode) {TERM, amode, name, 1, 0, 0, 1, 0, 0} #define TSp(name, amode) {TERM, amode, name, 1, 0, 0, 0, 0, 1} #define TSZ(name, amode, sz) {TERM, amode, name, 1, sz, 0, 0, 0, 0} +#define TSaZ(name, amode, sz) {TERM, amode, name, 1, sz, 0, 0, 0, 0, 0, 1} #define TSZx(name, amode, sz) {TERM, amode, name, 1, sz, 1, 0, 0, 0} #define TSZy(name, amode, sz) {TERM, amode, name, 1, sz, 0, 1, 0, 0} #define INVALID {TERM, UNKNOWN, "", 0, 0, 0, 0, 0} #elif defined(DIS_TEXT) #define IND(table) {(instable_t *)table, 0, "", 0, 0, 0, 0, 0} #define INDx(table) {(instable_t *)table, 0, "", 0, 1, 0, 0, 0} #define TNS(name, amode) {TERM, amode, name, 0, 0, 0, 0, 0} #define TNSu(name, amode) {TERM, amode, name, 0, 0, 0, 1, 0} #define TNSx(name, amode) {TERM, amode, name, 0, 1, 0, 0, 0} #define TNSy(name, amode) {TERM, amode, name, 0, 0, 1, 0, 0} #define TNSyp(name, amode) {TERM, amode, name, 0, 0, 1, 0, 1} #define TNSZ(name, amode, sz) {TERM, amode, name, 0, 0, 0, 0, 0} #define TNSZy(name, amode, sz) {TERM, amode, name, 0, 0, 1, 0, 0} +#define TNSZvr(name, amode, sz) {TERM, amode, name, 0, 0, 0, 0, 0, 1} #define TS(name, amode) {TERM, amode, name, 1, 0, 0, 0, 0} #define TSx(name, amode) {TERM, amode, name, 1, 1, 0, 0, 0} #define TSy(name, amode) {TERM, amode, name, 1, 0, 1, 0, 0} #define TSp(name, amode) {TERM, amode, name, 1, 0, 0, 0, 1} #define TSZ(name, amode, sz) {TERM, amode, name, 1, 0, 0, 0, 0} +#define TSaZ(name, amode, sz) {TERM, amode, name, 1, 0, 0, 0, 0, 0, 1} #define TSZx(name, amode, sz) {TERM, amode, name, 1, 1, 0, 0, 0} #define TSZy(name, amode, sz) {TERM, amode, name, 1, 0, 1, 0, 0} #define INVALID {TERM, UNKNOWN, "", 0, 0, 0, 0, 0} #elif defined(DIS_MEM) #define IND(table) {(instable_t *)table, 0, 0, 0, 0, 0, 0} #define INDx(table) {(instable_t *)table, 0, 0, 1, 0, 0, 0} #define TNS(name, amode) {TERM, amode, 0, 0, 0, 0, 0} #define TNSu(name, amode) {TERM, amode, 0, 0, 0, 1, 0} #define TNSy(name, amode) {TERM, amode, 0, 0, 1, 0, 0} #define TNSyp(name, amode) {TERM, amode, 0, 0, 1, 0, 1} #define TNSx(name, amode) {TERM, amode, 0, 1, 0, 0, 0} #define TNSZ(name, amode, sz) {TERM, amode, sz, 0, 0, 0, 0} #define TNSZy(name, amode, sz) {TERM, amode, sz, 0, 1, 0, 0} +#define TNSZvr(name, amode, sz) {TERM, amode, sz, 0, 0, 0, 0, 1} #define TS(name, amode) {TERM, amode, 0, 0, 0, 0, 0} #define TSx(name, amode) {TERM, amode, 0, 1, 0, 0, 0} #define TSy(name, amode) {TERM, amode, 0, 0, 1, 0, 0} #define TSp(name, amode) {TERM, amode, 0, 0, 0, 0, 1} #define TSZ(name, amode, sz) {TERM, amode, sz, 0, 0, 0, 0} +#define TSaZ(name, amode, sz) {TERM, amode, sz, 0, 0, 0, 0, 0, 1} #define TSZx(name, amode, sz) {TERM, amode, sz, 1, 0, 0, 0} #define TSZy(name, amode, sz) {TERM, amode, sz, 0, 1, 0, 0} #define INVALID {TERM, UNKNOWN, 0, 0, 0, 0, 0} #else #define IND(table) {(instable_t *)table, 0, 0, 0, 0, 0} #define INDx(table) {(instable_t *)table, 0, 1, 0, 0, 0} #define TNS(name, amode) {TERM, amode, 0, 0, 0, 0} #define TNSu(name, amode) {TERM, amode, 0, 0, 1, 0} #define TNSy(name, amode) {TERM, amode, 0, 1, 0, 0} #define TNSyp(name, amode) {TERM, amode, 0, 1, 0, 1} #define TNSx(name, amode) {TERM, amode, 1, 0, 0, 0} #define TNSZ(name, amode, sz) {TERM, amode, 0, 0, 0, 0} #define TNSZy(name, amode, sz) {TERM, amode, 0, 1, 0, 0} +#define TNSZvr(name, amode, sz) {TERM, amode, 0, 0, 0, 0, 1} #define TS(name, amode) {TERM, amode, 0, 0, 0, 0} #define TSx(name, amode) {TERM, amode, 1, 0, 0, 0} #define TSy(name, amode) {TERM, amode, 0, 1, 0, 0} #define TSp(name, amode) {TERM, amode, 0, 0, 0, 1} #define TSZ(name, amode, sz) {TERM, amode, 0, 0, 0, 0} +#define TSaZ(name, amode, sz) {TERM, amode, 0, 0, 0, 0, 0, 1} #define TSZx(name, amode, sz) {TERM, amode, 1, 0, 0, 0} #define TSZy(name, amode, sz) {TERM, amode, 0, 1, 0, 0} #define INVALID {TERM, UNKNOWN, 0, 0, 0, 0} #endif #ifdef DIS_TEXT /* * this decodes the r_m field for mode's 0, 1, 2 in 16 bit mode */ const char *const dis_addr16[3][8] = { "(%bx,%si)", "(%bx,%di)", "(%bp,%si)", "(%bp,%di)", "(%si)", "(%di)", "", "(%bx)", "(%bx,%si)", "(%bx,%di)", "(%bp,%si)", "(%bp,%di)", "(%si)", "(%di", "(%bp)", "(%bx)", "(%bx,%si)", "(%bx,%di)", "(%bp,%si)", "(%bp,%di)", "(%si)", "(%di)", "(%bp)", "(%bx)", }; /* * This decodes 32 bit addressing mode r_m field for modes 0, 1, 2 */ const char *const dis_addr32_mode0[16] = { "(%eax)", "(%ecx)", "(%edx)", "(%ebx)", "", "", "(%esi)", "(%edi)", "(%r8d)", "(%r9d)", "(%r10d)", "(%r11d)", "", "", "(%r14d)", "(%r15d)" }; const char *const dis_addr32_mode12[16] = { "(%eax)", "(%ecx)", "(%edx)", "(%ebx)", "", "(%ebp)", "(%esi)", "(%edi)", "(%r8d)", "(%r9d)", "(%r10d)", "(%r11d)", "", "(%r13d)", "(%r14d)", "(%r15d)" }; /* * This decodes 64 bit addressing mode r_m field for modes 0, 1, 2 */ const char *const dis_addr64_mode0[16] = { "(%rax)", "(%rcx)", "(%rdx)", "(%rbx)", "", "(%rip)", "(%rsi)", "(%rdi)", "(%r8)", "(%r9)", "(%r10)", "(%r11)", "(%r12)", "(%rip)", "(%r14)", "(%r15)" }; const char *const dis_addr64_mode12[16] = { "(%rax)", "(%rcx)", "(%rdx)", "(%rbx)", "", "(%rbp)", "(%rsi)", "(%rdi)", "(%r8)", "(%r9)", "(%r10)", "(%r11)", "(%r12)", "(%r13)", "(%r14)", "(%r15)" }; /* * decode for scale from SIB byte */ const char *const dis_scale_factor[4] = { ")", ",2)", ",4)", ",8)" }; /* + * decode for scale from VSIB byte, note that we always include the scale factor + * to match gas. + */ +const char *const dis_vscale_factor[4] = { ",1)", ",2)", ",4)", ",8)" }; + +/* * register decoding for normal references to registers (ie. not addressing) */ const char *const dis_REG8[16] = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh", "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b" }; const char *const dis_REG8_REX[16] = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil", "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b" }; const char *const dis_REG16[16] = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di", "%r8w", "%r9w", "%r10w", "%r11w", "%r12w", "%r13w", "%r14w", "%r15w" }; const char *const dis_REG32[16] = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" }; const char *const dis_REG64[16] = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; const char *const dis_DEBUGREG[16] = { "%db0", "%db1", "%db2", "%db3", "%db4", "%db5", "%db6", "%db7", "%db8", "%db9", "%db10", "%db11", "%db12", "%db13", "%db14", "%db15" }; const char *const dis_CONTROLREG[16] = { "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5?", "%cr6?", "%cr7?", "%cr8", "%cr9?", "%cr10?", "%cr11?", "%cr12?", "%cr13?", "%cr14?", "%cr15?" }; const char *const dis_TESTREG[16] = { "%tr0?", "%tr1?", "%tr2?", "%tr3", "%tr4", "%tr5", "%tr6", "%tr7", "%tr0?", "%tr1?", "%tr2?", "%tr3", "%tr4", "%tr5", "%tr6", "%tr7" }; const char *const dis_MMREG[16] = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" }; const char *const dis_XMMREG[16] = { "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15" }; const char *const dis_YMMREG[16] = { "%ymm0", "%ymm1", "%ymm2", "%ymm3", "%ymm4", "%ymm5", "%ymm6", "%ymm7", "%ymm8", "%ymm9", "%ymm10", "%ymm11", "%ymm12", "%ymm13", "%ymm14", "%ymm15" }; const char *const dis_SEGREG[16] = { "%es", "%cs", "%ss", "%ds", "%fs", "%gs", "", "", "%es", "%cs", "%ss", "%ds", "%fs", "%gs", "", "" }; /* * SIMD predicate suffixes */ const char *const dis_PREDSUFFIX[8] = { "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord" }; const char *const dis_AVXvgrp7[3][8] = { /*0 1 2 3 4 5 6 7*/ /*71*/ {"", "", "vpsrlw", "", "vpsraw", "", "vpsllw", ""}, /*72*/ {"", "", "vpsrld", "", "vpsrad", "", "vpslld", ""}, /*73*/ {"", "", "vpsrlq", "vpsrldq", "", "", "vpsllq", "vpslldq"} }; #endif /* DIS_TEXT */ /* * "decode table" for 64 bit mode MOVSXD instruction (opcode 0x63) */ const instable_t dis_opMOVSLD = TNS("movslq",MOVSXZ); /* * "decode table" for pause and clflush instructions */ const instable_t dis_opPause = TNS("pause", NORM); /* * Decode table for 0x0F00 opcodes */ const instable_t dis_op0F00[8] = { /* [0] */ TNS("sldt",M), TNS("str",M), TNSy("lldt",M), TNSy("ltr",M), /* [4] */ TNSZ("verr",M,2), TNSZ("verw",M,2), INVALID, INVALID, }; /* * Decode table for 0x0F01 opcodes */ const instable_t dis_op0F01[8] = { /* [0] */ TNSZ("sgdt",VMx,6), TNSZ("sidt",MONITOR_MWAIT,6), TNSZ("lgdt",XGETBV_XSETBV,6), TNSZ("lidt",SVM,6), /* [4] */ TNSZ("smsw",M,2), INVALID, TNSZ("lmsw",M,2), TNS("invlpg",SWAPGS_RDTSCP), }; /* * Decode table for 0x0F18 opcodes -- SIMD prefetch */ const instable_t dis_op0F18[8] = { /* [0] */ TNS("prefetchnta",PREF),TNS("prefetcht0",PREF), TNS("prefetcht1",PREF), TNS("prefetcht2",PREF), /* [4] */ INVALID, INVALID, INVALID, INVALID, }; /* * Decode table for 0x0FAE opcodes -- SIMD state save/restore */ const instable_t dis_op0FAE[8] = { /* [0] */ TNSZ("fxsave",M,512), TNSZ("fxrstor",M,512), TNS("ldmxcsr",M), TNS("stmxcsr",M), /* [4] */ TNSZ("xsave",M,512), TNS("lfence",XMMFENCE), TNS("mfence",XMMFENCE), TNS("sfence",XMMSFNC), }; /* * Decode table for 0x0FBA opcodes */ const instable_t dis_op0FBA[8] = { /* [0] */ INVALID, INVALID, INVALID, INVALID, /* [4] */ TS("bt",MIb), TS("bts",MIb), TS("btr",MIb), TS("btc",MIb), }; /* * Decode table for 0x0FC7 opcode (group 9) */ const instable_t dis_op0FC7[8] = { /* [0] */ INVALID, TNS("cmpxchg8b",M), INVALID, INVALID, /* [4] */ INVALID, INVALID, TNS("vmptrld",MG9), TNS("vmptrst",MG9), }; /* * Decode table for 0x0FC7 opcode (group 9) mode 3 */ const instable_t dis_op0FC7m3[8] = { /* [0] */ INVALID, INVALID, INVALID, INVALID, -/* [4] */ INVALID, INVALID, TNS("rdrand",MG9), INVALID, +/* [4] */ INVALID, INVALID, TNS("rdrand",MG9), TNS("rdseed", MG9), }; /* * Decode table for 0x0FC7 opcode with 0x66 prefix */ const instable_t dis_op660FC7[8] = { /* [0] */ INVALID, INVALID, INVALID, INVALID, /* [4] */ INVALID, INVALID, TNS("vmclear",M), INVALID, }; /* * Decode table for 0x0FC7 opcode with 0xF3 prefix */ const instable_t dis_opF30FC7[8] = { /* [0] */ INVALID, INVALID, INVALID, INVALID, /* [4] */ INVALID, INVALID, TNS("vmxon",M), INVALID, }; /* * Decode table for 0x0FC8 opcode -- 486 bswap instruction * *bit pattern: 0000 1111 1100 1reg */ const instable_t dis_op0FC8[4] = { /* [0] */ TNS("bswap",R), INVALID, INVALID, INVALID, }; /* * Decode table for 0x0F71, 0x0F72, and 0x0F73 opcodes -- MMX instructions */ const instable_t dis_op0F7123[4][8] = { { /* [70].0 */ INVALID, INVALID, INVALID, INVALID, /* .4 */ INVALID, INVALID, INVALID, INVALID, }, { /* [71].0 */ INVALID, INVALID, TNS("psrlw",MMOSH), INVALID, /* .4 */ TNS("psraw",MMOSH), INVALID, TNS("psllw",MMOSH), INVALID, }, { /* [72].0 */ INVALID, INVALID, TNS("psrld",MMOSH), INVALID, /* .4 */ TNS("psrad",MMOSH), INVALID, TNS("pslld",MMOSH), INVALID, }, { /* [73].0 */ INVALID, INVALID, TNS("psrlq",MMOSH), TNS("INVALID",MMOSH), /* .4 */ INVALID, INVALID, TNS("psllq",MMOSH), TNS("INVALID",MMOSH), } }; /* * Decode table for SIMD extensions to above 0x0F71-0x0F73 opcodes. */ const instable_t dis_opSIMD7123[32] = { /* [70].0 */ INVALID, INVALID, INVALID, INVALID, /* .4 */ INVALID, INVALID, INVALID, INVALID, /* [71].0 */ INVALID, INVALID, TNS("psrlw",XMMSH), INVALID, /* .4 */ TNS("psraw",XMMSH), INVALID, TNS("psllw",XMMSH), INVALID, /* [72].0 */ INVALID, INVALID, TNS("psrld",XMMSH), INVALID, /* .4 */ TNS("psrad",XMMSH), INVALID, TNS("pslld",XMMSH), INVALID, /* [73].0 */ INVALID, INVALID, TNS("psrlq",XMMSH), TNS("psrldq",XMMSH), /* .4 */ INVALID, INVALID, TNS("psllq",XMMSH), TNS("pslldq",XMMSH), }; /* * SIMD instructions have been wedged into the existing IA32 instruction * set through the use of prefixes. That is, while 0xf0 0x58 may be * addps, 0xf3 0xf0 0x58 (literally, repz addps) is a completely different * instruction - addss. At present, three prefixes have been coopted in * this manner - address size (0x66), repnz (0xf2) and repz (0xf3). The * following tables are used to provide the prefixed instruction names. * The arrays are sparse, but they're fast. */ /* * Decode table for SIMD instructions with the address size (0x66) prefix. */ const instable_t dis_opSIMDdata16[256] = { /* [00] */ INVALID, INVALID, INVALID, INVALID, /* [04] */ INVALID, INVALID, INVALID, INVALID, /* [08] */ INVALID, INVALID, INVALID, INVALID, /* [0C] */ INVALID, INVALID, INVALID, INVALID, /* [10] */ TNSZ("movupd",XMM,16), TNSZ("movupd",XMMS,16), TNSZ("movlpd",XMMM,8), TNSZ("movlpd",XMMMS,8), /* [14] */ TNSZ("unpcklpd",XMM,16),TNSZ("unpckhpd",XMM,16),TNSZ("movhpd",XMMM,8), TNSZ("movhpd",XMMMS,8), /* [18] */ INVALID, INVALID, INVALID, INVALID, /* [1C] */ INVALID, INVALID, INVALID, INVALID, /* [20] */ INVALID, INVALID, INVALID, INVALID, /* [24] */ INVALID, INVALID, INVALID, INVALID, /* [28] */ TNSZ("movapd",XMM,16), TNSZ("movapd",XMMS,16), TNSZ("cvtpi2pd",XMMOMX,8),TNSZ("movntpd",XMMOMS,16), /* [2C] */ TNSZ("cvttpd2pi",XMMXMM,16),TNSZ("cvtpd2pi",XMMXMM,16),TNSZ("ucomisd",XMM,8),TNSZ("comisd",XMM,8), /* [30] */ INVALID, INVALID, INVALID, INVALID, /* [34] */ INVALID, INVALID, INVALID, INVALID, /* [38] */ INVALID, INVALID, INVALID, INVALID, /* [3C] */ INVALID, INVALID, INVALID, INVALID, /* [40] */ INVALID, INVALID, INVALID, INVALID, /* [44] */ INVALID, INVALID, INVALID, INVALID, /* [48] */ INVALID, INVALID, INVALID, INVALID, /* [4C] */ INVALID, INVALID, INVALID, INVALID, /* [50] */ TNS("movmskpd",XMMOX3), TNSZ("sqrtpd",XMM,16), INVALID, INVALID, /* [54] */ TNSZ("andpd",XMM,16), TNSZ("andnpd",XMM,16), TNSZ("orpd",XMM,16), TNSZ("xorpd",XMM,16), /* [58] */ TNSZ("addpd",XMM,16), TNSZ("mulpd",XMM,16), TNSZ("cvtpd2ps",XMM,16),TNSZ("cvtps2dq",XMM,16), /* [5C] */ TNSZ("subpd",XMM,16), TNSZ("minpd",XMM,16), TNSZ("divpd",XMM,16), TNSZ("maxpd",XMM,16), /* [60] */ TNSZ("punpcklbw",XMM,16),TNSZ("punpcklwd",XMM,16),TNSZ("punpckldq",XMM,16),TNSZ("packsswb",XMM,16), /* [64] */ TNSZ("pcmpgtb",XMM,16), TNSZ("pcmpgtw",XMM,16), TNSZ("pcmpgtd",XMM,16), TNSZ("packuswb",XMM,16), /* [68] */ TNSZ("punpckhbw",XMM,16),TNSZ("punpckhwd",XMM,16),TNSZ("punpckhdq",XMM,16),TNSZ("packssdw",XMM,16), /* [6C] */ TNSZ("punpcklqdq",XMM,16),TNSZ("punpckhqdq",XMM,16),TNSZ("movd",XMM3MX,4),TNSZ("movdqa",XMM,16), /* [70] */ TNSZ("pshufd",XMMP,16), INVALID, INVALID, INVALID, /* [74] */ TNSZ("pcmpeqb",XMM,16), TNSZ("pcmpeqw",XMM,16), TNSZ("pcmpeqd",XMM,16), INVALID, /* [78] */ TNSZ("extrq",XMM2I,16), TNSZ("extrq",XMM,16), INVALID, INVALID, -/* [7C] */ INVALID, INVALID, TNSZ("movd",XMM3MXS,4), TNSZ("movdqa",XMMS,16), +/* [7C] */ TNSZ("haddpd",XMM,16), TNSZ("hsubpd",XMM,16), TNSZ("movd",XMM3MXS,4), TNSZ("movdqa",XMMS,16), /* [80] */ INVALID, INVALID, INVALID, INVALID, /* [84] */ INVALID, INVALID, INVALID, INVALID, /* [88] */ INVALID, INVALID, INVALID, INVALID, /* [8C] */ INVALID, INVALID, INVALID, INVALID, /* [90] */ INVALID, INVALID, INVALID, INVALID, /* [94] */ INVALID, INVALID, INVALID, INVALID, /* [98] */ INVALID, INVALID, INVALID, INVALID, /* [9C] */ INVALID, INVALID, INVALID, INVALID, /* [A0] */ INVALID, INVALID, INVALID, INVALID, /* [A4] */ INVALID, INVALID, INVALID, INVALID, /* [A8] */ INVALID, INVALID, INVALID, INVALID, /* [AC] */ INVALID, INVALID, INVALID, INVALID, /* [B0] */ INVALID, INVALID, INVALID, INVALID, /* [B4] */ INVALID, INVALID, INVALID, INVALID, /* [B8] */ INVALID, INVALID, INVALID, INVALID, /* [BC] */ INVALID, INVALID, INVALID, INVALID, /* [C0] */ INVALID, INVALID, TNSZ("cmppd",XMMP,16), INVALID, /* [C4] */ TNSZ("pinsrw",XMMPRM,2),TNS("pextrw",XMM3P), TNSZ("shufpd",XMMP,16), INVALID, /* [C8] */ INVALID, INVALID, INVALID, INVALID, /* [CC] */ INVALID, INVALID, INVALID, INVALID, -/* [D0] */ INVALID, TNSZ("psrlw",XMM,16), TNSZ("psrld",XMM,16), TNSZ("psrlq",XMM,16), +/* [D0] */ TNSZ("addsubpd",XMM,16),TNSZ("psrlw",XMM,16), TNSZ("psrld",XMM,16), TNSZ("psrlq",XMM,16), /* [D4] */ TNSZ("paddq",XMM,16), TNSZ("pmullw",XMM,16), TNSZ("movq",XMMS,8), TNS("pmovmskb",XMMX3), /* [D8] */ TNSZ("psubusb",XMM,16), TNSZ("psubusw",XMM,16), TNSZ("pminub",XMM,16), TNSZ("pand",XMM,16), /* [DC] */ TNSZ("paddusb",XMM,16), TNSZ("paddusw",XMM,16), TNSZ("pmaxub",XMM,16), TNSZ("pandn",XMM,16), /* [E0] */ TNSZ("pavgb",XMM,16), TNSZ("psraw",XMM,16), TNSZ("psrad",XMM,16), TNSZ("pavgw",XMM,16), /* [E4] */ TNSZ("pmulhuw",XMM,16), TNSZ("pmulhw",XMM,16), TNSZ("cvttpd2dq",XMM,16),TNSZ("movntdq",XMMS,16), /* [E8] */ TNSZ("psubsb",XMM,16), TNSZ("psubsw",XMM,16), TNSZ("pminsw",XMM,16), TNSZ("por",XMM,16), /* [EC] */ TNSZ("paddsb",XMM,16), TNSZ("paddsw",XMM,16), TNSZ("pmaxsw",XMM,16), TNSZ("pxor",XMM,16), /* [F0] */ INVALID, TNSZ("psllw",XMM,16), TNSZ("pslld",XMM,16), TNSZ("psllq",XMM,16), /* [F4] */ TNSZ("pmuludq",XMM,16), TNSZ("pmaddwd",XMM,16), TNSZ("psadbw",XMM,16), TNSZ("maskmovdqu", XMMXIMPL,16), /* [F8] */ TNSZ("psubb",XMM,16), TNSZ("psubw",XMM,16), TNSZ("psubd",XMM,16), TNSZ("psubq",XMM,16), /* [FC] */ TNSZ("paddb",XMM,16), TNSZ("paddw",XMM,16), TNSZ("paddd",XMM,16), INVALID, }; const instable_t dis_opAVX660F[256] = { /* [00] */ INVALID, INVALID, INVALID, INVALID, /* [04] */ INVALID, INVALID, INVALID, INVALID, /* [08] */ INVALID, INVALID, INVALID, INVALID, /* [0C] */ INVALID, INVALID, INVALID, INVALID, /* [10] */ TNSZ("vmovupd",VEX_MX,16), TNSZ("vmovupd",VEX_RX,16), TNSZ("vmovlpd",VEX_RMrX,8), TNSZ("vmovlpd",VEX_RM,8), /* [14] */ TNSZ("vunpcklpd",VEX_RMrX,16),TNSZ("vunpckhpd",VEX_RMrX,16),TNSZ("vmovhpd",VEX_RMrX,8), TNSZ("vmovhpd",VEX_RM,8), /* [18] */ INVALID, INVALID, INVALID, INVALID, /* [1C] */ INVALID, INVALID, INVALID, INVALID, /* [20] */ INVALID, INVALID, INVALID, INVALID, /* [24] */ INVALID, INVALID, INVALID, INVALID, /* [28] */ TNSZ("vmovapd",VEX_MX,16), TNSZ("vmovapd",VEX_RX,16), INVALID, TNSZ("vmovntpd",VEX_RM,16), /* [2C] */ INVALID, INVALID, TNSZ("vucomisd",VEX_MX,8),TNSZ("vcomisd",VEX_MX,8), /* [30] */ INVALID, INVALID, INVALID, INVALID, /* [34] */ INVALID, INVALID, INVALID, INVALID, /* [38] */ INVALID, INVALID, INVALID, INVALID, /* [3C] */ INVALID, INVALID, INVALID, INVALID, /* [40] */ INVALID, INVALID, INVALID, INVALID, /* [44] */ INVALID, INVALID, INVALID, INVALID, /* [48] */ INVALID, INVALID, INVALID, INVALID, /* [4C] */ INVALID, INVALID, INVALID, INVALID, /* [50] */ TNS("vmovmskpd",VEX_MR), TNSZ("vsqrtpd",VEX_MX,16), INVALID, INVALID, /* [54] */ TNSZ("vandpd",VEX_RMrX,16), TNSZ("vandnpd",VEX_RMrX,16), TNSZ("vorpd",VEX_RMrX,16), TNSZ("vxorpd",VEX_RMrX,16), /* [58] */ TNSZ("vaddpd",VEX_RMrX,16), TNSZ("vmulpd",VEX_RMrX,16), TNSZ("vcvtpd2ps",VEX_MX,16),TNSZ("vcvtps2dq",VEX_MX,16), /* [5C] */ TNSZ("vsubpd",VEX_RMrX,16), TNSZ("vminpd",VEX_RMrX,16), TNSZ("vdivpd",VEX_RMrX,16), TNSZ("vmaxpd",VEX_RMrX,16), /* [60] */ TNSZ("vpunpcklbw",VEX_RMrX,16),TNSZ("vpunpcklwd",VEX_RMrX,16),TNSZ("vpunpckldq",VEX_RMrX,16),TNSZ("vpacksswb",VEX_RMrX,16), /* [64] */ TNSZ("vpcmpgtb",VEX_RMrX,16), TNSZ("vpcmpgtw",VEX_RMrX,16), TNSZ("vpcmpgtd",VEX_RMrX,16), TNSZ("vpackuswb",VEX_RMrX,16), /* [68] */ TNSZ("vpunpckhbw",VEX_RMrX,16),TNSZ("vpunpckhwd",VEX_RMrX,16),TNSZ("vpunpckhdq",VEX_RMrX,16),TNSZ("vpackssdw",VEX_RMrX,16), /* [6C] */ TNSZ("vpunpcklqdq",VEX_RMrX,16),TNSZ("vpunpckhqdq",VEX_RMrX,16),TNSZ("vmovd",VEX_MX,4),TNSZ("vmovdqa",VEX_MX,16), /* [70] */ TNSZ("vpshufd",VEX_MXI,16), TNSZ("vgrp71",VEX_XXI,16), TNSZ("vgrp72",VEX_XXI,16), TNSZ("vgrp73",VEX_XXI,16), /* [74] */ TNSZ("vpcmpeqb",VEX_RMrX,16), TNSZ("vpcmpeqw",VEX_RMrX,16), TNSZ("vpcmpeqd",VEX_RMrX,16), INVALID, /* [78] */ INVALID, INVALID, INVALID, INVALID, /* [7C] */ TNSZ("vhaddpd",VEX_RMrX,16), TNSZ("vhsubpd",VEX_RMrX,16), TNSZ("vmovd",VEX_RR,4), TNSZ("vmovdqa",VEX_RX,16), /* [80] */ INVALID, INVALID, INVALID, INVALID, /* [84] */ INVALID, INVALID, INVALID, INVALID, /* [88] */ INVALID, INVALID, INVALID, INVALID, /* [8C] */ INVALID, INVALID, INVALID, INVALID, /* [90] */ INVALID, INVALID, INVALID, INVALID, /* [94] */ INVALID, INVALID, INVALID, INVALID, /* [98] */ INVALID, INVALID, INVALID, INVALID, /* [9C] */ INVALID, INVALID, INVALID, INVALID, /* [A0] */ INVALID, INVALID, INVALID, INVALID, /* [A4] */ INVALID, INVALID, INVALID, INVALID, /* [A8] */ INVALID, INVALID, INVALID, INVALID, /* [AC] */ INVALID, INVALID, INVALID, INVALID, /* [B0] */ INVALID, INVALID, INVALID, INVALID, /* [B4] */ INVALID, INVALID, INVALID, INVALID, /* [B8] */ INVALID, INVALID, INVALID, INVALID, /* [BC] */ INVALID, INVALID, INVALID, INVALID, /* [C0] */ INVALID, INVALID, TNSZ("vcmppd",VEX_RMRX,16), INVALID, /* [C4] */ TNSZ("vpinsrw",VEX_RMRX,2),TNS("vpextrw",VEX_MR), TNSZ("vshufpd",VEX_RMRX,16), INVALID, /* [C8] */ INVALID, INVALID, INVALID, INVALID, /* [CC] */ INVALID, INVALID, INVALID, INVALID, /* [D0] */ TNSZ("vaddsubpd",VEX_RMrX,16),TNSZ("vpsrlw",VEX_RMrX,16), TNSZ("vpsrld",VEX_RMrX,16), TNSZ("vpsrlq",VEX_RMrX,16), /* [D4] */ TNSZ("vpaddq",VEX_RMrX,16), TNSZ("vpmullw",VEX_RMrX,16), TNSZ("vmovq",VEX_RX,8), TNS("vpmovmskb",VEX_MR), /* [D8] */ TNSZ("vpsubusb",VEX_RMrX,16), TNSZ("vpsubusw",VEX_RMrX,16), TNSZ("vpminub",VEX_RMrX,16), TNSZ("vpand",VEX_RMrX,16), /* [DC] */ TNSZ("vpaddusb",VEX_RMrX,16), TNSZ("vpaddusw",VEX_RMrX,16), TNSZ("vpmaxub",VEX_RMrX,16), TNSZ("vpandn",VEX_RMrX,16), /* [E0] */ TNSZ("vpavgb",VEX_RMrX,16), TNSZ("vpsraw",VEX_RMrX,16), TNSZ("vpsrad",VEX_RMrX,16), TNSZ("vpavgw",VEX_RMrX,16), /* [E4] */ TNSZ("vpmulhuw",VEX_RMrX,16), TNSZ("vpmulhw",VEX_RMrX,16), TNSZ("vcvttpd2dq",VEX_MX,16),TNSZ("vmovntdq",VEX_RM,16), /* [E8] */ TNSZ("vpsubsb",VEX_RMrX,16), TNSZ("vpsubsw",VEX_RMrX,16), TNSZ("vpminsw",VEX_RMrX,16), TNSZ("vpor",VEX_RMrX,16), /* [EC] */ TNSZ("vpaddsb",VEX_RMrX,16), TNSZ("vpaddsw",VEX_RMrX,16), TNSZ("vpmaxsw",VEX_RMrX,16), TNSZ("vpxor",VEX_RMrX,16), /* [F0] */ INVALID, TNSZ("vpsllw",VEX_RMrX,16), TNSZ("vpslld",VEX_RMrX,16), TNSZ("vpsllq",VEX_RMrX,16), /* [F4] */ TNSZ("vpmuludq",VEX_RMrX,16), TNSZ("vpmaddwd",VEX_RMrX,16), TNSZ("vpsadbw",VEX_RMrX,16), TNS("vmaskmovdqu",VEX_MX), /* [F8] */ TNSZ("vpsubb",VEX_RMrX,16), TNSZ("vpsubw",VEX_RMrX,16), TNSZ("vpsubd",VEX_RMrX,16), TNSZ("vpsubq",VEX_RMrX,16), /* [FC] */ TNSZ("vpaddb",VEX_RMrX,16), TNSZ("vpaddw",VEX_RMrX,16), TNSZ("vpaddd",VEX_RMrX,16), INVALID, }; /* * Decode table for SIMD instructions with the repnz (0xf2) prefix. */ const instable_t dis_opSIMDrepnz[256] = { /* [00] */ INVALID, INVALID, INVALID, INVALID, /* [04] */ INVALID, INVALID, INVALID, INVALID, /* [08] */ INVALID, INVALID, INVALID, INVALID, /* [0C] */ INVALID, INVALID, INVALID, INVALID, -/* [10] */ TNSZ("movsd",XMM,8), TNSZ("movsd",XMMS,8), INVALID, INVALID, +/* [10] */ TNSZ("movsd",XMM,8), TNSZ("movsd",XMMS,8), TNSZ("movddup",XMM,8), INVALID, /* [14] */ INVALID, INVALID, INVALID, INVALID, /* [18] */ INVALID, INVALID, INVALID, INVALID, /* [1C] */ INVALID, INVALID, INVALID, INVALID, /* [20] */ INVALID, INVALID, INVALID, INVALID, /* [24] */ INVALID, INVALID, INVALID, INVALID, /* [28] */ INVALID, INVALID, TNSZ("cvtsi2sd",XMM3MX,4),TNSZ("movntsd",XMMMS,8), /* [2C] */ TNSZ("cvttsd2si",XMMXM3,8),TNSZ("cvtsd2si",XMMXM3,8),INVALID, INVALID, /* [30] */ INVALID, INVALID, INVALID, INVALID, /* [34] */ INVALID, INVALID, INVALID, INVALID, /* [38] */ INVALID, INVALID, INVALID, INVALID, /* [3C] */ INVALID, INVALID, INVALID, INVALID, /* [40] */ INVALID, INVALID, INVALID, INVALID, /* [44] */ INVALID, INVALID, INVALID, INVALID, /* [48] */ INVALID, INVALID, INVALID, INVALID, /* [4C] */ INVALID, INVALID, INVALID, INVALID, /* [50] */ INVALID, TNSZ("sqrtsd",XMM,8), INVALID, INVALID, /* [54] */ INVALID, INVALID, INVALID, INVALID, /* [58] */ TNSZ("addsd",XMM,8), TNSZ("mulsd",XMM,8), TNSZ("cvtsd2ss",XMM,8), INVALID, /* [5C] */ TNSZ("subsd",XMM,8), TNSZ("minsd",XMM,8), TNSZ("divsd",XMM,8), TNSZ("maxsd",XMM,8), /* [60] */ INVALID, INVALID, INVALID, INVALID, /* [64] */ INVALID, INVALID, INVALID, INVALID, /* [68] */ INVALID, INVALID, INVALID, INVALID, /* [6C] */ INVALID, INVALID, INVALID, INVALID, /* [70] */ TNSZ("pshuflw",XMMP,16),INVALID, INVALID, INVALID, /* [74] */ INVALID, INVALID, INVALID, INVALID, /* [78] */ TNSZ("insertq",XMMX2I,16),TNSZ("insertq",XMM,8),INVALID, INVALID, -/* [7C] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ TNSZ("haddps",XMM,16), TNSZ("hsubps",XMM,16), INVALID, INVALID, /* [80] */ INVALID, INVALID, INVALID, INVALID, /* [84] */ INVALID, INVALID, INVALID, INVALID, /* [88] */ INVALID, INVALID, INVALID, INVALID, /* [0C] */ INVALID, INVALID, INVALID, INVALID, /* [90] */ INVALID, INVALID, INVALID, INVALID, /* [94] */ INVALID, INVALID, INVALID, INVALID, /* [98] */ INVALID, INVALID, INVALID, INVALID, /* [9C] */ INVALID, INVALID, INVALID, INVALID, /* [A0] */ INVALID, INVALID, INVALID, INVALID, /* [A4] */ INVALID, INVALID, INVALID, INVALID, /* [A8] */ INVALID, INVALID, INVALID, INVALID, /* [AC] */ INVALID, INVALID, INVALID, INVALID, /* [B0] */ INVALID, INVALID, INVALID, INVALID, /* [B4] */ INVALID, INVALID, INVALID, INVALID, /* [B8] */ INVALID, INVALID, INVALID, INVALID, /* [BC] */ INVALID, INVALID, INVALID, INVALID, /* [C0] */ INVALID, INVALID, TNSZ("cmpsd",XMMP,8), INVALID, /* [C4] */ INVALID, INVALID, INVALID, INVALID, /* [C8] */ INVALID, INVALID, INVALID, INVALID, /* [CC] */ INVALID, INVALID, INVALID, INVALID, -/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D0] */ TNSZ("addsubps",XMM,16),INVALID, INVALID, INVALID, /* [D4] */ INVALID, INVALID, TNS("movdq2q",XMMXM), INVALID, /* [D8] */ INVALID, INVALID, INVALID, INVALID, /* [DC] */ INVALID, INVALID, INVALID, INVALID, /* [E0] */ INVALID, INVALID, INVALID, INVALID, /* [E4] */ INVALID, INVALID, TNSZ("cvtpd2dq",XMM,16),INVALID, /* [E8] */ INVALID, INVALID, INVALID, INVALID, /* [EC] */ INVALID, INVALID, INVALID, INVALID, -/* [F0] */ INVALID, INVALID, INVALID, INVALID, +/* [F0] */ TNS("lddqu",XMMM), INVALID, INVALID, INVALID, /* [F4] */ INVALID, INVALID, INVALID, INVALID, /* [F8] */ INVALID, INVALID, INVALID, INVALID, /* [FC] */ INVALID, INVALID, INVALID, INVALID, }; const instable_t dis_opAVXF20F[256] = { /* [00] */ INVALID, INVALID, INVALID, INVALID, /* [04] */ INVALID, INVALID, INVALID, INVALID, /* [08] */ INVALID, INVALID, INVALID, INVALID, /* [0C] */ INVALID, INVALID, INVALID, INVALID, /* [10] */ TNSZ("vmovsd",VEX_RMrX,8), TNSZ("vmovsd",VEX_RRX,8), TNSZ("vmovddup",VEX_MX,8), INVALID, /* [14] */ INVALID, INVALID, INVALID, INVALID, /* [18] */ INVALID, INVALID, INVALID, INVALID, /* [1C] */ INVALID, INVALID, INVALID, INVALID, /* [20] */ INVALID, INVALID, INVALID, INVALID, /* [24] */ INVALID, INVALID, INVALID, INVALID, /* [28] */ INVALID, INVALID, TNSZ("vcvtsi2sd",VEX_RMrX,4),INVALID, /* [2C] */ TNSZ("vcvttsd2si",VEX_MR,8),TNSZ("vcvtsd2si",VEX_MR,8),INVALID, INVALID, /* [30] */ INVALID, INVALID, INVALID, INVALID, /* [34] */ INVALID, INVALID, INVALID, INVALID, /* [38] */ INVALID, INVALID, INVALID, INVALID, /* [3C] */ INVALID, INVALID, INVALID, INVALID, /* [40] */ INVALID, INVALID, INVALID, INVALID, /* [44] */ INVALID, INVALID, INVALID, INVALID, /* [48] */ INVALID, INVALID, INVALID, INVALID, /* [4C] */ INVALID, INVALID, INVALID, INVALID, /* [50] */ INVALID, TNSZ("vsqrtsd",VEX_RMrX,8), INVALID, INVALID, /* [54] */ INVALID, INVALID, INVALID, INVALID, /* [58] */ TNSZ("vaddsd",VEX_RMrX,8), TNSZ("vmulsd",VEX_RMrX,8), TNSZ("vcvtsd2ss",VEX_RMrX,8), INVALID, /* [5C] */ TNSZ("vsubsd",VEX_RMrX,8), TNSZ("vminsd",VEX_RMrX,8), TNSZ("vdivsd",VEX_RMrX,8), TNSZ("vmaxsd",VEX_RMrX,8), /* [60] */ INVALID, INVALID, INVALID, INVALID, /* [64] */ INVALID, INVALID, INVALID, INVALID, /* [68] */ INVALID, INVALID, INVALID, INVALID, /* [6C] */ INVALID, INVALID, INVALID, INVALID, /* [70] */ TNSZ("vpshuflw",VEX_MXI,16),INVALID, INVALID, INVALID, /* [74] */ INVALID, INVALID, INVALID, INVALID, /* [78] */ INVALID, INVALID, INVALID, INVALID, /* [7C] */ TNSZ("vhaddps",VEX_RMrX,8), TNSZ("vhsubps",VEX_RMrX,8), INVALID, INVALID, /* [80] */ INVALID, INVALID, INVALID, INVALID, /* [84] */ INVALID, INVALID, INVALID, INVALID, /* [88] */ INVALID, INVALID, INVALID, INVALID, /* [0C] */ INVALID, INVALID, INVALID, INVALID, /* [90] */ INVALID, INVALID, INVALID, INVALID, /* [94] */ INVALID, INVALID, INVALID, INVALID, /* [98] */ INVALID, INVALID, INVALID, INVALID, /* [9C] */ INVALID, INVALID, INVALID, INVALID, /* [A0] */ INVALID, INVALID, INVALID, INVALID, /* [A4] */ INVALID, INVALID, INVALID, INVALID, /* [A8] */ INVALID, INVALID, INVALID, INVALID, /* [AC] */ INVALID, INVALID, INVALID, INVALID, /* [B0] */ INVALID, INVALID, INVALID, INVALID, /* [B4] */ INVALID, INVALID, INVALID, INVALID, /* [B8] */ INVALID, INVALID, INVALID, INVALID, /* [BC] */ INVALID, INVALID, INVALID, INVALID, /* [C0] */ INVALID, INVALID, TNSZ("vcmpsd",VEX_RMRX,8), INVALID, /* [C4] */ INVALID, INVALID, INVALID, INVALID, /* [C8] */ INVALID, INVALID, INVALID, INVALID, /* [CC] */ INVALID, INVALID, INVALID, INVALID, /* [D0] */ TNSZ("vaddsubps",VEX_RMrX,8), INVALID, INVALID, INVALID, /* [D4] */ INVALID, INVALID, INVALID, INVALID, /* [D8] */ INVALID, INVALID, INVALID, INVALID, /* [DC] */ INVALID, INVALID, INVALID, INVALID, /* [E0] */ INVALID, INVALID, INVALID, INVALID, /* [E4] */ INVALID, INVALID, TNSZ("vcvtpd2dq",VEX_MX,16),INVALID, /* [E8] */ INVALID, INVALID, INVALID, INVALID, /* [EC] */ INVALID, INVALID, INVALID, INVALID, /* [F0] */ TNSZ("vlddqu",VEX_MX,16), INVALID, INVALID, INVALID, /* [F4] */ INVALID, INVALID, INVALID, INVALID, /* [F8] */ INVALID, INVALID, INVALID, INVALID, /* [FC] */ INVALID, INVALID, INVALID, INVALID, }; +const instable_t dis_opAVXF20F3A[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ INVALID, INVALID, INVALID, INVALID, +/* [14] */ INVALID, INVALID, INVALID, INVALID, +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ INVALID, INVALID, INVALID, INVALID, +/* [2C] */ INVALID, INVALID, INVALID, INVALID, + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, INVALID, INVALID, INVALID, +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ INVALID, INVALID, INVALID, INVALID, +/* [5C] */ INVALID, INVALID, INVALID, INVALID, + +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, + +/* [70] */ INVALID, INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, INVALID, INVALID, + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, INVALID, INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, INVALID, INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, INVALID, + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, INVALID, INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ TNSZvr("rorx",VEX_MXI,6),INVALID, INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + +const instable_t dis_opAVXF20F38[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ INVALID, INVALID, INVALID, INVALID, +/* [14] */ INVALID, INVALID, INVALID, INVALID, +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ INVALID, INVALID, INVALID, INVALID, +/* [2C] */ INVALID, INVALID, INVALID, INVALID, + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, INVALID, INVALID, INVALID, +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ INVALID, INVALID, INVALID, INVALID, +/* [5C] */ INVALID, INVALID, INVALID, INVALID, + +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, + +/* [70] */ INVALID, INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, INVALID, INVALID, + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, INVALID, INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, INVALID, INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, INVALID, + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, INVALID, INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ INVALID, INVALID, INVALID, INVALID, +/* [F4] */ INVALID, TNSZvr("pdep",VEX_RMrX,5),TNSZvr("mulx",VEX_RMrX,5),TNSZvr("shrx",VEX_VRMrX,5), +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + +const instable_t dis_opAVXF30F38[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ INVALID, INVALID, INVALID, INVALID, +/* [14] */ INVALID, INVALID, INVALID, INVALID, +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ INVALID, INVALID, INVALID, INVALID, +/* [2C] */ INVALID, INVALID, INVALID, INVALID, + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, INVALID, INVALID, INVALID, +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ INVALID, INVALID, INVALID, INVALID, +/* [5C] */ INVALID, INVALID, INVALID, INVALID, + +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, + +/* [70] */ INVALID, INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, INVALID, INVALID, + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, INVALID, INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, INVALID, INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, INVALID, + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, INVALID, INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ INVALID, INVALID, INVALID, INVALID, +/* [F4] */ INVALID, TNSZvr("pext",VEX_RMrX,5),INVALID, TNSZvr("sarx",VEX_VRMrX,5), +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; /* * Decode table for SIMD instructions with the repz (0xf3) prefix. */ const instable_t dis_opSIMDrepz[256] = { /* [00] */ INVALID, INVALID, INVALID, INVALID, /* [04] */ INVALID, INVALID, INVALID, INVALID, /* [08] */ INVALID, INVALID, INVALID, INVALID, /* [0C] */ INVALID, INVALID, INVALID, INVALID, -/* [10] */ TNSZ("movss",XMM,4), TNSZ("movss",XMMS,4), INVALID, INVALID, -/* [14] */ INVALID, INVALID, INVALID, INVALID, +/* [10] */ TNSZ("movss",XMM,4), TNSZ("movss",XMMS,4), TNSZ("movsldup",XMM,16),INVALID, +/* [14] */ INVALID, INVALID, TNSZ("movshdup",XMM,16),INVALID, /* [18] */ INVALID, INVALID, INVALID, INVALID, /* [1C] */ INVALID, INVALID, INVALID, INVALID, /* [20] */ INVALID, INVALID, INVALID, INVALID, /* [24] */ INVALID, INVALID, INVALID, INVALID, /* [28] */ INVALID, INVALID, TNSZ("cvtsi2ss",XMM3MX,4),TNSZ("movntss",XMMMS,4), /* [2C] */ TNSZ("cvttss2si",XMMXM3,4),TNSZ("cvtss2si",XMMXM3,4),INVALID, INVALID, /* [30] */ INVALID, INVALID, INVALID, INVALID, /* [34] */ INVALID, INVALID, INVALID, INVALID, /* [38] */ INVALID, INVALID, INVALID, INVALID, /* [3C] */ INVALID, INVALID, INVALID, INVALID, /* [40] */ INVALID, INVALID, INVALID, INVALID, /* [44] */ INVALID, INVALID, INVALID, INVALID, /* [48] */ INVALID, INVALID, INVALID, INVALID, /* [4C] */ INVALID, INVALID, INVALID, INVALID, /* [50] */ INVALID, TNSZ("sqrtss",XMM,4), TNSZ("rsqrtss",XMM,4), TNSZ("rcpss",XMM,4), /* [54] */ INVALID, INVALID, INVALID, INVALID, /* [58] */ TNSZ("addss",XMM,4), TNSZ("mulss",XMM,4), TNSZ("cvtss2sd",XMM,4), TNSZ("cvttps2dq",XMM,16), /* [5C] */ TNSZ("subss",XMM,4), TNSZ("minss",XMM,4), TNSZ("divss",XMM,4), TNSZ("maxss",XMM,4), /* [60] */ INVALID, INVALID, INVALID, INVALID, /* [64] */ INVALID, INVALID, INVALID, INVALID, /* [68] */ INVALID, INVALID, INVALID, INVALID, /* [6C] */ INVALID, INVALID, INVALID, TNSZ("movdqu",XMM,16), /* [70] */ TNSZ("pshufhw",XMMP,16),INVALID, INVALID, INVALID, /* [74] */ INVALID, INVALID, INVALID, INVALID, /* [78] */ INVALID, INVALID, INVALID, INVALID, /* [7C] */ INVALID, INVALID, TNSZ("movq",XMM,8), TNSZ("movdqu",XMMS,16), /* [80] */ INVALID, INVALID, INVALID, INVALID, /* [84] */ INVALID, INVALID, INVALID, INVALID, /* [88] */ INVALID, INVALID, INVALID, INVALID, /* [0C] */ INVALID, INVALID, INVALID, INVALID, /* [90] */ INVALID, INVALID, INVALID, INVALID, /* [94] */ INVALID, INVALID, INVALID, INVALID, /* [98] */ INVALID, INVALID, INVALID, INVALID, /* [9C] */ INVALID, INVALID, INVALID, INVALID, /* [A0] */ INVALID, INVALID, INVALID, INVALID, /* [A4] */ INVALID, INVALID, INVALID, INVALID, /* [A8] */ INVALID, INVALID, INVALID, INVALID, /* [AC] */ INVALID, INVALID, INVALID, INVALID, /* [B0] */ INVALID, INVALID, INVALID, INVALID, /* [B4] */ INVALID, INVALID, INVALID, INVALID, /* [B8] */ TS("popcnt",MRw), INVALID, INVALID, INVALID, -/* [BC] */ INVALID, TS("lzcnt",MRw), INVALID, INVALID, +/* [BC] */ TNSZ("tzcnt",MRw,5), TS("lzcnt",MRw), INVALID, INVALID, /* [C0] */ INVALID, INVALID, TNSZ("cmpss",XMMP,4), INVALID, /* [C4] */ INVALID, INVALID, INVALID, INVALID, /* [C8] */ INVALID, INVALID, INVALID, INVALID, /* [CC] */ INVALID, INVALID, INVALID, INVALID, /* [D0] */ INVALID, INVALID, INVALID, INVALID, /* [D4] */ INVALID, INVALID, TNS("movq2dq",XMMMX), INVALID, /* [D8] */ INVALID, INVALID, INVALID, INVALID, /* [DC] */ INVALID, INVALID, INVALID, INVALID, /* [E0] */ INVALID, INVALID, INVALID, INVALID, /* [E4] */ INVALID, INVALID, TNSZ("cvtdq2pd",XMM,8), INVALID, /* [E8] */ INVALID, INVALID, INVALID, INVALID, /* [EC] */ INVALID, INVALID, INVALID, INVALID, /* [F0] */ INVALID, INVALID, INVALID, INVALID, /* [F4] */ INVALID, INVALID, INVALID, INVALID, /* [F8] */ INVALID, INVALID, INVALID, INVALID, /* [FC] */ INVALID, INVALID, INVALID, INVALID, }; const instable_t dis_opAVXF30F[256] = { /* [00] */ INVALID, INVALID, INVALID, INVALID, /* [04] */ INVALID, INVALID, INVALID, INVALID, /* [08] */ INVALID, INVALID, INVALID, INVALID, /* [0C] */ INVALID, INVALID, INVALID, INVALID, /* [10] */ TNSZ("vmovss",VEX_RMrX,4), TNSZ("vmovss",VEX_RRX,4), TNSZ("vmovsldup",VEX_MX,4), INVALID, /* [14] */ INVALID, INVALID, TNSZ("vmovshdup",VEX_MX,4), INVALID, /* [18] */ INVALID, INVALID, INVALID, INVALID, /* [1C] */ INVALID, INVALID, INVALID, INVALID, /* [20] */ INVALID, INVALID, INVALID, INVALID, /* [24] */ INVALID, INVALID, INVALID, INVALID, /* [28] */ INVALID, INVALID, TNSZ("vcvtsi2ss",VEX_RMrX,4),INVALID, /* [2C] */ TNSZ("vcvttss2si",VEX_MR,4),TNSZ("vcvtss2si",VEX_MR,4),INVALID, INVALID, /* [30] */ INVALID, INVALID, INVALID, INVALID, /* [34] */ INVALID, INVALID, INVALID, INVALID, /* [38] */ INVALID, INVALID, INVALID, INVALID, /* [3C] */ INVALID, INVALID, INVALID, INVALID, /* [40] */ INVALID, INVALID, INVALID, INVALID, /* [44] */ INVALID, INVALID, INVALID, INVALID, /* [48] */ INVALID, INVALID, INVALID, INVALID, /* [4C] */ INVALID, INVALID, INVALID, INVALID, /* [50] */ INVALID, TNSZ("vsqrtss",VEX_RMrX,4), TNSZ("vrsqrtss",VEX_RMrX,4), TNSZ("vrcpss",VEX_RMrX,4), /* [54] */ INVALID, INVALID, INVALID, INVALID, /* [58] */ TNSZ("vaddss",VEX_RMrX,4), TNSZ("vmulss",VEX_RMrX,4), TNSZ("vcvtss2sd",VEX_RMrX,4), TNSZ("vcvttps2dq",VEX_MX,16), /* [5C] */ TNSZ("vsubss",VEX_RMrX,4), TNSZ("vminss",VEX_RMrX,4), TNSZ("vdivss",VEX_RMrX,4), TNSZ("vmaxss",VEX_RMrX,4), /* [60] */ INVALID, INVALID, INVALID, INVALID, /* [64] */ INVALID, INVALID, INVALID, INVALID, /* [68] */ INVALID, INVALID, INVALID, INVALID, /* [6C] */ INVALID, INVALID, INVALID, TNSZ("vmovdqu",VEX_MX,16), /* [70] */ TNSZ("vpshufhw",VEX_MXI,16),INVALID, INVALID, INVALID, /* [74] */ INVALID, INVALID, INVALID, INVALID, /* [78] */ INVALID, INVALID, INVALID, INVALID, /* [7C] */ INVALID, INVALID, TNSZ("vmovq",VEX_MX,8), TNSZ("vmovdqu",VEX_RX,16), /* [80] */ INVALID, INVALID, INVALID, INVALID, /* [84] */ INVALID, INVALID, INVALID, INVALID, /* [88] */ INVALID, INVALID, INVALID, INVALID, /* [0C] */ INVALID, INVALID, INVALID, INVALID, /* [90] */ INVALID, INVALID, INVALID, INVALID, /* [94] */ INVALID, INVALID, INVALID, INVALID, /* [98] */ INVALID, INVALID, INVALID, INVALID, /* [9C] */ INVALID, INVALID, INVALID, INVALID, /* [A0] */ INVALID, INVALID, INVALID, INVALID, /* [A4] */ INVALID, INVALID, INVALID, INVALID, /* [A8] */ INVALID, INVALID, INVALID, INVALID, /* [AC] */ INVALID, INVALID, INVALID, INVALID, /* [B0] */ INVALID, INVALID, INVALID, INVALID, /* [B4] */ INVALID, INVALID, INVALID, INVALID, /* [B8] */ INVALID, INVALID, INVALID, INVALID, /* [BC] */ INVALID, INVALID, INVALID, INVALID, /* [C0] */ INVALID, INVALID, TNSZ("vcmpss",VEX_RMRX,4), INVALID, /* [C4] */ INVALID, INVALID, INVALID, INVALID, /* [C8] */ INVALID, INVALID, INVALID, INVALID, /* [CC] */ INVALID, INVALID, INVALID, INVALID, /* [D0] */ INVALID, INVALID, INVALID, INVALID, /* [D4] */ INVALID, INVALID, INVALID, INVALID, /* [D8] */ INVALID, INVALID, INVALID, INVALID, /* [DC] */ INVALID, INVALID, INVALID, INVALID, /* [E0] */ INVALID, INVALID, INVALID, INVALID, /* [E4] */ INVALID, INVALID, TNSZ("vcvtdq2pd",VEX_MX,8), INVALID, /* [E8] */ INVALID, INVALID, INVALID, INVALID, /* [EC] */ INVALID, INVALID, INVALID, INVALID, /* [F0] */ INVALID, INVALID, INVALID, INVALID, /* [F4] */ INVALID, INVALID, INVALID, INVALID, /* [F8] */ INVALID, INVALID, INVALID, INVALID, /* [FC] */ INVALID, INVALID, INVALID, INVALID, }; /* * The following two tables are used to encode crc32 and movbe * since they share the same opcodes. */ const instable_t dis_op0F38F0[2] = { /* [00] */ TNS("crc32b",CRC32), TS("movbe",MOVBE), }; const instable_t dis_op0F38F1[2] = { /* [00] */ TS("crc32",CRC32), TS("movbe",MOVBE), }; +/* + * The following table is used to distinguish between adox and adcx which share + * the same opcodes. + */ +const instable_t dis_op0F38F6[2] = { +/* [00] */ TNS("adcx",ADX), + TNS("adox",ADX), +}; + const instable_t dis_op0F38[256] = { /* [00] */ TNSZ("pshufb",XMM_66o,16),TNSZ("phaddw",XMM_66o,16),TNSZ("phaddd",XMM_66o,16),TNSZ("phaddsw",XMM_66o,16), /* [04] */ TNSZ("pmaddubsw",XMM_66o,16),TNSZ("phsubw",XMM_66o,16), TNSZ("phsubd",XMM_66o,16),TNSZ("phsubsw",XMM_66o,16), /* [08] */ TNSZ("psignb",XMM_66o,16),TNSZ("psignw",XMM_66o,16),TNSZ("psignd",XMM_66o,16),TNSZ("pmulhrsw",XMM_66o,16), /* [0C] */ INVALID, INVALID, INVALID, INVALID, /* [10] */ TNSZ("pblendvb",XMM_66r,16),INVALID, INVALID, INVALID, /* [14] */ TNSZ("blendvps",XMM_66r,16),TNSZ("blendvpd",XMM_66r,16),INVALID, TNSZ("ptest",XMM_66r,16), /* [18] */ INVALID, INVALID, INVALID, INVALID, /* [1C] */ TNSZ("pabsb",XMM_66o,16),TNSZ("pabsw",XMM_66o,16),TNSZ("pabsd",XMM_66o,16),INVALID, /* [20] */ TNSZ("pmovsxbw",XMM_66r,16),TNSZ("pmovsxbd",XMM_66r,16),TNSZ("pmovsxbq",XMM_66r,16),TNSZ("pmovsxwd",XMM_66r,16), /* [24] */ TNSZ("pmovsxwq",XMM_66r,16),TNSZ("pmovsxdq",XMM_66r,16),INVALID, INVALID, /* [28] */ TNSZ("pmuldq",XMM_66r,16),TNSZ("pcmpeqq",XMM_66r,16),TNSZ("movntdqa",XMMM_66r,16),TNSZ("packusdw",XMM_66r,16), /* [2C] */ INVALID, INVALID, INVALID, INVALID, /* [30] */ TNSZ("pmovzxbw",XMM_66r,16),TNSZ("pmovzxbd",XMM_66r,16),TNSZ("pmovzxbq",XMM_66r,16),TNSZ("pmovzxwd",XMM_66r,16), /* [34] */ TNSZ("pmovzxwq",XMM_66r,16),TNSZ("pmovzxdq",XMM_66r,16),INVALID, TNSZ("pcmpgtq",XMM_66r,16), /* [38] */ TNSZ("pminsb",XMM_66r,16),TNSZ("pminsd",XMM_66r,16),TNSZ("pminuw",XMM_66r,16),TNSZ("pminud",XMM_66r,16), /* [3C] */ TNSZ("pmaxsb",XMM_66r,16),TNSZ("pmaxsd",XMM_66r,16),TNSZ("pmaxuw",XMM_66r,16),TNSZ("pmaxud",XMM_66r,16), /* [40] */ TNSZ("pmulld",XMM_66r,16),TNSZ("phminposuw",XMM_66r,16),INVALID, INVALID, /* [44] */ INVALID, INVALID, INVALID, INVALID, /* [48] */ INVALID, INVALID, INVALID, INVALID, /* [4C] */ INVALID, INVALID, INVALID, INVALID, /* [50] */ INVALID, INVALID, INVALID, INVALID, /* [54] */ INVALID, INVALID, INVALID, INVALID, /* [58] */ INVALID, INVALID, INVALID, INVALID, /* [5C] */ INVALID, INVALID, INVALID, INVALID, /* [60] */ INVALID, INVALID, INVALID, INVALID, /* [64] */ INVALID, INVALID, INVALID, INVALID, /* [68] */ INVALID, INVALID, INVALID, INVALID, /* [6C] */ INVALID, INVALID, INVALID, INVALID, /* [70] */ INVALID, INVALID, INVALID, INVALID, /* [74] */ INVALID, INVALID, INVALID, INVALID, /* [78] */ INVALID, INVALID, INVALID, INVALID, /* [7C] */ INVALID, INVALID, INVALID, INVALID, /* [80] */ TNSy("invept", RM_66r), TNSy("invvpid", RM_66r),TNSy("invpcid", RM_66r),INVALID, /* [84] */ INVALID, INVALID, INVALID, INVALID, /* [88] */ INVALID, INVALID, INVALID, INVALID, /* [8C] */ INVALID, INVALID, INVALID, INVALID, /* [90] */ INVALID, INVALID, INVALID, INVALID, /* [94] */ INVALID, INVALID, INVALID, INVALID, /* [98] */ INVALID, INVALID, INVALID, INVALID, /* [9C] */ INVALID, INVALID, INVALID, INVALID, /* [A0] */ INVALID, INVALID, INVALID, INVALID, /* [A4] */ INVALID, INVALID, INVALID, INVALID, /* [A8] */ INVALID, INVALID, INVALID, INVALID, /* [AC] */ INVALID, INVALID, INVALID, INVALID, /* [B0] */ INVALID, INVALID, INVALID, INVALID, /* [B4] */ INVALID, INVALID, INVALID, INVALID, /* [B8] */ INVALID, INVALID, INVALID, INVALID, /* [BC] */ INVALID, INVALID, INVALID, INVALID, /* [C0] */ INVALID, INVALID, INVALID, INVALID, /* [C4] */ INVALID, INVALID, INVALID, INVALID, -/* [C8] */ INVALID, INVALID, INVALID, INVALID, -/* [CC] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ TNSZ("sha1nexte",XMM,16),TNSZ("sha1msg1",XMM,16),TNSZ("sha1msg2",XMM,16),TNSZ("sha256rnds2",XMM,16), +/* [CC] */ TNSZ("sha256msg1",XMM,16),TNSZ("sha256msg2",XMM,16),INVALID, INVALID, /* [D0] */ INVALID, INVALID, INVALID, INVALID, /* [D4] */ INVALID, INVALID, INVALID, INVALID, /* [D8] */ INVALID, INVALID, INVALID, TNSZ("aesimc",XMM_66r,16), /* [DC] */ TNSZ("aesenc",XMM_66r,16),TNSZ("aesenclast",XMM_66r,16),TNSZ("aesdec",XMM_66r,16),TNSZ("aesdeclast",XMM_66r,16), /* [E0] */ INVALID, INVALID, INVALID, INVALID, /* [E4] */ INVALID, INVALID, INVALID, INVALID, /* [E8] */ INVALID, INVALID, INVALID, INVALID, /* [EC] */ INVALID, INVALID, INVALID, INVALID, /* [F0] */ IND(dis_op0F38F0), IND(dis_op0F38F1), INVALID, INVALID, -/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F4] */ INVALID, INVALID, IND(dis_op0F38F6), INVALID, /* [F8] */ INVALID, INVALID, INVALID, INVALID, /* [FC] */ INVALID, INVALID, INVALID, INVALID, }; const instable_t dis_opAVX660F38[256] = { /* [00] */ TNSZ("vpshufb",VEX_RMrX,16),TNSZ("vphaddw",VEX_RMrX,16),TNSZ("vphaddd",VEX_RMrX,16),TNSZ("vphaddsw",VEX_RMrX,16), /* [04] */ TNSZ("vpmaddubsw",VEX_RMrX,16),TNSZ("vphsubw",VEX_RMrX,16), TNSZ("vphsubd",VEX_RMrX,16),TNSZ("vphsubsw",VEX_RMrX,16), /* [08] */ TNSZ("vpsignb",VEX_RMrX,16),TNSZ("vpsignw",VEX_RMrX,16),TNSZ("vpsignd",VEX_RMrX,16),TNSZ("vpmulhrsw",VEX_RMrX,16), /* [0C] */ TNSZ("vpermilps",VEX_RMrX,8),TNSZ("vpermilpd",VEX_RMrX,16),TNSZ("vtestps",VEX_RRI,8), TNSZ("vtestpd",VEX_RRI,16), /* [10] */ INVALID, INVALID, INVALID, TNSZ("vcvtph2ps",VEX_MX,16), -/* [14] */ INVALID, INVALID, INVALID, TNSZ("vptest",VEX_RRI,16), +/* [14] */ INVALID, INVALID, TNSZ("vpermps",VEX_RMrX,16),TNSZ("vptest",VEX_RRI,16), /* [18] */ TNSZ("vbroadcastss",VEX_MX,4),TNSZ("vbroadcastsd",VEX_MX,8),TNSZ("vbroadcastf128",VEX_MX,16),INVALID, /* [1C] */ TNSZ("vpabsb",VEX_MX,16),TNSZ("vpabsw",VEX_MX,16),TNSZ("vpabsd",VEX_MX,16),INVALID, /* [20] */ TNSZ("vpmovsxbw",VEX_MX,16),TNSZ("vpmovsxbd",VEX_MX,16),TNSZ("vpmovsxbq",VEX_MX,16),TNSZ("vpmovsxwd",VEX_MX,16), /* [24] */ TNSZ("vpmovsxwq",VEX_MX,16),TNSZ("vpmovsxdq",VEX_MX,16),INVALID, INVALID, /* [28] */ TNSZ("vpmuldq",VEX_RMrX,16),TNSZ("vpcmpeqq",VEX_RMrX,16),TNSZ("vmovntdqa",VEX_MX,16),TNSZ("vpackusdw",VEX_RMrX,16), /* [2C] */ TNSZ("vmaskmovps",VEX_RMrX,8),TNSZ("vmaskmovpd",VEX_RMrX,16),TNSZ("vmaskmovps",VEX_RRM,8),TNSZ("vmaskmovpd",VEX_RRM,16), /* [30] */ TNSZ("vpmovzxbw",VEX_MX,16),TNSZ("vpmovzxbd",VEX_MX,16),TNSZ("vpmovzxbq",VEX_MX,16),TNSZ("vpmovzxwd",VEX_MX,16), -/* [34] */ TNSZ("vpmovzxwq",VEX_MX,16),TNSZ("vpmovzxdq",VEX_MX,16),INVALID, TNSZ("vpcmpgtq",VEX_RMrX,16), +/* [34] */ TNSZ("vpmovzxwq",VEX_MX,16),TNSZ("vpmovzxdq",VEX_MX,16),TNSZ("vpermd",VEX_RMrX,16),TNSZ("vpcmpgtq",VEX_RMrX,16), /* [38] */ TNSZ("vpminsb",VEX_RMrX,16),TNSZ("vpminsd",VEX_RMrX,16),TNSZ("vpminuw",VEX_RMrX,16),TNSZ("vpminud",VEX_RMrX,16), /* [3C] */ TNSZ("vpmaxsb",VEX_RMrX,16),TNSZ("vpmaxsd",VEX_RMrX,16),TNSZ("vpmaxuw",VEX_RMrX,16),TNSZ("vpmaxud",VEX_RMrX,16), /* [40] */ TNSZ("vpmulld",VEX_RMrX,16),TNSZ("vphminposuw",VEX_MX,16),INVALID, INVALID, -/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, TSaZ("vpsrlv",VEX_RMrX,16),TNSZ("vpsravd",VEX_RMrX,16),TSaZ("vpsllv",VEX_RMrX,16), /* [48] */ INVALID, INVALID, INVALID, INVALID, /* [4C] */ INVALID, INVALID, INVALID, INVALID, /* [50] */ INVALID, INVALID, INVALID, INVALID, /* [54] */ INVALID, INVALID, INVALID, INVALID, -/* [58] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ TNSZ("vpbroadcastd",VEX_MX,16),TNSZ("vpbroadcastq",VEX_MX,16),TNSZ("vbroadcasti128",VEX_MX,16),INVALID, /* [5C] */ INVALID, INVALID, INVALID, INVALID, /* [60] */ INVALID, INVALID, INVALID, INVALID, /* [64] */ INVALID, INVALID, INVALID, INVALID, /* [68] */ INVALID, INVALID, INVALID, INVALID, /* [6C] */ INVALID, INVALID, INVALID, INVALID, /* [70] */ INVALID, INVALID, INVALID, INVALID, /* [74] */ INVALID, INVALID, INVALID, INVALID, -/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ TNSZ("vpbroadcastb",VEX_MX,16),TNSZ("vpbroadcastw",VEX_MX,16),INVALID, INVALID, /* [7C] */ INVALID, INVALID, INVALID, INVALID, /* [80] */ INVALID, INVALID, INVALID, INVALID, /* [84] */ INVALID, INVALID, INVALID, INVALID, /* [88] */ INVALID, INVALID, INVALID, INVALID, -/* [8C] */ INVALID, INVALID, INVALID, INVALID, +/* [8C] */ TSaZ("vpmaskmov",VEX_RMrX,16),INVALID, TSaZ("vpmaskmov",VEX_RRM,16),INVALID, -/* [90] */ INVALID, INVALID, INVALID, INVALID, -/* [94] */ INVALID, INVALID, INVALID, INVALID, -/* [98] */ INVALID, INVALID, INVALID, INVALID, -/* [9C] */ INVALID, INVALID, INVALID, INVALID, +/* [90] */ TNSZ("vpgatherd",VEX_SbVM,16),TNSZ("vpgatherq",VEX_SbVM,16),TNSZ("vgatherdp",VEX_SbVM,16),TNSZ("vgatherqp",VEX_SbVM,16), +/* [94] */ INVALID, INVALID, TNSZ("vfmaddsub132p",FMA,16),TNSZ("vfmsubadd132p",FMA,16), +/* [98] */ TNSZ("vfmadd132p",FMA,16),TNSZ("vfmadd132s",FMA,16),TNSZ("vfmsub132p",FMA,16),TNSZ("vfmsub132s",FMA,16), +/* [9C] */ TNSZ("vfnmadd132p",FMA,16),TNSZ("vfnmadd132s",FMA,16),TNSZ("vfnmsub132p",FMA,16),TNSZ("vfnmsub132s",FMA,16), /* [A0] */ INVALID, INVALID, INVALID, INVALID, -/* [A4] */ INVALID, INVALID, INVALID, INVALID, -/* [A8] */ INVALID, INVALID, INVALID, INVALID, -/* [AC] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, TNSZ("vfmaddsub213p",FMA,16),TNSZ("vfmsubadd213p",FMA,16), +/* [A8] */ TNSZ("vfmadd213p",FMA,16),TNSZ("vfmadd213s",FMA,16),TNSZ("vfmsub213p",FMA,16),TNSZ("vfmsub213s",FMA,16), +/* [AC] */ TNSZ("vfnmadd213p",FMA,16),TNSZ("vfnmadd213s",FMA,16),TNSZ("vfnmsub213p",FMA,16),TNSZ("vfnmsub213s",FMA,16), /* [B0] */ INVALID, INVALID, INVALID, INVALID, -/* [B4] */ INVALID, INVALID, INVALID, INVALID, -/* [B8] */ INVALID, INVALID, INVALID, INVALID, -/* [BC] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, TNSZ("vfmaddsub231p",FMA,16),TNSZ("vfmsubadd231p",FMA,16), +/* [B8] */ TNSZ("vfmadd231p",FMA,16),TNSZ("vfmadd231s",FMA,16),TNSZ("vfmsub231p",FMA,16),TNSZ("vfmsub231s",FMA,16), +/* [BC] */ TNSZ("vfnmadd231p",FMA,16),TNSZ("vfnmadd231s",FMA,16),TNSZ("vfnmsub231p",FMA,16),TNSZ("vfnmsub231s",FMA,16), /* [C0] */ INVALID, INVALID, INVALID, INVALID, /* [C4] */ INVALID, INVALID, INVALID, INVALID, /* [C8] */ INVALID, INVALID, INVALID, INVALID, /* [CC] */ INVALID, INVALID, INVALID, INVALID, /* [D0] */ INVALID, INVALID, INVALID, INVALID, /* [D4] */ INVALID, INVALID, INVALID, INVALID, /* [D8] */ INVALID, INVALID, INVALID, TNSZ("vaesimc",VEX_MX,16), /* [DC] */ TNSZ("vaesenc",VEX_RMrX,16),TNSZ("vaesenclast",VEX_RMrX,16),TNSZ("vaesdec",VEX_RMrX,16),TNSZ("vaesdeclast",VEX_RMrX,16), /* [E0] */ INVALID, INVALID, INVALID, INVALID, /* [E4] */ INVALID, INVALID, INVALID, INVALID, /* [E8] */ INVALID, INVALID, INVALID, INVALID, /* [EC] */ INVALID, INVALID, INVALID, INVALID, /* [F0] */ IND(dis_op0F38F0), IND(dis_op0F38F1), INVALID, INVALID, -/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, TNSZvr("shlx",VEX_VRMrX,5), /* [F8] */ INVALID, INVALID, INVALID, INVALID, /* [FC] */ INVALID, INVALID, INVALID, INVALID, }; const instable_t dis_op0F3A[256] = { /* [00] */ INVALID, INVALID, INVALID, INVALID, /* [04] */ INVALID, INVALID, INVALID, INVALID, /* [08] */ TNSZ("roundps",XMMP_66r,16),TNSZ("roundpd",XMMP_66r,16),TNSZ("roundss",XMMP_66r,16),TNSZ("roundsd",XMMP_66r,16), /* [0C] */ TNSZ("blendps",XMMP_66r,16),TNSZ("blendpd",XMMP_66r,16),TNSZ("pblendw",XMMP_66r,16),TNSZ("palignr",XMMP_66o,16), /* [10] */ INVALID, INVALID, INVALID, INVALID, /* [14] */ TNSZ("pextrb",XMM3PM_66r,8),TNSZ("pextrw",XMM3PM_66r,16),TSZ("pextr",XMM3PM_66r,16),TNSZ("extractps",XMM3PM_66r,16), /* [18] */ INVALID, INVALID, INVALID, INVALID, /* [1C] */ INVALID, INVALID, INVALID, INVALID, /* [20] */ TNSZ("pinsrb",XMMPRM_66r,8),TNSZ("insertps",XMMP_66r,16),TSZ("pinsr",XMMPRM_66r,16),INVALID, /* [24] */ INVALID, INVALID, INVALID, INVALID, /* [28] */ INVALID, INVALID, INVALID, INVALID, /* [2C] */ INVALID, INVALID, INVALID, INVALID, /* [30] */ INVALID, INVALID, INVALID, INVALID, /* [34] */ INVALID, INVALID, INVALID, INVALID, /* [38] */ INVALID, INVALID, INVALID, INVALID, /* [3C] */ INVALID, INVALID, INVALID, INVALID, /* [40] */ TNSZ("dpps",XMMP_66r,16),TNSZ("dppd",XMMP_66r,16),TNSZ("mpsadbw",XMMP_66r,16),INVALID, /* [44] */ TNSZ("pclmulqdq",XMMP_66r,16),INVALID, INVALID, INVALID, /* [48] */ INVALID, INVALID, INVALID, INVALID, /* [4C] */ INVALID, INVALID, INVALID, INVALID, /* [50] */ INVALID, INVALID, INVALID, INVALID, /* [54] */ INVALID, INVALID, INVALID, INVALID, /* [58] */ INVALID, INVALID, INVALID, INVALID, /* [5C] */ INVALID, INVALID, INVALID, INVALID, /* [60] */ TNSZ("pcmpestrm",XMMP_66r,16),TNSZ("pcmpestri",XMMP_66r,16),TNSZ("pcmpistrm",XMMP_66r,16),TNSZ("pcmpistri",XMMP_66r,16), /* [64] */ INVALID, INVALID, INVALID, INVALID, /* [68] */ INVALID, INVALID, INVALID, INVALID, /* [6C] */ INVALID, INVALID, INVALID, INVALID, /* [70] */ INVALID, INVALID, INVALID, INVALID, /* [74] */ INVALID, INVALID, INVALID, INVALID, /* [78] */ INVALID, INVALID, INVALID, INVALID, /* [7C] */ INVALID, INVALID, INVALID, INVALID, /* [80] */ INVALID, INVALID, INVALID, INVALID, /* [84] */ INVALID, INVALID, INVALID, INVALID, /* [88] */ INVALID, INVALID, INVALID, INVALID, /* [8C] */ INVALID, INVALID, INVALID, INVALID, /* [90] */ INVALID, INVALID, INVALID, INVALID, /* [94] */ INVALID, INVALID, INVALID, INVALID, /* [98] */ INVALID, INVALID, INVALID, INVALID, /* [9C] */ INVALID, INVALID, INVALID, INVALID, /* [A0] */ INVALID, INVALID, INVALID, INVALID, /* [A4] */ INVALID, INVALID, INVALID, INVALID, /* [A8] */ INVALID, INVALID, INVALID, INVALID, /* [AC] */ INVALID, INVALID, INVALID, INVALID, /* [B0] */ INVALID, INVALID, INVALID, INVALID, /* [B4] */ INVALID, INVALID, INVALID, INVALID, /* [B8] */ INVALID, INVALID, INVALID, INVALID, /* [BC] */ INVALID, INVALID, INVALID, INVALID, /* [C0] */ INVALID, INVALID, INVALID, INVALID, /* [C4] */ INVALID, INVALID, INVALID, INVALID, /* [C8] */ INVALID, INVALID, INVALID, INVALID, -/* [CC] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ TNSZ("sha1rnds4",XMMP,16),INVALID, INVALID, INVALID, /* [D0] */ INVALID, INVALID, INVALID, INVALID, /* [D4] */ INVALID, INVALID, INVALID, INVALID, /* [D8] */ INVALID, INVALID, INVALID, INVALID, /* [DC] */ INVALID, INVALID, INVALID, TNSZ("aeskeygenassist",XMMP_66r,16), /* [E0] */ INVALID, INVALID, INVALID, INVALID, /* [E4] */ INVALID, INVALID, INVALID, INVALID, /* [E8] */ INVALID, INVALID, INVALID, INVALID, /* [EC] */ INVALID, INVALID, INVALID, INVALID, /* [F0] */ INVALID, INVALID, INVALID, INVALID, /* [F4] */ INVALID, INVALID, INVALID, INVALID, /* [F8] */ INVALID, INVALID, INVALID, INVALID, /* [FC] */ INVALID, INVALID, INVALID, INVALID, }; const instable_t dis_opAVX660F3A[256] = { -/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [00] */ TNSZ("vpermq",VEX_MXI,16),TNSZ("vpermpd",VEX_MXI,16),TNSZ("vpblendd",VEX_RMRX,16),INVALID, /* [04] */ TNSZ("vpermilps",VEX_MXI,8),TNSZ("vpermilpd",VEX_MXI,16),TNSZ("vperm2f128",VEX_RMRX,16),INVALID, /* [08] */ TNSZ("vroundps",VEX_MXI,16),TNSZ("vroundpd",VEX_MXI,16),TNSZ("vroundss",VEX_RMRX,16),TNSZ("vroundsd",VEX_RMRX,16), /* [0C] */ TNSZ("vblendps",VEX_RMRX,16),TNSZ("vblendpd",VEX_RMRX,16),TNSZ("vpblendw",VEX_RMRX,16),TNSZ("vpalignr",VEX_RMRX,16), /* [10] */ INVALID, INVALID, INVALID, INVALID, /* [14] */ TNSZ("vpextrb",VEX_RRi,8),TNSZ("vpextrw",VEX_RRi,16),TNSZ("vpextrd",VEX_RRi,16),TNSZ("vextractps",VEX_RM,16), /* [18] */ TNSZ("vinsertf128",VEX_RMRX,16),TNSZ("vextractf128",VEX_RX,16),INVALID, INVALID, /* [1C] */ INVALID, TNSZ("vcvtps2ph",VEX_RX,16), INVALID, INVALID, /* [20] */ TNSZ("vpinsrb",VEX_RMRX,8),TNSZ("vinsertps",VEX_RMRX,16),TNSZ("vpinsrd",VEX_RMRX,16),INVALID, /* [24] */ INVALID, INVALID, INVALID, INVALID, /* [28] */ INVALID, INVALID, INVALID, INVALID, /* [2C] */ INVALID, INVALID, INVALID, INVALID, /* [30] */ INVALID, INVALID, INVALID, INVALID, /* [34] */ INVALID, INVALID, INVALID, INVALID, -/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ TNSZ("vinserti128",VEX_RMRX,16),TNSZ("vextracti128",VEX_RIM,16),INVALID, INVALID, /* [3C] */ INVALID, INVALID, INVALID, INVALID, /* [40] */ TNSZ("vdpps",VEX_RMRX,16),TNSZ("vdppd",VEX_RMRX,16),TNSZ("vmpsadbw",VEX_RMRX,16),INVALID, -/* [44] */ TNSZ("vpclmulqdq",VEX_RMRX,16),INVALID, INVALID, INVALID, +/* [44] */ TNSZ("vpclmulqdq",VEX_RMRX,16),INVALID, TNSZ("vperm2i128",VEX_RMRX,16),INVALID, /* [48] */ INVALID, INVALID, TNSZ("vblendvps",VEX_RMRX,8), TNSZ("vblendvpd",VEX_RMRX,16), /* [4C] */ TNSZ("vpblendvb",VEX_RMRX,16),INVALID, INVALID, INVALID, /* [50] */ INVALID, INVALID, INVALID, INVALID, /* [54] */ INVALID, INVALID, INVALID, INVALID, /* [58] */ INVALID, INVALID, INVALID, INVALID, /* [5C] */ INVALID, INVALID, INVALID, INVALID, /* [60] */ TNSZ("vpcmpestrm",VEX_MXI,16),TNSZ("vpcmpestri",VEX_MXI,16),TNSZ("vpcmpistrm",VEX_MXI,16),TNSZ("vpcmpistri",VEX_MXI,16), /* [64] */ INVALID, INVALID, INVALID, INVALID, /* [68] */ INVALID, INVALID, INVALID, INVALID, /* [6C] */ INVALID, INVALID, INVALID, INVALID, /* [70] */ INVALID, INVALID, INVALID, INVALID, /* [74] */ INVALID, INVALID, INVALID, INVALID, /* [78] */ INVALID, INVALID, INVALID, INVALID, /* [7C] */ INVALID, INVALID, INVALID, INVALID, /* [80] */ INVALID, INVALID, INVALID, INVALID, /* [84] */ INVALID, INVALID, INVALID, INVALID, /* [88] */ INVALID, INVALID, INVALID, INVALID, /* [8C] */ INVALID, INVALID, INVALID, INVALID, /* [90] */ INVALID, INVALID, INVALID, INVALID, /* [94] */ INVALID, INVALID, INVALID, INVALID, /* [98] */ INVALID, INVALID, INVALID, INVALID, /* [9C] */ INVALID, INVALID, INVALID, INVALID, /* [A0] */ INVALID, INVALID, INVALID, INVALID, /* [A4] */ INVALID, INVALID, INVALID, INVALID, /* [A8] */ INVALID, INVALID, INVALID, INVALID, /* [AC] */ INVALID, INVALID, INVALID, INVALID, /* [B0] */ INVALID, INVALID, INVALID, INVALID, /* [B4] */ INVALID, INVALID, INVALID, INVALID, /* [B8] */ INVALID, INVALID, INVALID, INVALID, /* [BC] */ INVALID, INVALID, INVALID, INVALID, /* [C0] */ INVALID, INVALID, INVALID, INVALID, /* [C4] */ INVALID, INVALID, INVALID, INVALID, /* [C8] */ INVALID, INVALID, INVALID, INVALID, /* [CC] */ INVALID, INVALID, INVALID, INVALID, /* [D0] */ INVALID, INVALID, INVALID, INVALID, /* [D4] */ INVALID, INVALID, INVALID, INVALID, /* [D8] */ INVALID, INVALID, INVALID, INVALID, /* [DC] */ INVALID, INVALID, INVALID, TNSZ("vaeskeygenassist",VEX_MXI,16), /* [E0] */ INVALID, INVALID, INVALID, INVALID, /* [E4] */ INVALID, INVALID, INVALID, INVALID, /* [E8] */ INVALID, INVALID, INVALID, INVALID, /* [EC] */ INVALID, INVALID, INVALID, INVALID, /* [F0] */ INVALID, INVALID, INVALID, INVALID, /* [F4] */ INVALID, INVALID, INVALID, INVALID, /* [F8] */ INVALID, INVALID, INVALID, INVALID, /* [FC] */ INVALID, INVALID, INVALID, INVALID, }; /* + * Decode table for 0x0F0D which uses the first byte of the mod_rm to + * indicate a sub-code. + */ +const instable_t dis_op0F0D[8] = { +/* [00] */ INVALID, TNS("prefetchw",PREF), TNS("prefetchwt1",PREF),INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +}; + +/* * Decode table for 0x0F opcodes */ const instable_t dis_op0F[16][16] = { { /* [00] */ IND(dis_op0F00), IND(dis_op0F01), TNS("lar",MR), TNS("lsl",MR), /* [04] */ INVALID, TNS("syscall",NORM), TNS("clts",NORM), TNS("sysret",NORM), /* [08] */ TNS("invd",NORM), TNS("wbinvd",NORM), INVALID, TNS("ud2",NORM), -/* [0C] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, IND(dis_op0F0D), INVALID, INVALID, }, { /* [10] */ TNSZ("movups",XMMO,16), TNSZ("movups",XMMOS,16),TNSZ("movlps",XMMO,8), TNSZ("movlps",XMMOS,8), /* [14] */ TNSZ("unpcklps",XMMO,16),TNSZ("unpckhps",XMMO,16),TNSZ("movhps",XMMOM,8),TNSZ("movhps",XMMOMS,8), /* [18] */ IND(dis_op0F18), INVALID, INVALID, INVALID, /* [1C] */ INVALID, INVALID, INVALID, TS("nop",Mw), }, { /* [20] */ TSy("mov",SREG), TSy("mov",SREG), TSy("mov",SREG), TSy("mov",SREG), /* [24] */ TSx("mov",SREG), INVALID, TSx("mov",SREG), INVALID, /* [28] */ TNSZ("movaps",XMMO,16), TNSZ("movaps",XMMOS,16),TNSZ("cvtpi2ps",XMMOMX,8),TNSZ("movntps",XMMOS,16), /* [2C] */ TNSZ("cvttps2pi",XMMOXMM,8),TNSZ("cvtps2pi",XMMOXMM,8),TNSZ("ucomiss",XMMO,4),TNSZ("comiss",XMMO,4), }, { /* [30] */ TNS("wrmsr",NORM), TNS("rdtsc",NORM), TNS("rdmsr",NORM), TNS("rdpmc",NORM), /* [34] */ TNSx("sysenter",NORM), TNSx("sysexit",NORM), INVALID, INVALID, /* [38] */ INVALID, INVALID, INVALID, INVALID, /* [3C] */ INVALID, INVALID, INVALID, INVALID, }, { /* [40] */ TS("cmovx.o",MR), TS("cmovx.no",MR), TS("cmovx.b",MR), TS("cmovx.ae",MR), /* [44] */ TS("cmovx.e",MR), TS("cmovx.ne",MR), TS("cmovx.be",MR), TS("cmovx.a",MR), /* [48] */ TS("cmovx.s",MR), TS("cmovx.ns",MR), TS("cmovx.pe",MR), TS("cmovx.po",MR), /* [4C] */ TS("cmovx.l",MR), TS("cmovx.ge",MR), TS("cmovx.le",MR), TS("cmovx.g",MR), }, { /* [50] */ TNS("movmskps",XMMOX3), TNSZ("sqrtps",XMMO,16), TNSZ("rsqrtps",XMMO,16),TNSZ("rcpps",XMMO,16), /* [54] */ TNSZ("andps",XMMO,16), TNSZ("andnps",XMMO,16), TNSZ("orps",XMMO,16), TNSZ("xorps",XMMO,16), /* [58] */ TNSZ("addps",XMMO,16), TNSZ("mulps",XMMO,16), TNSZ("cvtps2pd",XMMO,8),TNSZ("cvtdq2ps",XMMO,16), /* [5C] */ TNSZ("subps",XMMO,16), TNSZ("minps",XMMO,16), TNSZ("divps",XMMO,16), TNSZ("maxps",XMMO,16), }, { /* [60] */ TNSZ("punpcklbw",MMO,4),TNSZ("punpcklwd",MMO,4),TNSZ("punpckldq",MMO,4),TNSZ("packsswb",MMO,8), /* [64] */ TNSZ("pcmpgtb",MMO,8), TNSZ("pcmpgtw",MMO,8), TNSZ("pcmpgtd",MMO,8), TNSZ("packuswb",MMO,8), /* [68] */ TNSZ("punpckhbw",MMO,8),TNSZ("punpckhwd",MMO,8),TNSZ("punpckhdq",MMO,8),TNSZ("packssdw",MMO,8), /* [6C] */ TNSZ("INVALID",MMO,0), TNSZ("INVALID",MMO,0), TNSZ("movd",MMO,4), TNSZ("movq",MMO,8), }, { /* [70] */ TNSZ("pshufw",MMOPM,8), TNS("psrXXX",MR), TNS("psrXXX",MR), TNS("psrXXX",MR), /* [74] */ TNSZ("pcmpeqb",MMO,8), TNSZ("pcmpeqw",MMO,8), TNSZ("pcmpeqd",MMO,8), TNS("emms",NORM), /* [78] */ TNSy("vmread",RM), TNSy("vmwrite",MR), INVALID, INVALID, /* [7C] */ INVALID, INVALID, TNSZ("movd",MMOS,4), TNSZ("movq",MMOS,8), }, { /* [80] */ TNS("jo",D), TNS("jno",D), TNS("jb",D), TNS("jae",D), /* [84] */ TNS("je",D), TNS("jne",D), TNS("jbe",D), TNS("ja",D), /* [88] */ TNS("js",D), TNS("jns",D), TNS("jp",D), TNS("jnp",D), /* [8C] */ TNS("jl",D), TNS("jge",D), TNS("jle",D), TNS("jg",D), }, { /* [90] */ TNS("seto",Mb), TNS("setno",Mb), TNS("setb",Mb), TNS("setae",Mb), /* [94] */ TNS("sete",Mb), TNS("setne",Mb), TNS("setbe",Mb), TNS("seta",Mb), /* [98] */ TNS("sets",Mb), TNS("setns",Mb), TNS("setp",Mb), TNS("setnp",Mb), /* [9C] */ TNS("setl",Mb), TNS("setge",Mb), TNS("setle",Mb), TNS("setg",Mb), }, { /* [A0] */ TSp("push",LSEG), TSp("pop",LSEG), TNS("cpuid",NORM), TS("bt",RMw), /* [A4] */ TS("shld",DSHIFT), TS("shld",DSHIFTcl), INVALID, INVALID, /* [A8] */ TSp("push",LSEG), TSp("pop",LSEG), TNS("rsm",NORM), TS("bts",RMw), /* [AC] */ TS("shrd",DSHIFT), TS("shrd",DSHIFTcl), IND(dis_op0FAE), TS("imul",MRw), }, { /* [B0] */ TNS("cmpxchgb",RMw), TS("cmpxchg",RMw), TS("lss",MR), TS("btr",RMw), /* [B4] */ TS("lfs",MR), TS("lgs",MR), TS("movzb",MOVZ), TNS("movzwl",MOVZ), /* [B8] */ TNS("INVALID",MRw), INVALID, IND(dis_op0FBA), TS("btc",RMw), /* [BC] */ TS("bsf",MRw), TS("bsr",MRw), TS("movsb",MOVZ), TNS("movswl",MOVZ), }, { /* [C0] */ TNS("xaddb",XADDB), TS("xadd",RMw), TNSZ("cmpps",XMMOPM,16),TNS("movnti",RM), /* [C4] */ TNSZ("pinsrw",MMOPRM,2),TNS("pextrw",MMO3P), TNSZ("shufps",XMMOPM,16),IND(dis_op0FC7), /* [C8] */ INVALID, INVALID, INVALID, INVALID, /* [CC] */ INVALID, INVALID, INVALID, INVALID, }, { /* [D0] */ INVALID, TNSZ("psrlw",MMO,8), TNSZ("psrld",MMO,8), TNSZ("psrlq",MMO,8), /* [D4] */ TNSZ("paddq",MMO,8), TNSZ("pmullw",MMO,8), TNSZ("INVALID",MMO,0), TNS("pmovmskb",MMOM3), /* [D8] */ TNSZ("psubusb",MMO,8), TNSZ("psubusw",MMO,8), TNSZ("pminub",MMO,8), TNSZ("pand",MMO,8), /* [DC] */ TNSZ("paddusb",MMO,8), TNSZ("paddusw",MMO,8), TNSZ("pmaxub",MMO,8), TNSZ("pandn",MMO,8), }, { /* [E0] */ TNSZ("pavgb",MMO,8), TNSZ("psraw",MMO,8), TNSZ("psrad",MMO,8), TNSZ("pavgw",MMO,8), /* [E4] */ TNSZ("pmulhuw",MMO,8), TNSZ("pmulhw",MMO,8), TNS("INVALID",XMMO), TNSZ("movntq",MMOMS,8), /* [E8] */ TNSZ("psubsb",MMO,8), TNSZ("psubsw",MMO,8), TNSZ("pminsw",MMO,8), TNSZ("por",MMO,8), /* [EC] */ TNSZ("paddsb",MMO,8), TNSZ("paddsw",MMO,8), TNSZ("pmaxsw",MMO,8), TNSZ("pxor",MMO,8), }, { /* [F0] */ INVALID, TNSZ("psllw",MMO,8), TNSZ("pslld",MMO,8), TNSZ("psllq",MMO,8), /* [F4] */ TNSZ("pmuludq",MMO,8), TNSZ("pmaddwd",MMO,8), TNSZ("psadbw",MMO,8), TNSZ("maskmovq",MMOIMPL,8), /* [F8] */ TNSZ("psubb",MMO,8), TNSZ("psubw",MMO,8), TNSZ("psubd",MMO,8), TNSZ("psubq",MMO,8), /* [FC] */ TNSZ("paddb",MMO,8), TNSZ("paddw",MMO,8), TNSZ("paddd",MMO,8), INVALID, } }; const instable_t dis_opAVX0F[16][16] = { { /* [00] */ INVALID, INVALID, INVALID, INVALID, /* [04] */ INVALID, INVALID, INVALID, INVALID, /* [08] */ INVALID, INVALID, INVALID, INVALID, /* [0C] */ INVALID, INVALID, INVALID, INVALID, }, { /* [10] */ TNSZ("vmovups",VEX_MX,16), TNSZ("vmovups",VEX_RM,16),TNSZ("vmovlps",VEX_RMrX,8), TNSZ("vmovlps",VEX_RM,8), /* [14] */ TNSZ("vunpcklps",VEX_RMrX,16),TNSZ("vunpckhps",VEX_RMrX,16),TNSZ("vmovhps",VEX_RMrX,8),TNSZ("vmovhps",VEX_RM,8), /* [18] */ INVALID, INVALID, INVALID, INVALID, /* [1C] */ INVALID, INVALID, INVALID, INVALID, }, { /* [20] */ INVALID, INVALID, INVALID, INVALID, /* [24] */ INVALID, INVALID, INVALID, INVALID, /* [28] */ TNSZ("vmovaps",VEX_MX,16), TNSZ("vmovaps",VEX_RX,16),INVALID, TNSZ("vmovntps",VEX_RM,16), /* [2C] */ INVALID, INVALID, TNSZ("vucomiss",VEX_MX,4),TNSZ("vcomiss",VEX_MX,4), }, { /* [30] */ INVALID, INVALID, INVALID, INVALID, /* [34] */ INVALID, INVALID, INVALID, INVALID, /* [38] */ INVALID, INVALID, INVALID, INVALID, /* [3C] */ INVALID, INVALID, INVALID, INVALID, }, { /* [40] */ INVALID, INVALID, INVALID, INVALID, /* [44] */ INVALID, INVALID, INVALID, INVALID, /* [48] */ INVALID, INVALID, INVALID, INVALID, /* [4C] */ INVALID, INVALID, INVALID, INVALID, }, { /* [50] */ TNS("vmovmskps",VEX_MR), TNSZ("vsqrtps",VEX_MX,16), TNSZ("vrsqrtps",VEX_MX,16),TNSZ("vrcpps",VEX_MX,16), /* [54] */ TNSZ("vandps",VEX_RMrX,16), TNSZ("vandnps",VEX_RMrX,16), TNSZ("vorps",VEX_RMrX,16), TNSZ("vxorps",VEX_RMrX,16), /* [58] */ TNSZ("vaddps",VEX_RMrX,16), TNSZ("vmulps",VEX_RMrX,16), TNSZ("vcvtps2pd",VEX_MX,8),TNSZ("vcvtdq2ps",VEX_MX,16), /* [5C] */ TNSZ("vsubps",VEX_RMrX,16), TNSZ("vminps",VEX_RMrX,16), TNSZ("vdivps",VEX_RMrX,16), TNSZ("vmaxps",VEX_RMrX,16), }, { /* [60] */ INVALID, INVALID, INVALID, INVALID, /* [64] */ INVALID, INVALID, INVALID, INVALID, /* [68] */ INVALID, INVALID, INVALID, INVALID, /* [6C] */ INVALID, INVALID, INVALID, INVALID, }, { /* [70] */ INVALID, INVALID, INVALID, INVALID, /* [74] */ INVALID, INVALID, INVALID, TNS("vzeroupper", VEX_NONE), /* [78] */ INVALID, INVALID, INVALID, INVALID, /* [7C] */ INVALID, INVALID, INVALID, INVALID, }, { /* [80] */ INVALID, INVALID, INVALID, INVALID, /* [84] */ INVALID, INVALID, INVALID, INVALID, /* [88] */ INVALID, INVALID, INVALID, INVALID, /* [8C] */ INVALID, INVALID, INVALID, INVALID, }, { /* [90] */ INVALID, INVALID, INVALID, INVALID, /* [94] */ INVALID, INVALID, INVALID, INVALID, /* [98] */ INVALID, INVALID, INVALID, INVALID, /* [9C] */ INVALID, INVALID, INVALID, INVALID, }, { /* [A0] */ INVALID, INVALID, INVALID, INVALID, /* [A4] */ INVALID, INVALID, INVALID, INVALID, /* [A8] */ INVALID, INVALID, INVALID, INVALID, /* [AC] */ INVALID, INVALID, TNSZ("vldmxcsr",VEX_MO,2), INVALID, }, { /* [B0] */ INVALID, INVALID, INVALID, INVALID, /* [B4] */ INVALID, INVALID, INVALID, INVALID, /* [B8] */ INVALID, INVALID, INVALID, INVALID, /* [BC] */ INVALID, INVALID, INVALID, INVALID, }, { /* [C0] */ INVALID, INVALID, TNSZ("vcmpps",VEX_RMRX,16),INVALID, /* [C4] */ INVALID, INVALID, TNSZ("vshufps",VEX_RMRX,16),INVALID, /* [C8] */ INVALID, INVALID, INVALID, INVALID, /* [CC] */ INVALID, INVALID, INVALID, INVALID, }, { /* [D0] */ INVALID, INVALID, INVALID, INVALID, /* [D4] */ INVALID, INVALID, INVALID, INVALID, /* [D8] */ INVALID, INVALID, INVALID, INVALID, /* [DC] */ INVALID, INVALID, INVALID, INVALID, }, { /* [E0] */ INVALID, INVALID, INVALID, INVALID, /* [E4] */ INVALID, INVALID, INVALID, INVALID, /* [E8] */ INVALID, INVALID, INVALID, INVALID, /* [EC] */ INVALID, INVALID, INVALID, INVALID, }, { -/* [F0] */ INVALID, INVALID, INVALID, INVALID, -/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F0] */ INVALID, INVALID, TNSZvr("andn",VEX_RMrX,5),TNSZvr("bls",BLS,5), +/* [F4] */ INVALID, TNSZvr("bzhi",VEX_VRMrX,5),INVALID, TNSZvr("bextr",VEX_VRMrX,5), /* [F8] */ INVALID, INVALID, INVALID, INVALID, /* [FC] */ INVALID, INVALID, INVALID, INVALID, } }; /* * Decode table for 0x80 opcodes */ const instable_t dis_op80[8] = { /* [0] */ TNS("addb",IMlw), TNS("orb",IMw), TNS("adcb",IMlw), TNS("sbbb",IMlw), /* [4] */ TNS("andb",IMw), TNS("subb",IMlw), TNS("xorb",IMw), TNS("cmpb",IMlw), }; /* * Decode table for 0x81 opcodes. */ const instable_t dis_op81[8] = { /* [0] */ TS("add",IMlw), TS("or",IMw), TS("adc",IMlw), TS("sbb",IMlw), /* [4] */ TS("and",IMw), TS("sub",IMlw), TS("xor",IMw), TS("cmp",IMlw), }; /* * Decode table for 0x82 opcodes. */ const instable_t dis_op82[8] = { /* [0] */ TNSx("addb",IMlw), TNSx("orb",IMlw), TNSx("adcb",IMlw), TNSx("sbbb",IMlw), /* [4] */ TNSx("andb",IMlw), TNSx("subb",IMlw), TNSx("xorb",IMlw), TNSx("cmpb",IMlw), }; /* * Decode table for 0x83 opcodes. */ const instable_t dis_op83[8] = { /* [0] */ TS("add",IMlw), TS("or",IMlw), TS("adc",IMlw), TS("sbb",IMlw), /* [4] */ TS("and",IMlw), TS("sub",IMlw), TS("xor",IMlw), TS("cmp",IMlw), }; /* * Decode table for 0xC0 opcodes. */ const instable_t dis_opC0[8] = { /* [0] */ TNS("rolb",MvI), TNS("rorb",MvI), TNS("rclb",MvI), TNS("rcrb",MvI), /* [4] */ TNS("shlb",MvI), TNS("shrb",MvI), INVALID, TNS("sarb",MvI), }; /* * Decode table for 0xD0 opcodes. */ const instable_t dis_opD0[8] = { /* [0] */ TNS("rolb",Mv), TNS("rorb",Mv), TNS("rclb",Mv), TNS("rcrb",Mv), /* [4] */ TNS("shlb",Mv), TNS("shrb",Mv), TNS("salb",Mv), TNS("sarb",Mv), }; /* * Decode table for 0xC1 opcodes. * 186 instruction set */ const instable_t dis_opC1[8] = { /* [0] */ TS("rol",MvI), TS("ror",MvI), TS("rcl",MvI), TS("rcr",MvI), /* [4] */ TS("shl",MvI), TS("shr",MvI), TS("sal",MvI), TS("sar",MvI), }; /* * Decode table for 0xD1 opcodes. */ const instable_t dis_opD1[8] = { /* [0] */ TS("rol",Mv), TS("ror",Mv), TS("rcl",Mv), TS("rcr",Mv), /* [4] */ TS("shl",Mv), TS("shr",Mv), TS("sal",Mv), TS("sar",Mv), }; /* * Decode table for 0xD2 opcodes. */ const instable_t dis_opD2[8] = { /* [0] */ TNS("rolb",Mv), TNS("rorb",Mv), TNS("rclb",Mv), TNS("rcrb",Mv), /* [4] */ TNS("shlb",Mv), TNS("shrb",Mv), TNS("salb",Mv), TNS("sarb",Mv), }; /* * Decode table for 0xD3 opcodes. */ const instable_t dis_opD3[8] = { /* [0] */ TS("rol",Mv), TS("ror",Mv), TS("rcl",Mv), TS("rcr",Mv), /* [4] */ TS("shl",Mv), TS("shr",Mv), TS("salb",Mv), TS("sar",Mv), }; /* * Decode table for 0xF6 opcodes. */ const instable_t dis_opF6[8] = { /* [0] */ TNS("testb",IMw), TNS("testb",IMw), TNS("notb",Mw), TNS("negb",Mw), /* [4] */ TNS("mulb",MA), TNS("imulb",MA), TNS("divb",MA), TNS("idivb",MA), }; /* * Decode table for 0xF7 opcodes. */ const instable_t dis_opF7[8] = { /* [0] */ TS("test",IMw), TS("test",IMw), TS("not",Mw), TS("neg",Mw), /* [4] */ TS("mul",MA), TS("imul",MA), TS("div",MA), TS("idiv",MA), }; /* * Decode table for 0xFE opcodes. */ const instable_t dis_opFE[8] = { /* [0] */ TNS("incb",Mw), TNS("decb",Mw), INVALID, INVALID, /* [4] */ INVALID, INVALID, INVALID, INVALID, }; /* * Decode table for 0xFF opcodes. */ const instable_t dis_opFF[8] = { /* [0] */ TS("inc",Mw), TS("dec",Mw), TNSyp("call",INM), TNS("lcall",INM), /* [4] */ TNSy("jmp",INM), TNS("ljmp",INM), TSp("push",M), INVALID, }; /* for 287 instructions, which are a mess to decode */ const instable_t dis_opFP1n2[8][8] = { { /* bit pattern: 1101 1xxx MODxx xR/M */ /* [0,0] */ TNS("fadds",M), TNS("fmuls",M), TNS("fcoms",M), TNS("fcomps",M), /* [0,4] */ TNS("fsubs",M), TNS("fsubrs",M), TNS("fdivs",M), TNS("fdivrs",M), }, { /* [1,0] */ TNS("flds",M), INVALID, TNS("fsts",M), TNS("fstps",M), /* [1,4] */ TNSZ("fldenv",M,28), TNSZ("fldcw",M,2), TNSZ("fnstenv",M,28), TNSZ("fnstcw",M,2), }, { /* [2,0] */ TNS("fiaddl",M), TNS("fimull",M), TNS("ficoml",M), TNS("ficompl",M), /* [2,4] */ TNS("fisubl",M), TNS("fisubrl",M), TNS("fidivl",M), TNS("fidivrl",M), }, { -/* [3,0] */ TNS("fildl",M), INVALID, TNS("fistl",M), TNS("fistpl",M), +/* [3,0] */ TNS("fildl",M), TNSZ("tisttpl",M,4), TNS("fistl",M), TNS("fistpl",M), /* [3,4] */ INVALID, TNSZ("fldt",M,10), INVALID, TNSZ("fstpt",M,10), }, { /* [4,0] */ TNSZ("faddl",M,8), TNSZ("fmull",M,8), TNSZ("fcoml",M,8), TNSZ("fcompl",M,8), /* [4,1] */ TNSZ("fsubl",M,8), TNSZ("fsubrl",M,8), TNSZ("fdivl",M,8), TNSZ("fdivrl",M,8), }, { -/* [5,0] */ TNSZ("fldl",M,8), INVALID, TNSZ("fstl",M,8), TNSZ("fstpl",M,8), +/* [5,0] */ TNSZ("fldl",M,8), TNSZ("fisttpll",M,8), TNSZ("fstl",M,8), TNSZ("fstpl",M,8), /* [5,4] */ TNSZ("frstor",M,108), INVALID, TNSZ("fnsave",M,108), TNSZ("fnstsw",M,2), }, { /* [6,0] */ TNSZ("fiadd",M,2), TNSZ("fimul",M,2), TNSZ("ficom",M,2), TNSZ("ficomp",M,2), /* [6,4] */ TNSZ("fisub",M,2), TNSZ("fisubr",M,2), TNSZ("fidiv",M,2), TNSZ("fidivr",M,2), }, { -/* [7,0] */ TNSZ("fild",M,2), INVALID, TNSZ("fist",M,2), TNSZ("fistp",M,2), +/* [7,0] */ TNSZ("fild",M,2), TNSZ("fisttp",M,2), TNSZ("fist",M,2), TNSZ("fistp",M,2), /* [7,4] */ TNSZ("fbld",M,10), TNSZ("fildll",M,8), TNSZ("fbstp",M,10), TNSZ("fistpll",M,8), } }; const instable_t dis_opFP3[8][8] = { { /* bit pattern: 1101 1xxx 11xx xREG */ /* [0,0] */ TNS("fadd",FF), TNS("fmul",FF), TNS("fcom",F), TNS("fcomp",F), /* [0,4] */ TNS("fsub",FF), TNS("fsubr",FF), TNS("fdiv",FF), TNS("fdivr",FF), }, { /* [1,0] */ TNS("fld",F), TNS("fxch",F), TNS("fnop",NORM), TNS("fstp",F), /* [1,4] */ INVALID, INVALID, INVALID, INVALID, }, { /* [2,0] */ INVALID, INVALID, INVALID, INVALID, /* [2,4] */ INVALID, TNS("fucompp",NORM), INVALID, INVALID, }, { /* [3,0] */ INVALID, INVALID, INVALID, INVALID, /* [3,4] */ INVALID, INVALID, INVALID, INVALID, }, { /* [4,0] */ TNS("fadd",FF), TNS("fmul",FF), TNS("fcom",F), TNS("fcomp",F), /* [4,4] */ TNS("fsub",FF), TNS("fsubr",FF), TNS("fdiv",FF), TNS("fdivr",FF), }, { /* [5,0] */ TNS("ffree",F), TNS("fxch",F), TNS("fst",F), TNS("fstp",F), /* [5,4] */ TNS("fucom",F), TNS("fucomp",F), INVALID, INVALID, }, { /* [6,0] */ TNS("faddp",FF), TNS("fmulp",FF), TNS("fcomp",F), TNS("fcompp",NORM), /* [6,4] */ TNS("fsubp",FF), TNS("fsubrp",FF), TNS("fdivp",FF), TNS("fdivrp",FF), }, { /* [7,0] */ TNS("ffreep",F), TNS("fxch",F), TNS("fstp",F), TNS("fstp",F), /* [7,4] */ TNS("fnstsw",M), TNS("fucomip",FFC), TNS("fcomip",FFC), INVALID, } }; const instable_t dis_opFP4[4][8] = { { /* bit pattern: 1101 1001 111x xxxx */ /* [0,0] */ TNS("fchs",NORM), TNS("fabs",NORM), INVALID, INVALID, /* [0,4] */ TNS("ftst",NORM), TNS("fxam",NORM), TNS("ftstp",NORM), INVALID, }, { /* [1,0] */ TNS("fld1",NORM), TNS("fldl2t",NORM), TNS("fldl2e",NORM), TNS("fldpi",NORM), /* [1,4] */ TNS("fldlg2",NORM), TNS("fldln2",NORM), TNS("fldz",NORM), INVALID, }, { /* [2,0] */ TNS("f2xm1",NORM), TNS("fyl2x",NORM), TNS("fptan",NORM), TNS("fpatan",NORM), /* [2,4] */ TNS("fxtract",NORM), TNS("fprem1",NORM), TNS("fdecstp",NORM), TNS("fincstp",NORM), }, { /* [3,0] */ TNS("fprem",NORM), TNS("fyl2xp1",NORM), TNS("fsqrt",NORM), TNS("fsincos",NORM), /* [3,4] */ TNS("frndint",NORM), TNS("fscale",NORM), TNS("fsin",NORM), TNS("fcos",NORM), } }; const instable_t dis_opFP5[8] = { /* bit pattern: 1101 1011 111x xxxx */ /* [0] */ TNS("feni",NORM), TNS("fdisi",NORM), TNS("fnclex",NORM), TNS("fninit",NORM), /* [4] */ TNS("fsetpm",NORM), TNS("frstpm",NORM), INVALID, INVALID, }; const instable_t dis_opFP6[8] = { /* bit pattern: 1101 1011 11yy yxxx */ /* [00] */ TNS("fcmov.nb",FF), TNS("fcmov.ne",FF), TNS("fcmov.nbe",FF), TNS("fcmov.nu",FF), /* [04] */ INVALID, TNS("fucomi",F), TNS("fcomi",F), INVALID, }; const instable_t dis_opFP7[8] = { /* bit pattern: 1101 1010 11yy yxxx */ /* [00] */ TNS("fcmov.b",FF), TNS("fcmov.e",FF), TNS("fcmov.be",FF), TNS("fcmov.u",FF), /* [04] */ INVALID, INVALID, INVALID, INVALID, }; /* * Main decode table for the op codes. The first two nibbles * will be used as an index into the table. If there is a * a need to further decode an instruction, the array to be * referenced is indicated with the other two entries being * empty. */ const instable_t dis_distable[16][16] = { { /* [0,0] */ TNS("addb",RMw), TS("add",RMw), TNS("addb",MRw), TS("add",MRw), /* [0,4] */ TNS("addb",IA), TS("add",IA), TSx("push",SEG), TSx("pop",SEG), /* [0,8] */ TNS("orb",RMw), TS("or",RMw), TNS("orb",MRw), TS("or",MRw), /* [0,C] */ TNS("orb",IA), TS("or",IA), TSx("push",SEG), IND(dis_op0F), }, { /* [1,0] */ TNS("adcb",RMw), TS("adc",RMw), TNS("adcb",MRw), TS("adc",MRw), /* [1,4] */ TNS("adcb",IA), TS("adc",IA), TSx("push",SEG), TSx("pop",SEG), /* [1,8] */ TNS("sbbb",RMw), TS("sbb",RMw), TNS("sbbb",MRw), TS("sbb",MRw), /* [1,C] */ TNS("sbbb",IA), TS("sbb",IA), TSx("push",SEG), TSx("pop",SEG), }, { /* [2,0] */ TNS("andb",RMw), TS("and",RMw), TNS("andb",MRw), TS("and",MRw), /* [2,4] */ TNS("andb",IA), TS("and",IA), TNSx("%es:",OVERRIDE), TNSx("daa",NORM), /* [2,8] */ TNS("subb",RMw), TS("sub",RMw), TNS("subb",MRw), TS("sub",MRw), /* [2,C] */ TNS("subb",IA), TS("sub",IA), TNS("%cs:",OVERRIDE), TNSx("das",NORM), }, { /* [3,0] */ TNS("xorb",RMw), TS("xor",RMw), TNS("xorb",MRw), TS("xor",MRw), /* [3,4] */ TNS("xorb",IA), TS("xor",IA), TNSx("%ss:",OVERRIDE), TNSx("aaa",NORM), /* [3,8] */ TNS("cmpb",RMw), TS("cmp",RMw), TNS("cmpb",MRw), TS("cmp",MRw), /* [3,C] */ TNS("cmpb",IA), TS("cmp",IA), TNSx("%ds:",OVERRIDE), TNSx("aas",NORM), }, { /* [4,0] */ TSx("inc",R), TSx("inc",R), TSx("inc",R), TSx("inc",R), /* [4,4] */ TSx("inc",R), TSx("inc",R), TSx("inc",R), TSx("inc",R), /* [4,8] */ TSx("dec",R), TSx("dec",R), TSx("dec",R), TSx("dec",R), /* [4,C] */ TSx("dec",R), TSx("dec",R), TSx("dec",R), TSx("dec",R), }, { /* [5,0] */ TSp("push",R), TSp("push",R), TSp("push",R), TSp("push",R), /* [5,4] */ TSp("push",R), TSp("push",R), TSp("push",R), TSp("push",R), /* [5,8] */ TSp("pop",R), TSp("pop",R), TSp("pop",R), TSp("pop",R), /* [5,C] */ TSp("pop",R), TSp("pop",R), TSp("pop",R), TSp("pop",R), }, { /* [6,0] */ TSZx("pusha",IMPLMEM,28),TSZx("popa",IMPLMEM,28), TSx("bound",MR), TNS("arpl",RMw), /* [6,4] */ TNS("%fs:",OVERRIDE), TNS("%gs:",OVERRIDE), TNS("data16",DM), TNS("addr16",AM), /* [6,8] */ TSp("push",I), TS("imul",IMUL), TSp("push",Ib), TS("imul",IMUL), /* [6,C] */ TNSZ("insb",IMPLMEM,1), TSZ("ins",IMPLMEM,4), TNSZ("outsb",IMPLMEM,1),TSZ("outs",IMPLMEM,4), }, { /* [7,0] */ TNSy("jo",BD), TNSy("jno",BD), TNSy("jb",BD), TNSy("jae",BD), /* [7,4] */ TNSy("je",BD), TNSy("jne",BD), TNSy("jbe",BD), TNSy("ja",BD), /* [7,8] */ TNSy("js",BD), TNSy("jns",BD), TNSy("jp",BD), TNSy("jnp",BD), /* [7,C] */ TNSy("jl",BD), TNSy("jge",BD), TNSy("jle",BD), TNSy("jg",BD), }, { /* [8,0] */ IND(dis_op80), IND(dis_op81), INDx(dis_op82), IND(dis_op83), /* [8,4] */ TNS("testb",RMw), TS("test",RMw), TNS("xchgb",RMw), TS("xchg",RMw), /* [8,8] */ TNS("movb",RMw), TS("mov",RMw), TNS("movb",MRw), TS("mov",MRw), /* [8,C] */ TNS("movw",SM), TS("lea",MR), TNS("movw",MS), TSp("pop",M), }, { /* [9,0] */ TNS("nop",NORM), TS("xchg",RA), TS("xchg",RA), TS("xchg",RA), /* [9,4] */ TS("xchg",RA), TS("xchg",RA), TS("xchg",RA), TS("xchg",RA), /* [9,8] */ TNS("cXtX",CBW), TNS("cXtX",CWD), TNSx("lcall",SO), TNS("fwait",NORM), /* [9,C] */ TSZy("pushf",IMPLMEM,4),TSZy("popf",IMPLMEM,4), TNS("sahf",NORM), TNS("lahf",NORM), }, { /* [A,0] */ TNS("movb",OA), TS("mov",OA), TNS("movb",AO), TS("mov",AO), /* [A,4] */ TNSZ("movsb",SD,1), TS("movs",SD), TNSZ("cmpsb",SD,1), TS("cmps",SD), /* [A,8] */ TNS("testb",IA), TS("test",IA), TNS("stosb",AD), TS("stos",AD), /* [A,C] */ TNS("lodsb",SA), TS("lods",SA), TNS("scasb",AD), TS("scas",AD), }, { /* [B,0] */ TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), /* [B,4] */ TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), /* [B,8] */ TS("mov",IR), TS("mov",IR), TS("mov",IR), TS("mov",IR), /* [B,C] */ TS("mov",IR), TS("mov",IR), TS("mov",IR), TS("mov",IR), }, { /* [C,0] */ IND(dis_opC0), IND(dis_opC1), TNSyp("ret",RET), TNSyp("ret",NORM), /* [C,4] */ TNSx("les",MR), TNSx("lds",MR), TNS("movb",IMw), TS("mov",IMw), /* [C,8] */ TNSyp("enter",ENTER), TNSyp("leave",NORM), TNS("lret",RET), TNS("lret",NORM), /* [C,C] */ TNS("int",INT3), TNS("int",INTx), TNSx("into",NORM), TNS("iret",NORM), }, { /* [D,0] */ IND(dis_opD0), IND(dis_opD1), IND(dis_opD2), IND(dis_opD3), /* [D,4] */ TNSx("aam",U), TNSx("aad",U), TNSx("falc",NORM), TNSZ("xlat",IMPLMEM,1), /* 287 instructions. Note that although the indirect field */ /* indicates opFP1n2 for further decoding, this is not necessarily */ /* the case since the opFP arrays are not partitioned according to key1 */ /* and key2. opFP1n2 is given only to indicate that we haven't */ /* finished decoding the instruction. */ /* [D,8] */ IND(dis_opFP1n2), IND(dis_opFP1n2), IND(dis_opFP1n2), IND(dis_opFP1n2), /* [D,C] */ IND(dis_opFP1n2), IND(dis_opFP1n2), IND(dis_opFP1n2), IND(dis_opFP1n2), }, { /* [E,0] */ TNSy("loopnz",BD), TNSy("loopz",BD), TNSy("loop",BD), TNSy("jcxz",BD), /* [E,4] */ TNS("inb",P), TS("in",P), TNS("outb",P), TS("out",P), /* [E,8] */ TNSyp("call",D), TNSy("jmp",D), TNSx("ljmp",SO), TNSy("jmp",BD), /* [E,C] */ TNS("inb",V), TS("in",V), TNS("outb",V), TS("out",V), }, { /* [F,0] */ TNS("lock",LOCK), TNS("icebp", NORM), TNS("repnz",PREFIX), TNS("repz",PREFIX), /* [F,4] */ TNS("hlt",NORM), TNS("cmc",NORM), IND(dis_opF6), IND(dis_opF7), /* [F,8] */ TNS("clc",NORM), TNS("stc",NORM), TNS("cli",NORM), TNS("sti",NORM), /* [F,C] */ TNS("cld",NORM), TNS("std",NORM), IND(dis_opFE), IND(dis_opFF), } }; /* END CSTYLED */ /* * common functions to decode and disassemble an x86 or amd64 instruction */ /* * These are the individual fields of a REX prefix. Note that a REX * prefix with none of these set is still needed to: * - use the MOVSXD (sign extend 32 to 64 bits) instruction * - access the %sil, %dil, %bpl, %spl registers */ #define REX_W 0x08 /* 64 bit operand size when set */ #define REX_R 0x04 /* high order bit extension of ModRM reg field */ #define REX_X 0x02 /* high order bit extension of SIB index field */ #define REX_B 0x01 /* extends ModRM r_m, SIB base, or opcode reg */ /* * These are the individual fields of a VEX prefix. */ #define VEX_R 0x08 /* REX.R in 1's complement form */ #define VEX_X 0x04 /* REX.X in 1's complement form */ #define VEX_B 0x02 /* REX.B in 1's complement form */ /* Vector Length, 0: scalar or 128-bit vector, 1: 256-bit vector */ #define VEX_L 0x04 #define VEX_W 0x08 /* opcode specific, use like REX.W */ #define VEX_m 0x1F /* VEX m-mmmm field */ #define VEX_v 0x78 /* VEX register specifier */ #define VEX_p 0x03 /* VEX pp field, opcode extension */ /* VEX m-mmmm field, only used by three bytes prefix */ #define VEX_m_0F 0x01 /* implied 0F leading opcode byte */ #define VEX_m_0F38 0x02 /* implied 0F 38 leading opcode byte */ #define VEX_m_0F3A 0x03 /* implied 0F 3A leading opcode byte */ /* VEX pp field, providing equivalent functionality of a SIMD prefix */ #define VEX_p_66 0x01 #define VEX_p_F3 0x02 #define VEX_p_F2 0x03 /* * Even in 64 bit mode, usually only 4 byte immediate operands are supported. */ static int isize[] = {1, 2, 4, 4}; static int isize64[] = {1, 2, 4, 8}; /* * Just a bunch of useful macros. */ #define WBIT(x) (x & 0x1) /* to get w bit */ #define REGNO(x) (x & 0x7) /* to get 3 bit register */ #define VBIT(x) ((x)>>1 & 0x1) /* to get 'v' bit */ #define OPSIZE(osize, wbit) ((wbit) ? isize[osize] : 1) #define OPSIZE64(osize, wbit) ((wbit) ? isize64[osize] : 1) #define REG_ONLY 3 /* mode to indicate a register operand (not memory) */ #define BYTE_OPND 0 /* w-bit value indicating byte register */ #define LONG_OPND 1 /* w-bit value indicating opnd_size register */ #define MM_OPND 2 /* "value" used to indicate a mmx reg */ #define XMM_OPND 3 /* "value" used to indicate a xmm reg */ #define SEG_OPND 4 /* "value" used to indicate a segment reg */ #define CONTROL_OPND 5 /* "value" used to indicate a control reg */ #define DEBUG_OPND 6 /* "value" used to indicate a debug reg */ #define TEST_OPND 7 /* "value" used to indicate a test reg */ #define WORD_OPND 8 /* w-bit value indicating word size reg */ #define YMM_OPND 9 /* "value" used to indicate a ymm reg */ /* + * The AVX2 gather instructions are a bit of a mess. While there's a pattern, + * there's not really a consistent scheme that we can use to know what the mode + * is supposed to be for a given type. Various instructions, like VPGATHERDD, + * always match the value of VEX_L. Other instructions like VPGATHERDQ, have + * some registers match VEX_L, but the VSIB is always XMM. + * + * The simplest way to deal with this is to just define a table based on the + * instruction opcodes, which are 0x90-0x93, so we subtract 0x90 to index into + * them. + * + * We further have to subdivide this based on the value of VEX_W and the value + * of VEX_L. The array is constructed to be indexed as: + * [opcode - 0x90][VEX_W][VEX_L]. + */ +/* w = 0, 0x90 */ +typedef struct dis_gather_regs { + uint_t dgr_arg0; /* src reg */ + uint_t dgr_arg1; /* vsib reg */ + uint_t dgr_arg2; /* dst reg */ + char *dgr_suffix; /* suffix to append */ +} dis_gather_regs_t; + +static dis_gather_regs_t dis_vgather[4][2][2] = { + { + /* op 0x90, W.0 */ + { + { XMM_OPND, XMM_OPND, XMM_OPND, "d" }, + { YMM_OPND, YMM_OPND, YMM_OPND, "d" } + }, + /* op 0x90, W.1 */ + { + { XMM_OPND, XMM_OPND, XMM_OPND, "q" }, + { YMM_OPND, XMM_OPND, YMM_OPND, "q" } + } + }, + { + /* op 0x91, W.0 */ + { + { XMM_OPND, XMM_OPND, XMM_OPND, "d" }, + { XMM_OPND, YMM_OPND, XMM_OPND, "d" }, + }, + /* op 0x91, W.1 */ + { + { XMM_OPND, XMM_OPND, XMM_OPND, "q" }, + { YMM_OPND, YMM_OPND, YMM_OPND, "q" }, + } + }, + { + /* op 0x92, W.0 */ + { + { XMM_OPND, XMM_OPND, XMM_OPND, "s" }, + { YMM_OPND, YMM_OPND, YMM_OPND, "s" } + }, + /* op 0x92, W.1 */ + { + { XMM_OPND, XMM_OPND, XMM_OPND, "d" }, + { YMM_OPND, XMM_OPND, YMM_OPND, "d" } + } + }, + { + /* op 0x93, W.0 */ + { + { XMM_OPND, XMM_OPND, XMM_OPND, "s" }, + { XMM_OPND, YMM_OPND, XMM_OPND, "s" } + }, + /* op 0x93, W.1 */ + { + { XMM_OPND, XMM_OPND, XMM_OPND, "d" }, + { YMM_OPND, YMM_OPND, YMM_OPND, "d" } + } + } +}; + +/* * Get the next byte and separate the op code into the high and low nibbles. */ static int dtrace_get_opcode(dis86_t *x, uint_t *high, uint_t *low) { int byte; /* * x86 instructions have a maximum length of 15 bytes. Bail out if * we try to read more. */ if (x->d86_len >= 15) return (x->d86_error = 1); if (x->d86_error) return (1); byte = x->d86_get_byte(x->d86_data); if (byte < 0) return (x->d86_error = 1); x->d86_bytes[x->d86_len++] = byte; *low = byte & 0xf; /* ----xxxx low 4 bits */ *high = byte >> 4 & 0xf; /* xxxx---- bits 7 to 4 */ return (0); } /* * Get and decode an SIB (scaled index base) byte */ static void dtrace_get_SIB(dis86_t *x, uint_t *ss, uint_t *index, uint_t *base) { int byte; if (x->d86_error) return; byte = x->d86_get_byte(x->d86_data); if (byte < 0) { x->d86_error = 1; return; } x->d86_bytes[x->d86_len++] = byte; *base = byte & 0x7; *index = (byte >> 3) & 0x7; *ss = (byte >> 6) & 0x3; } /* * Get the byte following the op code and separate it into the * mode, register, and r/m fields. */ static void dtrace_get_modrm(dis86_t *x, uint_t *mode, uint_t *reg, uint_t *r_m) { if (x->d86_got_modrm == 0) { if (x->d86_rmindex == -1) x->d86_rmindex = x->d86_len; dtrace_get_SIB(x, mode, reg, r_m); x->d86_got_modrm = 1; } } /* * Adjust register selection based on any REX prefix bits present. */ /*ARGSUSED*/ static void dtrace_rex_adjust(uint_t rex_prefix, uint_t mode, uint_t *reg, uint_t *r_m) { if (reg != NULL && r_m == NULL) { if (rex_prefix & REX_B) *reg += 8; } else { if (reg != NULL && (REX_R & rex_prefix) != 0) *reg += 8; if (r_m != NULL && (REX_B & rex_prefix) != 0) *r_m += 8; } } /* * Adjust register selection based on any VEX prefix bits present. * Notes: VEX.R, VEX.X and VEX.B use the inverted form compared with REX prefix */ /*ARGSUSED*/ static void dtrace_vex_adjust(uint_t vex_byte1, uint_t mode, uint_t *reg, uint_t *r_m) { if (reg != NULL && r_m == NULL) { if (!(vex_byte1 & VEX_B)) *reg += 8; } else { if (reg != NULL && ((VEX_R & vex_byte1) == 0)) *reg += 8; if (r_m != NULL && ((VEX_B & vex_byte1) == 0)) *r_m += 8; } } /* * Get an immediate operand of the given size, with sign extension. */ static void dtrace_imm_opnd(dis86_t *x, int wbit, int size, int opindex) { int i; int byte; int valsize; if (x->d86_numopnds < opindex + 1) x->d86_numopnds = opindex + 1; switch (wbit) { case BYTE_OPND: valsize = 1; break; case LONG_OPND: if (x->d86_opnd_size == SIZE16) valsize = 2; else if (x->d86_opnd_size == SIZE32) valsize = 4; else valsize = 8; break; case MM_OPND: case XMM_OPND: case YMM_OPND: case SEG_OPND: case CONTROL_OPND: case DEBUG_OPND: case TEST_OPND: valsize = size; break; case WORD_OPND: valsize = 2; break; } if (valsize < size) valsize = size; if (x->d86_error) return; x->d86_opnd[opindex].d86_value = 0; for (i = 0; i < size; ++i) { byte = x->d86_get_byte(x->d86_data); if (byte < 0) { x->d86_error = 1; return; } x->d86_bytes[x->d86_len++] = byte; x->d86_opnd[opindex].d86_value |= (uint64_t)byte << (i * 8); } /* Do sign extension */ if (x->d86_bytes[x->d86_len - 1] & 0x80) { for (; i < sizeof (uint64_t); i++) x->d86_opnd[opindex].d86_value |= (uint64_t)0xff << (i * 8); } #ifdef DIS_TEXT x->d86_opnd[opindex].d86_mode = MODE_SIGNED; x->d86_opnd[opindex].d86_value_size = valsize; x->d86_imm_bytes += size; #endif } /* * Get an ip relative operand of the given size, with sign extension. */ static void dtrace_disp_opnd(dis86_t *x, int wbit, int size, int opindex) { dtrace_imm_opnd(x, wbit, size, opindex); #ifdef DIS_TEXT x->d86_opnd[opindex].d86_mode = MODE_IPREL; #endif } /* * Check to see if there is a segment override prefix pending. * If so, print it in the current 'operand' location and set * the override flag back to false. */ /*ARGSUSED*/ static void dtrace_check_override(dis86_t *x, int opindex) { #ifdef DIS_TEXT if (x->d86_seg_prefix) { (void) strlcat(x->d86_opnd[opindex].d86_prefix, x->d86_seg_prefix, PFIXLEN); } #endif x->d86_seg_prefix = NULL; } /* * Process a single instruction Register or Memory operand. * * mode = addressing mode from ModRM byte * r_m = r_m (or reg if mode == 3) field from ModRM byte * wbit = indicates which register (8bit, 16bit, ... MMX, etc.) set to use. * o = index of operand that we are processing (0, 1 or 2) * * the value of reg or r_m must have already been adjusted for any REX prefix. */ /*ARGSUSED*/ static void dtrace_get_operand(dis86_t *x, uint_t mode, uint_t r_m, int wbit, int opindex) { int have_SIB = 0; /* flag presence of scale-index-byte */ uint_t ss; /* scale-factor from opcode */ uint_t index; /* index register number */ uint_t base; /* base register number */ int dispsize; /* size of displacement in bytes */ #ifdef DIS_TEXT char *opnd = x->d86_opnd[opindex].d86_opnd; #endif if (x->d86_numopnds < opindex + 1) x->d86_numopnds = opindex + 1; if (x->d86_error) return; /* * first handle a simple register */ if (mode == REG_ONLY) { #ifdef DIS_TEXT switch (wbit) { case MM_OPND: (void) strlcat(opnd, dis_MMREG[r_m], OPLEN); break; case XMM_OPND: (void) strlcat(opnd, dis_XMMREG[r_m], OPLEN); break; case YMM_OPND: (void) strlcat(opnd, dis_YMMREG[r_m], OPLEN); break; case SEG_OPND: (void) strlcat(opnd, dis_SEGREG[r_m], OPLEN); break; case CONTROL_OPND: (void) strlcat(opnd, dis_CONTROLREG[r_m], OPLEN); break; case DEBUG_OPND: (void) strlcat(opnd, dis_DEBUGREG[r_m], OPLEN); break; case TEST_OPND: (void) strlcat(opnd, dis_TESTREG[r_m], OPLEN); break; case BYTE_OPND: if (x->d86_rex_prefix == 0) (void) strlcat(opnd, dis_REG8[r_m], OPLEN); else (void) strlcat(opnd, dis_REG8_REX[r_m], OPLEN); break; case WORD_OPND: (void) strlcat(opnd, dis_REG16[r_m], OPLEN); break; case LONG_OPND: if (x->d86_opnd_size == SIZE16) (void) strlcat(opnd, dis_REG16[r_m], OPLEN); else if (x->d86_opnd_size == SIZE32) (void) strlcat(opnd, dis_REG32[r_m], OPLEN); else (void) strlcat(opnd, dis_REG64[r_m], OPLEN); break; } #endif /* DIS_TEXT */ return; } /* * if symbolic representation, skip override prefix, if any */ dtrace_check_override(x, opindex); /* * Handle 16 bit memory references first, since they decode * the mode values more simply. * mode 1 is r_m + 8 bit displacement * mode 2 is r_m + 16 bit displacement * mode 0 is just r_m, unless r_m is 6 which is 16 bit disp */ if (x->d86_addr_size == SIZE16) { if ((mode == 0 && r_m == 6) || mode == 2) dtrace_imm_opnd(x, WORD_OPND, 2, opindex); else if (mode == 1) dtrace_imm_opnd(x, BYTE_OPND, 1, opindex); #ifdef DIS_TEXT if (mode == 0 && r_m == 6) x->d86_opnd[opindex].d86_mode = MODE_SIGNED; else if (mode == 0) x->d86_opnd[opindex].d86_mode = MODE_NONE; else x->d86_opnd[opindex].d86_mode = MODE_OFFSET; (void) strlcat(opnd, dis_addr16[mode][r_m], OPLEN); #endif return; } /* * 32 and 64 bit addressing modes are more complex since they * can involve an SIB (scaled index and base) byte to decode. */ if (r_m == ESP_REGNO || r_m == ESP_REGNO + 8) { have_SIB = 1; dtrace_get_SIB(x, &ss, &index, &base); if (x->d86_error) return; if (base != 5 || mode != 0) if (x->d86_rex_prefix & REX_B) base += 8; if (x->d86_rex_prefix & REX_X) index += 8; } else { base = r_m; } /* * Compute the displacement size and get its bytes */ dispsize = 0; if (mode == 1) dispsize = 1; else if (mode == 2) dispsize = 4; else if ((r_m & 7) == EBP_REGNO || (have_SIB && (base & 7) == EBP_REGNO)) dispsize = 4; if (dispsize > 0) { dtrace_imm_opnd(x, dispsize == 4 ? LONG_OPND : BYTE_OPND, dispsize, opindex); if (x->d86_error) return; } #ifdef DIS_TEXT if (dispsize > 0) x->d86_opnd[opindex].d86_mode = MODE_OFFSET; if (have_SIB == 0) { if (x->d86_mode == SIZE32) { if (mode == 0) (void) strlcat(opnd, dis_addr32_mode0[r_m], OPLEN); else (void) strlcat(opnd, dis_addr32_mode12[r_m], OPLEN); } else { if (mode == 0) { (void) strlcat(opnd, dis_addr64_mode0[r_m], OPLEN); if (r_m == 5) { x->d86_opnd[opindex].d86_mode = MODE_RIPREL; } } else { (void) strlcat(opnd, dis_addr64_mode12[r_m], OPLEN); } } } else { uint_t need_paren = 0; char **regs; + char **bregs; + const char *const *sf; if (x->d86_mode == SIZE32) /* NOTE this is not addr_size! */ regs = (char **)dis_REG32; else regs = (char **)dis_REG64; + if (x->d86_vsib != 0) { + if (wbit == YMM_OPND) /* NOTE this is not addr_size! */ + bregs = (char **)dis_YMMREG; + else + bregs = (char **)dis_XMMREG; + sf = dis_vscale_factor; + } else { + bregs = regs; + sf = dis_scale_factor; + } + /* * print the base (if any) */ if (base == EBP_REGNO && mode == 0) { - if (index != ESP_REGNO) { + if (index != ESP_REGNO || x->d86_vsib != 0) { (void) strlcat(opnd, "(", OPLEN); need_paren = 1; } } else { (void) strlcat(opnd, "(", OPLEN); (void) strlcat(opnd, regs[base], OPLEN); need_paren = 1; } /* * print the index (if any) */ - if (index != ESP_REGNO) { + if (index != ESP_REGNO || x->d86_vsib) { (void) strlcat(opnd, ",", OPLEN); - (void) strlcat(opnd, regs[index], OPLEN); - (void) strlcat(opnd, dis_scale_factor[ss], OPLEN); + (void) strlcat(opnd, bregs[index], OPLEN); + (void) strlcat(opnd, sf[ss], OPLEN); } else if (need_paren) (void) strlcat(opnd, ")", OPLEN); } #endif } /* * Operand sequence for standard instruction involving one register * and one register/memory operand. * wbit indicates a byte(0) or opnd_size(1) operation * vbit indicates direction (0 for "opcode r,r_m") or (1 for "opcode r_m, r") */ #define STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, vbit) { \ dtrace_get_modrm(x, &mode, ®, &r_m); \ dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ dtrace_get_operand(x, mode, r_m, wbit, vbit); \ dtrace_get_operand(x, REG_ONLY, reg, wbit, 1 - vbit); \ } /* * Similar to above, but allows for the two operands to be of different * classes (ie. wbit). * wbit is for the r_m operand * w2 is for the reg operand */ #define MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, w2, vbit) { \ dtrace_get_modrm(x, &mode, ®, &r_m); \ dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ dtrace_get_operand(x, mode, r_m, wbit, vbit); \ dtrace_get_operand(x, REG_ONLY, reg, w2, 1 - vbit); \ } /* * Similar, but for 2 operands plus an immediate. * vbit indicates direction * 0 for "opcode imm, r, r_m" or * 1 for "opcode imm, r_m, r" */ #define THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, w2, immsize, vbit) { \ dtrace_get_modrm(x, &mode, ®, &r_m); \ dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ dtrace_get_operand(x, mode, r_m, wbit, 2-vbit); \ dtrace_get_operand(x, REG_ONLY, reg, w2, 1+vbit); \ dtrace_imm_opnd(x, wbit, immsize, 0); \ } /* * Similar, but for 2 operands plus two immediates. */ #define FOUROPERAND(x, mode, reg, r_m, rex_prefix, wbit, w2, immsize) { \ dtrace_get_modrm(x, &mode, ®, &r_m); \ dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ dtrace_get_operand(x, mode, r_m, wbit, 2); \ dtrace_get_operand(x, REG_ONLY, reg, w2, 3); \ dtrace_imm_opnd(x, wbit, immsize, 1); \ dtrace_imm_opnd(x, wbit, immsize, 0); \ } /* * 1 operands plus two immediates. */ #define ONEOPERAND_TWOIMM(x, mode, reg, r_m, rex_prefix, wbit, immsize) { \ dtrace_get_modrm(x, &mode, ®, &r_m); \ dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ dtrace_get_operand(x, mode, r_m, wbit, 2); \ dtrace_imm_opnd(x, wbit, immsize, 1); \ dtrace_imm_opnd(x, wbit, immsize, 0); \ } /* * Dissassemble a single x86 or amd64 instruction. * * Mode determines the default operating mode (SIZE16, SIZE32 or SIZE64) * for interpreting instructions. * * returns non-zero for bad opcode */ int dtrace_disx86(dis86_t *x, uint_t cpu_mode) { instable_t *dp; /* decode table being used */ #ifdef DIS_TEXT uint_t i; #endif #ifdef DIS_MEM uint_t nomem = 0; #define NOMEM (nomem = 1) #else #define NOMEM /* nothing */ #endif uint_t opnd_size; /* SIZE16, SIZE32 or SIZE64 */ uint_t addr_size; /* SIZE16, SIZE32 or SIZE64 */ uint_t wbit; /* opcode wbit, 0 is 8 bit, !0 for opnd_size */ uint_t w2; /* wbit value for second operand */ uint_t vbit; uint_t mode = 0; /* mode value from ModRM byte */ uint_t reg; /* reg value from ModRM byte */ uint_t r_m; /* r_m value from ModRM byte */ uint_t opcode1; /* high nibble of 1st byte */ uint_t opcode2; /* low nibble of 1st byte */ uint_t opcode3; /* extra opcode bits usually from ModRM byte */ uint_t opcode4; /* high nibble of 2nd byte */ uint_t opcode5; /* low nibble of 2nd byte */ uint_t opcode6; /* high nibble of 3rd byte */ uint_t opcode7; /* low nibble of 3rd byte */ uint_t opcode_bytes = 1; /* * legacy prefixes come in 5 flavors, you should have only one of each */ uint_t opnd_size_prefix = 0; uint_t addr_size_prefix = 0; uint_t segment_prefix = 0; uint_t lock_prefix = 0; uint_t rep_prefix = 0; uint_t rex_prefix = 0; /* amd64 register extension prefix */ /* * Intel VEX instruction encoding prefix and fields */ /* 0xC4 means 3 bytes prefix, 0xC5 means 2 bytes prefix */ uint_t vex_prefix = 0; /* * VEX prefix byte 1, includes vex.r, vex.x and vex.b * (for 3 bytes prefix) */ uint_t vex_byte1 = 0; /* * For 32-bit mode, it should prefetch the next byte to * distinguish between AVX and les/lds */ uint_t vex_prefetch = 0; uint_t vex_m = 0; uint_t vex_v = 0; uint_t vex_p = 0; uint_t vex_R = 1; uint_t vex_X = 1; uint_t vex_B = 1; uint_t vex_W = 0; uint_t vex_L; + dis_gather_regs_t *vreg; +#ifdef DIS_TEXT + /* Instruction name for BLS* family of instructions */ + char *blsinstr; +#endif size_t off; instable_t dp_mmx; x->d86_len = 0; x->d86_rmindex = -1; x->d86_error = 0; #ifdef DIS_TEXT x->d86_numopnds = 0; x->d86_seg_prefix = NULL; x->d86_mnem[0] = 0; for (i = 0; i < 4; ++i) { x->d86_opnd[i].d86_opnd[0] = 0; x->d86_opnd[i].d86_prefix[0] = 0; x->d86_opnd[i].d86_value_size = 0; x->d86_opnd[i].d86_value = 0; x->d86_opnd[i].d86_mode = MODE_NONE; } #endif x->d86_rex_prefix = 0; x->d86_got_modrm = 0; x->d86_memsize = 0; + x->d86_vsib = 0; if (cpu_mode == SIZE16) { opnd_size = SIZE16; addr_size = SIZE16; } else if (cpu_mode == SIZE32) { opnd_size = SIZE32; addr_size = SIZE32; } else { opnd_size = SIZE32; addr_size = SIZE64; } /* * Get one opcode byte and check for zero padding that follows * jump tables. */ if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) goto error; if (opcode1 == 0 && opcode2 == 0 && x->d86_check_func != NULL && x->d86_check_func(x->d86_data)) { #ifdef DIS_TEXT (void) strncpy(x->d86_mnem, ".byte\t0", OPLEN); #endif goto done; } /* * Gather up legacy x86 prefix bytes. */ for (;;) { uint_t *which_prefix = NULL; dp = (instable_t *)&dis_distable[opcode1][opcode2]; switch (dp->it_adrmode) { case PREFIX: which_prefix = &rep_prefix; break; case LOCK: which_prefix = &lock_prefix; break; case OVERRIDE: which_prefix = &segment_prefix; #ifdef DIS_TEXT x->d86_seg_prefix = (char *)dp->it_name; #endif if (dp->it_invalid64 && cpu_mode == SIZE64) goto error; break; case AM: which_prefix = &addr_size_prefix; break; case DM: which_prefix = &opnd_size_prefix; break; } if (which_prefix == NULL) break; *which_prefix = (opcode1 << 4) | opcode2; if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) goto error; } /* * Handle amd64 mode PREFIX values. * Some of the segment prefixes are no-ops. (only FS/GS actually work) * We might have a REX prefix (opcodes 0x40-0x4f) */ if (cpu_mode == SIZE64) { if (segment_prefix != 0x64 && segment_prefix != 0x65) segment_prefix = 0; if (opcode1 == 0x4) { rex_prefix = (opcode1 << 4) | opcode2; if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) goto error; dp = (instable_t *)&dis_distable[opcode1][opcode2]; } else if (opcode1 == 0xC && (opcode2 == 0x4 || opcode2 == 0x5)) { /* AVX instructions */ vex_prefix = (opcode1 << 4) | opcode2; x->d86_rex_prefix = 0x40; } } else if (opcode1 == 0xC && (opcode2 == 0x4 || opcode2 == 0x5)) { /* LDS, LES or AVX */ dtrace_get_modrm(x, &mode, ®, &r_m); vex_prefetch = 1; if (mode == REG_ONLY) { /* AVX */ vex_prefix = (opcode1 << 4) | opcode2; x->d86_rex_prefix = 0x40; opcode3 = (((mode << 3) | reg)>>1) & 0x0F; opcode4 = ((reg << 3) | r_m) & 0x0F; } } if (vex_prefix == VEX_2bytes) { if (!vex_prefetch) { if (dtrace_get_opcode(x, &opcode3, &opcode4) != 0) goto error; } vex_R = ((opcode3 & VEX_R) & 0x0F) >> 3; vex_L = ((opcode4 & VEX_L) & 0x0F) >> 2; vex_v = (((opcode3 << 4) | opcode4) & VEX_v) >> 3; vex_p = opcode4 & VEX_p; /* * The vex.x and vex.b bits are not defined in two bytes * mode vex prefix, their default values are 1 */ vex_byte1 = (opcode3 & VEX_R) | VEX_X | VEX_B; if (vex_R == 0) x->d86_rex_prefix |= REX_R; if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) goto error; switch (vex_p) { case VEX_p_66: dp = (instable_t *) &dis_opAVX660F[(opcode1 << 4) | opcode2]; break; case VEX_p_F3: dp = (instable_t *) &dis_opAVXF30F[(opcode1 << 4) | opcode2]; break; case VEX_p_F2: dp = (instable_t *) &dis_opAVXF20F [(opcode1 << 4) | opcode2]; break; default: dp = (instable_t *) &dis_opAVX0F[opcode1][opcode2]; } } else if (vex_prefix == VEX_3bytes) { if (!vex_prefetch) { if (dtrace_get_opcode(x, &opcode3, &opcode4) != 0) goto error; } vex_R = (opcode3 & VEX_R) >> 3; vex_X = (opcode3 & VEX_X) >> 2; vex_B = (opcode3 & VEX_B) >> 1; vex_m = (((opcode3 << 4) | opcode4) & VEX_m); vex_byte1 = opcode3 & (VEX_R | VEX_X | VEX_B); if (vex_R == 0) x->d86_rex_prefix |= REX_R; if (vex_X == 0) x->d86_rex_prefix |= REX_X; if (vex_B == 0) x->d86_rex_prefix |= REX_B; if (dtrace_get_opcode(x, &opcode5, &opcode6) != 0) goto error; vex_W = (opcode5 & VEX_W) >> 3; vex_L = (opcode6 & VEX_L) >> 2; vex_v = (((opcode5 << 4) | opcode6) & VEX_v) >> 3; vex_p = opcode6 & VEX_p; if (vex_W) x->d86_rex_prefix |= REX_W; /* Only these three vex_m values valid; others are reserved */ if ((vex_m != VEX_m_0F) && (vex_m != VEX_m_0F38) && (vex_m != VEX_m_0F3A)) goto error; if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) goto error; switch (vex_p) { case VEX_p_66: if (vex_m == VEX_m_0F) { dp = (instable_t *) &dis_opAVX660F [(opcode1 << 4) | opcode2]; } else if (vex_m == VEX_m_0F38) { dp = (instable_t *) &dis_opAVX660F38 [(opcode1 << 4) | opcode2]; } else if (vex_m == VEX_m_0F3A) { dp = (instable_t *) &dis_opAVX660F3A [(opcode1 << 4) | opcode2]; } else { goto error; } break; case VEX_p_F3: if (vex_m == VEX_m_0F) { dp = (instable_t *) &dis_opAVXF30F [(opcode1 << 4) | opcode2]; + } else if (vex_m == VEX_m_0F38) { + dp = (instable_t *) + &dis_opAVXF30F38 + [(opcode1 << 4) | opcode2]; } else { goto error; } break; case VEX_p_F2: if (vex_m == VEX_m_0F) { dp = (instable_t *) &dis_opAVXF20F [(opcode1 << 4) | opcode2]; + } else if (vex_m == VEX_m_0F3A) { + dp = (instable_t *) + &dis_opAVXF20F3A + [(opcode1 << 4) | opcode2]; + } else if (vex_m == VEX_m_0F38) { + dp = (instable_t *) + &dis_opAVXF20F38 + [(opcode1 << 4) | opcode2]; } else { goto error; } break; default: dp = (instable_t *) &dis_opAVX0F[opcode1][opcode2]; } } if (vex_prefix) { - if (vex_L) - wbit = YMM_OPND; - else - wbit = XMM_OPND; + if (dp->it_vexwoxmm) { + wbit = LONG_OPND; + } else { + if (vex_L) + wbit = YMM_OPND; + else + wbit = XMM_OPND; + } } /* * Deal with selection of operand and address size now. * Note that the REX.W bit being set causes opnd_size_prefix to be * ignored. */ if (cpu_mode == SIZE64) { if ((rex_prefix & REX_W) || vex_W) opnd_size = SIZE64; else if (opnd_size_prefix) opnd_size = SIZE16; if (addr_size_prefix) addr_size = SIZE32; } else if (cpu_mode == SIZE32) { if (opnd_size_prefix) opnd_size = SIZE16; if (addr_size_prefix) addr_size = SIZE16; } else { if (opnd_size_prefix) opnd_size = SIZE32; if (addr_size_prefix) addr_size = SIZE32; } /* * The pause instruction - a repz'd nop. This doesn't fit * with any of the other prefix goop added for SSE, so we'll * special-case it here. */ if (rep_prefix == 0xf3 && opcode1 == 0x9 && opcode2 == 0x0) { rep_prefix = 0; dp = (instable_t *)&dis_opPause; } /* * Some 386 instructions have 2 bytes of opcode before the mod_r/m * byte so we may need to perform a table indirection. */ if (dp->it_indirect == (instable_t *)dis_op0F) { if (dtrace_get_opcode(x, &opcode4, &opcode5) != 0) goto error; opcode_bytes = 2; if (opcode4 == 0x7 && opcode5 >= 0x1 && opcode5 <= 0x3) { uint_t subcode; if (dtrace_get_opcode(x, &opcode6, &opcode7) != 0) goto error; opcode_bytes = 3; subcode = ((opcode6 & 0x3) << 1) | ((opcode7 & 0x8) >> 3); dp = (instable_t *)&dis_op0F7123[opcode5][subcode]; } else if ((opcode4 == 0xc) && (opcode5 >= 0x8)) { dp = (instable_t *)&dis_op0FC8[0]; } else if ((opcode4 == 0x3) && (opcode5 == 0xA)) { opcode_bytes = 3; if (dtrace_get_opcode(x, &opcode6, &opcode7) != 0) goto error; if (opnd_size == SIZE16) opnd_size = SIZE32; dp = (instable_t *)&dis_op0F3A[(opcode6<<4)|opcode7]; #ifdef DIS_TEXT if (strcmp(dp->it_name, "INVALID") == 0) goto error; #endif switch (dp->it_adrmode) { + case XMMP: + break; case XMMP_66r: case XMMPRM_66r: case XMM3PM_66r: if (opnd_size_prefix == 0) { goto error; } break; case XMMP_66o: if (opnd_size_prefix == 0) { /* SSSE3 MMX instructions */ dp_mmx = *dp; dp = &dp_mmx; dp->it_adrmode = MMOPM_66o; #ifdef DIS_MEM dp->it_size = 8; #endif } break; default: goto error; } } else if ((opcode4 == 0x3) && (opcode5 == 0x8)) { opcode_bytes = 3; if (dtrace_get_opcode(x, &opcode6, &opcode7) != 0) goto error; dp = (instable_t *)&dis_op0F38[(opcode6<<4)|opcode7]; /* * Both crc32 and movbe have the same 3rd opcode * byte of either 0xF0 or 0xF1, so we use another * indirection to distinguish between the two. */ if (dp->it_indirect == (instable_t *)dis_op0F38F0 || dp->it_indirect == (instable_t *)dis_op0F38F1) { dp = dp->it_indirect; if (rep_prefix != 0xF2) { /* It is movbe */ dp++; } } + + /* + * The adx family of instructions (adcx and adox) + * continue the classic Intel tradition of abusing + * arbitrary prefixes without actually meaning the + * prefix bit. Therefore, if we find either the + * opnd_size_prefix or rep_prefix we end up zeroing it + * out after making our determination so as to ensure + * that we don't get confused and accidentally print + * repz prefixes and the like on these instructions. + * + * In addition, these instructions are actually much + * closer to AVX instructions in semantics. Importantly, + * they always default to having 32-bit operands. + * However, if the CPU is in 64-bit mode, then and only + * then, does it use REX.w promotes things to 64-bits + * and REX.r allows 64-bit mode to use register r8-r15. + */ + if (dp->it_indirect == (instable_t *)dis_op0F38F6) { + dp = dp->it_indirect; + if (opnd_size_prefix == 0 && + rep_prefix == 0xf3) { + /* It is adox */ + dp++; + } else if (opnd_size_prefix != 0x66 && + rep_prefix != 0) { + /* It isn't adcx */ + goto error; + } + opnd_size_prefix = 0; + rep_prefix = 0; + opnd_size = SIZE32; + if (rex_prefix & REX_W) + opnd_size = SIZE64; + } + #ifdef DIS_TEXT if (strcmp(dp->it_name, "INVALID") == 0) goto error; #endif switch (dp->it_adrmode) { + case ADX: + case XMM: + break; case RM_66r: case XMM_66r: case XMMM_66r: if (opnd_size_prefix == 0) { goto error; } break; case XMM_66o: if (opnd_size_prefix == 0) { /* SSSE3 MMX instructions */ dp_mmx = *dp; dp = &dp_mmx; dp->it_adrmode = MM; #ifdef DIS_MEM dp->it_size = 8; #endif } break; case CRC32: if (rep_prefix != 0xF2) { goto error; } rep_prefix = 0; break; case MOVBE: if (rep_prefix != 0x0) { goto error; } break; default: goto error; } } else { dp = (instable_t *)&dis_op0F[opcode4][opcode5]; } } /* * If still not at a TERM decode entry, then a ModRM byte * exists and its fields further decode the instruction. */ x->d86_got_modrm = 0; if (dp->it_indirect != TERM) { dtrace_get_modrm(x, &mode, &opcode3, &r_m); if (x->d86_error) goto error; reg = opcode3; /* * decode 287 instructions (D8-DF) from opcodeN */ if (opcode1 == 0xD && opcode2 >= 0x8) { if (opcode2 == 0xB && mode == 0x3 && opcode3 == 4) dp = (instable_t *)&dis_opFP5[r_m]; else if (opcode2 == 0xA && mode == 0x3 && opcode3 < 4) dp = (instable_t *)&dis_opFP7[opcode3]; else if (opcode2 == 0xB && mode == 0x3) dp = (instable_t *)&dis_opFP6[opcode3]; else if (opcode2 == 0x9 && mode == 0x3 && opcode3 >= 4) dp = (instable_t *)&dis_opFP4[opcode3 - 4][r_m]; else if (mode == 0x3) dp = (instable_t *) &dis_opFP3[opcode2 - 8][opcode3]; else dp = (instable_t *) &dis_opFP1n2[opcode2 - 8][opcode3]; } else { dp = (instable_t *)dp->it_indirect + opcode3; } } /* * In amd64 bit mode, ARPL opcode is changed to MOVSXD * (sign extend 32bit to 64 bit) */ if ((vex_prefix == 0) && cpu_mode == SIZE64 && opcode1 == 0x6 && opcode2 == 0x3) dp = (instable_t *)&dis_opMOVSLD; /* * at this point we should have a correct (or invalid) opcode */ if (cpu_mode == SIZE64 && dp->it_invalid64 || cpu_mode != SIZE64 && dp->it_invalid32) goto error; if (dp->it_indirect != TERM) goto error; /* - * deal with MMX/SSE opcodes which are changed by prefixes + * Deal with MMX/SSE opcodes which are changed by prefixes. Note, we do + * need to include UNKNOWN below, as we may have instructions that + * actually have a prefix, but don't exist in any other form. */ switch (dp->it_adrmode) { + case UNKNOWN: case MMO: case MMOIMPL: case MMO3P: case MMOM3: case MMOMS: case MMOPM: case MMOPRM: case MMOS: case XMMO: case XMMOM: case XMMOMS: case XMMOPM: case XMMOS: case XMMOMX: case XMMOX3: case XMMOXMM: /* * This is horrible. Some SIMD instructions take the * form 0x0F 0x?? ..., which is easily decoded using the * existing tables. Other SIMD instructions use various * prefix bytes to overload existing instructions. For * Example, addps is F0, 58, whereas addss is F3 (repz), * F0, 58. Presumably someone got a raise for this. * * If we see one of the instructions which can be * modified in this way (if we've got one of the SIMDO* * address modes), we'll check to see if the last prefix * was a repz. If it was, we strip the prefix from the * mnemonic, and we indirect using the dis_opSIMDrepz * table. */ /* * Calculate our offset in dis_op0F */ if ((uintptr_t)dp - (uintptr_t)dis_op0F > sizeof (dis_op0F)) goto error; off = ((uintptr_t)dp - (uintptr_t)dis_op0F) / sizeof (instable_t); /* * Rewrite if this instruction used one of the magic prefixes. */ if (rep_prefix) { if (rep_prefix == 0xf2) dp = (instable_t *)&dis_opSIMDrepnz[off]; else dp = (instable_t *)&dis_opSIMDrepz[off]; rep_prefix = 0; } else if (opnd_size_prefix) { dp = (instable_t *)&dis_opSIMDdata16[off]; opnd_size_prefix = 0; if (opnd_size == SIZE16) opnd_size = SIZE32; } break; case MG9: /* * More horribleness: the group 9 (0xF0 0xC7) instructions are * allowed an optional prefix of 0x66 or 0xF3. This is similar * to the SIMD business described above, but with a different * addressing mode (and an indirect table), so we deal with it * separately (if similarly). * * Intel further complicated this with the release of Ivy Bridge * where they overloaded these instructions based on the ModR/M * bytes. The VMX instructions have a mode of 0 since they are * memory instructions but rdrand instructions have a mode of * 0b11 (REG_ONLY) because they only operate on registers. While * there are different prefix formats, for now it is sufficient * to use a single different table. */ /* * Calculate our offset in dis_op0FC7 (the group 9 table) */ if ((uintptr_t)dp - (uintptr_t)dis_op0FC7 > sizeof (dis_op0FC7)) goto error; off = ((uintptr_t)dp - (uintptr_t)dis_op0FC7) / sizeof (instable_t); /* * If we have a mode of 0b11 then we have to rewrite this. */ dtrace_get_modrm(x, &mode, ®, &r_m); if (mode == REG_ONLY) { dp = (instable_t *)&dis_op0FC7m3[off]; break; } /* * Rewrite if this instruction used one of the magic prefixes. */ if (rep_prefix) { if (rep_prefix == 0xf3) dp = (instable_t *)&dis_opF30FC7[off]; else goto error; rep_prefix = 0; } else if (opnd_size_prefix) { dp = (instable_t *)&dis_op660FC7[off]; opnd_size_prefix = 0; if (opnd_size == SIZE16) opnd_size = SIZE32; } break; case MMOSH: /* * As with the "normal" SIMD instructions, the MMX * shuffle instructions are overloaded. These * instructions, however, are special in that they use * an extra byte, and thus an extra table. As of this * writing, they only use the opnd_size prefix. */ /* * Calculate our offset in dis_op0F7123 */ if ((uintptr_t)dp - (uintptr_t)dis_op0F7123 > sizeof (dis_op0F7123)) goto error; if (opnd_size_prefix) { off = ((uintptr_t)dp - (uintptr_t)dis_op0F7123) / sizeof (instable_t); dp = (instable_t *)&dis_opSIMD7123[off]; opnd_size_prefix = 0; if (opnd_size == SIZE16) opnd_size = SIZE32; } break; case MRw: if (rep_prefix) { if (rep_prefix == 0xf3) { /* * Calculate our offset in dis_op0F */ if ((uintptr_t)dp - (uintptr_t)dis_op0F > sizeof (dis_op0F)) goto error; off = ((uintptr_t)dp - (uintptr_t)dis_op0F) / sizeof (instable_t); dp = (instable_t *)&dis_opSIMDrepz[off]; rep_prefix = 0; } else { goto error; } } break; } /* * In 64 bit mode, some opcodes automatically use opnd_size == SIZE64. */ if (cpu_mode == SIZE64) if (dp->it_always64 || (opnd_size == SIZE32 && dp->it_stackop)) opnd_size = SIZE64; #ifdef DIS_TEXT /* * At this point most instructions can format the opcode mnemonic * including the prefixes. */ if (lock_prefix) (void) strlcat(x->d86_mnem, "lock ", OPLEN); if (rep_prefix == 0xf2) (void) strlcat(x->d86_mnem, "repnz ", OPLEN); else if (rep_prefix == 0xf3) (void) strlcat(x->d86_mnem, "repz ", OPLEN); if (cpu_mode == SIZE64 && addr_size_prefix) (void) strlcat(x->d86_mnem, "addr32 ", OPLEN); if (dp->it_adrmode != CBW && dp->it_adrmode != CWD && dp->it_adrmode != XMMSFNC) { if (strcmp(dp->it_name, "INVALID") == 0) goto error; (void) strlcat(x->d86_mnem, dp->it_name, OPLEN); - if (dp->it_suffix) { + if (dp->it_avxsuf && dp->it_suffix) { + (void) strlcat(x->d86_mnem, vex_W != 0 ? "q" : "d", + OPLEN); + } else if (dp->it_suffix) { char *types[] = {"", "w", "l", "q"}; if (opcode_bytes == 2 && opcode4 == 4) { /* It's a cmovx.yy. Replace the suffix x */ for (i = 5; i < OPLEN; i++) { if (x->d86_mnem[i] == '.') break; } x->d86_mnem[i - 1] = *types[opnd_size]; } else if ((opnd_size == 2) && (opcode_bytes == 3) && ((opcode6 == 1 && opcode7 == 6) || (opcode6 == 2 && opcode7 == 2))) { /* * To handle PINSRD and PEXTRD */ (void) strlcat(x->d86_mnem, "d", OPLEN); } else { (void) strlcat(x->d86_mnem, types[opnd_size], OPLEN); } } } #endif /* * Process operands based on the addressing modes. */ x->d86_mode = cpu_mode; /* * In vex mode the rex_prefix has no meaning */ if (!vex_prefix) x->d86_rex_prefix = rex_prefix; x->d86_opnd_size = opnd_size; x->d86_addr_size = addr_size; vbit = 0; /* initialize for mem/reg -> reg */ switch (dp->it_adrmode) { /* * amd64 instruction to sign extend 32 bit reg/mem operands * into 64 bit register values */ case MOVSXZ: #ifdef DIS_TEXT if (rex_prefix == 0) (void) strncpy(x->d86_mnem, "movzld", OPLEN); #endif dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); x->d86_opnd_size = SIZE64; dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); x->d86_opnd_size = opnd_size = SIZE32; wbit = LONG_OPND; dtrace_get_operand(x, mode, r_m, wbit, 0); break; /* * movsbl movsbw movsbq (0x0FBE) or movswl movswq (0x0FBF) * movzbl movzbw movzbq (0x0FB6) or movzwl movzwq (0x0FB7) * wbit lives in 2nd byte, note that operands * are different sized */ case MOVZ: if (rex_prefix & REX_W) { /* target register size = 64 bit */ x->d86_mnem[5] = 'q'; } dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); x->d86_opnd_size = opnd_size = SIZE16; wbit = WBIT(opcode5); dtrace_get_operand(x, mode, r_m, wbit, 0); break; case CRC32: opnd_size = SIZE32; if (rex_prefix & REX_W) opnd_size = SIZE64; x->d86_opnd_size = opnd_size; dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); wbit = WBIT(opcode7); if (opnd_size_prefix) x->d86_opnd_size = opnd_size = SIZE16; dtrace_get_operand(x, mode, r_m, wbit, 0); break; case MOVBE: opnd_size = SIZE32; if (rex_prefix & REX_W) opnd_size = SIZE64; x->d86_opnd_size = opnd_size; dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); wbit = WBIT(opcode7); if (opnd_size_prefix) x->d86_opnd_size = opnd_size = SIZE16; if (wbit) { /* reg -> mem */ dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 0); dtrace_get_operand(x, mode, r_m, wbit, 1); } else { /* mem -> reg */ dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); dtrace_get_operand(x, mode, r_m, wbit, 0); } break; /* * imul instruction, with either 8-bit or longer immediate * opcode 0x6B for byte, sign-extended displacement, 0x69 for word(s) */ case IMUL: wbit = LONG_OPND; THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, OPSIZE(opnd_size, opcode2 == 0x9), 1); break; /* memory or register operand to register, with 'w' bit */ case MRw: + case ADX: wbit = WBIT(opcode2); STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0); break; /* register to memory or register operand, with 'w' bit */ /* arpl happens to fit here also because it is odd */ case RMw: if (opcode_bytes == 2) wbit = WBIT(opcode5); else wbit = WBIT(opcode2); STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); break; /* xaddb instruction */ case XADDB: wbit = 0; STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); break; /* MMX register to memory or register operand */ case MMS: case MMOS: #ifdef DIS_TEXT wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; #else wbit = LONG_OPND; #endif MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 1); break; /* MMX register to memory */ case MMOMS: dtrace_get_modrm(x, &mode, ®, &r_m); if (mode == REG_ONLY) goto error; wbit = MM_OPND; MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 1); break; /* Double shift. Has immediate operand specifying the shift. */ case DSHIFT: wbit = LONG_OPND; dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); dtrace_get_operand(x, mode, r_m, wbit, 2); dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); dtrace_imm_opnd(x, wbit, 1, 0); break; /* * Double shift. With no immediate operand, specifies using %cl. */ case DSHIFTcl: wbit = LONG_OPND; STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); break; /* immediate to memory or register operand */ case IMlw: wbit = WBIT(opcode2); dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); dtrace_get_operand(x, mode, r_m, wbit, 1); /* * Have long immediate for opcode 0x81, but not 0x80 nor 0x83 */ dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, opcode2 == 1), 0); break; /* immediate to memory or register operand with the */ /* 'w' bit present */ case IMw: wbit = WBIT(opcode2); dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); dtrace_get_operand(x, mode, r_m, wbit, 1); dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, wbit), 0); break; /* immediate to register with register in low 3 bits */ /* of op code */ case IR: /* w-bit here (with regs) is bit 3 */ wbit = opcode2 >>3 & 0x1; reg = REGNO(opcode2); dtrace_rex_adjust(rex_prefix, mode, ®, NULL); mode = REG_ONLY; r_m = reg; dtrace_get_operand(x, mode, r_m, wbit, 1); dtrace_imm_opnd(x, wbit, OPSIZE64(opnd_size, wbit), 0); break; /* MMX immediate shift of register */ case MMSH: case MMOSH: wbit = MM_OPND; goto mm_shift; /* in next case */ /* SIMD immediate shift of register */ case XMMSH: wbit = XMM_OPND; mm_shift: reg = REGNO(opcode7); dtrace_rex_adjust(rex_prefix, mode, ®, NULL); dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); dtrace_imm_opnd(x, wbit, 1, 0); NOMEM; break; /* accumulator to memory operand */ case AO: vbit = 1; /*FALLTHROUGH*/ /* memory operand to accumulator */ case OA: wbit = WBIT(opcode2); dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 1 - vbit); dtrace_imm_opnd(x, wbit, OPSIZE64(addr_size, LONG_OPND), vbit); #ifdef DIS_TEXT x->d86_opnd[vbit].d86_mode = MODE_OFFSET; #endif break; /* segment register to memory or register operand */ case SM: vbit = 1; /*FALLTHROUGH*/ /* memory or register operand to segment register */ case MS: dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); dtrace_get_operand(x, mode, r_m, LONG_OPND, vbit); dtrace_get_operand(x, REG_ONLY, reg, SEG_OPND, 1 - vbit); break; /* * rotate or shift instructions, which may shift by 1 or * consult the cl register, depending on the 'v' bit */ case Mv: vbit = VBIT(opcode2); wbit = WBIT(opcode2); dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); dtrace_get_operand(x, mode, r_m, wbit, 1); #ifdef DIS_TEXT if (vbit) { (void) strlcat(x->d86_opnd[0].d86_opnd, "%cl", OPLEN); } else { x->d86_opnd[0].d86_mode = MODE_SIGNED; x->d86_opnd[0].d86_value_size = 1; x->d86_opnd[0].d86_value = 1; } #endif break; /* * immediate rotate or shift instructions */ case MvI: wbit = WBIT(opcode2); normal_imm_mem: dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); dtrace_get_operand(x, mode, r_m, wbit, 1); dtrace_imm_opnd(x, wbit, 1, 0); break; /* bit test instructions */ case MIb: wbit = LONG_OPND; goto normal_imm_mem; /* single memory or register operand with 'w' bit present */ case Mw: wbit = WBIT(opcode2); just_mem: dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); dtrace_get_operand(x, mode, r_m, wbit, 0); break; case SWAPGS_RDTSCP: if (cpu_mode == SIZE64 && mode == 3 && r_m == 0) { #ifdef DIS_TEXT (void) strncpy(x->d86_mnem, "swapgs", OPLEN); #endif NOMEM; break; } else if (mode == 3 && r_m == 1) { #ifdef DIS_TEXT (void) strncpy(x->d86_mnem, "rdtscp", OPLEN); #endif NOMEM; break; } /*FALLTHROUGH*/ /* prefetch instruction - memory operand, but no memory acess */ case PREF: NOMEM; /*FALLTHROUGH*/ /* single memory or register operand */ case M: case MG9: wbit = LONG_OPND; goto just_mem; /* single memory or register byte operand */ case Mb: wbit = BYTE_OPND; goto just_mem; case VMx: if (mode == 3) { #ifdef DIS_TEXT char *vminstr; switch (r_m) { case 1: vminstr = "vmcall"; break; case 2: vminstr = "vmlaunch"; break; case 3: vminstr = "vmresume"; break; case 4: vminstr = "vmxoff"; break; default: goto error; } (void) strncpy(x->d86_mnem, vminstr, OPLEN); #else if (r_m < 1 || r_m > 4) goto error; #endif NOMEM; break; } /*FALLTHROUGH*/ case SVM: if (mode == 3) { #ifdef DIS_TEXT char *vinstr; switch (r_m) { case 0: vinstr = "vmrun"; break; case 1: vinstr = "vmmcall"; break; case 2: vinstr = "vmload"; break; case 3: vinstr = "vmsave"; break; case 4: vinstr = "stgi"; break; case 5: vinstr = "clgi"; break; case 6: vinstr = "skinit"; break; case 7: vinstr = "invlpga"; break; } (void) strncpy(x->d86_mnem, vinstr, OPLEN); #endif NOMEM; break; } /*FALLTHROUGH*/ case MONITOR_MWAIT: if (mode == 3) { if (r_m == 0) { #ifdef DIS_TEXT (void) strncpy(x->d86_mnem, "monitor", OPLEN); #endif NOMEM; break; } else if (r_m == 1) { #ifdef DIS_TEXT (void) strncpy(x->d86_mnem, "mwait", OPLEN); #endif NOMEM; break; + } else if (r_m == 2) { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, "clac", OPLEN); +#endif + NOMEM; + break; + } else if (r_m == 3) { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, "stac", OPLEN); +#endif + NOMEM; + break; } else { goto error; } } /*FALLTHROUGH*/ case XGETBV_XSETBV: if (mode == 3) { if (r_m == 0) { #ifdef DIS_TEXT (void) strncpy(x->d86_mnem, "xgetbv", OPLEN); #endif NOMEM; break; } else if (r_m == 1) { #ifdef DIS_TEXT (void) strncpy(x->d86_mnem, "xsetbv", OPLEN); #endif NOMEM; break; } else { goto error; } } /*FALLTHROUGH*/ case MO: /* Similar to M, but only memory (no direct registers) */ wbit = LONG_OPND; dtrace_get_modrm(x, &mode, ®, &r_m); if (mode == 3) goto error; dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); dtrace_get_operand(x, mode, r_m, wbit, 0); break; /* move special register to register or reverse if vbit */ case SREG: switch (opcode5) { case 2: vbit = 1; /*FALLTHROUGH*/ case 0: wbit = CONTROL_OPND; break; case 3: vbit = 1; /*FALLTHROUGH*/ case 1: wbit = DEBUG_OPND; break; case 6: vbit = 1; /*FALLTHROUGH*/ case 4: wbit = TEST_OPND; break; } dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); dtrace_get_operand(x, REG_ONLY, reg, wbit, vbit); dtrace_get_operand(x, REG_ONLY, r_m, LONG_OPND, 1 - vbit); NOMEM; break; /* * single register operand with register in the low 3 * bits of op code */ case R: if (opcode_bytes == 2) reg = REGNO(opcode5); else reg = REGNO(opcode2); dtrace_rex_adjust(rex_prefix, mode, ®, NULL); dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 0); NOMEM; break; /* * register to accumulator with register in the low 3 * bits of op code, xchg instructions */ case RA: NOMEM; reg = REGNO(opcode2); dtrace_rex_adjust(rex_prefix, mode, ®, NULL); dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 0); dtrace_get_operand(x, REG_ONLY, EAX_REGNO, LONG_OPND, 1); break; /* * single segment register operand, with register in * bits 3-4 of op code byte */ case SEG: NOMEM; reg = (x->d86_bytes[x->d86_len - 1] >> 3) & 0x3; dtrace_get_operand(x, REG_ONLY, reg, SEG_OPND, 0); break; /* * single segment register operand, with register in * bits 3-5 of op code */ case LSEG: NOMEM; /* long seg reg from opcode */ reg = (x->d86_bytes[x->d86_len - 1] >> 3) & 0x7; dtrace_get_operand(x, REG_ONLY, reg, SEG_OPND, 0); break; /* memory or register operand to register */ case MR: if (vex_prefetch) x->d86_got_modrm = 1; wbit = LONG_OPND; STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0); break; case RM: case RM_66r: wbit = LONG_OPND; STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); break; /* MMX/SIMD-Int memory or mm reg to mm reg */ case MM: case MMO: #ifdef DIS_TEXT wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; #else wbit = LONG_OPND; #endif MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 0); break; case MMOIMPL: #ifdef DIS_TEXT wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; #else wbit = LONG_OPND; #endif dtrace_get_modrm(x, &mode, ®, &r_m); if (mode != REG_ONLY) goto error; dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); dtrace_get_operand(x, mode, r_m, wbit, 0); dtrace_get_operand(x, REG_ONLY, reg, MM_OPND, 1); mode = 0; /* change for memory access size... */ break; /* MMX/SIMD-Int and SIMD-FP predicated mm reg to r32 */ case MMO3P: wbit = MM_OPND; goto xmm3p; case XMM3P: wbit = XMM_OPND; xmm3p: dtrace_get_modrm(x, &mode, ®, &r_m); if (mode != REG_ONLY) goto error; THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, 1, 1); NOMEM; break; case XMM3PM_66r: THREEOPERAND(x, mode, reg, r_m, rex_prefix, LONG_OPND, XMM_OPND, 1, 0); break; /* MMX/SIMD-Int predicated r32/mem to mm reg */ case MMOPRM: wbit = LONG_OPND; w2 = MM_OPND; goto xmmprm; case XMMPRM: case XMMPRM_66r: wbit = LONG_OPND; w2 = XMM_OPND; xmmprm: THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, w2, 1, 1); break; /* MMX/SIMD-Int predicated mm/mem to mm reg */ case MMOPM: case MMOPM_66o: wbit = w2 = MM_OPND; goto xmmprm; /* MMX/SIMD-Int mm reg to r32 */ case MMOM3: NOMEM; dtrace_get_modrm(x, &mode, ®, &r_m); if (mode != REG_ONLY) goto error; wbit = MM_OPND; MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, 0); break; /* SIMD memory or xmm reg operand to xmm reg */ case XMM: case XMM_66o: case XMM_66r: case XMMO: case XMMXIMPL: wbit = XMM_OPND; STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0); if (dp->it_adrmode == XMMXIMPL && mode != REG_ONLY) goto error; #ifdef DIS_TEXT /* * movlps and movhlps share opcodes. They differ in the * addressing modes allowed for their operands. * movhps and movlhps behave similarly. */ if (mode == REG_ONLY) { if (strcmp(dp->it_name, "movlps") == 0) (void) strncpy(x->d86_mnem, "movhlps", OPLEN); else if (strcmp(dp->it_name, "movhps") == 0) (void) strncpy(x->d86_mnem, "movlhps", OPLEN); } #endif if (dp->it_adrmode == XMMXIMPL) mode = 0; /* change for memory access size... */ break; /* SIMD xmm reg to memory or xmm reg */ case XMMS: case XMMOS: case XMMMS: case XMMOMS: dtrace_get_modrm(x, &mode, ®, &r_m); #ifdef DIS_TEXT if ((strcmp(dp->it_name, "movlps") == 0 || strcmp(dp->it_name, "movhps") == 0 || strcmp(dp->it_name, "movntps") == 0) && mode == REG_ONLY) goto error; #endif wbit = XMM_OPND; MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 1); break; /* SIMD memory to xmm reg */ case XMMM: case XMMM_66r: case XMMOM: wbit = XMM_OPND; dtrace_get_modrm(x, &mode, ®, &r_m); #ifdef DIS_TEXT if (mode == REG_ONLY) { if (strcmp(dp->it_name, "movhps") == 0) (void) strncpy(x->d86_mnem, "movlhps", OPLEN); else goto error; } #endif MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 0); break; /* SIMD memory or r32 to xmm reg */ case XMM3MX: wbit = LONG_OPND; MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 0); break; case XMM3MXS: wbit = LONG_OPND; MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 1); break; /* SIMD memory or mm reg to xmm reg */ case XMMOMX: /* SIMD mm to xmm */ case XMMMX: wbit = MM_OPND; MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 0); break; /* SIMD memory or xmm reg to mm reg */ case XMMXMM: case XMMOXMM: case XMMXM: wbit = XMM_OPND; MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 0); break; /* SIMD memory or xmm reg to r32 */ case XMMXM3: wbit = XMM_OPND; MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, 0); break; /* SIMD xmm to r32 */ case XMMX3: case XMMOX3: dtrace_get_modrm(x, &mode, ®, &r_m); if (mode != REG_ONLY) goto error; dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); dtrace_get_operand(x, mode, r_m, XMM_OPND, 0); dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); NOMEM; break; /* SIMD predicated memory or xmm reg with/to xmm reg */ case XMMP: case XMMP_66r: case XMMP_66o: case XMMOPM: wbit = XMM_OPND; THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 1, 1); #ifdef DIS_TEXT /* * cmpps and cmpss vary their instruction name based * on the value of imm8. Other XMMP instructions, * such as shufps, require explicit specification of * the predicate. */ if (dp->it_name[0] == 'c' && dp->it_name[1] == 'm' && dp->it_name[2] == 'p' && strlen(dp->it_name) == 5) { uchar_t pred = x->d86_opnd[0].d86_value & 0xff; if (pred >= (sizeof (dis_PREDSUFFIX) / sizeof (char *))) goto error; (void) strncpy(x->d86_mnem, "cmp", OPLEN); (void) strlcat(x->d86_mnem, dis_PREDSUFFIX[pred], OPLEN); (void) strlcat(x->d86_mnem, dp->it_name + strlen(dp->it_name) - 2, OPLEN); x->d86_opnd[0] = x->d86_opnd[1]; x->d86_opnd[1] = x->d86_opnd[2]; x->d86_numopnds = 2; } #endif break; case XMMX2I: FOUROPERAND(x, mode, reg, r_m, rex_prefix, XMM_OPND, XMM_OPND, 1); NOMEM; break; case XMM2I: ONEOPERAND_TWOIMM(x, mode, reg, r_m, rex_prefix, XMM_OPND, 1); NOMEM; break; /* immediate operand to accumulator */ case IA: wbit = WBIT(opcode2); dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 1); dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, wbit), 0); NOMEM; break; /* memory or register operand to accumulator */ case MA: wbit = WBIT(opcode2); dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); dtrace_get_operand(x, mode, r_m, wbit, 0); break; /* si register to di register used to reference memory */ case SD: #ifdef DIS_TEXT dtrace_check_override(x, 0); x->d86_numopnds = 2; if (addr_size == SIZE64) { (void) strlcat(x->d86_opnd[0].d86_opnd, "(%rsi)", OPLEN); (void) strlcat(x->d86_opnd[1].d86_opnd, "(%rdi)", OPLEN); } else if (addr_size == SIZE32) { (void) strlcat(x->d86_opnd[0].d86_opnd, "(%esi)", OPLEN); (void) strlcat(x->d86_opnd[1].d86_opnd, "(%edi)", OPLEN); } else { (void) strlcat(x->d86_opnd[0].d86_opnd, "(%si)", OPLEN); (void) strlcat(x->d86_opnd[1].d86_opnd, "(%di)", OPLEN); } #endif wbit = LONG_OPND; break; /* accumulator to di register */ case AD: wbit = WBIT(opcode2); #ifdef DIS_TEXT dtrace_check_override(x, 1); x->d86_numopnds = 2; dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 0); if (addr_size == SIZE64) (void) strlcat(x->d86_opnd[1].d86_opnd, "(%rdi)", OPLEN); else if (addr_size == SIZE32) (void) strlcat(x->d86_opnd[1].d86_opnd, "(%edi)", OPLEN); else (void) strlcat(x->d86_opnd[1].d86_opnd, "(%di)", OPLEN); #endif break; /* si register to accumulator */ case SA: wbit = WBIT(opcode2); #ifdef DIS_TEXT dtrace_check_override(x, 0); x->d86_numopnds = 2; if (addr_size == SIZE64) (void) strlcat(x->d86_opnd[0].d86_opnd, "(%rsi)", OPLEN); else if (addr_size == SIZE32) (void) strlcat(x->d86_opnd[0].d86_opnd, "(%esi)", OPLEN); else (void) strlcat(x->d86_opnd[0].d86_opnd, "(%si)", OPLEN); dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 1); #endif break; /* * single operand, a 16/32 bit displacement */ case D: wbit = LONG_OPND; dtrace_disp_opnd(x, wbit, OPSIZE(opnd_size, LONG_OPND), 0); NOMEM; break; /* jmp/call indirect to memory or register operand */ case INM: #ifdef DIS_TEXT (void) strlcat(x->d86_opnd[0].d86_prefix, "*", OPLEN); #endif dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); dtrace_get_operand(x, mode, r_m, LONG_OPND, 0); wbit = LONG_OPND; break; /* * for long jumps and long calls -- a new code segment * register and an offset in IP -- stored in object * code in reverse order. Note - not valid in amd64 */ case SO: dtrace_check_override(x, 1); wbit = LONG_OPND; dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, LONG_OPND), 1); #ifdef DIS_TEXT x->d86_opnd[1].d86_mode = MODE_SIGNED; #endif /* will now get segment operand */ dtrace_imm_opnd(x, wbit, 2, 0); break; /* * jmp/call. single operand, 8 bit displacement. * added to current EIP in 'compofff' */ case BD: dtrace_disp_opnd(x, BYTE_OPND, 1, 0); NOMEM; break; /* single 32/16 bit immediate operand */ case I: wbit = LONG_OPND; dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, LONG_OPND), 0); break; /* single 8 bit immediate operand */ case Ib: wbit = LONG_OPND; dtrace_imm_opnd(x, wbit, 1, 0); break; case ENTER: wbit = LONG_OPND; dtrace_imm_opnd(x, wbit, 2, 0); dtrace_imm_opnd(x, wbit, 1, 1); switch (opnd_size) { case SIZE64: x->d86_memsize = (x->d86_opnd[1].d86_value + 1) * 8; break; case SIZE32: x->d86_memsize = (x->d86_opnd[1].d86_value + 1) * 4; break; case SIZE16: x->d86_memsize = (x->d86_opnd[1].d86_value + 1) * 2; break; } break; /* 16-bit immediate operand */ case RET: wbit = LONG_OPND; dtrace_imm_opnd(x, wbit, 2, 0); break; /* single 8 bit port operand */ case P: dtrace_check_override(x, 0); dtrace_imm_opnd(x, BYTE_OPND, 1, 0); NOMEM; break; /* single operand, dx register (variable port instruction) */ case V: x->d86_numopnds = 1; dtrace_check_override(x, 0); #ifdef DIS_TEXT (void) strlcat(x->d86_opnd[0].d86_opnd, "(%dx)", OPLEN); #endif NOMEM; break; /* * The int instruction, which has two forms: * int 3 (breakpoint) or * int n, where n is indicated in the subsequent * byte (format Ib). The int 3 instruction (opcode 0xCC), * where, although the 3 looks like an operand, * it is implied by the opcode. It must be converted * to the correct base and output. */ case INT3: #ifdef DIS_TEXT x->d86_numopnds = 1; x->d86_opnd[0].d86_mode = MODE_SIGNED; x->d86_opnd[0].d86_value_size = 1; x->d86_opnd[0].d86_value = 3; #endif NOMEM; break; /* single 8 bit immediate operand */ case INTx: dtrace_imm_opnd(x, BYTE_OPND, 1, 0); NOMEM; break; /* an unused byte must be discarded */ case U: if (x->d86_get_byte(x->d86_data) < 0) goto error; x->d86_len++; NOMEM; break; case CBW: #ifdef DIS_TEXT if (opnd_size == SIZE16) (void) strlcat(x->d86_mnem, "cbtw", OPLEN); else if (opnd_size == SIZE32) (void) strlcat(x->d86_mnem, "cwtl", OPLEN); else (void) strlcat(x->d86_mnem, "cltq", OPLEN); #endif wbit = LONG_OPND; NOMEM; break; case CWD: #ifdef DIS_TEXT if (opnd_size == SIZE16) (void) strlcat(x->d86_mnem, "cwtd", OPLEN); else if (opnd_size == SIZE32) (void) strlcat(x->d86_mnem, "cltd", OPLEN); else (void) strlcat(x->d86_mnem, "cqtd", OPLEN); #endif wbit = LONG_OPND; NOMEM; break; case XMMSFNC: /* * sfence is sfence if mode is REG_ONLY. If mode isn't * REG_ONLY, mnemonic should be 'clflush'. */ dtrace_get_modrm(x, &mode, ®, &r_m); /* sfence doesn't take operands */ #ifdef DIS_TEXT if (mode == REG_ONLY) { (void) strlcat(x->d86_mnem, "sfence", OPLEN); } else { (void) strlcat(x->d86_mnem, "clflush", OPLEN); dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); dtrace_get_operand(x, mode, r_m, BYTE_OPND, 0); NOMEM; } #else if (mode != REG_ONLY) { dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); dtrace_get_operand(x, mode, r_m, LONG_OPND, 0); NOMEM; } #endif break; /* * no disassembly, the mnemonic was all there was so go on */ case NORM: if (dp->it_invalid32 && cpu_mode != SIZE64) goto error; NOMEM; /*FALLTHROUGH*/ case IMPLMEM: break; case XMMFENCE: /* * XRSTOR and LFENCE share the same opcode but differ in mode */ dtrace_get_modrm(x, &mode, ®, &r_m); if (mode == REG_ONLY) { /* * Only the following exact byte sequences are allowed: * * 0f ae e8 lfence * 0f ae f0 mfence */ if ((uint8_t)x->d86_bytes[x->d86_len - 1] != 0xe8 && (uint8_t)x->d86_bytes[x->d86_len - 1] != 0xf0) goto error; } else { #ifdef DIS_TEXT (void) strncpy(x->d86_mnem, "xrstor", OPLEN); #endif dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); dtrace_get_operand(x, mode, r_m, BYTE_OPND, 0); } break; /* float reg */ case F: #ifdef DIS_TEXT x->d86_numopnds = 1; (void) strlcat(x->d86_opnd[0].d86_opnd, "%st(X)", OPLEN); x->d86_opnd[0].d86_opnd[4] = r_m + '0'; #endif NOMEM; break; /* float reg to float reg, with ret bit present */ case FF: vbit = opcode2 >> 2 & 0x1; /* vbit = 1: st -> st(i) */ /*FALLTHROUGH*/ case FFC: /* case for vbit always = 0 */ #ifdef DIS_TEXT x->d86_numopnds = 2; (void) strlcat(x->d86_opnd[1 - vbit].d86_opnd, "%st", OPLEN); (void) strlcat(x->d86_opnd[vbit].d86_opnd, "%st(X)", OPLEN); x->d86_opnd[vbit].d86_opnd[4] = r_m + '0'; #endif NOMEM; break; /* AVX instructions */ case VEX_MO: /* op(ModR/M.r/m) */ x->d86_numopnds = 1; dtrace_get_modrm(x, &mode, ®, &r_m); #ifdef DIS_TEXT if ((dp == &dis_opAVX0F[0xA][0xE]) && (reg == 3)) (void) strncpy(x->d86_mnem, "vstmxcsr", OPLEN); #endif dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); dtrace_get_operand(x, mode, r_m, wbit, 0); break; case VEX_RMrX: + case FMA: /* ModR/M.reg := op(VEX.vvvv, ModR/M.r/m) */ x->d86_numopnds = 3; dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + /* + * In classic Intel fashion, the opcodes for all of the FMA + * instructions all have two possible mnemonics which vary by + * one letter, which is selected based on the value of the wbit. + * When wbit is one, they have the 'd' suffix and when 'wbit' is + * 0, they have the 's' suffix. Otherwise, the FMA instructions + * are all a standard VEX_RMrX. + */ +#ifdef DIS_TEXT + if (dp->it_adrmode == FMA) { + size_t len = strlen(dp->it_name); + (void) strncpy(x->d86_mnem, dp->it_name, OPLEN); + if (len + 1 < OPLEN) { + (void) strncpy(x->d86_mnem + len, + vex_W != 0 ? "d" : "s", OPLEN - len); + } + } +#endif + if (mode != REG_ONLY) { if ((dp == &dis_opAVXF20F[0x10]) || (dp == &dis_opAVXF30F[0x10])) { /* vmovsd , */ /* or vmovss , */ x->d86_numopnds = 2; goto L_VEX_MX; } } dtrace_get_operand(x, REG_ONLY, reg, wbit, 2); /* * VEX prefix uses the 1's complement form to encode the * XMM/YMM regs */ dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 1); if ((dp == &dis_opAVXF20F[0x2A]) || (dp == &dis_opAVXF30F[0x2A])) { /* * vcvtsi2si , , or vcvtsi2ss , * , */ wbit = LONG_OPND; } #ifdef DIS_TEXT else if ((mode == REG_ONLY) && (dp == &dis_opAVX0F[0x1][0x6])) { /* vmovlhps */ (void) strncpy(x->d86_mnem, "vmovlhps", OPLEN); } else if ((mode == REG_ONLY) && (dp == &dis_opAVX0F[0x1][0x2])) { /* vmovhlps */ (void) strncpy(x->d86_mnem, "vmovhlps", OPLEN); } #endif dtrace_get_operand(x, mode, r_m, wbit, 0); break; + case VEX_VRMrX: + /* ModR/M.reg := op(MODR/M.r/m, VEX.vvvv) */ + x->d86_numopnds = 3; + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + dtrace_get_operand(x, REG_ONLY, reg, wbit, 2); + /* + * VEX prefix uses the 1's complement form to encode the + * XMM/YMM regs + */ + dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 0); + + dtrace_get_operand(x, mode, r_m, wbit, 1); + break; + + case VEX_SbVM: + /* ModR/M.reg := op(MODR/M.r/m, VSIB, VEX.vvvv) */ + x->d86_numopnds = 3; + x->d86_vsib = 1; + + /* + * All instructions that use VSIB are currently a mess. See the + * comment around the dis_gather_regs_t structure definition. + */ + + vreg = &dis_vgather[opcode2][vex_W][vex_L]; + +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, dp->it_name, OPLEN); + (void) strlcat(x->d86_mnem + strlen(dp->it_name), + vreg->dgr_suffix, OPLEN - strlen(dp->it_name)); +#endif + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + dtrace_get_operand(x, REG_ONLY, reg, vreg->dgr_arg2, 2); + /* + * VEX prefix uses the 1's complement form to encode the + * XMM/YMM regs + */ + dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), vreg->dgr_arg0, + 0); + dtrace_get_operand(x, mode, r_m, vreg->dgr_arg1, 1); + break; + case VEX_RRX: /* ModR/M.rm := op(VEX.vvvv, ModR/M.reg) */ x->d86_numopnds = 3; dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); if (mode != REG_ONLY) { if ((dp == &dis_opAVXF20F[0x11]) || (dp == &dis_opAVXF30F[0x11])) { /* vmovsd , */ /* or vmovss , */ x->d86_numopnds = 2; goto L_VEX_RM; } } dtrace_get_operand(x, mode, r_m, wbit, 2); dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 1); dtrace_get_operand(x, REG_ONLY, reg, wbit, 0); break; case VEX_RMRX: /* ModR/M.reg := op(VEX.vvvv, ModR/M.r_m, imm8[7:4]) */ x->d86_numopnds = 4; dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); dtrace_get_operand(x, REG_ONLY, reg, wbit, 3); dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 2); if (dp == &dis_opAVX660F3A[0x18]) { /* vinsertf128 , , , */ dtrace_get_operand(x, mode, r_m, XMM_OPND, 1); } else if ((dp == &dis_opAVX660F3A[0x20]) || (dp == & dis_opAVX660F[0xC4])) { /* vpinsrb , , , */ /* or vpinsrw , , , */ dtrace_get_operand(x, mode, r_m, LONG_OPND, 1); } else if (dp == &dis_opAVX660F3A[0x22]) { /* vpinsrd/q , , , */ #ifdef DIS_TEXT if (vex_W) x->d86_mnem[6] = 'q'; #endif dtrace_get_operand(x, mode, r_m, LONG_OPND, 1); } else { dtrace_get_operand(x, mode, r_m, wbit, 1); } /* one byte immediate number */ dtrace_imm_opnd(x, wbit, 1, 0); /* vblendvpd, vblendvps, vblendvb use the imm encode the regs */ if ((dp == &dis_opAVX660F3A[0x4A]) || (dp == &dis_opAVX660F3A[0x4B]) || (dp == &dis_opAVX660F3A[0x4C])) { #ifdef DIS_TEXT int regnum = (x->d86_opnd[0].d86_value & 0xF0) >> 4; #endif x->d86_opnd[0].d86_mode = MODE_NONE; #ifdef DIS_TEXT if (vex_L) (void) strncpy(x->d86_opnd[0].d86_opnd, dis_YMMREG[regnum], OPLEN); else (void) strncpy(x->d86_opnd[0].d86_opnd, dis_XMMREG[regnum], OPLEN); #endif } break; case VEX_MX: /* ModR/M.reg := op(ModR/M.rm) */ x->d86_numopnds = 2; dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); L_VEX_MX: if ((dp == &dis_opAVXF20F[0xE6]) || (dp == &dis_opAVX660F[0x5A]) || (dp == &dis_opAVX660F[0xE6])) { /* vcvtpd2dq , */ /* or vcvtpd2ps , */ /* or vcvttpd2dq , */ dtrace_get_operand(x, REG_ONLY, reg, XMM_OPND, 1); dtrace_get_operand(x, mode, r_m, wbit, 0); } else if ((dp == &dis_opAVXF30F[0xE6]) || (dp == &dis_opAVX0F[0x5][0xA]) || - (dp == &dis_opAVX660F38[0x13])) { + (dp == &dis_opAVX660F38[0x13]) || + (dp == &dis_opAVX660F38[0x18]) || + (dp == &dis_opAVX660F38[0x19]) || + (dp == &dis_opAVX660F38[0x58]) || + (dp == &dis_opAVX660F38[0x78]) || + (dp == &dis_opAVX660F38[0x79]) || + (dp == &dis_opAVX660F38[0x59])) { /* vcvtdq2pd , */ /* or vcvtps2pd , */ + /* or vcvtph2ps , */ + /* or vbroadcasts* , */ dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); dtrace_get_operand(x, mode, r_m, XMM_OPND, 0); } else if (dp == &dis_opAVX660F[0x6E]) { /* vmovd/q , */ #ifdef DIS_TEXT if (vex_W) x->d86_mnem[4] = 'q'; #endif dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); dtrace_get_operand(x, mode, r_m, LONG_OPND, 0); } else { dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); dtrace_get_operand(x, mode, r_m, wbit, 0); } break; case VEX_MXI: /* ModR/M.reg := op(ModR/M.rm, imm8) */ x->d86_numopnds = 3; dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); dtrace_get_operand(x, REG_ONLY, reg, wbit, 2); dtrace_get_operand(x, mode, r_m, wbit, 1); /* one byte immediate number */ dtrace_imm_opnd(x, wbit, 1, 0); break; case VEX_XXI: /* VEX.vvvv := op(ModR/M.rm, imm8) */ x->d86_numopnds = 3; dtrace_get_modrm(x, &mode, ®, &r_m); #ifdef DIS_TEXT (void) strncpy(x->d86_mnem, dis_AVXvgrp7[opcode2 - 1][reg], OPLEN); #endif dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 2); dtrace_get_operand(x, REG_ONLY, r_m, wbit, 1); /* one byte immediate number */ dtrace_imm_opnd(x, wbit, 1, 0); break; case VEX_MR: /* ModR/M.reg (reg32/64) := op(ModR/M.rm) */ if (dp == &dis_opAVX660F[0xC5]) { /* vpextrw , , */ x->d86_numopnds = 2; vbit = 2; } else { x->d86_numopnds = 2; vbit = 1; } dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, vbit); dtrace_get_operand(x, mode, r_m, wbit, vbit - 1); if (vbit == 2) dtrace_imm_opnd(x, wbit, 1, 0); break; case VEX_RRI: /* implicit(eflags/r32) := op(ModR/M.reg, ModR/M.rm) */ x->d86_numopnds = 2; dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); dtrace_get_operand(x, mode, r_m, wbit, 0); break; case VEX_RX: /* ModR/M.rm := op(ModR/M.reg) */ /* vextractf128 || vcvtps2ph */ if (dp == &dis_opAVX660F3A[0x19] || dp == &dis_opAVX660F3A[0x1d]) { x->d86_numopnds = 3; dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); dtrace_get_operand(x, mode, r_m, XMM_OPND, 2); dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); /* one byte immediate number */ dtrace_imm_opnd(x, wbit, 1, 0); break; } x->d86_numopnds = 2; dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); dtrace_get_operand(x, mode, r_m, wbit, 1); dtrace_get_operand(x, REG_ONLY, reg, wbit, 0); break; case VEX_RR: /* ModR/M.rm := op(ModR/M.reg) */ x->d86_numopnds = 2; dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); if (dp == &dis_opAVX660F[0x7E]) { /* vmovd/q , */ #ifdef DIS_TEXT if (vex_W) x->d86_mnem[4] = 'q'; #endif dtrace_get_operand(x, mode, r_m, LONG_OPND, 1); } else dtrace_get_operand(x, mode, r_m, wbit, 1); dtrace_get_operand(x, REG_ONLY, reg, wbit, 0); break; case VEX_RRi: /* ModR/M.rm := op(ModR/M.reg, imm) */ x->d86_numopnds = 3; dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); #ifdef DIS_TEXT if (dp == &dis_opAVX660F3A[0x16]) { /* vpextrd/q , , */ if (vex_W) x->d86_mnem[6] = 'q'; } #endif dtrace_get_operand(x, mode, r_m, LONG_OPND, 2); dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); /* one byte immediate number */ dtrace_imm_opnd(x, wbit, 1, 0); break; + case VEX_RIM: + /* ModR/M.rm := op(ModR/M.reg, imm) */ + x->d86_numopnds = 3; + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + dtrace_get_operand(x, mode, r_m, XMM_OPND, 2); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); + /* one byte immediate number */ + dtrace_imm_opnd(x, wbit, 1, 0); + break; + case VEX_RM: /* ModR/M.rm := op(ModR/M.reg) */ if (dp == &dis_opAVX660F3A[0x17]) { /* vextractps */ x->d86_numopnds = 3; dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); dtrace_get_operand(x, mode, r_m, LONG_OPND, 2); dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); /* one byte immediate number */ dtrace_imm_opnd(x, wbit, 1, 0); break; } x->d86_numopnds = 2; dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); L_VEX_RM: vbit = 1; dtrace_get_operand(x, mode, r_m, wbit, vbit); dtrace_get_operand(x, REG_ONLY, reg, wbit, vbit - 1); break; case VEX_RRM: /* ModR/M.rm := op(VEX.vvvv, ModR/M.reg) */ x->d86_numopnds = 3; dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); dtrace_get_operand(x, mode, r_m, wbit, 2); /* VEX use the 1's complement form encode the XMM/YMM regs */ dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 1); dtrace_get_operand(x, REG_ONLY, reg, wbit, 0); break; case VEX_RMX: /* ModR/M.reg := op(VEX.vvvv, ModR/M.rm) */ x->d86_numopnds = 3; dtrace_get_modrm(x, &mode, ®, &r_m); dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); dtrace_get_operand(x, REG_ONLY, reg, wbit, 2); dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 1); dtrace_get_operand(x, REG_ONLY, r_m, wbit, 0); break; case VEX_NONE: #ifdef DIS_TEXT if (vex_L) (void) strncpy(x->d86_mnem, "vzeroall", OPLEN); #endif break; + case BLS: { + + /* + * The BLS instructions are VEX instructions that are based on + * VEX.0F38.F3; however, they are considered special group 17 + * and like everything else, they use the bits in 3-5 of the + * MOD R/M to determine the sub instruction. Unlike many others + * like the VMX instructions, these are valid both for memory + * and register forms. + */ + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + switch (reg) { + case 1: +#ifdef DIS_TEXT + blsinstr = "blsr"; +#endif + break; + case 2: +#ifdef DIS_TEXT + blsinstr = "blsmsk"; +#endif + break; + case 3: +#ifdef DIS_TEXT + blsinstr = "blsi"; +#endif + break; + default: + goto error; + } + + x->d86_numopnds = 2; +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, blsinstr, OPLEN); +#endif + dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 1); + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + } /* an invalid op code */ case AM: case DM: case OVERRIDE: case PREFIX: case UNKNOWN: NOMEM; default: goto error; } /* end switch */ if (x->d86_error) goto error; done: #ifdef DIS_MEM /* * compute the size of any memory accessed by the instruction */ if (x->d86_memsize != 0) { return (0); } else if (dp->it_stackop) { switch (opnd_size) { case SIZE16: x->d86_memsize = 2; break; case SIZE32: x->d86_memsize = 4; break; case SIZE64: x->d86_memsize = 8; break; } } else if (nomem || mode == REG_ONLY) { x->d86_memsize = 0; } else if (dp->it_size != 0) { /* * In 64 bit mode descriptor table entries * go up to 10 bytes and popf/pushf are always 8 bytes */ if (x->d86_mode == SIZE64 && dp->it_size == 6) x->d86_memsize = 10; else if (x->d86_mode == SIZE64 && opcode1 == 0x9 && (opcode2 == 0xc || opcode2 == 0xd)) x->d86_memsize = 8; else x->d86_memsize = dp->it_size; } else if (wbit == 0) { x->d86_memsize = 1; } else if (wbit == LONG_OPND) { if (opnd_size == SIZE64) x->d86_memsize = 8; else if (opnd_size == SIZE32) x->d86_memsize = 4; else x->d86_memsize = 2; } else if (wbit == SEG_OPND) { x->d86_memsize = 4; } else { x->d86_memsize = 8; } #endif return (0); error: #ifdef DIS_TEXT (void) strlcat(x->d86_mnem, "undef", OPLEN); #endif return (1); } #ifdef DIS_TEXT /* * Some instructions should have immediate operands printed * as unsigned integers. We compare against this table. */ static char *unsigned_ops[] = { "or", "and", "xor", "test", "in", "out", "lcall", "ljmp", "rcr", "rcl", "ror", "rol", "shl", "shr", "sal", "psr", "psl", 0 }; static int isunsigned_op(char *opcode) { char *where; int i; int is_unsigned = 0; /* * Work back to start of last mnemonic, since we may have * prefixes on some opcodes. */ where = opcode + strlen(opcode) - 1; while (where > opcode && *where != ' ') --where; if (*where == ' ') ++where; for (i = 0; unsigned_ops[i]; ++i) { if (strncmp(where, unsigned_ops[i], strlen(unsigned_ops[i]))) continue; is_unsigned = 1; break; } return (is_unsigned); } /* * Print a numeric immediate into end of buf, maximum length buflen. * The immediate may be an address or a displacement. Mask is set * for address size. If the immediate is a "small negative", or * if it's a negative displacement of any magnitude, print as -. * Respect the "octal" flag. "Small negative" is defined as "in the * interval [NEG_LIMIT, 0)". * * Also, "isunsigned_op()" instructions never print negatives. * * Return whether we decided to print a negative value or not. */ #define NEG_LIMIT -255 enum {IMM, DISP}; enum {POS, TRY_NEG}; static int print_imm(dis86_t *dis, uint64_t usv, uint64_t mask, char *buf, size_t buflen, int disp, int try_neg) { int curlen; int64_t sv = (int64_t)usv; int octal = dis->d86_flags & DIS_F_OCTAL; curlen = strlen(buf); if (try_neg == TRY_NEG && sv < 0 && (disp || sv >= NEG_LIMIT) && !isunsigned_op(dis->d86_mnem)) { dis->d86_sprintf_func(buf + curlen, buflen - curlen, octal ? "-0%llo" : "-0x%llx", (-sv) & mask); return (1); } else { if (disp == DISP) dis->d86_sprintf_func(buf + curlen, buflen - curlen, octal ? "+0%llo" : "+0x%llx", usv & mask); else dis->d86_sprintf_func(buf + curlen, buflen - curlen, octal ? "0%llo" : "0x%llx", usv & mask); return (0); } } static int log2(int size) { switch (size) { case 1: return (0); case 2: return (1); case 4: return (2); case 8: return (3); } return (0); } /* ARGSUSED */ void dtrace_disx86_str(dis86_t *dis, uint_t mode, uint64_t pc, char *buf, size_t buflen) { uint64_t reltgt = 0; uint64_t tgt = 0; int curlen; int (*lookup)(void *, uint64_t, char *, size_t); int i; int64_t sv; uint64_t usv, mask, save_mask, save_usv; static uint64_t masks[] = {0xffU, 0xffffU, 0xffffffffU, 0xffffffffffffffffULL}; save_usv = 0; dis->d86_sprintf_func(buf, buflen, "%-6s ", dis->d86_mnem); /* * For PC-relative jumps, the pc is really the next pc after executing * this instruction, so increment it appropriately. */ pc += dis->d86_len; for (i = 0; i < dis->d86_numopnds; i++) { d86opnd_t *op = &dis->d86_opnd[i]; if (i != 0) (void) strlcat(buf, ",", buflen); (void) strlcat(buf, op->d86_prefix, buflen); /* * sv is for the signed, possibly-truncated immediate or * displacement; usv retains the original size and * unsignedness for symbol lookup. */ sv = usv = op->d86_value; /* * About masks: for immediates that represent * addresses, the appropriate display size is * the effective address size of the instruction. * This includes MODE_OFFSET, MODE_IPREL, and * MODE_RIPREL. Immediates that are simply * immediate values should display in the operand's * size, however, since they don't represent addresses. */ /* d86_addr_size is SIZEnn, which is log2(real size) */ mask = masks[dis->d86_addr_size]; /* d86_value_size and d86_imm_bytes are in bytes */ if (op->d86_mode == MODE_SIGNED || op->d86_mode == MODE_IMPLIED) mask = masks[log2(op->d86_value_size)]; switch (op->d86_mode) { case MODE_NONE: (void) strlcat(buf, op->d86_opnd, buflen); break; case MODE_SIGNED: case MODE_IMPLIED: case MODE_OFFSET: tgt = usv; if (dis->d86_seg_prefix) (void) strlcat(buf, dis->d86_seg_prefix, buflen); if (op->d86_mode == MODE_SIGNED || op->d86_mode == MODE_IMPLIED) { (void) strlcat(buf, "$", buflen); } if (print_imm(dis, usv, mask, buf, buflen, IMM, TRY_NEG) && (op->d86_mode == MODE_SIGNED || op->d86_mode == MODE_IMPLIED)) { /* * We printed a negative value for an * immediate that wasn't a * displacement. Note that fact so we can * print the positive value as an * annotation. */ save_usv = usv; save_mask = mask; } (void) strlcat(buf, op->d86_opnd, buflen); break; case MODE_IPREL: case MODE_RIPREL: reltgt = pc + sv; switch (mode) { case SIZE16: reltgt = (uint16_t)reltgt; break; case SIZE32: reltgt = (uint32_t)reltgt; break; } (void) print_imm(dis, usv, mask, buf, buflen, DISP, TRY_NEG); if (op->d86_mode == MODE_RIPREL) (void) strlcat(buf, "(%rip)", buflen); break; } } /* * The symbol lookups may result in false positives, * particularly on object files, where small numbers may match * the 0-relative non-relocated addresses of symbols. */ lookup = dis->d86_sym_lookup; if (tgt != 0) { if ((dis->d86_flags & DIS_F_NOIMMSYM) == 0 && lookup(dis->d86_data, tgt, NULL, 0) == 0) { (void) strlcat(buf, "\t<", buflen); curlen = strlen(buf); lookup(dis->d86_data, tgt, buf + curlen, buflen - curlen); (void) strlcat(buf, ">", buflen); } /* * If we printed a negative immediate above, print the * positive in case our heuristic was unhelpful */ if (save_usv) { (void) strlcat(buf, "\t<", buflen); (void) print_imm(dis, save_usv, save_mask, buf, buflen, IMM, POS); (void) strlcat(buf, ">", buflen); } } if (reltgt != 0) { /* Print symbol or effective address for reltgt */ (void) strlcat(buf, "\t<", buflen); curlen = strlen(buf); lookup(dis->d86_data, reltgt, buf + curlen, buflen - curlen); (void) strlcat(buf, ">", buflen); } } #endif /* DIS_TEXT */ Index: projects/ipsec/sys/cddl/dev/dtrace/x86/dis_tables.h =================================================================== --- projects/ipsec/sys/cddl/dev/dtrace/x86/dis_tables.h (revision 313186) +++ projects/ipsec/sys/cddl/dev/dtrace/x86/dis_tables.h (revision 313187) @@ -1,110 +1,111 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1988 AT&T */ /* All Rights Reserved */ /* * $FreeBSD$ */ #ifndef _DIS_TABLES_H #define _DIS_TABLES_H /* * Constants and prototypes for the IA32 disassembler backend. See dis_tables.c * for usage information and documentation. */ #ifdef __cplusplus extern "C" { #endif #include #include /* * values for cpu mode */ #define SIZE16 1 #define SIZE32 2 #define SIZE64 3 #define OPLEN 256 #define PFIXLEN 8 #define NCPS 20 /* number of chars per symbol */ /* * data structures that must be provided to dtrace_dis86() */ typedef struct d86opnd { char d86_opnd[OPLEN]; /* symbolic rep of operand */ char d86_prefix[PFIXLEN]; /* any prefix string or "" */ uint_t d86_mode; /* mode for immediate */ uint_t d86_value_size; /* size in bytes of d86_value */ uint64_t d86_value; /* immediate value of opnd */ } d86opnd_t; typedef struct dis86 { uint_t d86_mode; uint_t d86_error; uint_t d86_len; /* instruction length */ int d86_rmindex; /* index of modrm byte or -1 */ uint_t d86_memsize; /* size of memory referenced */ char d86_bytes[16]; /* bytes of instruction */ char d86_mnem[OPLEN]; uint_t d86_numopnds; uint_t d86_rex_prefix; /* value of REX prefix if !0 */ char *d86_seg_prefix; /* segment prefix, if any */ uint_t d86_opnd_size; uint_t d86_addr_size; uint_t d86_got_modrm; + uint_t d86_vsib; /* Has a VSIB */ struct d86opnd d86_opnd[4]; /* up to 4 operands */ int (*d86_check_func)(void *); int (*d86_get_byte)(void *); #ifdef DIS_TEXT int (*d86_sym_lookup)(void *, uint64_t, char *, size_t); int (*d86_sprintf_func)(char *, size_t, const char *, ...); int d86_flags; uint_t d86_imm_bytes; #endif void *d86_data; } dis86_t; extern int dtrace_disx86(dis86_t *x, uint_t cpu_mode); #define DIS_F_OCTAL 0x1 /* Print all numbers in octal */ #define DIS_F_NOIMMSYM 0x2 /* Don't print symbols for immediates (.o) */ #ifdef DIS_TEXT extern void dtrace_disx86_str(dis86_t *x, uint_t cpu_mode, uint64_t pc, char *buf, size_t len); #endif #ifdef __cplusplus } #endif #endif /* _DIS_TABLES_H */ Index: projects/ipsec/sys/conf/files.pc98 =================================================================== --- projects/ipsec/sys/conf/files.pc98 (revision 313186) +++ projects/ipsec/sys/conf/files.pc98 (nonexistent) @@ -1,280 +0,0 @@ -# This file tells config what files go into building a kernel, -# files marked standard are always included. -# -# modified for PC-9801/PC-9821 -# -# $FreeBSD$ -# -# The long compile-with and dependency lines are required because of -# limitations in config: backslash-newline doesn't work in strings, and -# dependency lines other than the first are silently ignored. -# -linux_genassym.o optional compat_linux \ - dependency "$S/i386/linux/linux_genassym.c" \ - compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \ - no-obj no-implicit-rule \ - clean "linux_genassym.o" -# -linux_assym.h optional compat_linux \ - dependency "$S/kern/genassym.sh linux_genassym.o" \ - compile-with "sh $S/kern/genassym.sh linux_genassym.o > ${.TARGET}" \ - no-obj no-implicit-rule before-depend \ - clean "linux_assym.h" -# -linux_locore.o optional compat_linux \ - dependency "linux_assym.h $S/i386/linux/linux_locore.s" \ - compile-with "${CC} -x assembler-with-cpp -DLOCORE -shared -s -pipe -I. -I$S -Werror -Wall -fno-common -nostdinc -nostdlib -Wl,-T$S/i386/linux/linux_vdso.lds.s -Wl,-soname=linux_vdso.so,--eh-frame-hdr,-fPIC,-warn-common ${.IMPSRC} -o ${.TARGET}" \ - no-obj no-implicit-rule \ - clean "linux_locore.o" -# -linux_vdso.so optional compat_linux \ - dependency "linux_locore.o" \ - compile-with "${OBJCOPY} --input-target binary --output-target elf32-i386-freebsd --binary-architecture i386 linux_locore.o ${.TARGET}" \ - no-implicit-rule \ - clean "linux_vdso.so" -# -svr4_genassym.o optional compat_svr4 \ - dependency "$S/i386/svr4/svr4_genassym.c" \ - compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \ - no-obj no-implicit-rule \ - clean "svr4_genassym.o" -# -svr4_assym.h optional compat_svr4 \ - dependency "$S/kern/genassym.sh svr4_genassym.o" \ - compile-with "sh $S/kern/genassym.sh svr4_genassym.o > ${.TARGET}" \ - no-obj no-implicit-rule before-depend \ - clean "svr4_assym.h" -# -ukbdmap.h optional ukbd_dflt_keymap \ - compile-with "kbdcontrol -P ${S:S/sys$/share/}/vt/keymaps -P ${S:S/sys$/share/}/syscons/keymaps -L ${UKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > ukbdmap.h" \ - no-obj no-implicit-rule before-depend \ - clean "ukbdmap.h" -# -cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S optional zfs compile-with "${ZFS_S}" -compat/linprocfs/linprocfs.c optional linprocfs -compat/linsysfs/linsysfs.c optional linsysfs -compat/linux/linux_event.c optional compat_linux -compat/linux/linux_emul.c optional compat_linux -compat/linux/linux_file.c optional compat_linux -compat/linux/linux_fork.c optional compat_linux -compat/linux/linux_futex.c optional compat_linux -compat/linux/linux_getcwd.c optional compat_linux -compat/linux/linux_ioctl.c optional compat_linux -compat/linux/linux_ipc.c optional compat_linux -compat/linux/linux_mib.c optional compat_linux -compat/linux/linux_misc.c optional compat_linux -compat/linux/linux_mmap.c optional compat_linux -compat/linux/linux_signal.c optional compat_linux -compat/linux/linux_socket.c optional compat_linux -compat/linux/linux_stats.c optional compat_linux -compat/linux/linux_sysctl.c optional compat_linux -compat/linux/linux_time.c optional compat_linux -compat/linux/linux_timer.c optional compat_linux -compat/linux/linux_uid16.c optional compat_linux -compat/linux/linux_util.c optional compat_linux -compat/linux/linux_vdso.c optional compat_linux -compat/linux/linux.c optional compat_linux -compat/svr4/imgact_svr4.c optional compat_svr4 -compat/svr4/svr4_fcntl.c optional compat_svr4 -compat/svr4/svr4_filio.c optional compat_svr4 -compat/svr4/svr4_ioctl.c optional compat_svr4 -compat/svr4/svr4_ipc.c optional compat_svr4 -compat/svr4/svr4_misc.c optional compat_svr4 -compat/svr4/svr4_resource.c optional compat_svr4 -compat/svr4/svr4_signal.c optional compat_svr4 -compat/svr4/svr4_socket.c optional compat_svr4 -compat/svr4/svr4_sockio.c optional compat_svr4 -compat/svr4/svr4_stat.c optional compat_svr4 -compat/svr4/svr4_stream.c optional compat_svr4 -compat/svr4/svr4_syscallnames.c optional compat_svr4 -compat/svr4/svr4_sysent.c optional compat_svr4 -compat/svr4/svr4_sysvec.c optional compat_svr4 -compat/svr4/svr4_termios.c optional compat_svr4 -bf_enc.o optional crypto | ipsec | ipsec_support \ - dependency "$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S" \ - compile-with "${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}" \ - no-implicit-rule -crypto/des/arch/i386/des_enc.S optional crypto | ipsec | \ - ipsec_support | netsmb -dev/agp/agp_ali.c optional agp -dev/agp/agp_amd.c optional agp -dev/agp/agp_i810.c optional agp -dev/agp/agp_intel.c optional agp -dev/agp/agp_nvidia.c optional agp -dev/agp/agp_sis.c optional agp -dev/agp/agp_via.c optional agp -dev/aic/aic_cbus.c optional aic isa -dev/ce/ceddk.c optional ce -dev/ce/if_ce.c optional ce -dev/ce/tau32-ddk.c optional ce \ - compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}" -dev/cp/cpddk.c optional cp -dev/cp/if_cp.c optional cp -dev/ct/bshw_machdep.c optional ct -dev/ct/ct.c optional ct -dev/ct/ct_isa.c optional ct isa -dev/ed/if_ed_cbus.c optional ed isa -dev/ed/if_ed_wd80x3.c optional ed isa -dev/fb/fb.c optional fb | gdc -dev/fdc/fdc.c optional fdc -dev/fdc/fdc_cbus.c optional fdc isa -dev/fe/if_fe_cbus.c optional fe isa -dev/hwpmc/hwpmc_amd.c optional hwpmc -dev/hwpmc/hwpmc_intel.c optional hwpmc -dev/hwpmc/hwpmc_core.c optional hwpmc -dev/hwpmc/hwpmc_uncore.c optional hwpmc -dev/hwpmc/hwpmc_pentium.c optional hwpmc -dev/hwpmc/hwpmc_piv.c optional hwpmc -dev/hwpmc/hwpmc_ppro.c optional hwpmc -dev/hwpmc/hwpmc_tsc.c optional hwpmc -dev/hwpmc/hwpmc_x86.c optional hwpmc -dev/io/iodev.c optional io -dev/le/if_le_cbus.c optional le isa -dev/mse/mse.c optional mse -dev/mse/mse_cbus.c optional mse isa -dev/sbni/if_sbni.c optional sbni -dev/sbni/if_sbni_pci.c optional sbni pci -dev/sio/sio_pccard.c optional sio pccard -dev/sio/sio_pci.c optional sio pci -dev/sio/sio_puc.c optional sio puc -dev/snc/dp83932.c optional snc -dev/snc/dp83932subr.c optional snc -dev/snc/if_snc.c optional snc -dev/snc/if_snc_cbus.c optional snc isa -dev/snc/if_snc_pccard.c optional snc pccard -dev/speaker/spkr.c optional speaker -dev/syscons/apm/apm_saver.c optional apm_saver apm -dev/uart/uart_cpu_pc98.c optional uart -i386/bios/apm.c optional apm -#i386/i386/apic_vector.s optional apic -i386/i386/atomic.c standard \ - compile-with "${CC} -c ${CFLAGS} ${DEFINED_PROF:S/^$/-fomit-frame-pointer/} ${.IMPSRC}" -i386/i386/bios.c standard -i386/i386/bioscall.s standard -i386/i386/bpf_jit_machdep.c optional bpf_jitter -i386/i386/db_disasm.c optional ddb -i386/i386/db_interface.c optional ddb -i386/i386/db_trace.c optional ddb -i386/i386/elf_machdep.c standard -i386/i386/exception.s standard -i386/i386/gdb_machdep.c optional gdb -i386/i386/i686_mem.c optional mem -i386/i386/in_cksum.c optional inet | inet6 -i386/i386/initcpu.c standard -i386/i386/io.c optional io -i386/i386/k6_mem.c optional mem -i386/i386/locore.s standard no-obj -i386/i386/machdep.c standard -i386/i386/mem.c optional mem -i386/i386/minidump_machdep.c standard -i386/i386/mp_clock.c optional smp -i386/i386/mp_machdep.c optional smp -i386/i386/mpboot.s optional smp -i386/i386/perfmon.c optional perfmon -i386/i386/pmap.c standard -i386/i386/ptrace_machdep.c standard -i386/i386/support.s standard -i386/i386/swtch.s standard -i386/i386/sys_machdep.c standard -i386/i386/trap.c standard -i386/i386/uio_machdep.c standard -i386/i386/vm86.c standard -i386/i386/vm_machdep.c standard -i386/ibcs2/ibcs2_errno.c optional ibcs2 -i386/ibcs2/ibcs2_fcntl.c optional ibcs2 -i386/ibcs2/ibcs2_ioctl.c optional ibcs2 -i386/ibcs2/ibcs2_ipc.c optional ibcs2 -i386/ibcs2/ibcs2_isc.c optional ibcs2 -i386/ibcs2/ibcs2_isc_sysent.c optional ibcs2 -i386/ibcs2/ibcs2_misc.c optional ibcs2 -i386/ibcs2/ibcs2_msg.c optional ibcs2 -i386/ibcs2/ibcs2_other.c optional ibcs2 -i386/ibcs2/ibcs2_signal.c optional ibcs2 -i386/ibcs2/ibcs2_socksys.c optional ibcs2 -i386/ibcs2/ibcs2_stat.c optional ibcs2 -i386/ibcs2/ibcs2_sysent.c optional ibcs2 -i386/ibcs2/ibcs2_sysi86.c optional ibcs2 -i386/ibcs2/ibcs2_sysvec.c optional ibcs2 -i386/ibcs2/ibcs2_util.c optional ibcs2 -i386/ibcs2/ibcs2_xenix.c optional ibcs2 -i386/ibcs2/ibcs2_xenix_sysent.c optional ibcs2 -i386/ibcs2/imgact_coff.c optional ibcs2 -i386/isa/elink.c optional ep | ie -i386/isa/npx.c optional npx -i386/isa/pmtimer.c optional pmtimer -i386/isa/prof_machdep.c optional profiling-routine -i386/linux/imgact_linux.c optional compat_linux -i386/linux/linux_dummy.c optional compat_linux -i386/linux/linux_machdep.c optional compat_linux -i386/linux/linux_ptrace.c optional compat_linux -i386/linux/linux_support.s optional compat_linux \ - dependency "linux_assym.h" -i386/linux/linux_sysent.c optional compat_linux -i386/linux/linux_sysvec.c optional compat_linux -i386/pci/pci_cfgreg.c optional pci -i386/pci/pci_pir.c optional pci -i386/svr4/svr4_locore.s optional compat_svr4 \ - dependency "svr4_assym.h" \ - warning "COMPAT_SVR4 is broken and should be avoided" -i386/svr4/svr4_machdep.c optional compat_svr4 -kern/kern_clocksource.c standard -kern/imgact_aout.c optional compat_aout -kern/imgact_gzip.c optional gzip -kern/subr_sfbuf.c standard -libkern/divdi3.c standard -libkern/ffsll.c standard -libkern/flsll.c standard -libkern/memmove.c standard -libkern/memset.c standard -libkern/moddi3.c standard -libkern/qdivrem.c standard -libkern/ucmpdi2.c standard -libkern/udivdi3.c standard -libkern/umoddi3.c standard -pc98/apm/apm_bioscall.S optional apm -pc98/cbus/cbus_dma.c optional isa -pc98/cbus/gdc.c optional gdc -pc98/cbus/nmi.c standard -pc98/cbus/olpt.c optional olpt -pc98/cbus/pckbd.c optional pckbd -pc98/cbus/pcrtc.c standard -pc98/cbus/pmc.c optional pmc -pc98/cbus/scgdcrndr.c optional sc gdc -pc98/cbus/scterm-sck.c optional sc -pc98/cbus/scvtb.c optional sc -pc98/cbus/sio.c optional sio -pc98/cbus/sio_cbus.c optional sio isa -pc98/cbus/syscons_cbus.c optional sc -pc98/pc98/busio.s standard -pc98/pc98/busiosubr.c standard -pc98/pc98/canbepm.c optional canbepm -pc98/pc98/canbus.c optional canbus -pc98/pc98/canbus_if.m optional canbus -pc98/pc98/pc98_machdep.c standard -# -# x86 shared code between IA32, AMD64 and PC98 architectures -# -x86/isa/atpic.c optional atpic -x86/isa/clock.c standard -x86/isa/isa.c optional isa -x86/pci/pci_bus.c optional pci -x86/x86/autoconf.c standard -x86/x86/busdma_bounce.c standard -x86/x86/busdma_machdep.c standard -x86/x86/cpu_machdep.c standard -x86/x86/dump_machdep.c standard -x86/x86/identcpu.c standard -x86/x86/intr_machdep.c standard -x86/x86/io_apic.c optional apic -x86/x86/legacy.c standard -x86/x86/local_apic.c optional apic -x86/x86/mca.c standard -x86/x86/mptable.c optional apic -x86/x86/mptable_pci.c optional apic pci -x86/x86/mp_x86.c optional smp -x86/x86/mp_watchdog.c optional mp_watchdog smp -x86/x86/msi.c optional apic pci -x86/x86/nexus.c standard -x86/x86/stack_machdep.c optional ddb | stack -x86/x86/tsc.c standard -x86/x86/delay.c standard Property changes on: projects/ipsec/sys/conf/files.pc98 ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: projects/ipsec/sys/conf/files.i386 =================================================================== --- projects/ipsec/sys/conf/files.i386 (revision 313186) +++ projects/ipsec/sys/conf/files.i386 (revision 313187) @@ -1,648 +1,648 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # # $FreeBSD$ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and # dependency lines other than the first are silently ignored. # cloudabi32_vdso.o optional compat_cloudabi32 \ dependency "$S/contrib/cloudabi/cloudabi_vdso_i686.S" \ compile-with "${CC} -x assembler-with-cpp -shared -nostdinc -nostdlib -Wl,-T$S/compat/cloudabi/cloudabi_vdso.lds $S/contrib/cloudabi/cloudabi_vdso_i686.S -o ${.TARGET}" \ no-obj no-implicit-rule \ clean "cloudabi32_vdso.o" # cloudabi32_vdso_blob.o optional compat_cloudabi32 \ dependency "cloudabi32_vdso.o" \ compile-with "${OBJCOPY} --input-target binary --output-target elf32-i386-freebsd --binary-architecture i386 cloudabi32_vdso.o ${.TARGET}" \ no-implicit-rule \ clean "cloudabi32_vdso_blob.o" # linux_genassym.o optional compat_linux \ dependency "$S/i386/linux/linux_genassym.c" \ compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \ no-obj no-implicit-rule \ clean "linux_genassym.o" # linux_assym.h optional compat_linux \ dependency "$S/kern/genassym.sh linux_genassym.o" \ compile-with "sh $S/kern/genassym.sh linux_genassym.o > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "linux_assym.h" # linux_locore.o optional compat_linux \ dependency "linux_assym.h $S/i386/linux/linux_locore.s" \ compile-with "${CC} -x assembler-with-cpp -DLOCORE -shared -s -pipe -I. -I$S -Werror -Wall -fno-common -nostdinc -nostdlib -Wl,-T$S/i386/linux/linux_vdso.lds.s -Wl,-soname=linux_vdso.so,--eh-frame-hdr,-fPIC,-warn-common ${.IMPSRC} -o ${.TARGET}" \ no-obj no-implicit-rule \ clean "linux_locore.o" # linux_vdso.so optional compat_linux \ dependency "linux_locore.o" \ compile-with "${OBJCOPY} --input-target binary --output-target elf32-i386-freebsd --binary-architecture i386 linux_locore.o ${.TARGET}" \ no-implicit-rule \ clean "linux_vdso.so" # svr4_genassym.o optional compat_svr4 \ dependency "$S/i386/svr4/svr4_genassym.c" \ compile-with "${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}" \ no-obj no-implicit-rule \ clean "svr4_genassym.o" # svr4_assym.h optional compat_svr4 \ dependency "$S/kern/genassym.sh svr4_genassym.o" \ compile-with "sh $S/kern/genassym.sh svr4_genassym.o > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "svr4_assym.h" # font.h optional sc_dflt_font \ compile-with "uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'static u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'static u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'static u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h" \ no-obj no-implicit-rule before-depend \ clean "font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8" # atkbdmap.h optional atkbd_dflt_keymap \ compile-with "kbdcontrol -P ${S:S/sys$/share/}/vt/keymaps -P ${S:S/sys$/share/}/syscons/keymaps -L ${ATKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > atkbdmap.h" \ no-obj no-implicit-rule before-depend \ clean "atkbdmap.h" # ukbdmap.h optional ukbd_dflt_keymap \ compile-with "kbdcontrol -P ${S:S/sys$/share/}/vt/keymaps -P ${S:S/sys$/share/}/syscons/keymaps -L ${UKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > ukbdmap.h" \ no-obj no-implicit-rule before-depend \ clean "ukbdmap.h" # hpt27xx_lib.o optional hpt27xx \ dependency "$S/dev/hpt27xx/i386-elf.hpt27xx_lib.o.uu" \ compile-with "uudecode < $S/dev/hpt27xx/i386-elf.hpt27xx_lib.o.uu" \ no-implicit-rule # hptmvraid.o optional hptmv \ dependency "$S/dev/hptmv/i386-elf.raid.o.uu" \ compile-with "uudecode < $S/dev/hptmv/i386-elf.raid.o.uu" \ no-implicit-rule # hptnr_lib.o optional hptnr \ dependency "$S/dev/hptnr/i386-elf.hptnr_lib.o.uu" \ compile-with "uudecode < $S/dev/hptnr/i386-elf.hptnr_lib.o.uu" \ no-implicit-rule # hptrr_lib.o optional hptrr \ dependency "$S/dev/hptrr/i386-elf.hptrr_lib.o.uu" \ compile-with "uudecode < $S/dev/hptrr/i386-elf.hptrr_lib.o.uu" \ no-implicit-rule # cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S optional zfs | dtrace compile-with "${ZFS_S}" cddl/dev/dtrace/i386/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/i386/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/x86/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" cddl/dev/dtrace/x86/dis_tables.c optional dtrace_fbt | dtraceall compile-with "${DTRACE_C}" cddl/dev/dtrace/x86/instr_size.c optional dtrace_fbt | dtraceall compile-with "${DTRACE_C}" compat/linprocfs/linprocfs.c optional linprocfs compat/linsysfs/linsysfs.c optional linsysfs compat/linux/linux_event.c optional compat_linux compat/linux/linux_emul.c optional compat_linux compat/linux/linux_file.c optional compat_linux compat/linux/linux_fork.c optional compat_linux compat/linux/linux_futex.c optional compat_linux compat/linux/linux_getcwd.c optional compat_linux compat/linux/linux_ioctl.c optional compat_linux compat/linux/linux_ipc.c optional compat_linux compat/linux/linux_mib.c optional compat_linux compat/linux/linux_misc.c optional compat_linux compat/linux/linux_mmap.c optional compat_linux compat/linux/linux_signal.c optional compat_linux compat/linux/linux_socket.c optional compat_linux compat/linux/linux_stats.c optional compat_linux compat/linux/linux_sysctl.c optional compat_linux compat/linux/linux_time.c optional compat_linux compat/linux/linux_timer.c optional compat_linux compat/linux/linux_uid16.c optional compat_linux compat/linux/linux_util.c optional compat_linux compat/linux/linux_vdso.c optional compat_linux compat/linux/linux.c optional compat_linux compat/ndis/kern_ndis.c optional ndisapi pci compat/ndis/kern_windrv.c optional ndisapi pci compat/ndis/subr_hal.c optional ndisapi pci compat/ndis/subr_ndis.c optional ndisapi pci compat/ndis/subr_ntoskrnl.c optional ndisapi pci compat/ndis/subr_pe.c optional ndisapi pci compat/ndis/subr_usbd.c optional ndisapi pci compat/ndis/winx32_wrap.S optional ndisapi pci compat/svr4/imgact_svr4.c optional compat_svr4 compat/svr4/svr4_fcntl.c optional compat_svr4 compat/svr4/svr4_filio.c optional compat_svr4 compat/svr4/svr4_ioctl.c optional compat_svr4 compat/svr4/svr4_ipc.c optional compat_svr4 compat/svr4/svr4_misc.c optional compat_svr4 compat/svr4/svr4_resource.c optional compat_svr4 compat/svr4/svr4_signal.c optional compat_svr4 compat/svr4/svr4_socket.c optional compat_svr4 compat/svr4/svr4_sockio.c optional compat_svr4 compat/svr4/svr4_stat.c optional compat_svr4 compat/svr4/svr4_stream.c optional compat_svr4 compat/svr4/svr4_syscallnames.c optional compat_svr4 compat/svr4/svr4_sysent.c optional compat_svr4 compat/svr4/svr4_sysvec.c optional compat_svr4 compat/svr4/svr4_termios.c optional compat_svr4 bf_enc.o optional crypto | ipsec | ipsec_support \ dependency "$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S" \ compile-with "${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}" \ no-implicit-rule crypto/aesni/aeskeys_i386.S optional aesni crypto/aesni/aesni.c optional aesni aesni_ghash.o optional aesni \ dependency "$S/crypto/aesni/aesni_ghash.c" \ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${NO_WCAST_QUAL} ${PROF} -mmmx -msse -msse4 -maes -mpclmul ${.IMPSRC}" \ no-implicit-rule \ clean "aesni_ghash.o" aesni_wrap.o optional aesni \ dependency "$S/crypto/aesni/aesni_wrap.c" \ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${NO_WCAST_QUAL} ${PROF} -mmmx -msse -msse4 -maes ${.IMPSRC}" \ no-implicit-rule \ clean "aesni_wrap.o" crypto/des/arch/i386/des_enc.S optional crypto | ipsec | ipsec_support | netsmb crypto/via/padlock.c optional padlock crypto/via/padlock_cipher.c optional padlock crypto/via/padlock_hash.c optional padlock dev/advansys/adv_isa.c optional adv isa dev/agp/agp_ali.c optional agp dev/agp/agp_amd.c optional agp dev/agp/agp_amd64.c optional agp dev/agp/agp_ati.c optional agp dev/agp/agp_i810.c optional agp dev/agp/agp_intel.c optional agp dev/agp/agp_nvidia.c optional agp dev/agp/agp_sis.c optional agp dev/agp/agp_via.c optional agp dev/aic/aic_isa.c optional aic isa dev/amdsbwd/amdsbwd.c optional amdsbwd dev/amdtemp/amdtemp.c optional amdtemp dev/arcmsr/arcmsr.c optional arcmsr pci dev/asmc/asmc.c optional asmc isa dev/atkbdc/atkbd.c optional atkbd atkbdc dev/atkbdc/atkbd_atkbdc.c optional atkbd atkbdc dev/atkbdc/atkbdc.c optional atkbdc dev/atkbdc/atkbdc_isa.c optional atkbdc isa dev/atkbdc/atkbdc_subr.c optional atkbdc dev/atkbdc/psm.c optional psm atkbdc dev/bxe/bxe.c optional bxe pci dev/bxe/bxe_stats.c optional bxe pci dev/bxe/bxe_debug.c optional bxe pci dev/bxe/ecore_sp.c optional bxe pci dev/bxe/bxe_elink.c optional bxe pci dev/bxe/57710_init_values.c optional bxe pci dev/bxe/57711_init_values.c optional bxe pci dev/bxe/57712_init_values.c optional bxe pci dev/ce/ceddk.c optional ce dev/ce/if_ce.c optional ce dev/ce/tau32-ddk.c optional ce \ compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}" dev/cm/if_cm_isa.c optional cm isa dev/coretemp/coretemp.c optional coretemp dev/cp/cpddk.c optional cp dev/cp/if_cp.c optional cp dev/cpuctl/cpuctl.c optional cpuctl dev/ctau/ctau.c optional ctau dev/ctau/ctddk.c optional ctau dev/ctau/if_ct.c optional ctau dev/cx/csigma.c optional cx dev/cx/cxddk.c optional cx dev/cx/if_cx.c optional cx dev/dpms/dpms.c optional dpms dev/ed/if_ed_3c503.c optional ed isa ed_3c503 dev/ed/if_ed_isa.c optional ed isa dev/ed/if_ed_wd80x3.c optional ed isa dev/ed/if_ed_hpp.c optional ed isa ed_hpp dev/ed/if_ed_sic.c optional ed isa ed_sic dev/fb/fb.c optional fb | vga dev/fb/s3_pci.c optional s3pci dev/fb/vesa.c optional vga vesa dev/fb/vga.c optional vga dev/fdc/fdc.c optional fdc dev/fdc/fdc_acpi.c optional fdc dev/fdc/fdc_isa.c optional fdc isa dev/fdc/fdc_pccard.c optional fdc pccard dev/fe/if_fe_isa.c optional fe isa dev/glxiic/glxiic.c optional glxiic dev/glxsb/glxsb.c optional glxsb dev/glxsb/glxsb_hash.c optional glxsb dev/gpio/bytgpio.c optional bytgpio dev/hpt27xx/hpt27xx_os_bsd.c optional hpt27xx dev/hpt27xx/hpt27xx_osm_bsd.c optional hpt27xx dev/hpt27xx/hpt27xx_config.c optional hpt27xx dev/hptmv/entry.c optional hptmv dev/hptmv/mv.c optional hptmv dev/hptmv/gui_lib.c optional hptmv dev/hptmv/hptproc.c optional hptmv dev/hptmv/ioctl.c optional hptmv dev/hptnr/hptnr_os_bsd.c optional hptnr dev/hptnr/hptnr_osm_bsd.c optional hptnr dev/hptnr/hptnr_config.c optional hptnr dev/hptrr/hptrr_os_bsd.c optional hptrr dev/hptrr/hptrr_osm_bsd.c optional hptrr dev/hptrr/hptrr_config.c optional hptrr dev/hwpmc/hwpmc_amd.c optional hwpmc dev/hwpmc/hwpmc_intel.c optional hwpmc dev/hwpmc/hwpmc_core.c optional hwpmc dev/hwpmc/hwpmc_uncore.c optional hwpmc dev/hwpmc/hwpmc_pentium.c optional hwpmc dev/hwpmc/hwpmc_piv.c optional hwpmc dev/hwpmc/hwpmc_ppro.c optional hwpmc dev/hwpmc/hwpmc_tsc.c optional hwpmc dev/hwpmc/hwpmc_x86.c optional hwpmc dev/hyperv/pcib/vmbus_pcib.c optional hyperv pci dev/hyperv/netvsc/hn_nvs.c optional hyperv dev/hyperv/netvsc/hn_rndis.c optional hyperv dev/hyperv/netvsc/if_hn.c optional hyperv dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c optional hyperv dev/hyperv/utilities/hv_kvp.c optional hyperv dev/hyperv/utilities/hv_snapshot.c optional hyperv dev/hyperv/utilities/vmbus_heartbeat.c optional hyperv dev/hyperv/utilities/vmbus_ic.c optional hyperv dev/hyperv/utilities/vmbus_shutdown.c optional hyperv dev/hyperv/utilities/vmbus_timesync.c optional hyperv dev/hyperv/vmbus/hyperv.c optional hyperv dev/hyperv/vmbus/hyperv_busdma.c optional hyperv dev/hyperv/vmbus/vmbus.c optional hyperv pci dev/hyperv/vmbus/vmbus_br.c optional hyperv dev/hyperv/vmbus/vmbus_chan.c optional hyperv dev/hyperv/vmbus/vmbus_et.c optional hyperv dev/hyperv/vmbus/vmbus_if.m optional hyperv dev/hyperv/vmbus/vmbus_xact.c optional hyperv dev/hyperv/vmbus/i386/hyperv_machdep.c optional hyperv dev/hyperv/vmbus/i386/vmbus_vector.S optional hyperv dev/ichwd/ichwd.c optional ichwd dev/if_ndis/if_ndis.c optional ndis dev/if_ndis/if_ndis_pccard.c optional ndis pccard dev/if_ndis/if_ndis_pci.c optional ndis cardbus | ndis pci dev/if_ndis/if_ndis_usb.c optional ndis usb dev/intel/spi.c optional intelspi dev/io/iodev.c optional io dev/ipmi/ipmi.c optional ipmi dev/ipmi/ipmi_acpi.c optional ipmi acpi dev/ipmi/ipmi_isa.c optional ipmi isa dev/ipmi/ipmi_kcs.c optional ipmi dev/ipmi/ipmi_smic.c optional ipmi dev/ipmi/ipmi_smbus.c optional ipmi smbus dev/ipmi/ipmi_smbios.c optional ipmi dev/ipmi/ipmi_ssif.c optional ipmi smbus dev/ipmi/ipmi_pci.c optional ipmi pci dev/ipmi/ipmi_linux.c optional ipmi compat_linux dev/le/if_le_isa.c optional le isa dev/mse/mse.c optional mse dev/mse/mse_isa.c optional mse isa dev/nfe/if_nfe.c optional nfe pci dev/ntb/if_ntb/if_ntb.c optional if_ntb dev/ntb/ntb_transport.c optional if_ntb dev/ntb/ntb.c optional if_ntb | ntb_hw dev/ntb/ntb_if.m optional if_ntb | ntb_hw dev/ntb/ntb_hw/ntb_hw.c optional ntb_hw dev/nvd/nvd.c optional nvd nvme dev/nvme/nvme.c optional nvme dev/nvme/nvme_ctrlr.c optional nvme dev/nvme/nvme_ctrlr_cmd.c optional nvme dev/nvme/nvme_ns.c optional nvme dev/nvme/nvme_ns_cmd.c optional nvme dev/nvme/nvme_qpair.c optional nvme dev/nvme/nvme_sysctl.c optional nvme dev/nvme/nvme_test.c optional nvme dev/nvme/nvme_util.c optional nvme dev/nvram/nvram.c optional nvram isa dev/ofw/ofwpci.c optional fdt pci dev/pcf/pcf_isa.c optional pcf dev/random/ivy.c optional rdrand_rng dev/random/nehemiah.c optional padlock_rng dev/sbni/if_sbni.c optional sbni dev/sbni/if_sbni_isa.c optional sbni isa dev/sbni/if_sbni_pci.c optional sbni pci dev/sio/sio.c optional sio dev/sio/sio_isa.c optional sio isa dev/sio/sio_pccard.c optional sio pccard dev/sio/sio_pci.c optional sio pci dev/sio/sio_puc.c optional sio puc dev/speaker/spkr.c optional speaker dev/syscons/apm/apm_saver.c optional apm_saver apm dev/syscons/scterm-teken.c optional sc dev/syscons/scvesactl.c optional sc vga vesa dev/syscons/scvgarndr.c optional sc vga dev/syscons/scvtb.c optional sc dev/tpm/tpm.c optional tpm dev/tpm/tpm_acpi.c optional tpm acpi dev/tpm/tpm_isa.c optional tpm isa dev/uart/uart_cpu_x86.c optional uart dev/viawd/viawd.c optional viawd dev/vmware/vmxnet3/if_vmx.c optional vmx dev/acpica/acpi_if.m standard dev/acpica/acpi_hpet.c optional acpi dev/acpica/acpi_timer.c optional acpi dev/acpi_support/acpi_wmi_if.m standard dev/wbwd/wbwd.c optional wbwd dev/wpi/if_wpi.c optional wpi dev/isci/isci.c optional isci dev/isci/isci_controller.c optional isci dev/isci/isci_domain.c optional isci dev/isci/isci_interrupt.c optional isci dev/isci/isci_io_request.c optional isci dev/isci/isci_logger.c optional isci dev/isci/isci_oem_parameters.c optional isci dev/isci/isci_remote_device.c optional isci dev/isci/isci_sysctl.c optional isci dev/isci/isci_task_request.c optional isci dev/isci/isci_timer.c optional isci dev/isci/scil/sati.c optional isci dev/isci/scil/sati_abort_task_set.c optional isci dev/isci/scil/sati_atapi.c optional isci dev/isci/scil/sati_device.c optional isci dev/isci/scil/sati_inquiry.c optional isci dev/isci/scil/sati_log_sense.c optional isci dev/isci/scil/sati_lun_reset.c optional isci dev/isci/scil/sati_mode_pages.c optional isci dev/isci/scil/sati_mode_select.c optional isci dev/isci/scil/sati_mode_sense.c optional isci dev/isci/scil/sati_mode_sense_10.c optional isci dev/isci/scil/sati_mode_sense_6.c optional isci dev/isci/scil/sati_move.c optional isci dev/isci/scil/sati_passthrough.c optional isci dev/isci/scil/sati_read.c optional isci dev/isci/scil/sati_read_buffer.c optional isci dev/isci/scil/sati_read_capacity.c optional isci dev/isci/scil/sati_reassign_blocks.c optional isci dev/isci/scil/sati_report_luns.c optional isci dev/isci/scil/sati_request_sense.c optional isci dev/isci/scil/sati_start_stop_unit.c optional isci dev/isci/scil/sati_synchronize_cache.c optional isci dev/isci/scil/sati_test_unit_ready.c optional isci dev/isci/scil/sati_unmap.c optional isci dev/isci/scil/sati_util.c optional isci dev/isci/scil/sati_verify.c optional isci dev/isci/scil/sati_write.c optional isci dev/isci/scil/sati_write_and_verify.c optional isci dev/isci/scil/sati_write_buffer.c optional isci dev/isci/scil/sati_write_long.c optional isci dev/isci/scil/sci_abstract_list.c optional isci dev/isci/scil/sci_base_controller.c optional isci dev/isci/scil/sci_base_domain.c optional isci dev/isci/scil/sci_base_iterator.c optional isci dev/isci/scil/sci_base_library.c optional isci dev/isci/scil/sci_base_logger.c optional isci dev/isci/scil/sci_base_memory_descriptor_list.c optional isci dev/isci/scil/sci_base_memory_descriptor_list_decorator.c optional isci dev/isci/scil/sci_base_object.c optional isci dev/isci/scil/sci_base_observer.c optional isci dev/isci/scil/sci_base_phy.c optional isci dev/isci/scil/sci_base_port.c optional isci dev/isci/scil/sci_base_remote_device.c optional isci dev/isci/scil/sci_base_request.c optional isci dev/isci/scil/sci_base_state_machine.c optional isci dev/isci/scil/sci_base_state_machine_logger.c optional isci dev/isci/scil/sci_base_state_machine_observer.c optional isci dev/isci/scil/sci_base_subject.c optional isci dev/isci/scil/sci_util.c optional isci dev/isci/scil/scic_sds_controller.c optional isci dev/isci/scil/scic_sds_library.c optional isci dev/isci/scil/scic_sds_pci.c optional isci dev/isci/scil/scic_sds_phy.c optional isci dev/isci/scil/scic_sds_port.c optional isci dev/isci/scil/scic_sds_port_configuration_agent.c optional isci dev/isci/scil/scic_sds_remote_device.c optional isci dev/isci/scil/scic_sds_remote_node_context.c optional isci dev/isci/scil/scic_sds_remote_node_table.c optional isci dev/isci/scil/scic_sds_request.c optional isci dev/isci/scil/scic_sds_sgpio.c optional isci dev/isci/scil/scic_sds_smp_remote_device.c optional isci dev/isci/scil/scic_sds_smp_request.c optional isci dev/isci/scil/scic_sds_ssp_request.c optional isci dev/isci/scil/scic_sds_stp_packet_request.c optional isci dev/isci/scil/scic_sds_stp_remote_device.c optional isci dev/isci/scil/scic_sds_stp_request.c optional isci dev/isci/scil/scic_sds_unsolicited_frame_control.c optional isci dev/isci/scil/scif_sas_controller.c optional isci dev/isci/scil/scif_sas_controller_state_handlers.c optional isci dev/isci/scil/scif_sas_controller_states.c optional isci dev/isci/scil/scif_sas_domain.c optional isci dev/isci/scil/scif_sas_domain_state_handlers.c optional isci dev/isci/scil/scif_sas_domain_states.c optional isci dev/isci/scil/scif_sas_high_priority_request_queue.c optional isci dev/isci/scil/scif_sas_internal_io_request.c optional isci dev/isci/scil/scif_sas_io_request.c optional isci dev/isci/scil/scif_sas_io_request_state_handlers.c optional isci dev/isci/scil/scif_sas_io_request_states.c optional isci dev/isci/scil/scif_sas_library.c optional isci dev/isci/scil/scif_sas_remote_device.c optional isci dev/isci/scil/scif_sas_remote_device_ready_substate_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_ready_substates.c optional isci dev/isci/scil/scif_sas_remote_device_starting_substate_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_starting_substates.c optional isci dev/isci/scil/scif_sas_remote_device_state_handlers.c optional isci dev/isci/scil/scif_sas_remote_device_states.c optional isci dev/isci/scil/scif_sas_request.c optional isci dev/isci/scil/scif_sas_smp_activity_clear_affiliation.c optional isci dev/isci/scil/scif_sas_smp_io_request.c optional isci dev/isci/scil/scif_sas_smp_phy.c optional isci dev/isci/scil/scif_sas_smp_remote_device.c optional isci dev/isci/scil/scif_sas_stp_io_request.c optional isci dev/isci/scil/scif_sas_stp_remote_device.c optional isci dev/isci/scil/scif_sas_stp_task_request.c optional isci dev/isci/scil/scif_sas_task_request.c optional isci dev/isci/scil/scif_sas_task_request_state_handlers.c optional isci dev/isci/scil/scif_sas_task_request_states.c optional isci dev/isci/scil/scif_sas_timer.c optional isci i386/acpica/acpi_machdep.c optional acpi acpi_wakecode.o optional acpi \ dependency "$S/i386/acpica/acpi_wakecode.S assym.s" \ compile-with "${NORMAL_S}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.o" acpi_wakecode.bin optional acpi \ dependency "acpi_wakecode.o" \ compile-with "${OBJCOPY} -S -O binary acpi_wakecode.o ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.bin" acpi_wakecode.h optional acpi \ dependency "acpi_wakecode.bin" \ compile-with "file2c -sx 'static char wakecode[] = {' '};' < acpi_wakecode.bin > ${.TARGET}" \ no-obj no-implicit-rule before-depend \ clean "acpi_wakecode.h" acpi_wakedata.h optional acpi \ dependency "acpi_wakecode.o" \ compile-with '${NM} -n --defined-only acpi_wakecode.o | while read offset dummy what; do echo "#define $${what} 0x$${offset}"; done > ${.TARGET}' \ no-obj no-implicit-rule before-depend \ clean "acpi_wakedata.h" # i386/bios/apm.c optional apm i386/bios/mca_machdep.c optional mca i386/bios/smapi.c optional smapi i386/bios/smapi_bios.S optional smapi i386/cloudabi32/cloudabi32_sysvec.c optional compat_cloudabi32 #i386/i386/apic_vector.s optional apic i386/i386/atomic.c standard \ compile-with "${CC} -c ${CFLAGS} ${DEFINED_PROF:S/^$/-fomit-frame-pointer/} ${.IMPSRC}" i386/i386/bios.c standard i386/i386/bioscall.s standard i386/i386/bpf_jit_machdep.c optional bpf_jitter i386/i386/db_disasm.c optional ddb i386/i386/db_interface.c optional ddb i386/i386/db_trace.c optional ddb i386/i386/elan-mmcr.c optional cpu_elan | cpu_soekris i386/i386/elf_machdep.c standard i386/i386/exception.s standard i386/i386/gdb_machdep.c optional gdb i386/i386/geode.c optional cpu_geode i386/i386/i686_mem.c optional mem i386/i386/in_cksum.c optional inet | inet6 i386/i386/initcpu.c standard i386/i386/io.c optional io i386/i386/k6_mem.c optional mem i386/i386/locore.s standard no-obj i386/i386/longrun.c optional cpu_enable_longrun i386/i386/machdep.c standard i386/i386/mem.c optional mem i386/i386/minidump_machdep.c standard i386/i386/mp_clock.c optional smp i386/i386/mp_machdep.c optional smp i386/i386/mpboot.s optional smp i386/i386/perfmon.c optional perfmon i386/i386/pmap.c standard i386/i386/ptrace_machdep.c standard i386/i386/support.s standard i386/i386/swtch.s standard i386/i386/sys_machdep.c standard i386/i386/trap.c standard i386/i386/uio_machdep.c standard i386/i386/vm86.c standard i386/i386/vm_machdep.c standard i386/ibcs2/ibcs2_errno.c optional ibcs2 i386/ibcs2/ibcs2_fcntl.c optional ibcs2 i386/ibcs2/ibcs2_ioctl.c optional ibcs2 i386/ibcs2/ibcs2_ipc.c optional ibcs2 i386/ibcs2/ibcs2_isc.c optional ibcs2 i386/ibcs2/ibcs2_isc_sysent.c optional ibcs2 i386/ibcs2/ibcs2_misc.c optional ibcs2 i386/ibcs2/ibcs2_msg.c optional ibcs2 i386/ibcs2/ibcs2_other.c optional ibcs2 i386/ibcs2/ibcs2_signal.c optional ibcs2 i386/ibcs2/ibcs2_socksys.c optional ibcs2 i386/ibcs2/ibcs2_stat.c optional ibcs2 i386/ibcs2/ibcs2_sysent.c optional ibcs2 i386/ibcs2/ibcs2_sysi86.c optional ibcs2 i386/ibcs2/ibcs2_sysvec.c optional ibcs2 i386/ibcs2/ibcs2_util.c optional ibcs2 i386/ibcs2/ibcs2_xenix.c optional ibcs2 i386/ibcs2/ibcs2_xenix_sysent.c optional ibcs2 i386/ibcs2/imgact_coff.c optional ibcs2 i386/isa/elink.c optional ep | ie -i386/isa/npx.c optional npx +i386/isa/npx.c standard i386/isa/pmtimer.c optional pmtimer i386/isa/prof_machdep.c optional profiling-routine i386/linux/imgact_linux.c optional compat_linux i386/linux/linux_dummy.c optional compat_linux i386/linux/linux_machdep.c optional compat_linux i386/linux/linux_ptrace.c optional compat_linux i386/linux/linux_support.s optional compat_linux \ dependency "linux_assym.h" i386/linux/linux_sysent.c optional compat_linux i386/linux/linux_sysvec.c optional compat_linux i386/pci/pci_cfgreg.c optional pci i386/pci/pci_pir.c optional pci i386/svr4/svr4_locore.s optional compat_svr4 \ dependency "svr4_assym.h" \ warning "COMPAT_SVR4 is broken and should be avoided" i386/svr4/svr4_machdep.c optional compat_svr4 # isa/syscons_isa.c optional sc isa/vga_isa.c optional vga kern/kern_clocksource.c standard kern/imgact_aout.c optional compat_aout kern/imgact_gzip.c optional gzip kern/subr_sfbuf.c standard crc32_sse42.o standard \ dependency "$S/libkern/x86/crc32_sse42.c" \ compile-with "${CC} -c ${CFLAGS:N-nostdinc} ${WERROR} ${PROF} -msse4 ${.IMPSRC}" \ no-implicit-rule \ clean "crc32_sse42.o" libkern/divdi3.c standard libkern/ffsll.c standard libkern/flsll.c standard libkern/memmove.c standard libkern/memset.c standard libkern/moddi3.c standard libkern/qdivrem.c standard libkern/ucmpdi2.c standard libkern/udivdi3.c standard libkern/umoddi3.c standard i386/xbox/xbox.c optional xbox i386/xbox/xboxfb.c optional xboxfb dev/fb/boot_font.c optional xboxfb i386/xbox/pic16l.s optional xbox # # x86 real mode BIOS support, required by dpms/pci/vesa # compat/x86bios/x86bios.c optional x86bios | dpms | pci | vesa # # bvm console # dev/bvm/bvm_console.c optional bvmconsole dev/bvm/bvm_dbg.c optional bvmdebug # # x86 shared code between IA32 and AMD64 architectures # x86/acpica/OsdEnvironment.c optional acpi x86/acpica/acpi_apm.c optional acpi x86/acpica/acpi_wakeup.c optional acpi x86/acpica/madt.c optional acpi apic x86/acpica/srat.c optional acpi x86/bios/smbios.c optional smbios x86/bios/vpd.c optional vpd x86/cpufreq/est.c optional cpufreq x86/cpufreq/hwpstate.c optional cpufreq x86/cpufreq/p4tcc.c optional cpufreq x86/cpufreq/powernow.c optional cpufreq x86/cpufreq/smist.c optional cpufreq x86/iommu/busdma_dmar.c optional acpi acpi_dmar pci x86/iommu/intel_ctx.c optional acpi acpi_dmar pci x86/iommu/intel_drv.c optional acpi acpi_dmar pci x86/iommu/intel_fault.c optional acpi acpi_dmar pci x86/iommu/intel_gas.c optional acpi acpi_dmar pci x86/iommu/intel_idpgtbl.c optional acpi acpi_dmar pci x86/iommu/intel_intrmap.c optional acpi acpi_dmar pci x86/iommu/intel_qi.c optional acpi acpi_dmar pci x86/iommu/intel_quirks.c optional acpi acpi_dmar pci x86/iommu/intel_utils.c optional acpi acpi_dmar pci x86/isa/atpic.c optional atpic x86/isa/atrtc.c standard x86/isa/clock.c standard x86/isa/elcr.c optional atpic | apic x86/isa/isa.c optional isa x86/isa/isa_dma.c optional isa x86/isa/nmi.c standard x86/isa/orm.c optional isa x86/pci/pci_bus.c optional pci x86/pci/qpi.c optional pci x86/x86/autoconf.c standard x86/x86/bus_machdep.c standard x86/x86/busdma_bounce.c standard x86/x86/busdma_machdep.c standard x86/x86/cpu_machdep.c standard x86/x86/dump_machdep.c standard x86/x86/fdt_machdep.c optional fdt x86/x86/identcpu.c standard x86/x86/intr_machdep.c standard x86/x86/io_apic.c optional apic x86/x86/legacy.c standard x86/x86/local_apic.c optional apic x86/x86/mca.c standard x86/x86/mptable.c optional apic x86/x86/mptable_pci.c optional apic pci x86/x86/mp_x86.c optional smp x86/x86/mp_watchdog.c optional mp_watchdog smp x86/x86/msi.c optional apic pci x86/x86/nexus.c standard x86/x86/stack_machdep.c optional ddb | stack x86/x86/tsc.c standard x86/x86/pvclock.c standard x86/x86/delay.c standard x86/xen/hvm.c optional xenhvm x86/xen/xen_intr.c optional xenhvm x86/xen/xen_apic.c optional xenhvm x86/xen/xenpv.c optional xenhvm x86/xen/xen_nexus.c optional xenhvm x86/xen/xen_msi.c optional xenhvm Index: projects/ipsec/sys/conf/options.i386 =================================================================== --- projects/ipsec/sys/conf/options.i386 (revision 313186) +++ projects/ipsec/sys/conf/options.i386 (revision 313187) @@ -1,127 +1,124 @@ # $FreeBSD$ # Options specific to the i386 platform kernels AUTO_EOI_1 opt_auto_eoi.h AUTO_EOI_2 opt_auto_eoi.h BROKEN_KEYBOARD_RESET opt_reset.h COUNT_XINVLTLB_HITS opt_smp.h COUNT_IPIS opt_smp.h DISABLE_PG_G opt_pmap.h DISABLE_PSE opt_pmap.h I586_PMC_GUPROF opt_i586_guprof.h MAXMEM MPTABLE_FORCE_HTT MP_WATCHDOG NKPT opt_pmap.h PERFMON PMAP_SHPGPERPROC opt_pmap.h POWERFAIL_NMI opt_trap.h PV_STATS opt_pmap.h # Options for emulators. These should only be used at config time, so # they are handled like options for static filesystems # (see src/sys/conf/options), except for broken debugging options. COMPAT_AOUT opt_dontuse.h IBCS2 opt_dontuse.h COMPAT_LINUX opt_dontuse.h COMPAT_SVR4 opt_dontuse.h DEBUG_SVR4 opt_svr4.h LINPROCFS opt_dontuse.h LINSYSFS opt_dontuse.h NDISAPI opt_dontuse.h # Change KVM size. Changes things all over the kernel. KVA_PAGES opt_global.h # Physical address extensions and support for >4G ram. As above. PAE opt_global.h # Use PAE page tables, but limit memory support to 4GB. # This keeps the i386 non-PAE KBI, in particular, drivers see # 32bit vm_paddr_t. PAE_TABLES opt_global.h TIMER_FREQ opt_clock.h CPU_ATHLON_SSE_HACK opt_cpu.h CPU_BLUELIGHTNING_3X opt_cpu.h CPU_BLUELIGHTNING_FPU_OP_CACHE opt_cpu.h CPU_BTB_EN opt_cpu.h CPU_CYRIX_NO_LOCK opt_cpu.h CPU_DIRECT_MAPPED_CACHE opt_cpu.h CPU_DISABLE_5X86_LSSER opt_cpu.h -CPU_DISABLE_CMPXCHG opt_global.h # XXX global, unlike other CPU_* -CPU_DISABLE_SSE opt_cpu.h CPU_ELAN opt_cpu.h CPU_ELAN_PPS opt_cpu.h CPU_ELAN_XTAL opt_cpu.h CPU_ENABLE_LONGRUN opt_cpu.h CPU_FASTER_5X86_FPU opt_cpu.h CPU_GEODE opt_cpu.h CPU_I486_ON_386 opt_cpu.h CPU_IORT opt_cpu.h CPU_L2_LATENCY opt_cpu.h CPU_LOOP_EN opt_cpu.h CPU_PPRO2CELERON opt_cpu.h CPU_RSTK_EN opt_cpu.h CPU_SOEKRIS opt_cpu.h CPU_SUSP_HLT opt_cpu.h CPU_UPGRADE_HW_CACHE opt_cpu.h CPU_WT_ALLOC opt_cpu.h CYRIX_CACHE_REALLY_WORKS opt_cpu.h CYRIX_CACHE_WORKS opt_cpu.h NO_F00F_HACK opt_cpu.h NO_MEMORY_HOLE opt_cpu.h # The CPU type affects the endian conversion functions all over the kernel. I486_CPU opt_global.h I586_CPU opt_global.h I686_CPU opt_global.h # options for serial support COM_ESP opt_sio.h COM_MULTIPORT opt_sio.h CONSPEED opt_sio.h GDBSPEED opt_sio.h COM_NO_ACPI opt_sio.h VGA_ALT_SEQACCESS opt_vga.h VGA_DEBUG opt_vga.h VGA_NO_FONT_LOADING opt_vga.h VGA_NO_MODE_CHANGE opt_vga.h VGA_SLOW_IOACCESS opt_vga.h VGA_WIDTH90 opt_vga.h VESA VESA_DEBUG opt_vesa.h # AGP debugging support AGP_DEBUG opt_agp.h PSM_DEBUG opt_psm.h PSM_HOOKRESUME opt_psm.h PSM_RESETAFTERSUSPEND opt_psm.h ATKBD_DFLT_KEYMAP opt_atkbd.h # Video spigot SPIGOT_UNSECURE opt_spigot.h # Enables NETGRAPH support for Cronyx adapters NETGRAPH_CRONYX opt_ng_cronyx.h # Device options DEV_APIC opt_apic.h DEV_ATPIC opt_atpic.h -DEV_NPX opt_npx.h # Debugging NPX_DEBUG opt_npx.h # BPF just-in-time compiler BPF_JITTER opt_bpf.h XENHVM opt_global.h # options for the Intel C600 SAS driver (isci) ISCI_LOGGING opt_isci.h Index: projects/ipsec/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c =================================================================== --- projects/ipsec/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c (revision 313186) +++ projects/ipsec/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c (revision 313187) @@ -1,806 +1,1002 @@ /* * Copyright (c) 2012, 2013 Adrian Chadd . * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include "opt_ah.h" #include "ah.h" #include "ah_internal.h" #include "ah_devid.h" #include "ah_desc.h" #include "ar9300.h" #include "ar9300reg.h" #include "ar9300phy.h" #include "ar9300desc.h" #include "ar9300_freebsd.h" #include "ar9300_stub.h" #include "ar9300_stub_funcs.h" #define FIX_NOISE_FLOOR 1 #define NEXT_TBTT_NOW 5 static HAL_BOOL ar9300ClrMulticastFilterIndex(struct ath_hal *ah, uint32_t ix); static HAL_BOOL ar9300SetMulticastFilterIndex(struct ath_hal *ah, uint32_t ix); static void ar9300_beacon_set_beacon_timers(struct ath_hal *ah, const HAL_BEACON_TIMERS *bt); static void ar9300SetChainMasks(struct ath_hal *ah, uint32_t tx_chainmask, uint32_t rx_chainmask) { AH9300(ah)->ah_tx_chainmask = tx_chainmask & AH_PRIVATE(ah)->ah_caps.halTxChainMask; AH9300(ah)->ah_rx_chainmask = rx_chainmask & AH_PRIVATE(ah)->ah_caps.halRxChainMask; } static u_int ar9300GetSlotTime(struct ath_hal *ah) { u_int clks = OS_REG_READ(ah, AR_D_GBL_IFS_SLOT) & 0xffff; return (ath_hal_mac_usec(ah, clks)); /* convert from system clocks */ } static HAL_BOOL ar9300_freebsd_set_tx_power_limit(struct ath_hal *ah, uint32_t limit) { return (ar9300_set_tx_power_limit(ah, limit, 0, 0)); } static uint64_t ar9300_get_next_tbtt(struct ath_hal *ah) { return (OS_REG_READ(ah, AR_NEXT_TBTT_TIMER)); } /* * TODO: implement the antenna diversity control for AR9485 and * other LNA mixing based NICs. * * For now we'll just go with the HAL default and make these no-ops. */ static HAL_ANT_SETTING ar9300_freebsd_get_antenna_switch(struct ath_hal *ah) { return (HAL_ANT_VARIABLE); } static HAL_BOOL ar9300_freebsd_set_antenna_switch(struct ath_hal *ah, HAL_ANT_SETTING setting) { return (AH_TRUE); } static u_int ar9300_freebsd_get_cts_timeout(struct ath_hal *ah) { u_int clks = MS(OS_REG_READ(ah, AR_TIME_OUT), AR_TIME_OUT_CTS); return ath_hal_mac_usec(ah, clks); /* convert from system clocks */ } static void ar9300_freebsd_set_tsf64(struct ath_hal *ah, uint64_t tsf64) { /* * XXX TODO: read ar5416SetTsf64() - we should wait before we do * this. */ OS_REG_WRITE(ah, AR_TSF_L32, tsf64 & 0xffffffff); OS_REG_WRITE(ah, AR_TSF_U32, (tsf64 >> 32) & 0xffffffff); } +/* Flags for pulse_bw_info */ +#define PRI_CH_RADAR_FOUND 0x01 +#define EXT_CH_RADAR_FOUND 0x02 +#define EXT_CH_RADAR_EARLY_FOUND 0x04 + +static HAL_BOOL +ar9300_freebsd_proc_radar_event(struct ath_hal *ah, struct ath_rx_status *rxs, + uint64_t fulltsf, const char *buf, HAL_DFS_EVENT *event) +{ + HAL_BOOL doDfsExtCh; + HAL_BOOL doDfsEnhanced; + HAL_BOOL doDfsCombinedRssi; + + uint8_t rssi = 0, ext_rssi = 0; + uint8_t pulse_bw_info = 0, pulse_length_ext = 0, pulse_length_pri = 0; + uint32_t dur = 0; + int pri_found = 1, ext_found = 0; + int early_ext = 0; + int is_dc = 0; + uint16_t datalen; /* length from the RX status field */ + + /* Check whether the given phy error is a radar event */ + if ((rxs->rs_phyerr != HAL_PHYERR_RADAR) && + (rxs->rs_phyerr != HAL_PHYERR_FALSE_RADAR_EXT)) { + return AH_FALSE; + } + + /* Grab copies of the capabilities; just to make the code clearer */ + doDfsExtCh = AH_PRIVATE(ah)->ah_caps.halExtChanDfsSupport; + doDfsEnhanced = AH_PRIVATE(ah)->ah_caps.halEnhancedDfsSupport; + doDfsCombinedRssi = AH_PRIVATE(ah)->ah_caps.halUseCombinedRadarRssi; + + datalen = rxs->rs_datalen; + + /* If hardware supports it, use combined RSSI, else use chain 0 RSSI */ + if (doDfsCombinedRssi) + rssi = (uint8_t) rxs->rs_rssi; + else + rssi = (uint8_t) rxs->rs_rssi_ctl[0]; + + /* Set this; but only use it if doDfsExtCh is set */ + ext_rssi = (uint8_t) rxs->rs_rssi_ext[0]; + + /* Cap it at 0 if the RSSI is a negative number */ + if (rssi & 0x80) + rssi = 0; + + if (ext_rssi & 0x80) + ext_rssi = 0; + + /* + * Fetch the relevant data from the frame + */ + if (doDfsExtCh) { + if (datalen < 3) + return AH_FALSE; + + /* Last three bytes of the frame are of interest */ + pulse_length_pri = *(buf + datalen - 3); + pulse_length_ext = *(buf + datalen - 2); + pulse_bw_info = *(buf + datalen - 1); + HALDEBUG(ah, HAL_DEBUG_DFS, "%s: rssi=%d, ext_rssi=%d, pulse_length_pri=%d," + " pulse_length_ext=%d, pulse_bw_info=%x\n", + __func__, rssi, ext_rssi, pulse_length_pri, pulse_length_ext, + pulse_bw_info); + } else { + /* The pulse width is byte 0 of the data */ + if (datalen >= 1) + dur = ((uint8_t) buf[0]) & 0xff; + else + dur = 0; + + if (dur == 0 && rssi == 0) { + HALDEBUG(ah, HAL_DEBUG_DFS, "%s: dur and rssi are 0\n", __func__); + return AH_FALSE; + } + + HALDEBUG(ah, HAL_DEBUG_DFS, "%s: rssi=%d, dur=%d\n", __func__, rssi, dur); + + /* Single-channel only */ + pri_found = 1; + ext_found = 0; + } + + /* + * If doing extended channel data, pulse_bw_info must + * have one of the flags set. + */ + if (doDfsExtCh && pulse_bw_info == 0x0) + return AH_FALSE; + + /* + * If the extended channel data is available, calculate + * which to pay attention to. + */ + if (doDfsExtCh) { + /* If pulse is on DC, take the larger duration of the two */ + if ((pulse_bw_info & EXT_CH_RADAR_FOUND) && + (pulse_bw_info & PRI_CH_RADAR_FOUND)) { + is_dc = 1; + if (pulse_length_ext > pulse_length_pri) { + dur = pulse_length_ext; + pri_found = 0; + ext_found = 1; + } else { + dur = pulse_length_pri; + pri_found = 1; + ext_found = 0; + } + } else if (pulse_bw_info & EXT_CH_RADAR_EARLY_FOUND) { + dur = pulse_length_ext; + pri_found = 0; + ext_found = 1; + early_ext = 1; + } else if (pulse_bw_info & PRI_CH_RADAR_FOUND) { + dur = pulse_length_pri; + pri_found = 1; + ext_found = 0; + } else if (pulse_bw_info & EXT_CH_RADAR_FOUND) { + dur = pulse_length_ext; + pri_found = 0; + ext_found = 1; + } + + } + + /* + * For enhanced DFS (Merlin and later), pulse_bw_info has + * implications for selecting the correct RSSI value. + */ + if (doDfsEnhanced) { + switch (pulse_bw_info & 0x03) { + case 0: + /* No radar? */ + rssi = 0; + break; + case PRI_CH_RADAR_FOUND: + /* Radar in primary channel */ + /* Cannot use ctrl channel RSSI if ext channel is stronger */ + if (ext_rssi >= (rssi + 3)) { + rssi = 0; + } + break; + case EXT_CH_RADAR_FOUND: + /* Radar in extended channel */ + /* Cannot use ext channel RSSI if ctrl channel is stronger */ + if (rssi >= (ext_rssi + 12)) { + rssi = 0; + } else { + rssi = ext_rssi; + } + break; + case (PRI_CH_RADAR_FOUND | EXT_CH_RADAR_FOUND): + /* When both are present, use stronger one */ + if (rssi < ext_rssi) + rssi = ext_rssi; + break; + } + } + + /* + * If not doing enhanced DFS, choose the ext channel if + * it is stronger than the main channel + */ + if (doDfsExtCh && !doDfsEnhanced) { + if ((ext_rssi > rssi) && (ext_rssi < 128)) + rssi = ext_rssi; + } + + /* + * XXX what happens if the above code decides the RSSI + * XXX wasn't valid, an sets it to 0? + */ + + /* + * Fill out dfs_event structure. + */ + event->re_full_ts = fulltsf; + event->re_ts = rxs->rs_tstamp; + event->re_rssi = rssi; + event->re_dur = dur; + + event->re_flags = 0; + if (pri_found) + event->re_flags |= HAL_DFS_EVENT_PRICH; + if (ext_found) + event->re_flags |= HAL_DFS_EVENT_EXTCH; + if (early_ext) + event->re_flags |= HAL_DFS_EVENT_EXTEARLY; + if (is_dc) + event->re_flags |= HAL_DFS_EVENT_ISDC; + + return AH_TRUE; +} + void ar9300_attach_freebsd_ops(struct ath_hal *ah) { /* Global functions */ ah->ah_detach = ar9300_detach; ah->ah_getRateTable = ar9300_get_rate_table; /* Reset functions */ ah->ah_reset = ar9300_reset_freebsd; ah->ah_phyDisable = ar9300_phy_disable; ah->ah_disable = ar9300_disable; ah->ah_configPCIE = ar9300_config_pcie_freebsd; // ah->ah_disablePCIE = ar9300_disable_pcie_phy; ah->ah_setPCUConfig = ar9300_set_pcu_config; // perCalibration ah->ah_perCalibrationN = ar9300_per_calibration_freebsd; ah->ah_resetCalValid = ar9300_reset_cal_valid_freebsd; ah->ah_setTxPowerLimit = ar9300_freebsd_set_tx_power_limit; ah->ah_getChanNoise = ath_hal_getChanNoise; /* Transmit functions */ ah->ah_setupTxQueue = ar9300_setup_tx_queue; ah->ah_setTxQueueProps = ar9300_set_tx_queue_props; ah->ah_getTxQueueProps = ar9300_get_tx_queue_props; ah->ah_releaseTxQueue = ar9300_release_tx_queue; ah->ah_resetTxQueue = ar9300_reset_tx_queue; ah->ah_getTxDP = ar9300_get_tx_dp; ah->ah_setTxDP = ar9300_set_tx_dp; ah->ah_numTxPending = ar9300_num_tx_pending; ah->ah_startTxDma = ar9300_start_tx_dma; ah->ah_stopTxDma = ar9300_stop_tx_dma_freebsd; ah->ah_setupTxDesc = ar9300_freebsd_setup_tx_desc; ah->ah_setupXTxDesc = ar9300_freebsd_setup_x_tx_desc; ah->ah_fillTxDesc = ar9300_freebsd_fill_tx_desc; ah->ah_procTxDesc = ar9300_freebsd_proc_tx_desc; ah->ah_getTxIntrQueue = ar9300_get_tx_intr_queue; // reqTxIntrDesc ah->ah_getTxCompletionRates = ar9300_freebsd_get_tx_completion_rates; ah->ah_setTxDescLink = ar9300_set_desc_link; ah->ah_getTxDescLink = ar9300_freebsd_get_desc_link; ah->ah_getTxDescLinkPtr = ar9300_get_desc_link_ptr; ah->ah_setupTxStatusRing = ar9300_setup_tx_status_ring; ah->ah_getTxRawTxDesc = ar9300_get_raw_tx_desc; ah->ah_updateTxTrigLevel = ar9300_update_tx_trig_level; /* RX functions */ ah->ah_getRxDP = ar9300_get_rx_dp; ah->ah_setRxDP = ar9300_set_rx_dp; ah->ah_enableReceive = ar9300_enable_receive; ah->ah_stopDmaReceive = ar9300_stop_dma_receive_freebsd; ah->ah_startPcuReceive = ar9300_start_pcu_receive_freebsd; ah->ah_stopPcuReceive = ar9300_stop_pcu_receive; ah->ah_setMulticastFilter = ar9300_set_multicast_filter; ah->ah_setMulticastFilterIndex = ar9300SetMulticastFilterIndex; ah->ah_clrMulticastFilterIndex = ar9300ClrMulticastFilterIndex; ah->ah_getRxFilter = ar9300_get_rx_filter; ah->ah_setRxFilter = ar9300_set_rx_filter; /* setupRxDesc */ ah->ah_procRxDesc = ar9300_proc_rx_desc_freebsd; ah->ah_rxMonitor = ar9300_ani_rxmonitor_freebsd; ah->ah_aniPoll = ar9300_ani_poll_freebsd; ah->ah_procMibEvent = ar9300_process_mib_intr; /* Misc functions */ ah->ah_getCapability = ar9300_get_capability; ah->ah_setCapability = ar9300_set_capability; ah->ah_getDiagState = ar9300_get_diag_state; ah->ah_getMacAddress = ar9300_get_mac_address; ah->ah_setMacAddress = ar9300_set_mac_address; ah->ah_getBssIdMask = ar9300_get_bss_id_mask; ah->ah_setBssIdMask = ar9300_set_bss_id_mask; ah->ah_setRegulatoryDomain = ar9300_set_regulatory_domain; ah->ah_setLedState = ar9300_set_led_state; ah->ah_writeAssocid = ar9300_write_associd; ah->ah_gpioCfgInput = ar9300_gpio_cfg_input; ah->ah_gpioCfgOutput = ar9300_gpio_cfg_output; ah->ah_gpioGet = ar9300_gpio_get; ah->ah_gpioSet = ar9300_gpio_set; ah->ah_gpioSetIntr = ar9300_gpio_set_intr; /* polarity */ /* mask */ ah->ah_getTsf32 = ar9300_get_tsf32; ah->ah_getTsf64 = ar9300_get_tsf64; ah->ah_resetTsf = ar9300_reset_tsf; ah->ah_setTsf64 = ar9300_freebsd_set_tsf64; ah->ah_detectCardPresent = ar9300_detect_card_present; // ah->ah_updateMibCounters = ar9300_update_mib_counters; ah->ah_getRfGain = ar9300_get_rfgain; ah->ah_getDefAntenna = ar9300_get_def_antenna; ah->ah_setDefAntenna = ar9300_set_def_antenna; ah->ah_getAntennaSwitch = ar9300_freebsd_get_antenna_switch; ah->ah_setAntennaSwitch = ar9300_freebsd_set_antenna_switch; // ah->ah_setSifsTime = ar9300_set_sifs_time; // ah->ah_getSifsTime = ar9300_get_sifs_time; ah->ah_setSlotTime = ar9300_set_slot_time; ah->ah_getSlotTime = ar9300GetSlotTime; ah->ah_getAckTimeout = ar9300_get_ack_timeout; ah->ah_setAckTimeout = ar9300_set_ack_timeout; // XXX ack/ctsrate // XXX CTS timeout ah->ah_getCTSTimeout = ar9300_freebsd_get_cts_timeout; // XXX decompmask // coverageclass ah->ah_setQuiet = ar9300_set_quiet; ah->ah_getMibCycleCounts = ar9300_freebsd_get_mib_cycle_counts; /* DFS functions */ ah->ah_enableDfs = ar9300_enable_dfs; ah->ah_getDfsThresh = ar9300_get_dfs_thresh; ah->ah_getDfsDefaultThresh = ar9300_get_default_dfs_thresh; - // procradarevent + ah->ah_procRadarEvent = ar9300_freebsd_proc_radar_event; ah->ah_isFastClockEnabled = ar9300_is_fast_clock_enabled; - ah->ah_get11nExtBusy = ar9300_get_11n_ext_busy; + ah->ah_get11nExtBusy = ar9300_get_11n_ext_busy; + ah->ah_setDfsCacTxQuiet = ar9300_cac_tx_quiet; /* Spectral Scan Functions */ ah->ah_spectralConfigure = ar9300_configure_spectral_scan; ah->ah_spectralGetConfig = ar9300_get_spectral_params; ah->ah_spectralStart = ar9300_start_spectral_scan; ah->ah_spectralStop = ar9300_stop_spectral_scan; ah->ah_spectralIsEnabled = ar9300_is_spectral_enabled; ah->ah_spectralIsActive = ar9300_is_spectral_active; /* Key cache functions */ ah->ah_getKeyCacheSize = ar9300_get_key_cache_size; ah->ah_resetKeyCacheEntry = ar9300_reset_key_cache_entry; ah->ah_isKeyCacheEntryValid = ar9300_is_key_cache_entry_valid; ah->ah_setKeyCacheEntry = ar9300_set_key_cache_entry; ah->ah_setKeyCacheEntryMac = ar9300_set_key_cache_entry_mac; /* Power management functions */ ah->ah_setPowerMode = ar9300_set_power_mode; ah->ah_getPowerMode = ar9300_get_power_mode; /* Beacon functions */ /* ah_setBeaconTimers */ ah->ah_beaconInit = ar9300_freebsd_beacon_init; ah->ah_setBeaconTimers = ar9300_beacon_set_beacon_timers; ah->ah_setStationBeaconTimers = ar9300_set_sta_beacon_timers; /* ah_resetStationBeaconTimers */ ah->ah_getNextTBTT = ar9300_get_next_tbtt; /* Interrupt functions */ ah->ah_isInterruptPending = ar9300_is_interrupt_pending; ah->ah_getPendingInterrupts = ar9300_get_pending_interrupts_freebsd; ah->ah_getInterrupts = ar9300_get_interrupts; ah->ah_setInterrupts = ar9300_set_interrupts_freebsd; /* Regulatory/internal functions */ // AH_PRIVATE(ah)->ah_getNfAdjust = ar9300_get_nf_adjust; AH_PRIVATE(ah)->ah_eepromRead = ar9300_eeprom_read_word; // AH_PRIVATE(ah)->ah_getChipPowerLimits = ar9300_get_chip_power_limits; AH_PRIVATE(ah)->ah_getWirelessModes = ar9300_get_wireless_modes; AH_PRIVATE(ah)->ah_getChannelEdges = ar9300_get_channel_edges; AH_PRIVATE(ah)->ah_eepromRead = ar9300_eeprom_read_word; /* XXX ah_eeprom */ /* XXX ah_eeversion */ /* XXX ah_eepromDetach */ /* XXX ah_eepromGet */ AH_PRIVATE(ah)->ah_eepromGet = ar9300_eeprom_get_freebsd; /* XXX ah_eepromSet */ /* XXX ah_getSpurChan */ /* XXX ah_eepromDiag */ /* 802.11n functions */ ah->ah_chainTxDesc = ar9300_freebsd_chain_tx_desc; ah->ah_setupFirstTxDesc= ar9300_freebsd_setup_first_tx_desc; ah->ah_setupLastTxDesc = ar9300_freebsd_setup_last_tx_desc; ah->ah_set11nRateScenario = ar9300_freebsd_set_11n_rate_scenario; ah->ah_set11nTxDesc = ar9300_freebsd_setup_11n_desc; ah->ah_set11nAggrFirst = ar9300_set_11n_aggr_first; ah->ah_set11nAggrMiddle = ar9300_set_11n_aggr_middle; ah->ah_set11nAggrLast = ar9300_set_11n_aggr_last; ah->ah_clr11nAggr = ar9300_clr_11n_aggr; ah->ah_set11nBurstDuration = ar9300_set_11n_burst_duration; /* ah_get11nExtBusy */ ah->ah_set11nMac2040 = ar9300_set_11n_mac2040; ah->ah_setChainMasks = ar9300SetChainMasks; /* ah_get11nRxClear */ /* ah_set11nRxClear */ /* bluetooth coexistence functions */ ah->ah_btCoexSetInfo = ar9300_set_bt_coex_info; ah->ah_btCoexSetConfig = ar9300_bt_coex_config; ah->ah_btCoexSetQcuThresh = ar9300_bt_coex_set_qcu_thresh; ah->ah_btCoexSetWeights = ar9300_bt_coex_set_weights; ah->ah_btCoexSetBmissThresh = ar9300_bt_coex_setup_bmiss_thresh; ah->ah_btCoexSetParameter = ar9300_bt_coex_set_parameter; ah->ah_btCoexDisable = ar9300_bt_coex_disable; ah->ah_btCoexEnable = ar9300_bt_coex_enable; /* MCI bluetooth functions */ if (AR_SREV_JUPITER(ah) || AR_SREV_APHRODITE(ah)) { /* * Note: these are done in attach too for now, because * at this point we haven't yet setup the mac/bb revision * values, so this code is effectively NULL. * However, I'm leaving this here so people digging * into the code (a) see the MCI bits here, and (b) * are now told they should look elsewhere for * these methods. */ ah->ah_btCoexSetWeights = ar9300_mci_bt_coex_set_weights; ah->ah_btCoexDisable = ar9300_mci_bt_coex_disable; ah->ah_btCoexEnable = ar9300_mci_bt_coex_enable; } ah->ah_btMciSetup = ar9300_mci_setup; ah->ah_btMciSendMessage = ar9300_mci_send_message; ah->ah_btMciGetInterrupt = ar9300_mci_get_interrupt; ah->ah_btMciState = ar9300_mci_state; ah->ah_btMciDetach = ar9300_mci_detach; /* LNA diversity functions */ ah->ah_divLnaConfGet = ar9300_ant_div_comb_get_config; ah->ah_divLnaConfSet = ar9300_ant_div_comb_set_config; } HAL_BOOL ar9300_reset_freebsd(struct ath_hal *ah, HAL_OPMODE opmode, struct ieee80211_channel *chan, HAL_BOOL bChannelChange, HAL_RESET_TYPE resetType, HAL_STATUS *status) { HAL_BOOL r; HAL_HT_MACMODE macmode; struct ath_hal_private *ap = AH_PRIVATE(ah); macmode = IEEE80211_IS_CHAN_HT40(chan) ? HAL_HT_MACMODE_2040 : HAL_HT_MACMODE_20; r = ar9300_reset(ah, opmode, chan, macmode, ap->ah_caps.halTxChainMask, ap->ah_caps.halRxChainMask, HAL_HT_EXTPROTSPACING_20, /* always 20Mhz channel spacing */ bChannelChange, status, AH_FALSE); /* XXX should really extend ath_hal_reset() */ return (r); } void ar9300_config_pcie_freebsd(struct ath_hal *ah, HAL_BOOL restore, HAL_BOOL powerOff) { ar9300_config_pci_power_save(ah, restore ? 1 : 0, powerOff ? 1 : 0); } /* * This is a copy from ar9300_eeprom_get(), purely because the FreeBSD * API is very silly and inconsistent. * * The AR93xx HAL doesn't call the eepromGetFlag() function, so this * only occurs for FreeBSD code. * * When I fix this particular API, I'll undo this. */ HAL_STATUS ar9300_eeprom_get_freebsd(struct ath_hal *ah, int param, void *val) { switch (param) { case AR_EEP_FSTCLK_5G: return HAL_OK; default: ath_hal_printf(ah, "%s: called, param=%d\n", __func__, param); return HAL_EIO; } } HAL_BOOL ar9300_stop_tx_dma_freebsd(struct ath_hal *ah, u_int q) { return ar9300_stop_tx_dma(ah, q, 1000); } void ar9300_ani_poll_freebsd(struct ath_hal *ah, const struct ieee80211_channel *chan) { HAL_NODE_STATS stats; HAL_ANISTATS anistats; HAL_SURVEY_SAMPLE survey; OS_MEMZERO(&stats, sizeof(stats)); OS_MEMZERO(&anistats, sizeof(anistats)); OS_MEMZERO(&survey, sizeof(survey)); ar9300_ani_ar_poll(ah, &stats, chan, &anistats); /* * If ANI stats are valid, use them to update the * channel survey. */ if (anistats.valid) { survey.cycle_count = anistats.cyclecnt_diff; survey.chan_busy = anistats.rxclr_cnt; survey.ext_chan_busy = anistats.extrxclr_cnt; survey.tx_busy = anistats.txframecnt_diff; survey.rx_busy = anistats.rxframecnt_diff; ath_hal_survey_add_sample(ah, &survey); } } /* * Setup the configuration parameters in the style the AR9300 HAL * wants. */ void ar9300_config_defaults_freebsd(struct ath_hal *ah, HAL_OPS_CONFIG *ah_config) { /* Until FreeBSD's HAL does this by default - just copy */ OS_MEMCPY(&ah->ah_config, ah_config, sizeof(HAL_OPS_CONFIG)); ah->ah_config.ath_hal_enable_ani = AH_TRUE; } HAL_BOOL ar9300_stop_dma_receive_freebsd(struct ath_hal *ah) { return ar9300_stop_dma_receive(ah, 1000); } HAL_BOOL ar9300_get_pending_interrupts_freebsd(struct ath_hal *ah, HAL_INT *masked) { /* Non-MSI, so no MSI vector; and 'nortc' = 0 */ return ar9300_get_pending_interrupts(ah, masked, HAL_INT_LINE, 0, 0); } HAL_INT ar9300_set_interrupts_freebsd(struct ath_hal *ah, HAL_INT ints) { /* nortc = 0 */ return ar9300_set_interrupts(ah, ints, 0); } HAL_BOOL ar9300_per_calibration_freebsd(struct ath_hal *ah, struct ieee80211_channel *chan, u_int rxchainmask, HAL_BOOL long_cal, HAL_BOOL *isCalDone) { /* XXX fake scheduled calibrations for now */ u_int32_t sched_cals = 0xfffffff; return ar9300_calibration(ah, chan, AH_PRIVATE(ah)->ah_caps.halRxChainMask, long_cal, isCalDone, 0, /* is_scan */ &sched_cals); } HAL_BOOL ar9300_reset_cal_valid_freebsd(struct ath_hal *ah, const struct ieee80211_channel *chan) { HAL_BOOL is_cal_done = AH_TRUE; ar9300_reset_cal_valid(ah, chan, &is_cal_done, 0xffffffff); return (is_cal_done); } void ar9300_start_pcu_receive_freebsd(struct ath_hal *ah) { /* is_scanning flag == NULL */ ar9300_start_pcu_receive(ah, AH_FALSE); } /* * FreeBSD will just pass in the descriptor value as 'pa'. * The Atheros HAL treats 'pa' as the physical address of the RX * descriptor and 'bufaddr' as the physical address of the RX buffer. * I'm not sure why they didn't collapse them - the AR9300 RX descriptor * routine doesn't check 'pa'. */ HAL_STATUS ar9300_proc_rx_desc_freebsd(struct ath_hal *ah, struct ath_desc *ds, uint32_t pa, struct ath_desc *ds_next, uint64_t tsf, struct ath_rx_status *rxs) { return (ar9300_proc_rx_desc_fast(ah, ds, 0, ds_next, rxs, (void *) ds)); } void ar9300_ani_rxmonitor_freebsd(struct ath_hal *ah, const HAL_NODE_STATS *stats, const struct ieee80211_channel *chan) { } void ar9300_freebsd_get_desc_link(struct ath_hal *ah, void *ds, uint32_t *link) { struct ar9300_txc *ads = AR9300TXC(ds); (*link) = ads->ds_link; } /* * TX descriptor field setting wrappers - eek. */ HAL_BOOL ar9300_freebsd_setup_tx_desc(struct ath_hal *ah, struct ath_desc *ds, u_int pktLen, u_int hdrLen, HAL_PKT_TYPE type, u_int txPower, u_int txRate0, u_int txTries0, u_int keyIx, u_int antMode, u_int flags, u_int rtsctsRate, u_int rtsCtsDuration, u_int compicvLen, u_int compivLen, u_int comp) { struct ath_hal_9300 *ahp = AH9300(ah); HAL_KEY_TYPE keyType = 0; /* XXX No padding */ if (keyIx != HAL_TXKEYIX_INVALID) keyType = ahp->ah_keytype[keyIx]; /* XXX bounds check keyix */ ar9300_set_11n_tx_desc(ah, ds, pktLen, type, txPower, keyIx, keyType, flags); return AH_TRUE; } HAL_BOOL ar9300_freebsd_setup_x_tx_desc(struct ath_hal *ah, struct ath_desc *ds, u_int txRate1, u_int txTries1, u_int txRate2, u_int txTries2, u_int txRate3, u_int txTries3) { #if 0 ath_hal_printf(ah, "%s: called, 0x%x/%d, 0x%x/%d, 0x%x/%d\n", __func__, txRate1, txTries1, txRate2, txTries2, txRate3, txTries3); #endif /* XXX should only be called during probe */ return (AH_TRUE); } HAL_BOOL ar9300_freebsd_fill_tx_desc(struct ath_hal *ah, struct ath_desc *ds, HAL_DMA_ADDR *bufListPtr, uint32_t *segLenPtr, u_int descId, u_int qid, HAL_BOOL firstSeg, HAL_BOOL lastSeg, const struct ath_desc *ds0) { HAL_KEY_TYPE keyType = 0; const struct ar9300_txc *ads = AR9300TXC_CONST(ds0); /* * FreeBSD's HAL doesn't pass the keytype to fill_tx_desc(); * it's copied as part of the descriptor chaining. * * So, extract it from ds0. */ keyType = MS(ads->ds_ctl17, AR_encr_type); return ar9300_fill_tx_desc(ah, ds, bufListPtr, segLenPtr, descId, qid, keyType, firstSeg, lastSeg, ds0); } HAL_BOOL ar9300_freebsd_get_tx_completion_rates(struct ath_hal *ah, const struct ath_desc *ds0, int *rates, int *tries) { ath_hal_printf(ah, "%s: called\n", __func__); return AH_FALSE; /* XXX for now */ } /* * 802.11n TX descriptor wrappers */ void ar9300_freebsd_set_11n_rate_scenario(struct ath_hal *ah, struct ath_desc *ds, u_int durUpdateEn, u_int rtsctsRate, HAL_11N_RATE_SERIES series[], u_int nseries, u_int flags) { /* lastds=NULL, rtscts_duration is 0, smart antenna is 0 */ ar9300_set_11n_rate_scenario(ah, (void *) ds, (void *)ds, durUpdateEn, rtsctsRate, 0, series, nseries, flags, 0); } /* chaintxdesc */ HAL_BOOL ar9300_freebsd_chain_tx_desc(struct ath_hal *ah, struct ath_desc *ds, HAL_DMA_ADDR *bufLenList, uint32_t *segLenList, u_int pktLen, u_int hdrLen, HAL_PKT_TYPE type, u_int keyIx, HAL_CIPHER cipher, uint8_t numDelims, HAL_BOOL firstSeg, HAL_BOOL lastSeg, HAL_BOOL lastAggr) { ath_hal_printf(ah, "%s: called\n", __func__); return AH_FALSE; } /* setupfirsttxdesc */ HAL_BOOL ar9300_freebsd_setup_first_tx_desc(struct ath_hal *ah, struct ath_desc *ds, u_int aggrLen, u_int flags, u_int txPower, u_int txRate0, u_int txTries0, u_int antMode, u_int rtsctsRate, u_int rtsctsDuration) { ath_hal_printf(ah, "%s: called\n", __func__); return AH_FALSE; } /* setuplasttxdesc */ /* * This gets called but for now let's not log anything; * it's only used to update the rate control information. */ HAL_BOOL ar9300_freebsd_setup_last_tx_desc(struct ath_hal *ah, struct ath_desc *ds, const struct ath_desc *ds0) { // ath_hal_printf(ah, "%s: called\n", __func__); return AH_FALSE; } void ar9300_freebsd_setup_11n_desc(struct ath_hal *ah, void *ds, u_int pktLen, HAL_PKT_TYPE type, u_int txPower, u_int keyIx, u_int flags) { ath_hal_printf(ah, "%s: called\n", __func__); #if 0 struct ath_hal_9300 *ahp = AH9300(ah); HAL_KEY_TYPE keyType = 0; /* XXX No padding */ if (keyIx != HAL_TXKEYIX_INVALID) keyType = ahp->ah_keytype[keyIx]; /* XXX bounds check keyix */ ar9300_set_11n_tx_desc(ah, ds, pktLen, type, txPower, keyIx, keyType, flags); #endif } HAL_STATUS ar9300_freebsd_proc_tx_desc(struct ath_hal *ah, struct ath_desc *ds, struct ath_tx_status *ts) { return ar9300_proc_tx_desc(ah, ts); } void ar9300_freebsd_beacon_init(struct ath_hal *ah, uint32_t next_beacon, uint32_t beacon_period) { ar9300_beacon_init(ah, next_beacon, beacon_period, 0, AH_PRIVATE(ah)->ah_opmode); } HAL_BOOL ar9300_freebsd_get_mib_cycle_counts(struct ath_hal *ah, HAL_SURVEY_SAMPLE *hs) { return (AH_FALSE); } /* * Clear multicast filter by index - from FreeBSD ar5212_recv.c */ static HAL_BOOL ar9300ClrMulticastFilterIndex(struct ath_hal *ah, uint32_t ix) { uint32_t val; if (ix >= 64) return (AH_FALSE); if (ix >= 32) { val = OS_REG_READ(ah, AR_MCAST_FIL1); OS_REG_WRITE(ah, AR_MCAST_FIL1, (val &~ (1<<(ix-32)))); } else { val = OS_REG_READ(ah, AR_MCAST_FIL0); OS_REG_WRITE(ah, AR_MCAST_FIL0, (val &~ (1<= 64) return (AH_FALSE); if (ix >= 32) { val = OS_REG_READ(ah, AR_MCAST_FIL1); OS_REG_WRITE(ah, AR_MCAST_FIL1, (val | (1<<(ix-32)))); } else { val = OS_REG_READ(ah, AR_MCAST_FIL0); OS_REG_WRITE(ah, AR_MCAST_FIL0, (val | (1<bt_nexttbtt)); OS_REG_WRITE(ah, AR_NEXT_DMA_BEACON_ALERT, ONE_EIGHTH_TU_TO_USEC(bt->bt_nextdba)); OS_REG_WRITE(ah, AR_NEXT_SWBA, ONE_EIGHTH_TU_TO_USEC(bt->bt_nextswba)); OS_REG_WRITE(ah, AR_NEXT_NDP_TIMER, TU_TO_USEC(bt->bt_nextatim)); bperiod = TU_TO_USEC(bt->bt_intval & HAL_BEACON_PERIOD); AH9300(ah)->ah_beaconInterval = bt->bt_intval & HAL_BEACON_PERIOD; OS_REG_WRITE(ah, AR_BEACON_PERIOD, bperiod); OS_REG_WRITE(ah, AR_DMA_BEACON_PERIOD, bperiod); OS_REG_WRITE(ah, AR_SWBA_PERIOD, bperiod); OS_REG_WRITE(ah, AR_NDP_PERIOD, bperiod); /* * Reset TSF if required. */ if (bt->bt_intval & HAL_BEACON_RESET_TSF) ar9300_reset_tsf(ah); /* enable timers */ /* NB: flags == 0 handled specially for backwards compatibility */ OS_REG_SET_BIT(ah, AR_TIMER_MODE, bt->bt_flags != 0 ? bt->bt_flags : AR_TBTT_TIMER_EN | AR_DBA_TIMER_EN | AR_SWBA_TIMER_EN); } /* * RF attach stubs */ static HAL_BOOL rf9330_attach(struct ath_hal *ah, HAL_STATUS *status) { (*status) = HAL_EINVAL; return (AH_FALSE); } static HAL_BOOL rf9330_probe(struct ath_hal *ah) { return (AH_FALSE); } AH_RF(RF9330, rf9330_probe, rf9330_attach); Index: projects/ipsec/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_misc.c =================================================================== --- projects/ipsec/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_misc.c (revision 313186) +++ projects/ipsec/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_misc.c (revision 313187) @@ -1,3925 +1,3926 @@ /* * Copyright (c) 2013 Qualcomm Atheros, Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR * PERFORMANCE OF THIS SOFTWARE. */ #include "opt_ah.h" #include "ah.h" #include "ah_internal.h" #include "ah_devid.h" #ifdef AH_DEBUG #include "ah_desc.h" /* NB: for HAL_PHYERR* */ #endif #include "ar9300/ar9300.h" #include "ar9300/ar9300reg.h" #include "ar9300/ar9300phy.h" #include "ar9300/ar9300desc.h" static u_int32_t ar9300_read_loc_timer(struct ath_hal *ah); void ar9300_get_hw_hangs(struct ath_hal *ah, hal_hw_hangs_t *hangs) { struct ath_hal_9300 *ahp = AH9300(ah); *hangs = 0; if (ar9300_get_capability(ah, HAL_CAP_BB_RIFS_HANG, 0, AH_NULL) == HAL_OK) { *hangs |= HAL_RIFS_BB_HANG_WAR; } if (ar9300_get_capability(ah, HAL_CAP_BB_DFS_HANG, 0, AH_NULL) == HAL_OK) { *hangs |= HAL_DFS_BB_HANG_WAR; } if (ar9300_get_capability(ah, HAL_CAP_BB_RX_CLEAR_STUCK_HANG, 0, AH_NULL) == HAL_OK) { *hangs |= HAL_RX_STUCK_LOW_BB_HANG_WAR; } if (ar9300_get_capability(ah, HAL_CAP_MAC_HANG, 0, AH_NULL) == HAL_OK) { *hangs |= HAL_MAC_HANG_WAR; } if (ar9300_get_capability(ah, HAL_CAP_PHYRESTART_CLR_WAR, 0, AH_NULL) == HAL_OK) { *hangs |= HAL_PHYRESTART_CLR_WAR; } ahp->ah_hang_wars = *hangs; } /* * XXX FreeBSD: the HAL version of ath_hal_mac_usec() knows about * HT20, HT40, fast-clock, turbo mode, etc. */ static u_int ar9300_mac_to_usec(struct ath_hal *ah, u_int clks) { #if 0 const struct ieee80211_channel *chan = AH_PRIVATE(ah)->ah_curchan; if (chan && IEEE80211_IS_CHAN_HT40(chan)) { return (ath_hal_mac_usec(ah, clks) / 2); } else { return (ath_hal_mac_usec(ah, clks)); } #endif return (ath_hal_mac_usec(ah, clks)); } u_int ar9300_mac_to_clks(struct ath_hal *ah, u_int usecs) { #if 0 const struct ieee80211_channel *chan = AH_PRIVATE(ah)->ah_curchan; if (chan && IEEE80211_IS_CHAN_HT40(chan)) { return (ath_hal_mac_clks(ah, usecs) * 2); } else { return (ath_hal_mac_clks(ah, usecs)); } #endif return (ath_hal_mac_clks(ah, usecs)); } void ar9300_get_mac_address(struct ath_hal *ah, u_int8_t *mac) { struct ath_hal_9300 *ahp = AH9300(ah); OS_MEMCPY(mac, ahp->ah_macaddr, IEEE80211_ADDR_LEN); } HAL_BOOL ar9300_set_mac_address(struct ath_hal *ah, const u_int8_t *mac) { struct ath_hal_9300 *ahp = AH9300(ah); OS_MEMCPY(ahp->ah_macaddr, mac, IEEE80211_ADDR_LEN); return AH_TRUE; } void ar9300_get_bss_id_mask(struct ath_hal *ah, u_int8_t *mask) { struct ath_hal_9300 *ahp = AH9300(ah); OS_MEMCPY(mask, ahp->ah_bssid_mask, IEEE80211_ADDR_LEN); } HAL_BOOL ar9300_set_bss_id_mask(struct ath_hal *ah, const u_int8_t *mask) { struct ath_hal_9300 *ahp = AH9300(ah); /* save it since it must be rewritten on reset */ OS_MEMCPY(ahp->ah_bssid_mask, mask, IEEE80211_ADDR_LEN); OS_REG_WRITE(ah, AR_BSSMSKL, LE_READ_4(ahp->ah_bssid_mask)); OS_REG_WRITE(ah, AR_BSSMSKU, LE_READ_2(ahp->ah_bssid_mask + 4)); return AH_TRUE; } /* * Attempt to change the cards operating regulatory domain to the given value * Returns: A_EINVAL for an unsupported regulatory domain. * A_HARDWARE for an unwritable EEPROM or bad EEPROM version */ HAL_BOOL ar9300_set_regulatory_domain(struct ath_hal *ah, u_int16_t reg_domain, HAL_STATUS *status) { HAL_STATUS ecode; if (AH_PRIVATE(ah)->ah_currentRD == 0) { AH_PRIVATE(ah)->ah_currentRD = reg_domain; return AH_TRUE; } ecode = HAL_EIO; #if 0 bad: #endif if (status) { *status = ecode; } return AH_FALSE; } /* * Return the wireless modes (a,b,g,t) supported by hardware. * * This value is what is actually supported by the hardware * and is unaffected by regulatory/country code settings. * */ u_int ar9300_get_wireless_modes(struct ath_hal *ah) { return AH_PRIVATE(ah)->ah_caps.halWirelessModes; } /* * Set the interrupt and GPIO values so the ISR can disable RF * on a switch signal. Assumes GPIO port and interrupt polarity * are set prior to call. */ void ar9300_enable_rf_kill(struct ath_hal *ah) { /* TODO - can this really be above the hal on the GPIO interface for * TODO - the client only? */ struct ath_hal_9300 *ahp = AH9300(ah); if (AR_SREV_JUPITER(ah) || AR_SREV_APHRODITE(ah)) { /* Check RF kill GPIO before set/clear RFSILENT bits. */ if (ar9300_gpio_get(ah, ahp->ah_gpio_select) == ahp->ah_polarity) { OS_REG_SET_BIT(ah, AR_HOSTIF_REG(ah, AR_RFSILENT), AR_RFSILENT_FORCE); OS_REG_SET_BIT(ah, AR_PHY_TEST, RFSILENT_BB); } else { OS_REG_CLR_BIT(ah, AR_HOSTIF_REG(ah, AR_RFSILENT), AR_RFSILENT_FORCE); OS_REG_CLR_BIT(ah, AR_PHY_TEST, RFSILENT_BB); } } else { /* Connect rfsilent_bb_l to baseband */ OS_REG_SET_BIT(ah, AR_HOSTIF_REG(ah, AR_GPIO_INPUT_EN_VAL), AR_GPIO_INPUT_EN_VAL_RFSILENT_BB); /* Set input mux for rfsilent_bb_l to GPIO #0 */ OS_REG_CLR_BIT(ah, AR_HOSTIF_REG(ah, AR_GPIO_INPUT_MUX2), AR_GPIO_INPUT_MUX2_RFSILENT); OS_REG_SET_BIT(ah, AR_HOSTIF_REG(ah, AR_GPIO_INPUT_MUX2), (ahp->ah_gpio_select & 0x0f) << 4); /* * Configure the desired GPIO port for input and * enable baseband rf silence */ ath_hal_gpioCfgInput(ah, ahp->ah_gpio_select); OS_REG_SET_BIT(ah, AR_PHY_TEST, RFSILENT_BB); } /* * If radio disable switch connection to GPIO bit x is enabled * program GPIO interrupt. * If rfkill bit on eeprom is 1, setupeeprommap routine has already * verified that it is a later version of eeprom, it has a place for * rfkill bit and it is set to 1, indicating that GPIO bit x hardware * connection is present. */ /* * RFKill uses polling not interrupt, * disable interrupt to avoid Eee PC 2.6.21.4 hang up issue */ if (ath_hal_hasrfkill_int(ah)) { if (ahp->ah_gpio_bit == ar9300_gpio_get(ah, ahp->ah_gpio_select)) { /* switch already closed, set to interrupt upon open */ ar9300_gpio_set_intr(ah, ahp->ah_gpio_select, !ahp->ah_gpio_bit); } else { ar9300_gpio_set_intr(ah, ahp->ah_gpio_select, ahp->ah_gpio_bit); } } } /* * Change the LED blinking pattern to correspond to the connectivity */ void ar9300_set_led_state(struct ath_hal *ah, HAL_LED_STATE state) { static const u_int32_t ledbits[8] = { AR_CFG_LED_ASSOC_NONE, /* HAL_LED_RESET */ AR_CFG_LED_ASSOC_PENDING, /* HAL_LED_INIT */ AR_CFG_LED_ASSOC_PENDING, /* HAL_LED_READY */ AR_CFG_LED_ASSOC_PENDING, /* HAL_LED_SCAN */ AR_CFG_LED_ASSOC_PENDING, /* HAL_LED_AUTH */ AR_CFG_LED_ASSOC_ACTIVE, /* HAL_LED_ASSOC */ AR_CFG_LED_ASSOC_ACTIVE, /* HAL_LED_RUN */ AR_CFG_LED_ASSOC_NONE, }; OS_REG_RMW_FIELD(ah, AR_CFG_LED, AR_CFG_LED_ASSOC_CTL, ledbits[state]); } /* * Sets the Power LED on the cardbus without affecting the Network LED. */ void ar9300_set_power_led_state(struct ath_hal *ah, u_int8_t enabled) { u_int32_t val; val = enabled ? AR_CFG_LED_MODE_POWER_ON : AR_CFG_LED_MODE_POWER_OFF; OS_REG_RMW_FIELD(ah, AR_CFG_LED, AR_CFG_LED_POWER, val); } /* * Sets the Network LED on the cardbus without affecting the Power LED. */ void ar9300_set_network_led_state(struct ath_hal *ah, u_int8_t enabled) { u_int32_t val; val = enabled ? AR_CFG_LED_MODE_NETWORK_ON : AR_CFG_LED_MODE_NETWORK_OFF; OS_REG_RMW_FIELD(ah, AR_CFG_LED, AR_CFG_LED_NETWORK, val); } /* * Change association related fields programmed into the hardware. * Writing a valid BSSID to the hardware effectively enables the hardware * to synchronize its TSF to the correct beacons and receive frames coming * from that BSSID. It is called by the SME JOIN operation. */ void ar9300_write_associd(struct ath_hal *ah, const u_int8_t *bssid, u_int16_t assoc_id) { struct ath_hal_9300 *ahp = AH9300(ah); /* save bssid and assoc_id for restore on reset */ OS_MEMCPY(ahp->ah_bssid, bssid, IEEE80211_ADDR_LEN); ahp->ah_assoc_id = assoc_id; OS_REG_WRITE(ah, AR_BSS_ID0, LE_READ_4(ahp->ah_bssid)); OS_REG_WRITE(ah, AR_BSS_ID1, LE_READ_2(ahp->ah_bssid + 4) | ((assoc_id & 0x3fff) << AR_BSS_ID1_AID_S)); } /* * Get the current hardware tsf for stamlme */ u_int64_t ar9300_get_tsf64(struct ath_hal *ah) { u_int64_t tsf; /* XXX sync multi-word read? */ tsf = OS_REG_READ(ah, AR_TSF_U32); tsf = (tsf << 32) | OS_REG_READ(ah, AR_TSF_L32); return tsf; } void ar9300_set_tsf64(struct ath_hal *ah, u_int64_t tsf) { OS_REG_WRITE(ah, AR_TSF_L32, (tsf & 0xffffffff)); OS_REG_WRITE(ah, AR_TSF_U32, ((tsf >> 32) & 0xffffffff)); } /* * Get the current hardware tsf for stamlme */ u_int32_t ar9300_get_tsf32(struct ath_hal *ah) { return OS_REG_READ(ah, AR_TSF_L32); } u_int32_t ar9300_get_tsf2_32(struct ath_hal *ah) { return OS_REG_READ(ah, AR_TSF2_L32); } /* * Reset the current hardware tsf for stamlme. */ void ar9300_reset_tsf(struct ath_hal *ah) { int count; count = 0; while (OS_REG_READ(ah, AR_SLP32_MODE) & AR_SLP32_TSF_WRITE_STATUS) { count++; if (count > 10) { HALDEBUG(ah, HAL_DEBUG_RESET, "%s: AR_SLP32_TSF_WRITE_STATUS limit exceeded\n", __func__); break; } OS_DELAY(10); } OS_REG_WRITE(ah, AR_RESET_TSF, AR_RESET_TSF_ONCE); } /* * Set or clear hardware basic rate bit * Set hardware basic rate set if basic rate is found * and basic rate is equal or less than 2Mbps */ void ar9300_set_basic_rate(struct ath_hal *ah, HAL_RATE_SET *rs) { const struct ieee80211_channel *chan = AH_PRIVATE(ah)->ah_curchan; u_int32_t reg; u_int8_t xset; int i; if (chan == AH_NULL || !IEEE80211_IS_CHAN_CCK(chan)) { return; } xset = 0; for (i = 0; i < rs->rs_count; i++) { u_int8_t rset = rs->rs_rates[i]; /* Basic rate defined? */ if ((rset & 0x80) && (rset &= 0x7f) >= xset) { xset = rset; } } /* * Set the h/w bit to reflect whether or not the basic * rate is found to be equal or less than 2Mbps. */ reg = OS_REG_READ(ah, AR_STA_ID1); if (xset && xset / 2 <= 2) { OS_REG_WRITE(ah, AR_STA_ID1, reg | AR_STA_ID1_BASE_RATE_11B); } else { OS_REG_WRITE(ah, AR_STA_ID1, reg &~ AR_STA_ID1_BASE_RATE_11B); } } /* * Grab a semi-random value from hardware registers - may not * change often */ u_int32_t ar9300_get_random_seed(struct ath_hal *ah) { u_int32_t nf; nf = (OS_REG_READ(ah, AR_PHY(25)) >> 19) & 0x1ff; if (nf & 0x100) { nf = 0 - ((nf ^ 0x1ff) + 1); } return (OS_REG_READ(ah, AR_TSF_U32) ^ OS_REG_READ(ah, AR_TSF_L32) ^ nf); } /* * Detect if our card is present */ HAL_BOOL ar9300_detect_card_present(struct ath_hal *ah) { u_int16_t mac_version, mac_rev; u_int32_t v; /* * Read the Silicon Revision register and compare that * to what we read at attach time. If the same, we say * a card/device is present. */ v = OS_REG_READ(ah, AR_HOSTIF_REG(ah, AR_SREV)) & AR_SREV_ID; if (v == 0xFF) { /* new SREV format */ v = OS_REG_READ(ah, AR_HOSTIF_REG(ah, AR_SREV)); /* * Include 6-bit Chip Type (masked to 0) to differentiate * from pre-Sowl versions */ mac_version = (v & AR_SREV_VERSION2) >> AR_SREV_TYPE2_S; mac_rev = MS(v, AR_SREV_REVISION2); } else { mac_version = MS(v, AR_SREV_VERSION); mac_rev = v & AR_SREV_REVISION; } return (AH_PRIVATE(ah)->ah_macVersion == mac_version && AH_PRIVATE(ah)->ah_macRev == mac_rev); } /* * Update MIB Counters */ void ar9300_update_mib_mac_stats(struct ath_hal *ah) { struct ath_hal_9300 *ahp = AH9300(ah); HAL_MIB_STATS* stats = &ahp->ah_stats.ast_mibstats; stats->ackrcv_bad += OS_REG_READ(ah, AR_ACK_FAIL); stats->rts_bad += OS_REG_READ(ah, AR_RTS_FAIL); stats->fcs_bad += OS_REG_READ(ah, AR_FCS_FAIL); stats->rts_good += OS_REG_READ(ah, AR_RTS_OK); stats->beacons += OS_REG_READ(ah, AR_BEACON_CNT); } void ar9300_get_mib_mac_stats(struct ath_hal *ah, HAL_MIB_STATS* stats) { struct ath_hal_9300 *ahp = AH9300(ah); HAL_MIB_STATS* istats = &ahp->ah_stats.ast_mibstats; stats->ackrcv_bad = istats->ackrcv_bad; stats->rts_bad = istats->rts_bad; stats->fcs_bad = istats->fcs_bad; stats->rts_good = istats->rts_good; stats->beacons = istats->beacons; } /* * Detect if the HW supports spreading a CCK signal on channel 14 */ HAL_BOOL ar9300_is_japan_channel_spread_supported(struct ath_hal *ah) { return AH_TRUE; } /* * Get the rssi of frame curently being received. */ u_int32_t ar9300_get_cur_rssi(struct ath_hal *ah) { /* XXX return (OS_REG_READ(ah, AR_PHY_CURRENT_RSSI) & 0xff); */ /* get combined RSSI */ return (OS_REG_READ(ah, AR_PHY_RSSI_3) & 0xff); } #if ATH_GEN_RANDOMNESS /* * Get the rssi value from BB on ctl chain0. */ u_int32_t ar9300_get_rssi_chain0(struct ath_hal *ah) { /* get ctl chain0 RSSI */ return OS_REG_READ(ah, AR_PHY_RSSI_0) & 0xff; } #endif u_int ar9300_get_def_antenna(struct ath_hal *ah) { return (OS_REG_READ(ah, AR_DEF_ANTENNA) & 0x7); } /* Setup coverage class */ void ar9300_set_coverage_class(struct ath_hal *ah, u_int8_t coverageclass, int now) { } void ar9300_set_def_antenna(struct ath_hal *ah, u_int antenna) { OS_REG_WRITE(ah, AR_DEF_ANTENNA, (antenna & 0x7)); } HAL_BOOL ar9300_set_antenna_switch(struct ath_hal *ah, HAL_ANT_SETTING settings, const struct ieee80211_channel *chan, u_int8_t *tx_chainmask, u_int8_t *rx_chainmask, u_int8_t *antenna_cfgd) { struct ath_hal_9300 *ahp = AH9300(ah); /* * Owl does not support diversity or changing antennas. * * Instead this API and function are defined differently for AR9300. * To support Tablet PC's, this interface allows the system * to dramatically reduce the TX power on a particular chain. * * Based on the value of (redefined) diversity_control, the * reset code will decrease power on chain 0 or chain 1/2. * * Based on the value of bit 0 of antenna_switch_swap, * the mapping between OID call and chain is defined as: * 0: map A -> 0, B -> 1; * 1: map A -> 1, B -> 0; * * NOTE: * The devices that use this OID should use a tx_chain_mask and * tx_chain_select_legacy setting of 5 or 3 if ANTENNA_FIXED_B is * used in order to ensure an active transmit antenna. This * API will allow the host to turn off the only transmitting * antenna to ensure the antenna closest to the user's body is * powered-down. */ /* * Set antenna control for use during reset sequence by * ar9300_decrease_chain_power() */ ahp->ah_diversity_control = settings; return AH_TRUE; } HAL_BOOL ar9300_is_sleep_after_beacon_broken(struct ath_hal *ah) { return AH_TRUE; } HAL_BOOL ar9300_set_slot_time(struct ath_hal *ah, u_int us) { struct ath_hal_9300 *ahp = AH9300(ah); if (us < HAL_SLOT_TIME_9 || us > ar9300_mac_to_usec(ah, 0xffff)) { HALDEBUG(ah, HAL_DEBUG_RESET, "%s: bad slot time %u\n", __func__, us); ahp->ah_slot_time = (u_int) -1; /* restore default handling */ return AH_FALSE; } else { /* convert to system clocks */ OS_REG_WRITE(ah, AR_D_GBL_IFS_SLOT, ar9300_mac_to_clks(ah, us)); ahp->ah_slot_time = us; return AH_TRUE; } } HAL_BOOL ar9300_set_ack_timeout(struct ath_hal *ah, u_int us) { struct ath_hal_9300 *ahp = AH9300(ah); if (us > ar9300_mac_to_usec(ah, MS(0xffffffff, AR_TIME_OUT_ACK))) { HALDEBUG(ah, HAL_DEBUG_RESET, "%s: bad ack timeout %u\n", __func__, us); ahp->ah_ack_timeout = (u_int) -1; /* restore default handling */ return AH_FALSE; } else { /* convert to system clocks */ OS_REG_RMW_FIELD(ah, AR_TIME_OUT, AR_TIME_OUT_ACK, ar9300_mac_to_clks(ah, us)); ahp->ah_ack_timeout = us; return AH_TRUE; } } u_int ar9300_get_ack_timeout(struct ath_hal *ah) { u_int clks = MS(OS_REG_READ(ah, AR_TIME_OUT), AR_TIME_OUT_ACK); return ar9300_mac_to_usec(ah, clks); /* convert from system clocks */ } HAL_STATUS ar9300_set_quiet(struct ath_hal *ah, u_int32_t period, u_int32_t duration, u_int32_t next_start, HAL_QUIET_FLAG flag) { #define TU_TO_USEC(_tu) ((_tu) << 10) HAL_STATUS status = HAL_EIO; u_int32_t tsf = 0, j, next_start_us = 0; if (flag & HAL_QUIET_ENABLE) { for (j = 0; j < 2; j++) { next_start_us = TU_TO_USEC(next_start); tsf = OS_REG_READ(ah, AR_TSF_L32); if ((!next_start) || (flag & HAL_QUIET_ADD_CURRENT_TSF)) { next_start_us += tsf; } if (flag & HAL_QUIET_ADD_SWBA_RESP_TIME) { next_start_us += ah->ah_config.ah_sw_beacon_response_time; } OS_REG_RMW_FIELD(ah, AR_QUIET1, AR_QUIET1_QUIET_ACK_CTS_ENABLE, 1); OS_REG_WRITE(ah, AR_QUIET2, SM(duration, AR_QUIET2_QUIET_DUR)); OS_REG_WRITE(ah, AR_QUIET_PERIOD, TU_TO_USEC(period)); OS_REG_WRITE(ah, AR_NEXT_QUIET_TIMER, next_start_us); OS_REG_SET_BIT(ah, AR_TIMER_MODE, AR_QUIET_TIMER_EN); if ((OS_REG_READ(ah, AR_TSF_L32) >> 10) == tsf >> 10) { status = HAL_OK; break; } HALDEBUG(ah, HAL_DEBUG_QUEUE, "%s: TSF have moved " "while trying to set quiet time TSF: 0x%08x\n", __func__, tsf); /* TSF shouldn't count twice or reg access is taking forever */ HALASSERT(j < 1); } } else { OS_REG_CLR_BIT(ah, AR_TIMER_MODE, AR_QUIET_TIMER_EN); status = HAL_OK; } return status; #undef TU_TO_USEC } -#ifdef ATH_SUPPORT_DFS + +//#ifdef ATH_SUPPORT_DFS void ar9300_cac_tx_quiet(struct ath_hal *ah, HAL_BOOL enable) { - u32 reg1, reg2; + uint32_t reg1, reg2; reg1 = OS_REG_READ(ah, AR_MAC_PCU_OFFSET(MAC_PCU_MISC_MODE)); reg2 = OS_REG_READ(ah, AR_MAC_PCU_OFFSET(MAC_PCU_QUIET_TIME_1)); AH9300(ah)->ah_cac_quiet_enabled = enable; if (enable) { OS_REG_WRITE(ah, AR_MAC_PCU_OFFSET(MAC_PCU_MISC_MODE), reg1 | AR_PCU_FORCE_QUIET_COLL); OS_REG_WRITE(ah, AR_MAC_PCU_OFFSET(MAC_PCU_QUIET_TIME_1), reg2 & ~AR_QUIET1_QUIET_ACK_CTS_ENABLE); } else { OS_REG_WRITE(ah, AR_MAC_PCU_OFFSET(MAC_PCU_MISC_MODE), reg1 & ~AR_PCU_FORCE_QUIET_COLL); OS_REG_WRITE(ah, AR_MAC_PCU_OFFSET(MAC_PCU_QUIET_TIME_1), reg2 | AR_QUIET1_QUIET_ACK_CTS_ENABLE); } } -#endif /* ATH_SUPPORT_DFS */ +//#endif /* ATH_SUPPORT_DFS */ void ar9300_set_pcu_config(struct ath_hal *ah) { ar9300_set_operating_mode(ah, AH_PRIVATE(ah)->ah_opmode); } HAL_STATUS ar9300_get_capability(struct ath_hal *ah, HAL_CAPABILITY_TYPE type, u_int32_t capability, u_int32_t *result) { struct ath_hal_9300 *ahp = AH9300(ah); const HAL_CAPABILITIES *p_cap = &AH_PRIVATE(ah)->ah_caps; struct ar9300_ani_state *ani; switch (type) { case HAL_CAP_CIPHER: /* cipher handled in hardware */ switch (capability) { case HAL_CIPHER_AES_CCM: case HAL_CIPHER_AES_OCB: case HAL_CIPHER_TKIP: case HAL_CIPHER_WEP: case HAL_CIPHER_MIC: case HAL_CIPHER_CLR: return HAL_OK; default: return HAL_ENOTSUPP; } case HAL_CAP_TKIP_MIC: /* handle TKIP MIC in hardware */ switch (capability) { case 0: /* hardware capability */ return HAL_OK; case 1: return (ahp->ah_sta_id1_defaults & AR_STA_ID1_CRPT_MIC_ENABLE) ? HAL_OK : HAL_ENXIO; default: return HAL_ENOTSUPP; } case HAL_CAP_TKIP_SPLIT: /* hardware TKIP uses split keys */ switch (capability) { case 0: /* hardware capability */ return p_cap->halTkipMicTxRxKeySupport ? HAL_ENXIO : HAL_OK; case 1: /* current setting */ return (ahp->ah_misc_mode & AR_PCU_MIC_NEW_LOC_ENA) ? HAL_ENXIO : HAL_OK; default: return HAL_ENOTSUPP; } case HAL_CAP_WME_TKIPMIC: /* hardware can do TKIP MIC when WMM is turned on */ return HAL_OK; case HAL_CAP_PHYCOUNTERS: /* hardware PHY error counters */ return HAL_OK; case HAL_CAP_DIVERSITY: /* hardware supports fast diversity */ switch (capability) { case 0: /* hardware capability */ return HAL_OK; case 1: /* current setting */ return (OS_REG_READ(ah, AR_PHY_CCK_DETECT) & AR_PHY_CCK_DETECT_BB_ENABLE_ANT_FAST_DIV) ? HAL_OK : HAL_ENXIO; } return HAL_EINVAL; case HAL_CAP_TPC: switch (capability) { case 0: /* hardware capability */ return HAL_OK; case 1: return ah->ah_config.ath_hal_desc_tpc ? HAL_OK : HAL_ENXIO; } return HAL_OK; case HAL_CAP_PHYDIAG: /* radar pulse detection capability */ return HAL_OK; case HAL_CAP_MCAST_KEYSRCH: /* multicast frame keycache search */ switch (capability) { case 0: /* hardware capability */ return HAL_OK; case 1: if (OS_REG_READ(ah, AR_STA_ID1) & AR_STA_ID1_ADHOC) { /* * Owl and Merlin have problems in mcast key search. * Disable this cap. in Ad-hoc mode. see Bug 25776 and * 26802 */ return HAL_ENXIO; } else { return (ahp->ah_sta_id1_defaults & AR_STA_ID1_MCAST_KSRCH) ? HAL_OK : HAL_ENXIO; } } return HAL_EINVAL; case HAL_CAP_TSF_ADJUST: /* hardware has beacon tsf adjust */ switch (capability) { case 0: /* hardware capability */ return p_cap->halTsfAddSupport ? HAL_OK : HAL_ENOTSUPP; case 1: return (ahp->ah_misc_mode & AR_PCU_TX_ADD_TSF) ? HAL_OK : HAL_ENXIO; } return HAL_EINVAL; case HAL_CAP_RFSILENT: /* rfsilent support */ if (capability == 3) { /* rfkill interrupt */ /* * XXX: Interrupt-based notification of RF Kill state * changes not working yet. Report that this feature * is not supported so that polling is used instead. */ return (HAL_ENOTSUPP); } return ath_hal_getcapability(ah, type, capability, result); case HAL_CAP_4ADDR_AGGR: return HAL_OK; case HAL_CAP_BB_RIFS_HANG: return HAL_ENOTSUPP; case HAL_CAP_BB_DFS_HANG: return HAL_ENOTSUPP; case HAL_CAP_BB_RX_CLEAR_STUCK_HANG: /* Track chips that are known to have BB hangs related * to rx_clear stuck low. */ return HAL_ENOTSUPP; case HAL_CAP_MAC_HANG: /* Track chips that are known to have MAC hangs. */ return HAL_OK; case HAL_CAP_RIFS_RX_ENABLED: /* Is RIFS RX currently enabled */ return (ahp->ah_rifs_enabled == AH_TRUE) ? HAL_OK : HAL_ENOTSUPP; #if 0 case HAL_CAP_ANT_CFG_2GHZ: *result = p_cap->halNumAntCfg2Ghz; return HAL_OK; case HAL_CAP_ANT_CFG_5GHZ: *result = p_cap->halNumAntCfg5Ghz; return HAL_OK; case HAL_CAP_RX_STBC: *result = p_cap->hal_rx_stbc_support; return HAL_OK; case HAL_CAP_TX_STBC: *result = p_cap->hal_tx_stbc_support; return HAL_OK; #endif case HAL_CAP_LDPC: *result = p_cap->halLDPCSupport; return HAL_OK; case HAL_CAP_DYNAMIC_SMPS: return HAL_OK; case HAL_CAP_DS: return (AR_SREV_HORNET(ah) || AR_SREV_POSEIDON(ah) || AR_SREV_APHRODITE(ah) || (p_cap->halTxChainMask & 0x3) != 0x3 || (p_cap->halRxChainMask & 0x3) != 0x3) ? HAL_ENOTSUPP : HAL_OK; case HAL_CAP_TS: return (AR_SREV_HORNET(ah) || AR_SREV_POSEIDON(ah) || AR_SREV_APHRODITE(ah) || (p_cap->halTxChainMask & 0x7) != 0x7 || (p_cap->halRxChainMask & 0x7) != 0x7) ? HAL_ENOTSUPP : HAL_OK; case HAL_CAP_OL_PWRCTRL: return (ar9300_eeprom_get(ahp, EEP_OL_PWRCTRL)) ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_CRDC: #if ATH_SUPPORT_CRDC return (AR_SREV_WASP(ah) && ah->ah_config.ath_hal_crdc_enable) ? HAL_OK : HAL_ENOTSUPP; #else return HAL_ENOTSUPP; #endif #if 0 case HAL_CAP_MAX_WEP_TKIP_HT20_TX_RATEKBPS: *result = (u_int32_t)(-1); return HAL_OK; case HAL_CAP_MAX_WEP_TKIP_HT40_TX_RATEKBPS: *result = (u_int32_t)(-1); return HAL_OK; #endif case HAL_CAP_BB_PANIC_WATCHDOG: return HAL_OK; case HAL_CAP_PHYRESTART_CLR_WAR: if ((AH_PRIVATE((ah))->ah_macVersion == AR_SREV_VERSION_OSPREY) && (AH_PRIVATE((ah))->ah_macRev < AR_SREV_REVISION_AR9580_10)) { return HAL_OK; } else { return HAL_ENOTSUPP; } case HAL_CAP_ENTERPRISE_MODE: *result = ahp->ah_enterprise_mode >> 16; /* * WAR for EV 77658 - Add delimiters to first sub-frame when using * RTS/CTS with aggregation and non-enterprise Osprey. * * Bug fixed in AR9580/Peacock, Wasp1.1 and later */ if ((ahp->ah_enterprise_mode & AR_ENT_OTP_MIN_PKT_SIZE_DISABLE) && !AR_SREV_AR9580_10_OR_LATER(ah) && (!AR_SREV_WASP(ah) || AR_SREV_WASP_10(ah))) { *result |= AH_ENT_RTSCTS_DELIM_WAR; } return HAL_OK; case HAL_CAP_LDPCWAR: /* WAR for RIFS+LDPC issue is required for all chips currently * supported by ar9300 HAL. */ return HAL_OK; case HAL_CAP_ENABLE_APM: *result = p_cap->halApmEnable; return HAL_OK; case HAL_CAP_PCIE_LCR_EXTSYNC_EN: return (p_cap->hal_pcie_lcr_extsync_en == AH_TRUE) ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_PCIE_LCR_OFFSET: *result = p_cap->hal_pcie_lcr_offset; return HAL_OK; case HAL_CAP_SMARTANTENNA: /* FIXME A request is pending with h/w team to add feature bit in * caldata to detect if board has smart antenna or not, once added * we need to fix his piece of code to read and return value without * any compile flags */ #if UMAC_SUPPORT_SMARTANTENNA /* enable smart antenna for Peacock, Wasp and scorpion for future chips need to modify */ if (AR_SREV_AR9580_10(ah) || (AR_SREV_WASP(ah)) || AR_SREV_SCORPION(ah)) { return HAL_OK; } else { return HAL_ENOTSUPP; } #else return HAL_ENOTSUPP; #endif #ifdef ATH_TRAFFIC_FAST_RECOVER case HAL_CAP_TRAFFIC_FAST_RECOVER: if (AR_SREV_HORNET(ah) || AR_SREV_POSEIDON(ah) || AR_SREV_WASP_11(ah)) { return HAL_OK; } else { return HAL_ENOTSUPP; } #endif /* FreeBSD ANI */ case HAL_CAP_INTMIT: /* interference mitigation */ switch (capability) { case HAL_CAP_INTMIT_PRESENT: /* hardware capability */ return HAL_OK; case HAL_CAP_INTMIT_ENABLE: return (ahp->ah_proc_phy_err & HAL_PROCESS_ANI) ? HAL_OK : HAL_ENXIO; case HAL_CAP_INTMIT_NOISE_IMMUNITY_LEVEL: case HAL_CAP_INTMIT_OFDM_WEAK_SIGNAL_LEVEL: // case HAL_CAP_INTMIT_CCK_WEAK_SIGNAL_THR: case HAL_CAP_INTMIT_FIRSTEP_LEVEL: case HAL_CAP_INTMIT_SPUR_IMMUNITY_LEVEL: ani = ar9300_ani_get_current_state(ah); if (ani == AH_NULL) return HAL_ENXIO; switch (capability) { /* XXX AR9300 HAL has OFDM/CCK noise immunity level params? */ case 2: *result = ani->ofdm_noise_immunity_level; break; case 3: *result = !ani->ofdm_weak_sig_detect_off; break; // case 4: *result = ani->cck_weak_sig_threshold; break; case 5: *result = ani->firstep_level; break; case 6: *result = ani->spur_immunity_level; break; } return HAL_OK; } return HAL_EINVAL; case HAL_CAP_ENFORCE_TXOP: if (capability == 0) return (HAL_OK); if (capability != 1) return (HAL_ENOTSUPP); (*result) = !! (ahp->ah_misc_mode & AR_PCU_TXOP_TBTT_LIMIT_ENA); return (HAL_OK); case HAL_CAP_TOA_LOCATIONING: if (capability == 0) return HAL_OK; if (capability == 2) { *result = ar9300_read_loc_timer(ah); return (HAL_OK); } return HAL_ENOTSUPP; default: return ath_hal_getcapability(ah, type, capability, result); } } HAL_BOOL ar9300_set_capability(struct ath_hal *ah, HAL_CAPABILITY_TYPE type, u_int32_t capability, u_int32_t setting, HAL_STATUS *status) { struct ath_hal_9300 *ahp = AH9300(ah); const HAL_CAPABILITIES *p_cap = &AH_PRIVATE(ah)->ah_caps; u_int32_t v; switch (type) { case HAL_CAP_TKIP_SPLIT: /* hardware TKIP uses split keys */ if (! p_cap->halTkipMicTxRxKeySupport) return AH_FALSE; if (setting) ahp->ah_misc_mode &= ~AR_PCU_MIC_NEW_LOC_ENA; else ahp->ah_misc_mode |= AR_PCU_MIC_NEW_LOC_ENA; OS_REG_WRITE(ah, AR_PCU_MISC, ahp->ah_misc_mode); return AH_TRUE; case HAL_CAP_TKIP_MIC: /* handle TKIP MIC in hardware */ if (setting) { ahp->ah_sta_id1_defaults |= AR_STA_ID1_CRPT_MIC_ENABLE; } else { ahp->ah_sta_id1_defaults &= ~AR_STA_ID1_CRPT_MIC_ENABLE; } return AH_TRUE; case HAL_CAP_DIVERSITY: v = OS_REG_READ(ah, AR_PHY_CCK_DETECT); if (setting) { v |= AR_PHY_CCK_DETECT_BB_ENABLE_ANT_FAST_DIV; } else { v &= ~AR_PHY_CCK_DETECT_BB_ENABLE_ANT_FAST_DIV; } OS_REG_WRITE(ah, AR_PHY_CCK_DETECT, v); return AH_TRUE; case HAL_CAP_DIAG: /* hardware diagnostic support */ /* * NB: could split this up into virtual capabilities, * (e.g. 1 => ACK, 2 => CTS, etc.) but it hardly * seems worth the additional complexity. */ #ifdef AH_DEBUG AH_PRIVATE(ah)->ah_diagreg = setting; #else AH_PRIVATE(ah)->ah_diagreg = setting & 0x6; /* ACK+CTS */ #endif OS_REG_WRITE(ah, AR_DIAG_SW, AH_PRIVATE(ah)->ah_diagreg); return AH_TRUE; case HAL_CAP_TPC: ah->ah_config.ath_hal_desc_tpc = (setting != 0); return AH_TRUE; case HAL_CAP_MCAST_KEYSRCH: /* multicast frame keycache search */ if (setting) { ahp->ah_sta_id1_defaults |= AR_STA_ID1_MCAST_KSRCH; } else { ahp->ah_sta_id1_defaults &= ~AR_STA_ID1_MCAST_KSRCH; } return AH_TRUE; case HAL_CAP_TSF_ADJUST: /* hardware has beacon tsf adjust */ if (p_cap->halTsfAddSupport) { if (setting) { ahp->ah_misc_mode |= AR_PCU_TX_ADD_TSF; } else { ahp->ah_misc_mode &= ~AR_PCU_TX_ADD_TSF; } return AH_TRUE; } return AH_FALSE; /* FreeBSD interrupt mitigation / ANI */ case HAL_CAP_INTMIT: { /* interference mitigation */ /* This maps the public ANI commands to the internal ANI commands */ /* Private: HAL_ANI_CMD; Public: HAL_CAP_INTMIT_CMD */ static const HAL_ANI_CMD cmds[] = { HAL_ANI_PRESENT, HAL_ANI_MODE, HAL_ANI_NOISE_IMMUNITY_LEVEL, HAL_ANI_OFDM_WEAK_SIGNAL_DETECTION, HAL_ANI_CCK_WEAK_SIGNAL_THR, HAL_ANI_FIRSTEP_LEVEL, HAL_ANI_SPUR_IMMUNITY_LEVEL, }; #define N(a) (sizeof(a) / sizeof(a[0])) return capability < N(cmds) ? ar9300_ani_control(ah, cmds[capability], setting) : AH_FALSE; #undef N } case HAL_CAP_RXBUFSIZE: /* set MAC receive buffer size */ ahp->rx_buf_size = setting & AR_DATABUF_MASK; OS_REG_WRITE(ah, AR_DATABUF, ahp->rx_buf_size); return AH_TRUE; case HAL_CAP_ENFORCE_TXOP: if (capability != 1) return AH_FALSE; if (setting) { ahp->ah_misc_mode |= AR_PCU_TXOP_TBTT_LIMIT_ENA; OS_REG_SET_BIT(ah, AR_PCU_MISC, AR_PCU_TXOP_TBTT_LIMIT_ENA); } else { ahp->ah_misc_mode &= ~AR_PCU_TXOP_TBTT_LIMIT_ENA; OS_REG_CLR_BIT(ah, AR_PCU_MISC, AR_PCU_TXOP_TBTT_LIMIT_ENA); } return AH_TRUE; case HAL_CAP_TOA_LOCATIONING: if (capability == 0) return AH_TRUE; if (capability == 1) { ar9300_update_loc_ctl_reg(ah, setting); return AH_TRUE; } return AH_FALSE; /* fall thru... */ default: return ath_hal_setcapability(ah, type, capability, setting, status); } } #ifdef AH_DEBUG static void ar9300_print_reg(struct ath_hal *ah, u_int32_t args) { u_int32_t i = 0; /* Read 0x80d0 to trigger pcie analyzer */ HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "0x%04x 0x%08x\n", 0x80d0, OS_REG_READ(ah, 0x80d0)); if (args & HAL_DIAG_PRINT_REG_COUNTER) { struct ath_hal_9300 *ahp = AH9300(ah); u_int32_t tf, rf, rc, cc; tf = OS_REG_READ(ah, AR_TFCNT); rf = OS_REG_READ(ah, AR_RFCNT); rc = OS_REG_READ(ah, AR_RCCNT); cc = OS_REG_READ(ah, AR_CCCNT); HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "AR_TFCNT Diff= 0x%x\n", tf - ahp->last_tf); HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "AR_RFCNT Diff= 0x%x\n", rf - ahp->last_rf); HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "AR_RCCNT Diff= 0x%x\n", rc - ahp->last_rc); HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "AR_CCCNT Diff= 0x%x\n", cc - ahp->last_cc); ahp->last_tf = tf; ahp->last_rf = rf; ahp->last_rc = rc; ahp->last_cc = cc; HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "DMADBG0 = 0x%x\n", OS_REG_READ(ah, AR_DMADBG_0)); HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "DMADBG1 = 0x%x\n", OS_REG_READ(ah, AR_DMADBG_1)); HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "DMADBG2 = 0x%x\n", OS_REG_READ(ah, AR_DMADBG_2)); HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "DMADBG3 = 0x%x\n", OS_REG_READ(ah, AR_DMADBG_3)); HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "DMADBG4 = 0x%x\n", OS_REG_READ(ah, AR_DMADBG_4)); HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "DMADBG5 = 0x%x\n", OS_REG_READ(ah, AR_DMADBG_5)); HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "DMADBG6 = 0x%x\n", OS_REG_READ(ah, AR_DMADBG_6)); HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "DMADBG7 = 0x%x\n", OS_REG_READ(ah, AR_DMADBG_7)); } if (args & HAL_DIAG_PRINT_REG_ALL) { for (i = 0x8; i <= 0xB8; i += sizeof(u_int32_t)) { HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "0x%04x 0x%08x\n", i, OS_REG_READ(ah, i)); } for (i = 0x800; i <= (0x800 + (10 << 2)); i += sizeof(u_int32_t)) { HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "0x%04x 0x%08x\n", i, OS_REG_READ(ah, i)); } HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "0x%04x 0x%08x\n", 0x840, OS_REG_READ(ah, i)); HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "0x%04x 0x%08x\n", 0x880, OS_REG_READ(ah, i)); for (i = 0x8C0; i <= (0x8C0 + (10 << 2)); i += sizeof(u_int32_t)) { HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "0x%04x 0x%08x\n", i, OS_REG_READ(ah, i)); } for (i = 0x1F00; i <= 0x1F04; i += sizeof(u_int32_t)) { HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "0x%04x 0x%08x\n", i, OS_REG_READ(ah, i)); } for (i = 0x4000; i <= 0x408C; i += sizeof(u_int32_t)) { HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "0x%04x 0x%08x\n", i, OS_REG_READ(ah, i)); } for (i = 0x5000; i <= 0x503C; i += sizeof(u_int32_t)) { HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "0x%04x 0x%08x\n", i, OS_REG_READ(ah, i)); } for (i = 0x7040; i <= 0x7058; i += sizeof(u_int32_t)) { HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "0x%04x 0x%08x\n", i, OS_REG_READ(ah, i)); } for (i = 0x8000; i <= 0x8098; i += sizeof(u_int32_t)) { HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "0x%04x 0x%08x\n", i, OS_REG_READ(ah, i)); } for (i = 0x80D4; i <= 0x8200; i += sizeof(u_int32_t)) { HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "0x%04x 0x%08x\n", i, OS_REG_READ(ah, i)); } for (i = 0x8240; i <= 0x97FC; i += sizeof(u_int32_t)) { HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "0x%04x 0x%08x\n", i, OS_REG_READ(ah, i)); } for (i = 0x9800; i <= 0x99f0; i += sizeof(u_int32_t)) { HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "0x%04x 0x%08x\n", i, OS_REG_READ(ah, i)); } for (i = 0x9c10; i <= 0x9CFC; i += sizeof(u_int32_t)) { HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "0x%04x 0x%08x\n", i, OS_REG_READ(ah, i)); } for (i = 0xA200; i <= 0xA26C; i += sizeof(u_int32_t)) { HALDEBUG(ah, HAL_DEBUG_PRINT_REG, "0x%04x 0x%08x\n", i, OS_REG_READ(ah, i)); } } } #endif HAL_BOOL ar9300_get_diag_state(struct ath_hal *ah, int request, const void *args, u_int32_t argsize, void **result, u_int32_t *resultsize) { struct ath_hal_9300 *ahp = AH9300(ah); struct ar9300_ani_state *ani; (void) ahp; if (ath_hal_getdiagstate(ah, request, args, argsize, result, resultsize)) { return AH_TRUE; } switch (request) { #ifdef AH_PRIVATE_DIAG case HAL_DIAG_EEPROM: *result = &ahp->ah_eeprom; *resultsize = sizeof(ar9300_eeprom_t); return AH_TRUE; #if 0 /* XXX - TODO */ case HAL_DIAG_EEPROM_EXP_11A: case HAL_DIAG_EEPROM_EXP_11B: case HAL_DIAG_EEPROM_EXP_11G: pe = &ahp->ah_mode_power_array2133[request - HAL_DIAG_EEPROM_EXP_11A]; *result = pe->p_channels; *resultsize = (*result == AH_NULL) ? 0 : roundup(sizeof(u_int16_t) * pe->num_channels, sizeof(u_int32_t)) + sizeof(EXPN_DATA_PER_CHANNEL_2133) * pe->num_channels; return AH_TRUE; #endif case HAL_DIAG_RFGAIN: *result = &ahp->ah_gain_values; *resultsize = sizeof(GAIN_VALUES); return AH_TRUE; case HAL_DIAG_RFGAIN_CURSTEP: *result = (void *) ahp->ah_gain_values.curr_step; *resultsize = (*result == AH_NULL) ? 0 : sizeof(GAIN_OPTIMIZATION_STEP); return AH_TRUE; #if 0 /* XXX - TODO */ case HAL_DIAG_PCDAC: *result = ahp->ah_pcdac_table; *resultsize = ahp->ah_pcdac_table_size; return AH_TRUE; #endif case HAL_DIAG_ANI_CURRENT: ani = ar9300_ani_get_current_state(ah); if (ani == AH_NULL) return AH_FALSE; /* Convert ar9300 HAL to FreeBSD HAL ANI state */ /* XXX TODO: add all of these to the HAL ANI state structure */ bzero(&ahp->ext_ani_state, sizeof(ahp->ext_ani_state)); /* XXX should this be OFDM or CCK noise immunity level? */ ahp->ext_ani_state.noiseImmunityLevel = ani->ofdm_noise_immunity_level; ahp->ext_ani_state.spurImmunityLevel = ani->spur_immunity_level; ahp->ext_ani_state.firstepLevel = ani->firstep_level; ahp->ext_ani_state.ofdmWeakSigDetectOff = ani->ofdm_weak_sig_detect_off; /* mrc_cck_off */ /* cck_noise_immunity_level */ ahp->ext_ani_state.listenTime = ani->listen_time; *result = &ahp->ext_ani_state; *resultsize = sizeof(ahp->ext_ani_state); #if 0 *result = ar9300_ani_get_current_state(ah); *resultsize = (*result == AH_NULL) ? 0 : sizeof(struct ar9300_ani_state); #endif return AH_TRUE; case HAL_DIAG_ANI_STATS: *result = ar9300_ani_get_current_stats(ah); *resultsize = (*result == AH_NULL) ? 0 : sizeof(HAL_ANI_STATS); return AH_TRUE; case HAL_DIAG_ANI_CMD: if (argsize != 2*sizeof(u_int32_t)) { return AH_FALSE; } ar9300_ani_control( ah, ((const u_int32_t *)args)[0], ((const u_int32_t *)args)[1]); return AH_TRUE; #if 0 case HAL_DIAG_TXCONT: /*AR9300_CONTTXMODE(ah, (struct ath_desc *)args, argsize );*/ return AH_TRUE; #endif /* 0 */ #endif /* AH_PRIVATE_DIAG */ case HAL_DIAG_CHANNELS: #if 0 *result = &(ahp->ah_priv.ah_channels[0]); *resultsize = sizeof(ahp->ah_priv.ah_channels[0]) * ahp->ah_priv.priv.ah_nchan; #endif return AH_TRUE; #ifdef AH_DEBUG case HAL_DIAG_PRINT_REG: ar9300_print_reg(ah, *((const u_int32_t *)args)); return AH_TRUE; #endif default: break; } return AH_FALSE; } void ar9300_dma_reg_dump(struct ath_hal *ah) { #ifdef AH_DEBUG #define NUM_DMA_DEBUG_REGS 8 #define NUM_QUEUES 10 u_int32_t val[NUM_DMA_DEBUG_REGS]; int qcu_offset = 0, dcu_offset = 0; u_int32_t *qcu_base = &val[0], *dcu_base = &val[4], reg; int i, j, k; int16_t nfarray[HAL_NUM_NF_READINGS]; #ifdef ATH_NF_PER_CHAN HAL_CHANNEL_INTERNAL *ichan = ath_hal_checkchannel(ah, AH_PRIVATE(ah)->ah_curchan); #endif /* ATH_NF_PER_CHAN */ HAL_NFCAL_HIST_FULL *h = AH_HOME_CHAN_NFCAL_HIST(ah, ichan); /* selecting DMA OBS 8 */ OS_REG_WRITE(ah, AR_MACMISC, ((AR_MACMISC_DMA_OBS_LINE_8 << AR_MACMISC_DMA_OBS_S) | (AR_MACMISC_MISC_OBS_BUS_1 << AR_MACMISC_MISC_OBS_BUS_MSB_S))); ath_hal_printf(ah, "Raw DMA Debug values:\n"); for (i = 0; i < NUM_DMA_DEBUG_REGS; i++) { if (i % 4 == 0) { ath_hal_printf(ah, "\n"); } val[i] = OS_REG_READ(ah, AR_DMADBG_0 + (i * sizeof(u_int32_t))); ath_hal_printf(ah, "%d: %08x ", i, val[i]); } ath_hal_printf(ah, "\n\n"); ath_hal_printf(ah, "Num QCU: chain_st fsp_ok fsp_st DCU: chain_st\n"); for (i = 0; i < NUM_QUEUES; i++, qcu_offset += 4, dcu_offset += 5) { if (i == 8) { /* only 8 QCU entries in val[0] */ qcu_offset = 0; qcu_base++; } if (i == 6) { /* only 6 DCU entries in val[4] */ dcu_offset = 0; dcu_base++; } ath_hal_printf(ah, "%2d %2x %1x %2x %2x\n", i, (*qcu_base & (0x7 << qcu_offset)) >> qcu_offset, (*qcu_base & (0x8 << qcu_offset)) >> (qcu_offset + 3), val[2] & (0x7 << (i * 3)) >> (i * 3), (*dcu_base & (0x1f << dcu_offset)) >> dcu_offset); } ath_hal_printf(ah, "\n"); ath_hal_printf(ah, "qcu_stitch state: %2x qcu_fetch state: %2x\n", (val[3] & 0x003c0000) >> 18, (val[3] & 0x03c00000) >> 22); ath_hal_printf(ah, "qcu_complete state: %2x dcu_complete state: %2x\n", (val[3] & 0x1c000000) >> 26, (val[6] & 0x3)); ath_hal_printf(ah, "dcu_arb state: %2x dcu_fp state: %2x\n", (val[5] & 0x06000000) >> 25, (val[5] & 0x38000000) >> 27); ath_hal_printf(ah, "chan_idle_dur: %3d chan_idle_dur_valid: %1d\n", (val[6] & 0x000003fc) >> 2, (val[6] & 0x00000400) >> 10); ath_hal_printf(ah, "txfifo_valid_0: %1d txfifo_valid_1: %1d\n", (val[6] & 0x00000800) >> 11, (val[6] & 0x00001000) >> 12); ath_hal_printf(ah, "txfifo_dcu_num_0: %2d txfifo_dcu_num_1: %2d\n", (val[6] & 0x0001e000) >> 13, (val[6] & 0x001e0000) >> 17); ath_hal_printf(ah, "pcu observe 0x%x \n", OS_REG_READ(ah, AR_OBS_BUS_1)); ath_hal_printf(ah, "AR_CR 0x%x \n", OS_REG_READ(ah, AR_CR)); ar9300_upload_noise_floor(ah, 1, nfarray); ath_hal_printf(ah, "2G:\n"); ath_hal_printf(ah, "Min CCA Out:\n"); ath_hal_printf(ah, "\t\tChain 0\t\tChain 1\t\tChain 2\n"); ath_hal_printf(ah, "Control:\t%8d\t%8d\t%8d\n", nfarray[0], nfarray[1], nfarray[2]); ath_hal_printf(ah, "Extension:\t%8d\t%8d\t%8d\n\n", nfarray[3], nfarray[4], nfarray[5]); ar9300_upload_noise_floor(ah, 0, nfarray); ath_hal_printf(ah, "5G:\n"); ath_hal_printf(ah, "Min CCA Out:\n"); ath_hal_printf(ah, "\t\tChain 0\t\tChain 1\t\tChain 2\n"); ath_hal_printf(ah, "Control:\t%8d\t%8d\t%8d\n", nfarray[0], nfarray[1], nfarray[2]); ath_hal_printf(ah, "Extension:\t%8d\t%8d\t%8d\n\n", nfarray[3], nfarray[4], nfarray[5]); for (i = 0; i < HAL_NUM_NF_READINGS; i++) { ath_hal_printf(ah, "%s Chain %d NF History:\n", ((i < 3) ? "Control " : "Extension "), i%3); for (j = 0, k = h->base.curr_index; j < HAL_NF_CAL_HIST_LEN_FULL; j++, k++) { ath_hal_printf(ah, "Element %d: %d\n", j, h->nf_cal_buffer[k % HAL_NF_CAL_HIST_LEN_FULL][i]); } ath_hal_printf(ah, "Last Programmed NF: %d\n\n", h->base.priv_nf[i]); } reg = OS_REG_READ(ah, AR_PHY_FIND_SIG_LOW); ath_hal_printf(ah, "FIRStep Low = 0x%x (%d)\n", MS(reg, AR_PHY_FIND_SIG_LOW_FIRSTEP_LOW), MS(reg, AR_PHY_FIND_SIG_LOW_FIRSTEP_LOW)); reg = OS_REG_READ(ah, AR_PHY_DESIRED_SZ); ath_hal_printf(ah, "Total Desired = 0x%x (%d)\n", MS(reg, AR_PHY_DESIRED_SZ_TOT_DES), MS(reg, AR_PHY_DESIRED_SZ_TOT_DES)); ath_hal_printf(ah, "ADC Desired = 0x%x (%d)\n", MS(reg, AR_PHY_DESIRED_SZ_ADC), MS(reg, AR_PHY_DESIRED_SZ_ADC)); reg = OS_REG_READ(ah, AR_PHY_FIND_SIG); ath_hal_printf(ah, "FIRStep = 0x%x (%d)\n", MS(reg, AR_PHY_FIND_SIG_FIRSTEP), MS(reg, AR_PHY_FIND_SIG_FIRSTEP)); reg = OS_REG_READ(ah, AR_PHY_AGC); ath_hal_printf(ah, "Coarse High = 0x%x (%d)\n", MS(reg, AR_PHY_AGC_COARSE_HIGH), MS(reg, AR_PHY_AGC_COARSE_HIGH)); ath_hal_printf(ah, "Coarse Low = 0x%x (%d)\n", MS(reg, AR_PHY_AGC_COARSE_LOW), MS(reg, AR_PHY_AGC_COARSE_LOW)); ath_hal_printf(ah, "Coarse Power Constant = 0x%x (%d)\n", MS(reg, AR_PHY_AGC_COARSE_PWR_CONST), MS(reg, AR_PHY_AGC_COARSE_PWR_CONST)); reg = OS_REG_READ(ah, AR_PHY_TIMING5); ath_hal_printf(ah, "Enable Cyclic Power Thresh = %d\n", MS(reg, AR_PHY_TIMING5_CYCPWR_THR1_ENABLE)); ath_hal_printf(ah, "Cyclic Power Thresh = 0x%x (%d)\n", MS(reg, AR_PHY_TIMING5_CYCPWR_THR1), MS(reg, AR_PHY_TIMING5_CYCPWR_THR1)); ath_hal_printf(ah, "Cyclic Power Thresh 1A= 0x%x (%d)\n", MS(reg, AR_PHY_TIMING5_CYCPWR_THR1A), MS(reg, AR_PHY_TIMING5_CYCPWR_THR1A)); reg = OS_REG_READ(ah, AR_PHY_DAG_CTRLCCK); ath_hal_printf(ah, "Barker RSSI Thresh Enable = %d\n", MS(reg, AR_PHY_DAG_CTRLCCK_EN_RSSI_THR)); ath_hal_printf(ah, "Barker RSSI Thresh = 0x%x (%d)\n", MS(reg, AR_PHY_DAG_CTRLCCK_RSSI_THR), MS(reg, AR_PHY_DAG_CTRLCCK_RSSI_THR)); /* Step 1a: Set bit 23 of register 0xa360 to 0 */ reg = OS_REG_READ(ah, 0xa360); reg &= ~0x00800000; OS_REG_WRITE(ah, 0xa360, reg); /* Step 2a: Set register 0xa364 to 0x1000 */ reg = 0x1000; OS_REG_WRITE(ah, 0xa364, reg); /* Step 3a: Read bits 17:0 of register 0x9c20 */ reg = OS_REG_READ(ah, 0x9c20); reg &= 0x0003ffff; ath_hal_printf(ah, "%s: Test Control Status [0x1000] 0x9c20[17:0] = 0x%x\n", __func__, reg); /* Step 1b: Set bit 23 of register 0xa360 to 0 */ reg = OS_REG_READ(ah, 0xa360); reg &= ~0x00800000; OS_REG_WRITE(ah, 0xa360, reg); /* Step 2b: Set register 0xa364 to 0x1400 */ reg = 0x1400; OS_REG_WRITE(ah, 0xa364, reg); /* Step 3b: Read bits 17:0 of register 0x9c20 */ reg = OS_REG_READ(ah, 0x9c20); reg &= 0x0003ffff; ath_hal_printf(ah, "%s: Test Control Status [0x1400] 0x9c20[17:0] = 0x%x\n", __func__, reg); /* Step 1c: Set bit 23 of register 0xa360 to 0 */ reg = OS_REG_READ(ah, 0xa360); reg &= ~0x00800000; OS_REG_WRITE(ah, 0xa360, reg); /* Step 2c: Set register 0xa364 to 0x3C00 */ reg = 0x3c00; OS_REG_WRITE(ah, 0xa364, reg); /* Step 3c: Read bits 17:0 of register 0x9c20 */ reg = OS_REG_READ(ah, 0x9c20); reg &= 0x0003ffff; ath_hal_printf(ah, "%s: Test Control Status [0x3C00] 0x9c20[17:0] = 0x%x\n", __func__, reg); /* Step 1d: Set bit 24 of register 0xa360 to 0 */ reg = OS_REG_READ(ah, 0xa360); reg &= ~0x001040000; OS_REG_WRITE(ah, 0xa360, reg); /* Step 2d: Set register 0xa364 to 0x5005D */ reg = 0x5005D; OS_REG_WRITE(ah, 0xa364, reg); /* Step 3d: Read bits 17:0 of register 0xa368 */ reg = OS_REG_READ(ah, 0xa368); reg &= 0x0003ffff; ath_hal_printf(ah, "%s: Test Control Status [0x5005D] 0xa368[17:0] = 0x%x\n", __func__, reg); /* Step 1e: Set bit 24 of register 0xa360 to 0 */ reg = OS_REG_READ(ah, 0xa360); reg &= ~0x001040000; OS_REG_WRITE(ah, 0xa360, reg); /* Step 2e: Set register 0xa364 to 0x7005D */ reg = 0x7005D; OS_REG_WRITE(ah, 0xa364, reg); /* Step 3e: Read bits 17:0 of register 0xa368 */ reg = OS_REG_READ(ah, 0xa368); reg &= 0x0003ffff; ath_hal_printf(ah, "%s: Test Control Status [0x7005D] 0xa368[17:0] = 0x%x\n", __func__, reg); /* Step 1f: Set bit 24 of register 0xa360 to 0 */ reg = OS_REG_READ(ah, 0xa360); reg &= ~0x001000000; reg |= 0x40000; OS_REG_WRITE(ah, 0xa360, reg); /* Step 2f: Set register 0xa364 to 0x3005D */ reg = 0x3005D; OS_REG_WRITE(ah, 0xa364, reg); /* Step 3f: Read bits 17:0 of register 0xa368 */ reg = OS_REG_READ(ah, 0xa368); reg &= 0x0003ffff; ath_hal_printf(ah, "%s: Test Control Status [0x3005D] 0xa368[17:0] = 0x%x\n", __func__, reg); /* Step 1g: Set bit 24 of register 0xa360 to 0 */ reg = OS_REG_READ(ah, 0xa360); reg &= ~0x001000000; reg |= 0x40000; OS_REG_WRITE(ah, 0xa360, reg); /* Step 2g: Set register 0xa364 to 0x6005D */ reg = 0x6005D; OS_REG_WRITE(ah, 0xa364, reg); /* Step 3g: Read bits 17:0 of register 0xa368 */ reg = OS_REG_READ(ah, 0xa368); reg &= 0x0003ffff; ath_hal_printf(ah, "%s: Test Control Status [0x6005D] 0xa368[17:0] = 0x%x\n", __func__, reg); #endif /* AH_DEBUG */ } /* * Return the busy for rx_frame, rx_clear, and tx_frame */ u_int32_t ar9300_get_mib_cycle_counts_pct(struct ath_hal *ah, u_int32_t *rxc_pcnt, u_int32_t *rxf_pcnt, u_int32_t *txf_pcnt) { struct ath_hal_9300 *ahp = AH9300(ah); u_int32_t good = 1; u_int32_t rc = OS_REG_READ(ah, AR_RCCNT); u_int32_t rf = OS_REG_READ(ah, AR_RFCNT); u_int32_t tf = OS_REG_READ(ah, AR_TFCNT); u_int32_t cc = OS_REG_READ(ah, AR_CCCNT); /* read cycles last */ if (ahp->ah_cycles == 0 || ahp->ah_cycles > cc) { /* * Cycle counter wrap (or initial call); it's not possible * to accurately calculate a value because the registers * right shift rather than wrap--so punt and return 0. */ HALDEBUG(ah, HAL_DEBUG_CHANNEL, "%s: cycle counter wrap. ExtBusy = 0\n", __func__); good = 0; } else { u_int32_t cc_d = cc - ahp->ah_cycles; u_int32_t rc_d = rc - ahp->ah_rx_clear; u_int32_t rf_d = rf - ahp->ah_rx_frame; u_int32_t tf_d = tf - ahp->ah_tx_frame; if (cc_d != 0) { *rxc_pcnt = rc_d * 100 / cc_d; *rxf_pcnt = rf_d * 100 / cc_d; *txf_pcnt = tf_d * 100 / cc_d; } else { good = 0; } } ahp->ah_cycles = cc; ahp->ah_rx_frame = rf; ahp->ah_rx_clear = rc; ahp->ah_tx_frame = tf; return good; } /* * Return approximation of extension channel busy over an time interval * 0% (clear) -> 100% (busy) * -1 for invalid estimate */ uint32_t ar9300_get_11n_ext_busy(struct ath_hal *ah) { /* * Overflow condition to check before multiplying to get % * (x * 100 > 0xFFFFFFFF ) => (x > 0x28F5C28) */ #define OVERFLOW_LIMIT 0x28F5C28 #define ERROR_CODE -1 struct ath_hal_9300 *ahp = AH9300(ah); u_int32_t busy = 0; /* percentage */ int8_t busyper = 0; u_int32_t cycle_count, ctl_busy, ext_busy; /* cycle_count will always be the first to wrap; therefore, read it last * This sequence of reads is not atomic, and MIB counter wrap * could happen during it ? */ ctl_busy = OS_REG_READ(ah, AR_RCCNT); ext_busy = OS_REG_READ(ah, AR_EXTRCCNT); cycle_count = OS_REG_READ(ah, AR_CCCNT); if ((ahp->ah_cycle_count == 0) || (ahp->ah_cycle_count > cycle_count) || (ahp->ah_ctl_busy > ctl_busy) || (ahp->ah_ext_busy > ext_busy)) { /* * Cycle counter wrap (or initial call); it's not possible * to accurately calculate a value because the registers * right shift rather than wrap--so punt and return 0. */ busyper = ERROR_CODE; HALDEBUG(ah, HAL_DEBUG_CHANNEL, "%s: cycle counter wrap. ExtBusy = 0\n", __func__); } else { u_int32_t cycle_delta = cycle_count - ahp->ah_cycle_count; u_int32_t ext_busy_delta = ext_busy - ahp->ah_ext_busy; /* * Compute extension channel busy percentage * Overflow condition: 0xFFFFFFFF < ext_busy_delta * 100 * Underflow condition/Divide-by-zero: check that cycle_delta >> 7 != 0 * Will never happen, since (ext_busy_delta < cycle_delta) always, * and shift necessitated by large ext_busy_delta. * Due to timing difference to read the registers and counter overflow, * it may still happen that cycle_delta >> 7 = 0. * */ if (cycle_delta) { if (ext_busy_delta > OVERFLOW_LIMIT) { if (cycle_delta >> 7) { busy = ((ext_busy_delta >> 7) * 100) / (cycle_delta >> 7); } else { busyper = ERROR_CODE; } } else { busy = (ext_busy_delta * 100) / cycle_delta; } } else { busyper = ERROR_CODE; } if (busy > 100) { busy = 100; } if ( busyper != ERROR_CODE ) { busyper = busy; } } ahp->ah_cycle_count = cycle_count; ahp->ah_ctl_busy = ctl_busy; ahp->ah_ext_busy = ext_busy; return busyper; #undef OVERFLOW_LIMIT #undef ERROR_CODE } /* BB Panic Watchdog declarations */ #define HAL_BB_PANIC_WD_HT20_FACTOR 74 /* 0.74 */ #define HAL_BB_PANIC_WD_HT40_FACTOR 37 /* 0.37 */ void ar9300_config_bb_panic_watchdog(struct ath_hal *ah) { #define HAL_BB_PANIC_IDLE_TIME_OUT 0x0a8c0000 const struct ieee80211_channel *chan = AH_PRIVATE(ah)->ah_curchan; u_int32_t idle_tmo_ms = AH9300(ah)->ah_bb_panic_timeout_ms; u_int32_t val, idle_count; if (idle_tmo_ms != 0) { /* enable IRQ, disable chip-reset for BB panic */ val = OS_REG_READ(ah, AR_PHY_PANIC_WD_CTL_2) & AR_PHY_BB_PANIC_CNTL2_MASK; OS_REG_WRITE(ah, AR_PHY_PANIC_WD_CTL_2, (val | AR_PHY_BB_PANIC_IRQ_ENABLE) & ~AR_PHY_BB_PANIC_RST_ENABLE); /* bound limit to 10 secs */ if (idle_tmo_ms > 10000) { idle_tmo_ms = 10000; } if (chan != AH_NULL && IEEE80211_IS_CHAN_HT40(chan)) { idle_count = (100 * idle_tmo_ms) / HAL_BB_PANIC_WD_HT40_FACTOR; } else { idle_count = (100 * idle_tmo_ms) / HAL_BB_PANIC_WD_HT20_FACTOR; } /* * enable panic in non-IDLE mode, * disable in IDLE mode, * set idle time-out */ // EV92527 : Enable IDLE mode panic OS_REG_WRITE(ah, AR_PHY_PANIC_WD_CTL_1, AR_PHY_BB_PANIC_NON_IDLE_ENABLE | AR_PHY_BB_PANIC_IDLE_ENABLE | (AR_PHY_BB_PANIC_IDLE_MASK & HAL_BB_PANIC_IDLE_TIME_OUT) | (AR_PHY_BB_PANIC_NON_IDLE_MASK & (idle_count << 2))); } else { /* disable IRQ, disable chip-reset for BB panic */ OS_REG_WRITE(ah, AR_PHY_PANIC_WD_CTL_2, OS_REG_READ(ah, AR_PHY_PANIC_WD_CTL_2) & ~(AR_PHY_BB_PANIC_RST_ENABLE | AR_PHY_BB_PANIC_IRQ_ENABLE)); /* disable panic in non-IDLE mode, disable in IDLE mode */ OS_REG_WRITE(ah, AR_PHY_PANIC_WD_CTL_1, OS_REG_READ(ah, AR_PHY_PANIC_WD_CTL_1) & ~(AR_PHY_BB_PANIC_NON_IDLE_ENABLE | AR_PHY_BB_PANIC_IDLE_ENABLE)); } HALDEBUG(ah, HAL_DEBUG_RFPARAM, "%s: %s BB Panic Watchdog tmo=%ums\n", __func__, idle_tmo_ms ? "Enabled" : "Disabled", idle_tmo_ms); #undef HAL_BB_PANIC_IDLE_TIME_OUT } void ar9300_handle_bb_panic(struct ath_hal *ah) { u_int32_t status; /* * we want to avoid printing in ISR context so we save * panic watchdog status to be printed later in DPC context */ AH9300(ah)->ah_bb_panic_last_status = status = OS_REG_READ(ah, AR_PHY_PANIC_WD_STATUS); /* * panic watchdog timer should reset on status read * but to make sure we write 0 to the watchdog status bit */ OS_REG_WRITE(ah, AR_PHY_PANIC_WD_STATUS, status & ~AR_PHY_BB_WD_STATUS_CLR); } int ar9300_get_bb_panic_info(struct ath_hal *ah, struct hal_bb_panic_info *bb_panic) { bb_panic->status = AH9300(ah)->ah_bb_panic_last_status; /* * For signature 04000539 do not print anything. * This is a very common occurence as a compromise between * BB Panic and AH_FALSE detects (EV71009). It indicates * radar hang, which can be cleared by reprogramming * radar related register and does not requre a chip reset */ /* Suppress BB Status mesg following signature */ switch (bb_panic->status) { case 0x04000539: case 0x04008009: case 0x04000b09: case 0x1300000a: return -1; } bb_panic->tsf = ar9300_get_tsf32(ah); bb_panic->wd = MS(bb_panic->status, AR_PHY_BB_WD_STATUS); bb_panic->det = MS(bb_panic->status, AR_PHY_BB_WD_DET_HANG); bb_panic->rdar = MS(bb_panic->status, AR_PHY_BB_WD_RADAR_SM); bb_panic->r_odfm = MS(bb_panic->status, AR_PHY_BB_WD_RX_OFDM_SM); bb_panic->r_cck = MS(bb_panic->status, AR_PHY_BB_WD_RX_CCK_SM); bb_panic->t_odfm = MS(bb_panic->status, AR_PHY_BB_WD_TX_OFDM_SM); bb_panic->t_cck = MS(bb_panic->status, AR_PHY_BB_WD_TX_CCK_SM); bb_panic->agc = MS(bb_panic->status, AR_PHY_BB_WD_AGC_SM); bb_panic->src = MS(bb_panic->status, AR_PHY_BB_WD_SRCH_SM); bb_panic->phy_panic_wd_ctl1 = OS_REG_READ(ah, AR_PHY_PANIC_WD_CTL_1); bb_panic->phy_panic_wd_ctl2 = OS_REG_READ(ah, AR_PHY_PANIC_WD_CTL_2); bb_panic->phy_gen_ctrl = OS_REG_READ(ah, AR_PHY_GEN_CTRL); bb_panic->rxc_pcnt = bb_panic->rxf_pcnt = bb_panic->txf_pcnt = 0; bb_panic->cycles = ar9300_get_mib_cycle_counts_pct(ah, &bb_panic->rxc_pcnt, &bb_panic->rxf_pcnt, &bb_panic->txf_pcnt); if (ah->ah_config.ath_hal_show_bb_panic) { ath_hal_printf(ah, "\n==== BB update: BB status=0x%08x, " "tsf=0x%08x ====\n", bb_panic->status, bb_panic->tsf); ath_hal_printf(ah, "** BB state: wd=%u det=%u rdar=%u rOFDM=%d " "rCCK=%u tOFDM=%u tCCK=%u agc=%u src=%u **\n", bb_panic->wd, bb_panic->det, bb_panic->rdar, bb_panic->r_odfm, bb_panic->r_cck, bb_panic->t_odfm, bb_panic->t_cck, bb_panic->agc, bb_panic->src); ath_hal_printf(ah, "** BB WD cntl: cntl1=0x%08x cntl2=0x%08x **\n", bb_panic->phy_panic_wd_ctl1, bb_panic->phy_panic_wd_ctl2); ath_hal_printf(ah, "** BB mode: BB_gen_controls=0x%08x **\n", bb_panic->phy_gen_ctrl); if (bb_panic->cycles) { ath_hal_printf(ah, "** BB busy times: rx_clear=%d%%, " "rx_frame=%d%%, tx_frame=%d%% **\n", bb_panic->rxc_pcnt, bb_panic->rxf_pcnt, bb_panic->txf_pcnt); } ath_hal_printf(ah, "==== BB update: done ====\n\n"); } return 0; //The returned data will be stored for athstats to retrieve it } /* set the reason for HAL reset */ void ar9300_set_hal_reset_reason(struct ath_hal *ah, u_int8_t resetreason) { AH9300(ah)->ah_reset_reason = resetreason; } /* * Configure 20/40 operation * * 20/40 = joint rx clear (control and extension) * 20 = rx clear (control) * * - NOTE: must stop MAC (tx) and requeue 40 MHz packets as 20 MHz * when changing from 20/40 => 20 only */ void ar9300_set_11n_mac2040(struct ath_hal *ah, HAL_HT_MACMODE mode) { u_int32_t macmode; /* Configure MAC for 20/40 operation */ if (mode == HAL_HT_MACMODE_2040 && !ah->ah_config.ath_hal_cwm_ignore_ext_cca) { macmode = AR_2040_JOINED_RX_CLEAR; } else { macmode = 0; } OS_REG_WRITE(ah, AR_2040_MODE, macmode); } /* * Get Rx clear (control/extension channel) * * Returns active low (busy) for ctrl/ext channel * Owl 2.0 */ HAL_HT_RXCLEAR ar9300_get_11n_rx_clear(struct ath_hal *ah) { HAL_HT_RXCLEAR rxclear = 0; u_int32_t val; val = OS_REG_READ(ah, AR_DIAG_SW); /* control channel */ if (val & AR_DIAG_RX_CLEAR_CTL_LOW) { rxclear |= HAL_RX_CLEAR_CTL_LOW; } /* extension channel */ if (val & AR_DIAG_RX_CLEAR_EXT_LOW) { rxclear |= HAL_RX_CLEAR_EXT_LOW; } return rxclear; } /* * Set Rx clear (control/extension channel) * * Useful for forcing the channel to appear busy for * debugging/diagnostics * Owl 2.0 */ void ar9300_set_11n_rx_clear(struct ath_hal *ah, HAL_HT_RXCLEAR rxclear) { /* control channel */ if (rxclear & HAL_RX_CLEAR_CTL_LOW) { OS_REG_SET_BIT(ah, AR_DIAG_SW, AR_DIAG_RX_CLEAR_CTL_LOW); } else { OS_REG_CLR_BIT(ah, AR_DIAG_SW, AR_DIAG_RX_CLEAR_CTL_LOW); } /* extension channel */ if (rxclear & HAL_RX_CLEAR_EXT_LOW) { OS_REG_SET_BIT(ah, AR_DIAG_SW, AR_DIAG_RX_CLEAR_EXT_LOW); } else { OS_REG_CLR_BIT(ah, AR_DIAG_SW, AR_DIAG_RX_CLEAR_EXT_LOW); } } /* * HAL support code for force ppm tracking workaround. */ u_int32_t ar9300_ppm_get_rssi_dump(struct ath_hal *ah) { u_int32_t retval; u_int32_t off1; u_int32_t off2; if (OS_REG_READ(ah, AR_PHY_ANALOG_SWAP) & AR_PHY_SWAP_ALT_CHAIN) { off1 = 0x2000; off2 = 0x1000; } else { off1 = 0x1000; off2 = 0x2000; } retval = ((0xff & OS_REG_READ(ah, AR_PHY_CHAN_INFO_GAIN_0 )) << 0) | ((0xff & OS_REG_READ(ah, AR_PHY_CHAN_INFO_GAIN_0 + off1)) << 8) | ((0xff & OS_REG_READ(ah, AR_PHY_CHAN_INFO_GAIN_0 + off2)) << 16); return retval; } u_int32_t ar9300_ppm_force(struct ath_hal *ah) { u_int32_t data_fine; u_int32_t data4; //u_int32_t off1; //u_int32_t off2; HAL_BOOL signed_val = AH_FALSE; // if (OS_REG_READ(ah, AR_PHY_ANALOG_SWAP) & AR_PHY_SWAP_ALT_CHAIN) { // off1 = 0x2000; // off2 = 0x1000; // } else { // off1 = 0x1000; // off2 = 0x2000; // } data_fine = AR_PHY_CHAN_INFO_GAIN_DIFF_PPM_MASK & OS_REG_READ(ah, AR_PHY_CHNINFO_GAINDIFF); /* * bit [11-0] is new ppm value. bit 11 is the signed bit. * So check value from bit[10:0]. * Now get the abs val of the ppm value read in bit[0:11]. * After that do bound check on abs value. * if value is off limit, CAP the value and and restore signed bit. */ if (data_fine & AR_PHY_CHAN_INFO_GAIN_DIFF_PPM_SIGNED_BIT) { /* get the positive value */ data_fine = (~data_fine + 1) & AR_PHY_CHAN_INFO_GAIN_DIFF_PPM_MASK; signed_val = AH_TRUE; } if (data_fine > AR_PHY_CHAN_INFO_GAIN_DIFF_UPPER_LIMIT) { HALDEBUG(ah, HAL_DEBUG_REGIO, "%s Correcting ppm out of range %x\n", __func__, (data_fine & 0x7ff)); data_fine = AR_PHY_CHAN_INFO_GAIN_DIFF_UPPER_LIMIT; } /* * Restore signed value if changed above. * Use typecast to avoid compilation errors */ if (signed_val) { data_fine = (-(int32_t)data_fine) & AR_PHY_CHAN_INFO_GAIN_DIFF_PPM_MASK; } /* write value */ data4 = OS_REG_READ(ah, AR_PHY_TIMING2) & ~(AR_PHY_TIMING2_USE_FORCE_PPM | AR_PHY_TIMING2_FORCE_PPM_VAL); OS_REG_WRITE(ah, AR_PHY_TIMING2, data4 | data_fine | AR_PHY_TIMING2_USE_FORCE_PPM); return data_fine; } void ar9300_ppm_un_force(struct ath_hal *ah) { u_int32_t data4; data4 = OS_REG_READ(ah, AR_PHY_TIMING2) & ~AR_PHY_TIMING2_USE_FORCE_PPM; OS_REG_WRITE(ah, AR_PHY_TIMING2, data4); } u_int32_t ar9300_ppm_arm_trigger(struct ath_hal *ah) { u_int32_t val; u_int32_t ret; val = OS_REG_READ(ah, AR_PHY_CHAN_INFO_MEMORY); ret = OS_REG_READ(ah, AR_TSF_L32); OS_REG_WRITE(ah, AR_PHY_CHAN_INFO_MEMORY, val | AR_PHY_CHAN_INFO_MEMORY_CAPTURE_MASK); /* return low word of TSF at arm time */ return ret; } int ar9300_ppm_get_trigger(struct ath_hal *ah) { if (OS_REG_READ(ah, AR_PHY_CHAN_INFO_MEMORY) & AR_PHY_CHAN_INFO_MEMORY_CAPTURE_MASK) { /* has not triggered yet, return AH_FALSE */ return 0; } /* else triggered, return AH_TRUE */ return 1; } void ar9300_mark_phy_inactive(struct ath_hal *ah) { OS_REG_WRITE(ah, AR_PHY_ACTIVE, AR_PHY_ACTIVE_DIS); } /* DEBUG */ u_int32_t ar9300_ppm_get_force_state(struct ath_hal *ah) { return OS_REG_READ(ah, AR_PHY_TIMING2) & (AR_PHY_TIMING2_USE_FORCE_PPM | AR_PHY_TIMING2_FORCE_PPM_VAL); } /* * Return the Cycle counts for rx_frame, rx_clear, and tx_frame */ HAL_BOOL ar9300_get_mib_cycle_counts(struct ath_hal *ah, HAL_SURVEY_SAMPLE *hs) { /* * XXX FreeBSD todo: reimplement this */ #if 0 p_cnts->tx_frame_count = OS_REG_READ(ah, AR_TFCNT); p_cnts->rx_frame_count = OS_REG_READ(ah, AR_RFCNT); p_cnts->rx_clear_count = OS_REG_READ(ah, AR_RCCNT); p_cnts->cycle_count = OS_REG_READ(ah, AR_CCCNT); p_cnts->is_tx_active = (OS_REG_READ(ah, AR_TFCNT) == p_cnts->tx_frame_count) ? AH_FALSE : AH_TRUE; p_cnts->is_rx_active = (OS_REG_READ(ah, AR_RFCNT) == p_cnts->rx_frame_count) ? AH_FALSE : AH_TRUE; #endif return AH_FALSE; } void ar9300_clear_mib_counters(struct ath_hal *ah) { u_int32_t reg_val; reg_val = OS_REG_READ(ah, AR_MIBC); OS_REG_WRITE(ah, AR_MIBC, reg_val | AR_MIBC_CMC); OS_REG_WRITE(ah, AR_MIBC, reg_val & ~AR_MIBC_CMC); } /* Enable or Disable RIFS Rx capability as part of SW WAR for Bug 31602 */ HAL_BOOL ar9300_set_rifs_delay(struct ath_hal *ah, HAL_BOOL enable) { struct ath_hal_9300 *ahp = AH9300(ah); HAL_CHANNEL_INTERNAL *ichan = ath_hal_checkchannel(ah, AH_PRIVATE(ah)->ah_curchan); HAL_BOOL is_chan_2g = IS_CHAN_2GHZ(ichan); u_int32_t tmp = 0; if (enable) { if (ahp->ah_rifs_enabled == AH_TRUE) { return AH_TRUE; } OS_REG_WRITE(ah, AR_PHY_SEARCH_START_DELAY, ahp->ah_rifs_reg[0]); OS_REG_WRITE(ah, AR_PHY_RIFS_SRCH, ahp->ah_rifs_reg[1]); ahp->ah_rifs_enabled = AH_TRUE; OS_MEMZERO(ahp->ah_rifs_reg, sizeof(ahp->ah_rifs_reg)); } else { if (ahp->ah_rifs_enabled == AH_TRUE) { ahp->ah_rifs_reg[0] = OS_REG_READ(ah, AR_PHY_SEARCH_START_DELAY); ahp->ah_rifs_reg[1] = OS_REG_READ(ah, AR_PHY_RIFS_SRCH); } /* Change rifs init delay to 0 */ OS_REG_WRITE(ah, AR_PHY_RIFS_SRCH, (ahp->ah_rifs_reg[1] & ~(AR_PHY_RIFS_INIT_DELAY))); tmp = 0xfffff000 & OS_REG_READ(ah, AR_PHY_SEARCH_START_DELAY); if (is_chan_2g) { if (IEEE80211_IS_CHAN_HT40(AH_PRIVATE(ah)->ah_curchan)) { OS_REG_WRITE(ah, AR_PHY_SEARCH_START_DELAY, tmp | 500); } else { /* Sowl 2G HT-20 default is 0x134 for search start delay */ OS_REG_WRITE(ah, AR_PHY_SEARCH_START_DELAY, tmp | 250); } } else { if (IEEE80211_IS_CHAN_HT40(AH_PRIVATE(ah)->ah_curchan)) { OS_REG_WRITE(ah, AR_PHY_SEARCH_START_DELAY, tmp | 0x370); } else { /* Sowl 5G HT-20 default is 0x1b8 for search start delay */ OS_REG_WRITE(ah, AR_PHY_SEARCH_START_DELAY, tmp | 0x1b8); } } ahp->ah_rifs_enabled = AH_FALSE; } return AH_TRUE; } /* ar9300_set_rifs_delay () */ /* Set the current RIFS Rx setting */ HAL_BOOL ar9300_set_11n_rx_rifs(struct ath_hal *ah, HAL_BOOL enable) { /* Non-Owl 11n chips */ if ((ath_hal_getcapability(ah, HAL_CAP_RIFS_RX, 0, AH_NULL) == HAL_OK)) { if (ar9300_get_capability(ah, HAL_CAP_LDPCWAR, 0, AH_NULL) == HAL_OK) { return ar9300_set_rifs_delay(ah, enable); } return AH_FALSE; } return AH_TRUE; } /* ar9300_set_11n_rx_rifs () */ static hal_mac_hangs_t ar9300_compare_dbg_hang(struct ath_hal *ah, mac_dbg_regs_t mac_dbg, hal_mac_hang_check_t hang_check, hal_mac_hangs_t hangs, u_int8_t *dcu_chain) { int i = 0; hal_mac_hangs_t found_hangs = 0; if (hangs & dcu_chain_state) { for (i = 0; i < 6; i++) { if (((mac_dbg.dma_dbg_4 >> (5 * i)) & 0x1f) == hang_check.dcu_chain_state) { found_hangs |= dcu_chain_state; *dcu_chain = i; } } for (i = 0; i < 4; i++) { if (((mac_dbg.dma_dbg_5 >> (5 * i)) & 0x1f) == hang_check.dcu_chain_state) { found_hangs |= dcu_chain_state; *dcu_chain = i + 6; } } } if (hangs & dcu_complete_state) { if ((mac_dbg.dma_dbg_6 & 0x3) == hang_check.dcu_complete_state) { found_hangs |= dcu_complete_state; } } return found_hangs; } /* end - ar9300_compare_dbg_hang */ #define NUM_STATUS_READS 50 HAL_BOOL ar9300_detect_mac_hang(struct ath_hal *ah) { struct ath_hal_9300 *ahp = AH9300(ah); mac_dbg_regs_t mac_dbg; hal_mac_hang_check_t hang_sig1_val = {0x6, 0x1, 0, 0, 0, 0, 0, 0}; hal_mac_hangs_t hang_sig1 = (dcu_chain_state | dcu_complete_state); int i = 0; u_int8_t dcu_chain = 0, current_dcu_chain_state, shift_val; if (!(ahp->ah_hang_wars & HAL_MAC_HANG_WAR)) { return AH_FALSE; } OS_MEMZERO(&mac_dbg, sizeof(mac_dbg)); mac_dbg.dma_dbg_4 = OS_REG_READ(ah, AR_DMADBG_4); mac_dbg.dma_dbg_5 = OS_REG_READ(ah, AR_DMADBG_5); mac_dbg.dma_dbg_6 = OS_REG_READ(ah, AR_DMADBG_6); HALDEBUG(ah, HAL_DEBUG_DFS, " dma regs: %X %X %X \n", mac_dbg.dma_dbg_4, mac_dbg.dma_dbg_5, mac_dbg.dma_dbg_6); if (hang_sig1 != ar9300_compare_dbg_hang(ah, mac_dbg, hang_sig1_val, hang_sig1, &dcu_chain)) { HALDEBUG(ah, HAL_DEBUG_DFS, " hang sig1 not found \n"); return AH_FALSE; } shift_val = (dcu_chain >= 6) ? (dcu_chain-6) : (dcu_chain); shift_val *= 5; for (i = 1; i <= NUM_STATUS_READS; i++) { if (dcu_chain < 6) { mac_dbg.dma_dbg_4 = OS_REG_READ(ah, AR_DMADBG_4); current_dcu_chain_state = ((mac_dbg.dma_dbg_4 >> shift_val) & 0x1f); } else { mac_dbg.dma_dbg_5 = OS_REG_READ(ah, AR_DMADBG_5); current_dcu_chain_state = ((mac_dbg.dma_dbg_5 >> shift_val) & 0x1f); } mac_dbg.dma_dbg_6 = OS_REG_READ(ah, AR_DMADBG_6); if (((mac_dbg.dma_dbg_6 & 0x3) != hang_sig1_val.dcu_complete_state) || (current_dcu_chain_state != hang_sig1_val.dcu_chain_state)) { return AH_FALSE; } } HALDEBUG(ah, HAL_DEBUG_DFS, "%s sig5count=%d sig6count=%d ", __func__, ahp->ah_hang[MAC_HANG_SIG1], ahp->ah_hang[MAC_HANG_SIG2]); ahp->ah_hang[MAC_HANG_SIG1]++; return AH_TRUE; } /* end - ar9300_detect_mac_hang */ /* Determine if the baseband is hung by reading the Observation Bus Register */ HAL_BOOL ar9300_detect_bb_hang(struct ath_hal *ah) { #define N(a) (sizeof(a) / sizeof(a[0])) struct ath_hal_9300 *ahp = AH9300(ah); u_int32_t hang_sig = 0; int i = 0; /* Check the PCU Observation Bus 1 register (0x806c) NUM_STATUS_READS times * * 4 known BB hang signatures - * [1] bits 8,9,11 are 0. State machine state (bits 25-31) is 0x1E * [2] bits 8,9 are 1, bit 11 is 0. State machine state (bits 25-31) is 0x52 * [3] bits 8,9 are 1, bit 11 is 0. State machine state (bits 25-31) is 0x18 * [4] bit 10 is 1, bit 11 is 0. WEP state (bits 12-17) is 0x2, * Rx State (bits 20-24) is 0x7. */ hal_hw_hang_check_t hang_list [] = { /* Offset Reg Value Reg Mask Hang Offset */ {AR_OBS_BUS_1, 0x1E000000, 0x7E000B00, BB_HANG_SIG1}, {AR_OBS_BUS_1, 0x52000B00, 0x7E000B00, BB_HANG_SIG2}, {AR_OBS_BUS_1, 0x18000B00, 0x7E000B00, BB_HANG_SIG3}, {AR_OBS_BUS_1, 0x00702400, 0x7E7FFFEF, BB_HANG_SIG4} }; if (!(ahp->ah_hang_wars & (HAL_RIFS_BB_HANG_WAR | HAL_DFS_BB_HANG_WAR | HAL_RX_STUCK_LOW_BB_HANG_WAR))) { return AH_FALSE; } hang_sig = OS_REG_READ(ah, AR_OBS_BUS_1); for (i = 1; i <= NUM_STATUS_READS; i++) { if (hang_sig != OS_REG_READ(ah, AR_OBS_BUS_1)) { return AH_FALSE; } } for (i = 0; i < N(hang_list); i++) { if ((hang_sig & hang_list[i].hang_mask) == hang_list[i].hang_val) { ahp->ah_hang[hang_list[i].hang_offset]++; HALDEBUG(ah, HAL_DEBUG_DFS, "%s sig1count=%d sig2count=%d " "sig3count=%d sig4count=%d\n", __func__, ahp->ah_hang[BB_HANG_SIG1], ahp->ah_hang[BB_HANG_SIG2], ahp->ah_hang[BB_HANG_SIG3], ahp->ah_hang[BB_HANG_SIG4]); return AH_TRUE; } } HALDEBUG(ah, HAL_DEBUG_DFS, "%s Found an unknown BB hang signature! " "<0x806c>=0x%x\n", __func__, hang_sig); return AH_FALSE; #undef N } /* end - ar9300_detect_bb_hang () */ #undef NUM_STATUS_READS HAL_STATUS ar9300_select_ant_config(struct ath_hal *ah, u_int32_t cfg) { struct ath_hal_9300 *ahp = AH9300(ah); const struct ieee80211_channel *chan = AH_PRIVATE(ah)->ah_curchan; HAL_CHANNEL_INTERNAL *ichan = ath_hal_checkchannel(ah, chan); const HAL_CAPABILITIES *p_cap = &AH_PRIVATE(ah)->ah_caps; u_int16_t ant_config; u_int32_t hal_num_ant_config; hal_num_ant_config = IS_CHAN_2GHZ(ichan) ? p_cap->halNumAntCfg2GHz: p_cap->halNumAntCfg5GHz; if (cfg < hal_num_ant_config) { if (HAL_OK == ar9300_eeprom_get_ant_cfg(ahp, chan, cfg, &ant_config)) { OS_REG_WRITE(ah, AR_PHY_SWITCH_COM, ant_config); return HAL_OK; } } return HAL_EINVAL; } /* * Functions to get/set DCS mode */ void ar9300_set_dcs_mode(struct ath_hal *ah, u_int32_t mode) { AH9300(ah)->ah_dcs_enable = mode; } u_int32_t ar9300_get_dcs_mode(struct ath_hal *ah) { return AH9300(ah)->ah_dcs_enable; } #if ATH_BT_COEX void ar9300_set_bt_coex_info(struct ath_hal *ah, HAL_BT_COEX_INFO *btinfo) { struct ath_hal_9300 *ahp = AH9300(ah); ahp->ah_bt_module = btinfo->bt_module; ahp->ah_bt_coex_config_type = btinfo->bt_coex_config; ahp->ah_bt_active_gpio_select = btinfo->bt_gpio_bt_active; ahp->ah_bt_priority_gpio_select = btinfo->bt_gpio_bt_priority; ahp->ah_wlan_active_gpio_select = btinfo->bt_gpio_wlan_active; ahp->ah_bt_active_polarity = btinfo->bt_active_polarity; ahp->ah_bt_coex_single_ant = btinfo->bt_single_ant; ahp->ah_bt_wlan_isolation = btinfo->bt_isolation; } void ar9300_bt_coex_config(struct ath_hal *ah, HAL_BT_COEX_CONFIG *btconf) { struct ath_hal_9300 *ahp = AH9300(ah); HAL_BOOL rx_clear_polarity; /* * For Kiwi and Osprey, the polarity of rx_clear is active high. * The bt_rxclear_polarity flag from ath_dev needs to be inverted. */ rx_clear_polarity = !btconf->bt_rxclear_polarity; ahp->ah_bt_coex_mode = (ahp->ah_bt_coex_mode & AR_BT_QCU_THRESH) | SM(btconf->bt_time_extend, AR_BT_TIME_EXTEND) | SM(btconf->bt_txstate_extend, AR_BT_TXSTATE_EXTEND) | SM(btconf->bt_txframe_extend, AR_BT_TX_FRAME_EXTEND) | SM(btconf->bt_mode, AR_BT_MODE) | SM(btconf->bt_quiet_collision, AR_BT_QUIET) | SM(rx_clear_polarity, AR_BT_RX_CLEAR_POLARITY) | SM(btconf->bt_priority_time, AR_BT_PRIORITY_TIME) | SM(btconf->bt_first_slot_time, AR_BT_FIRST_SLOT_TIME); ahp->ah_bt_coex_mode2 |= SM(btconf->bt_hold_rxclear, AR_BT_HOLD_RX_CLEAR); if (ahp->ah_bt_coex_single_ant == AH_FALSE) { /* Enable ACK to go out even though BT has higher priority. */ ahp->ah_bt_coex_mode2 |= AR_BT_DISABLE_BT_ANT; } } void ar9300_bt_coex_set_qcu_thresh(struct ath_hal *ah, int qnum) { struct ath_hal_9300 *ahp = AH9300(ah); /* clear the old value, then set the new value */ ahp->ah_bt_coex_mode &= ~AR_BT_QCU_THRESH; ahp->ah_bt_coex_mode |= SM(qnum, AR_BT_QCU_THRESH); } void ar9300_bt_coex_set_weights(struct ath_hal *ah, u_int32_t stomp_type) { struct ath_hal_9300 *ahp = AH9300(ah); ahp->ah_bt_coex_bt_weight[0] = AR9300_BT_WGHT; ahp->ah_bt_coex_bt_weight[1] = AR9300_BT_WGHT; ahp->ah_bt_coex_bt_weight[2] = AR9300_BT_WGHT; ahp->ah_bt_coex_bt_weight[3] = AR9300_BT_WGHT; switch (stomp_type) { case HAL_BT_COEX_STOMP_ALL: ahp->ah_bt_coex_wlan_weight[0] = AR9300_STOMP_ALL_WLAN_WGHT0; ahp->ah_bt_coex_wlan_weight[1] = AR9300_STOMP_ALL_WLAN_WGHT1; break; case HAL_BT_COEX_STOMP_LOW: ahp->ah_bt_coex_wlan_weight[0] = AR9300_STOMP_LOW_WLAN_WGHT0; ahp->ah_bt_coex_wlan_weight[1] = AR9300_STOMP_LOW_WLAN_WGHT1; break; case HAL_BT_COEX_STOMP_ALL_FORCE: ahp->ah_bt_coex_wlan_weight[0] = AR9300_STOMP_ALL_FORCE_WLAN_WGHT0; ahp->ah_bt_coex_wlan_weight[1] = AR9300_STOMP_ALL_FORCE_WLAN_WGHT1; break; case HAL_BT_COEX_STOMP_LOW_FORCE: ahp->ah_bt_coex_wlan_weight[0] = AR9300_STOMP_LOW_FORCE_WLAN_WGHT0; ahp->ah_bt_coex_wlan_weight[1] = AR9300_STOMP_LOW_FORCE_WLAN_WGHT1; break; case HAL_BT_COEX_STOMP_NONE: case HAL_BT_COEX_NO_STOMP: ahp->ah_bt_coex_wlan_weight[0] = AR9300_STOMP_NONE_WLAN_WGHT0; ahp->ah_bt_coex_wlan_weight[1] = AR9300_STOMP_NONE_WLAN_WGHT1; break; default: /* There is a force_weight from registry */ ahp->ah_bt_coex_wlan_weight[0] = stomp_type; ahp->ah_bt_coex_wlan_weight[1] = stomp_type; break; } } void ar9300_bt_coex_setup_bmiss_thresh(struct ath_hal *ah, u_int32_t thresh) { struct ath_hal_9300 *ahp = AH9300(ah); /* clear the old value, then set the new value */ ahp->ah_bt_coex_mode2 &= ~AR_BT_BCN_MISS_THRESH; ahp->ah_bt_coex_mode2 |= SM(thresh, AR_BT_BCN_MISS_THRESH); } static void ar9300_bt_coex_antenna_diversity(struct ath_hal *ah, u_int32_t value) { struct ath_hal_9300 *ahp = AH9300(ah); #if ATH_ANT_DIV_COMB //struct ath_hal_private *ahpriv = AH_PRIVATE(ah); const struct ieee80211_channel *chan = AH_PRIVATE(ah)->ah_curchan; #endif HALDEBUG(ah, HAL_DEBUG_BT_COEX, "%s: called, value=%d\n", __func__, value); if (ahp->ah_bt_coex_flag & HAL_BT_COEX_FLAG_ANT_DIV_ALLOW) { if (ahp->ah_diversity_control == HAL_ANT_VARIABLE) { /* Config antenna diversity */ #if ATH_ANT_DIV_COMB ar9300_ant_ctrl_set_lna_div_use_bt_ant(ah, value, chan); #endif } } } void ar9300_bt_coex_set_parameter(struct ath_hal *ah, u_int32_t type, u_int32_t value) { struct ath_hal_9300 *ahp = AH9300(ah); struct ath_hal_private *ahpriv = AH_PRIVATE(ah); switch (type) { case HAL_BT_COEX_SET_ACK_PWR: if (value) { ahp->ah_bt_coex_flag |= HAL_BT_COEX_FLAG_LOW_ACK_PWR; } else { ahp->ah_bt_coex_flag &= ~HAL_BT_COEX_FLAG_LOW_ACK_PWR; } ar9300_set_tx_power_limit(ah, ahpriv->ah_powerLimit, ahpriv->ah_extraTxPow, 0); break; case HAL_BT_COEX_ANTENNA_DIVERSITY: if (AR_SREV_POSEIDON(ah) || AR_SREV_APHRODITE(ah)) { ahp->ah_bt_coex_flag |= HAL_BT_COEX_FLAG_ANT_DIV_ALLOW; if (value) { ahp->ah_bt_coex_flag |= HAL_BT_COEX_FLAG_ANT_DIV_ENABLE; } else { ahp->ah_bt_coex_flag &= ~HAL_BT_COEX_FLAG_ANT_DIV_ENABLE; } ar9300_bt_coex_antenna_diversity(ah, value); } break; case HAL_BT_COEX_LOWER_TX_PWR: if (value) { ahp->ah_bt_coex_flag |= HAL_BT_COEX_FLAG_LOWER_TX_PWR; } else { ahp->ah_bt_coex_flag &= ~HAL_BT_COEX_FLAG_LOWER_TX_PWR; } ar9300_set_tx_power_limit(ah, ahpriv->ah_powerLimit, ahpriv->ah_extraTxPow, 0); break; #if ATH_SUPPORT_MCI case HAL_BT_COEX_MCI_MAX_TX_PWR: if ((ah->ah_config.ath_hal_mci_config & ATH_MCI_CONFIG_CONCUR_TX) == ATH_MCI_CONCUR_TX_SHARED_CHN) { if (value) { ahp->ah_bt_coex_flag |= HAL_BT_COEX_FLAG_MCI_MAX_TX_PWR; ahp->ah_mci_concur_tx_en = AH_TRUE; } else { ahp->ah_bt_coex_flag &= ~HAL_BT_COEX_FLAG_MCI_MAX_TX_PWR; ahp->ah_mci_concur_tx_en = AH_FALSE; } ar9300_set_tx_power_limit(ah, ahpriv->ah_powerLimit, ahpriv->ah_extraTxPow, 0); } HALDEBUG(ah, HAL_DEBUG_BT_COEX, "(MCI) concur_tx_en = %d\n", ahp->ah_mci_concur_tx_en); break; case HAL_BT_COEX_MCI_FTP_STOMP_RX: if (value) { ahp->ah_bt_coex_flag |= HAL_BT_COEX_FLAG_MCI_FTP_STOMP_RX; } else { ahp->ah_bt_coex_flag &= ~HAL_BT_COEX_FLAG_MCI_FTP_STOMP_RX; } break; #endif default: break; } } void ar9300_bt_coex_disable(struct ath_hal *ah) { struct ath_hal_9300 *ahp = AH9300(ah); /* Always drive rx_clear_external output as 0 */ ath_hal_gpioCfgOutput(ah, ahp->ah_wlan_active_gpio_select, HAL_GPIO_OUTPUT_MUX_AS_OUTPUT); if (ahp->ah_bt_coex_single_ant == AH_TRUE) { OS_REG_RMW_FIELD(ah, AR_QUIET1, AR_QUIET1_QUIET_ACK_CTS_ENABLE, 1); OS_REG_RMW_FIELD(ah, AR_PCU_MISC, AR_PCU_BT_ANT_PREVENT_RX, 0); } OS_REG_WRITE(ah, AR_BT_COEX_MODE, AR_BT_QUIET | AR_BT_MODE); OS_REG_WRITE(ah, AR_BT_COEX_MODE2, 0); OS_REG_WRITE(ah, AR_BT_COEX_WL_WEIGHTS0, 0); OS_REG_WRITE(ah, AR_BT_COEX_WL_WEIGHTS1, 0); OS_REG_WRITE(ah, AR_BT_COEX_BT_WEIGHTS0, 0); OS_REG_WRITE(ah, AR_BT_COEX_BT_WEIGHTS1, 0); OS_REG_WRITE(ah, AR_BT_COEX_BT_WEIGHTS2, 0); OS_REG_WRITE(ah, AR_BT_COEX_BT_WEIGHTS3, 0); ahp->ah_bt_coex_enabled = AH_FALSE; } int ar9300_bt_coex_enable(struct ath_hal *ah) { struct ath_hal_9300 *ahp = AH9300(ah); /* Program coex mode and weight registers to actually enable coex */ OS_REG_WRITE(ah, AR_BT_COEX_MODE, ahp->ah_bt_coex_mode); OS_REG_WRITE(ah, AR_BT_COEX_MODE2, ahp->ah_bt_coex_mode2); OS_REG_WRITE(ah, AR_BT_COEX_WL_WEIGHTS0, ahp->ah_bt_coex_wlan_weight[0]); OS_REG_WRITE(ah, AR_BT_COEX_WL_WEIGHTS1, ahp->ah_bt_coex_wlan_weight[1]); OS_REG_WRITE(ah, AR_BT_COEX_BT_WEIGHTS0, ahp->ah_bt_coex_bt_weight[0]); OS_REG_WRITE(ah, AR_BT_COEX_BT_WEIGHTS1, ahp->ah_bt_coex_bt_weight[1]); OS_REG_WRITE(ah, AR_BT_COEX_BT_WEIGHTS2, ahp->ah_bt_coex_bt_weight[2]); OS_REG_WRITE(ah, AR_BT_COEX_BT_WEIGHTS3, ahp->ah_bt_coex_bt_weight[3]); if (ahp->ah_bt_coex_flag & HAL_BT_COEX_FLAG_LOW_ACK_PWR) { OS_REG_WRITE(ah, AR_TPC, HAL_BT_COEX_LOW_ACK_POWER); } else { OS_REG_WRITE(ah, AR_TPC, HAL_BT_COEX_HIGH_ACK_POWER); } OS_REG_RMW_FIELD(ah, AR_QUIET1, AR_QUIET1_QUIET_ACK_CTS_ENABLE, 1); if (ahp->ah_bt_coex_single_ant == AH_TRUE) { OS_REG_RMW_FIELD(ah, AR_PCU_MISC, AR_PCU_BT_ANT_PREVENT_RX, 1); } else { OS_REG_RMW_FIELD(ah, AR_PCU_MISC, AR_PCU_BT_ANT_PREVENT_RX, 0); } if (ahp->ah_bt_coex_config_type == HAL_BT_COEX_CFG_3WIRE) { /* For 3-wire, configure the desired GPIO port for rx_clear */ ath_hal_gpioCfgOutput(ah, ahp->ah_wlan_active_gpio_select, HAL_GPIO_OUTPUT_MUX_AS_WLAN_ACTIVE); } else if ((ahp->ah_bt_coex_config_type >= HAL_BT_COEX_CFG_2WIRE_2CH) && (ahp->ah_bt_coex_config_type <= HAL_BT_COEX_CFG_2WIRE_CH0)) { /* For 2-wire, configure the desired GPIO port for TX_FRAME output */ ath_hal_gpioCfgOutput(ah, ahp->ah_wlan_active_gpio_select, HAL_GPIO_OUTPUT_MUX_AS_TX_FRAME); } /* * Enable a weak pull down on BT_ACTIVE. * When BT device is disabled, BT_ACTIVE might be floating. */ OS_REG_RMW(ah, AR_HOSTIF_REG(ah, AR_GPIO_PDPU), (AR_GPIO_PULL_DOWN << (ahp->ah_bt_active_gpio_select * 2)), (AR_GPIO_PDPU_OPTION << (ahp->ah_bt_active_gpio_select * 2))); ahp->ah_bt_coex_enabled = AH_TRUE; return 0; } u_int32_t ar9300_get_bt_active_gpio(struct ath_hal *ah, u_int32_t reg) { return 0; } u_int32_t ar9300_get_wlan_active_gpio(struct ath_hal *ah, u_int32_t reg,u_int32_t bOn) { return bOn; } void ar9300_init_bt_coex(struct ath_hal *ah) { struct ath_hal_9300 *ahp = AH9300(ah); if (ahp->ah_bt_coex_config_type == HAL_BT_COEX_CFG_3WIRE) { OS_REG_SET_BIT(ah, AR_HOSTIF_REG(ah, AR_GPIO_INPUT_EN_VAL), (AR_GPIO_INPUT_EN_VAL_BT_PRIORITY_BB | AR_GPIO_INPUT_EN_VAL_BT_ACTIVE_BB)); /* * Set input mux for bt_prority_async and * bt_active_async to GPIO pins */ OS_REG_RMW_FIELD(ah, AR_HOSTIF_REG(ah, AR_GPIO_INPUT_MUX1), AR_GPIO_INPUT_MUX1_BT_ACTIVE, ahp->ah_bt_active_gpio_select); OS_REG_RMW_FIELD(ah, AR_HOSTIF_REG(ah, AR_GPIO_INPUT_MUX1), AR_GPIO_INPUT_MUX1_BT_PRIORITY, ahp->ah_bt_priority_gpio_select); /* Configure the desired GPIO ports for input */ ath_hal_gpioCfgInput(ah, ahp->ah_bt_active_gpio_select); ath_hal_gpioCfgInput(ah, ahp->ah_bt_priority_gpio_select); if (ahp->ah_bt_coex_enabled) { ar9300_bt_coex_enable(ah); } else { ar9300_bt_coex_disable(ah); } } else if ((ahp->ah_bt_coex_config_type >= HAL_BT_COEX_CFG_2WIRE_2CH) && (ahp->ah_bt_coex_config_type <= HAL_BT_COEX_CFG_2WIRE_CH0)) { /* 2-wire */ if (ahp->ah_bt_coex_enabled) { /* Connect bt_active_async to baseband */ OS_REG_CLR_BIT(ah, AR_HOSTIF_REG(ah, AR_GPIO_INPUT_EN_VAL), (AR_GPIO_INPUT_EN_VAL_BT_PRIORITY_DEF | AR_GPIO_INPUT_EN_VAL_BT_FREQUENCY_DEF)); OS_REG_SET_BIT(ah, AR_HOSTIF_REG(ah, AR_GPIO_INPUT_EN_VAL), AR_GPIO_INPUT_EN_VAL_BT_ACTIVE_BB); /* * Set input mux for bt_prority_async and * bt_active_async to GPIO pins */ OS_REG_RMW_FIELD(ah, AR_HOSTIF_REG(ah, AR_GPIO_INPUT_MUX1), AR_GPIO_INPUT_MUX1_BT_ACTIVE, ahp->ah_bt_active_gpio_select); /* Configure the desired GPIO ports for input */ ath_hal_gpioCfgInput(ah, ahp->ah_bt_active_gpio_select); /* Enable coexistence on initialization */ ar9300_bt_coex_enable(ah); } } #if ATH_SUPPORT_MCI else if (ahp->ah_bt_coex_config_type == HAL_BT_COEX_CFG_MCI) { if (ahp->ah_bt_coex_enabled) { ar9300_mci_bt_coex_enable(ah); } else { ar9300_mci_bt_coex_disable(ah); } } #endif /* ATH_SUPPORT_MCI */ } #endif /* ATH_BT_COEX */ HAL_STATUS ar9300_set_proxy_sta(struct ath_hal *ah, HAL_BOOL enable) { u_int32_t val; int wasp_mm_rev; #define AR_SOC_RST_REVISION_ID 0xB8060090 #define REG_READ(_reg) *((volatile u_int32_t *)(_reg)) wasp_mm_rev = (REG_READ(AR_SOC_RST_REVISION_ID) & AR_SREV_REVISION_WASP_MINOR_MINOR_MASK) >> AR_SREV_REVISION_WASP_MINOR_MINOR_SHIFT; #undef AR_SOC_RST_REVISION_ID #undef REG_READ /* * Azimuth (ProxySTA) Mode is only supported correctly by * Peacock or WASP 1.3.0.1 or later (hopefully) chips. * * Enable this feature for Scorpion at this time. The silicon * still needs to be validated. */ if (!(AH_PRIVATE((ah))->ah_macVersion == AR_SREV_VERSION_AR9580) && !(AH_PRIVATE((ah))->ah_macVersion == AR_SREV_VERSION_SCORPION) && !((AH_PRIVATE((ah))->ah_macVersion == AR_SREV_VERSION_WASP) && ((AH_PRIVATE((ah))->ah_macRev > AR_SREV_REVISION_WASP_13) || (AH_PRIVATE((ah))->ah_macRev == AR_SREV_REVISION_WASP_13 && wasp_mm_rev >= 0 /* 1 */)))) { HALDEBUG(ah, HAL_DEBUG_UNMASKABLE, "%s error: current chip (ver 0x%x, " "rev 0x%x, minor minor rev 0x%x) cannot support Azimuth Mode\n", __func__, AH_PRIVATE((ah))->ah_macVersion, AH_PRIVATE((ah))->ah_macRev, wasp_mm_rev); return HAL_ENOTSUPP; } OS_REG_WRITE(ah, AR_MAC_PCU_LOGIC_ANALYZER, AR_MAC_PCU_LOGIC_ANALYZER_PSTABUG75996); /* turn on mode bit[24] for proxy sta */ OS_REG_WRITE(ah, AR_PCU_MISC_MODE2, OS_REG_READ(ah, AR_PCU_MISC_MODE2) | AR_PCU_MISC_MODE2_PROXY_STA); val = OS_REG_READ(ah, AR_AZIMUTH_MODE); if (enable) { val |= AR_AZIMUTH_KEY_SEARCH_AD1 | AR_AZIMUTH_CTS_MATCH_TX_AD2 | AR_AZIMUTH_BA_USES_AD1; /* turn off filter pass hold (bit 9) */ val &= ~AR_AZIMUTH_FILTER_PASS_HOLD; } else { val &= ~(AR_AZIMUTH_KEY_SEARCH_AD1 | AR_AZIMUTH_CTS_MATCH_TX_AD2 | AR_AZIMUTH_BA_USES_AD1); } OS_REG_WRITE(ah, AR_AZIMUTH_MODE, val); /* enable promiscous mode */ OS_REG_WRITE(ah, AR_RX_FILTER, OS_REG_READ(ah, AR_RX_FILTER) | HAL_RX_FILTER_PROM); /* enable promiscous in azimuth mode */ OS_REG_WRITE(ah, AR_PCU_MISC_MODE2, AR_PCU_MISC_MODE2_PROM_VC_MODE); OS_REG_WRITE(ah, AR_MAC_PCU_LOGIC_ANALYZER, AR_MAC_PCU_LOGIC_ANALYZER_VC_MODE); /* turn on filter pass hold (bit 9) */ OS_REG_WRITE(ah, AR_AZIMUTH_MODE, OS_REG_READ(ah, AR_AZIMUTH_MODE) | AR_AZIMUTH_FILTER_PASS_HOLD); return HAL_OK; } #if 0 void ar9300_mat_enable(struct ath_hal *ah, int enable) { /* * MAT (s/w ProxySTA) implementation requires to turn off interrupt * mitigation and turn on key search always for better performance. */ struct ath_hal_9300 *ahp = AH9300(ah); struct ath_hal_private *ap = AH_PRIVATE(ah); ahp->ah_intr_mitigation_rx = !enable; if (ahp->ah_intr_mitigation_rx) { /* * Enable Interrupt Mitigation for Rx. * If no build-specific limits for the rx interrupt mitigation * timer have been specified, use conservative defaults. */ #ifndef AH_RIMT_VAL_LAST #define AH_RIMT_LAST_MICROSEC 500 #endif #ifndef AH_RIMT_VAL_FIRST #define AH_RIMT_FIRST_MICROSEC 2000 #endif OS_REG_RMW_FIELD(ah, AR_RIMT, AR_RIMT_LAST, AH_RIMT_LAST_MICROSEC); OS_REG_RMW_FIELD(ah, AR_RIMT, AR_RIMT_FIRST, AH_RIMT_FIRST_MICROSEC); } else { OS_REG_WRITE(ah, AR_RIMT, 0); } ahp->ah_enable_keysearch_always = !!enable; ar9300_enable_keysearch_always(ah, ahp->ah_enable_keysearch_always); } #endif void ar9300_enable_tpc(struct ath_hal *ah) { u_int32_t val = 0; ah->ah_config.ath_hal_desc_tpc = 1; /* Enable TPC */ OS_REG_RMW_FIELD(ah, AR_PHY_PWRTX_MAX, AR_PHY_PER_PACKET_POWERTX_MAX, 1); /* * Disable per chain power reduction since we are already * accounting for this in our calculations */ val = OS_REG_READ(ah, AR_PHY_POWER_TX_SUB); if (AR_SREV_WASP(ah)) { OS_REG_WRITE(ah, AR_PHY_POWER_TX_SUB, val & AR_PHY_POWER_TX_SUB_2_DISABLE); } else { OS_REG_WRITE(ah, AR_PHY_POWER_TX_SUB, val & AR_PHY_POWER_TX_SUB_3_DISABLE); } } /* * ar9300_force_tsf_sync * This function forces the TSF sync to the given bssid, this is implemented * as a temp hack to get the AoW demo, and is primarily used in the WDS client * mode of operation, where we sync the TSF to RootAP TSF values */ void ar9300_force_tsf_sync(struct ath_hal *ah, const u_int8_t *bssid, u_int16_t assoc_id) { ar9300_set_operating_mode(ah, HAL_M_STA); ar9300_write_associd(ah, bssid, assoc_id); } void ar9300_chk_rssi_update_tx_pwr(struct ath_hal *ah, int rssi) { struct ath_hal_9300 *ahp = AH9300(ah); u_int32_t temp_obdb_reg_val = 0, temp_tcp_reg_val; u_int32_t temp_powertx_rate9_reg_val; int8_t olpc_power_offset = 0; int8_t tmp_olpc_val = 0; HAL_RSSI_TX_POWER old_greentx_status; u_int8_t target_power_val_t[ar9300_rate_size]; int8_t tmp_rss1_thr1, tmp_rss1_thr2; if ((AH_PRIVATE(ah)->ah_opmode != HAL_M_STA) || !ah->ah_config.ath_hal_sta_update_tx_pwr_enable) { return; } old_greentx_status = AH9300(ah)->green_tx_status; if (ahp->ah_hw_green_tx_enable) { tmp_rss1_thr1 = AR9485_HW_GREEN_TX_THRES1_DB; tmp_rss1_thr2 = AR9485_HW_GREEN_TX_THRES2_DB; } else { tmp_rss1_thr1 = WB225_SW_GREEN_TX_THRES1_DB; tmp_rss1_thr2 = WB225_SW_GREEN_TX_THRES2_DB; } if ((ah->ah_config.ath_hal_sta_update_tx_pwr_enable_S1) && (rssi > tmp_rss1_thr1)) { if (old_greentx_status != HAL_RSSI_TX_POWER_SHORT) { AH9300(ah)->green_tx_status = HAL_RSSI_TX_POWER_SHORT; } } else if (ah->ah_config.ath_hal_sta_update_tx_pwr_enable_S2 && (rssi > tmp_rss1_thr2)) { if (old_greentx_status != HAL_RSSI_TX_POWER_MIDDLE) { AH9300(ah)->green_tx_status = HAL_RSSI_TX_POWER_MIDDLE; } } else if (ah->ah_config.ath_hal_sta_update_tx_pwr_enable_S3) { if (old_greentx_status != HAL_RSSI_TX_POWER_LONG) { AH9300(ah)->green_tx_status = HAL_RSSI_TX_POWER_LONG; } } /* If status is not change, don't do anything */ if (old_greentx_status == AH9300(ah)->green_tx_status) { return; } /* for Poseidon which ath_hal_sta_update_tx_pwr_enable is enabled */ if ((AH9300(ah)->green_tx_status != HAL_RSSI_TX_POWER_NONE) && AR_SREV_POSEIDON(ah)) { if (ahp->ah_hw_green_tx_enable) { switch (AH9300(ah)->green_tx_status) { case HAL_RSSI_TX_POWER_SHORT: /* 1. TxPower Config */ OS_MEMCPY(target_power_val_t, ar9485_hw_gtx_tp_distance_short, sizeof(target_power_val_t)); /* 1.1 Store OLPC Delta Calibration Offset*/ olpc_power_offset = 0; /* 2. Store OB/DB */ /* 3. Store TPC settting */ temp_tcp_reg_val = (SM(14, AR_TPC_ACK) | SM(14, AR_TPC_CTS) | SM(14, AR_TPC_CHIRP) | SM(14, AR_TPC_RPT)); /* 4. Store BB_powertx_rate9 value */ temp_powertx_rate9_reg_val = AR9485_BBPWRTXRATE9_HW_GREEN_TX_SHORT_VALUE; break; case HAL_RSSI_TX_POWER_MIDDLE: /* 1. TxPower Config */ OS_MEMCPY(target_power_val_t, ar9485_hw_gtx_tp_distance_middle, sizeof(target_power_val_t)); /* 1.1 Store OLPC Delta Calibration Offset*/ olpc_power_offset = 0; /* 2. Store OB/DB */ /* 3. Store TPC settting */ temp_tcp_reg_val = (SM(18, AR_TPC_ACK) | SM(18, AR_TPC_CTS) | SM(18, AR_TPC_CHIRP) | SM(18, AR_TPC_RPT)); /* 4. Store BB_powertx_rate9 value */ temp_powertx_rate9_reg_val = AR9485_BBPWRTXRATE9_HW_GREEN_TX_MIDDLE_VALUE; break; case HAL_RSSI_TX_POWER_LONG: default: /* 1. TxPower Config */ OS_MEMCPY(target_power_val_t, ahp->ah_default_tx_power, sizeof(target_power_val_t)); /* 1.1 Store OLPC Delta Calibration Offset*/ olpc_power_offset = 0; /* 2. Store OB/DB1/DB2 */ /* 3. Store TPC settting */ temp_tcp_reg_val = AH9300(ah)->ah_ob_db1[POSEIDON_STORED_REG_TPC]; /* 4. Store BB_powertx_rate9 value */ temp_powertx_rate9_reg_val = AH9300(ah)->ah_ob_db1[POSEIDON_STORED_REG_BB_PWRTX_RATE9]; break; } } else { switch (AH9300(ah)->green_tx_status) { case HAL_RSSI_TX_POWER_SHORT: /* 1. TxPower Config */ OS_MEMCPY(target_power_val_t, wb225_sw_gtx_tp_distance_short, sizeof(target_power_val_t)); /* 1.1 Store OLPC Delta Calibration Offset*/ olpc_power_offset = wb225_gtx_olpc_cal_offset[WB225_OB_GREEN_TX_SHORT_VALUE] - wb225_gtx_olpc_cal_offset[WB225_OB_CALIBRATION_VALUE]; /* 2. Store OB/DB */ temp_obdb_reg_val = AH9300(ah)->ah_ob_db1[POSEIDON_STORED_REG_OBDB]; temp_obdb_reg_val &= ~(AR_PHY_65NM_CH0_TXRF2_DB2G | AR_PHY_65NM_CH0_TXRF2_OB2G_CCK | AR_PHY_65NM_CH0_TXRF2_OB2G_PSK | AR_PHY_65NM_CH0_TXRF2_OB2G_QAM); temp_obdb_reg_val |= (SM(5, AR_PHY_65NM_CH0_TXRF2_DB2G) | SM(WB225_OB_GREEN_TX_SHORT_VALUE, AR_PHY_65NM_CH0_TXRF2_OB2G_CCK) | SM(WB225_OB_GREEN_TX_SHORT_VALUE, AR_PHY_65NM_CH0_TXRF2_OB2G_PSK) | SM(WB225_OB_GREEN_TX_SHORT_VALUE, AR_PHY_65NM_CH0_TXRF2_OB2G_QAM)); /* 3. Store TPC settting */ temp_tcp_reg_val = (SM(6, AR_TPC_ACK) | SM(6, AR_TPC_CTS) | SM(6, AR_TPC_CHIRP) | SM(6, AR_TPC_RPT)); /* 4. Store BB_powertx_rate9 value */ temp_powertx_rate9_reg_val = WB225_BBPWRTXRATE9_SW_GREEN_TX_SHORT_VALUE; break; case HAL_RSSI_TX_POWER_MIDDLE: /* 1. TxPower Config */ OS_MEMCPY(target_power_val_t, wb225_sw_gtx_tp_distance_middle, sizeof(target_power_val_t)); /* 1.1 Store OLPC Delta Calibration Offset*/ olpc_power_offset = wb225_gtx_olpc_cal_offset[WB225_OB_GREEN_TX_MIDDLE_VALUE] - wb225_gtx_olpc_cal_offset[WB225_OB_CALIBRATION_VALUE]; /* 2. Store OB/DB */ temp_obdb_reg_val = AH9300(ah)->ah_ob_db1[POSEIDON_STORED_REG_OBDB]; temp_obdb_reg_val &= ~(AR_PHY_65NM_CH0_TXRF2_DB2G | AR_PHY_65NM_CH0_TXRF2_OB2G_CCK | AR_PHY_65NM_CH0_TXRF2_OB2G_PSK | AR_PHY_65NM_CH0_TXRF2_OB2G_QAM); temp_obdb_reg_val |= (SM(5, AR_PHY_65NM_CH0_TXRF2_DB2G) | SM(WB225_OB_GREEN_TX_MIDDLE_VALUE, AR_PHY_65NM_CH0_TXRF2_OB2G_CCK) | SM(WB225_OB_GREEN_TX_MIDDLE_VALUE, AR_PHY_65NM_CH0_TXRF2_OB2G_PSK) | SM(WB225_OB_GREEN_TX_MIDDLE_VALUE, AR_PHY_65NM_CH0_TXRF2_OB2G_QAM)); /* 3. Store TPC settting */ temp_tcp_reg_val = (SM(14, AR_TPC_ACK) | SM(14, AR_TPC_CTS) | SM(14, AR_TPC_CHIRP) | SM(14, AR_TPC_RPT)); /* 4. Store BB_powertx_rate9 value */ temp_powertx_rate9_reg_val = WB225_BBPWRTXRATE9_SW_GREEN_TX_MIDDLE_VALUE; break; case HAL_RSSI_TX_POWER_LONG: default: /* 1. TxPower Config */ OS_MEMCPY(target_power_val_t, ahp->ah_default_tx_power, sizeof(target_power_val_t)); /* 1.1 Store OLPC Delta Calibration Offset*/ olpc_power_offset = wb225_gtx_olpc_cal_offset[WB225_OB_GREEN_TX_LONG_VALUE] - wb225_gtx_olpc_cal_offset[WB225_OB_CALIBRATION_VALUE]; /* 2. Store OB/DB1/DB2 */ temp_obdb_reg_val = AH9300(ah)->ah_ob_db1[POSEIDON_STORED_REG_OBDB]; /* 3. Store TPC settting */ temp_tcp_reg_val = AH9300(ah)->ah_ob_db1[POSEIDON_STORED_REG_TPC]; /* 4. Store BB_powertx_rate9 value */ temp_powertx_rate9_reg_val = AH9300(ah)->ah_ob_db1[POSEIDON_STORED_REG_BB_PWRTX_RATE9]; break; } } /* 1.1 Do OLPC Delta Calibration Offset */ tmp_olpc_val = (int8_t) AH9300(ah)->ah_db2[POSEIDON_STORED_REG_G2_OLPC_OFFSET]; tmp_olpc_val += olpc_power_offset; OS_REG_RMW(ah, AR_PHY_TPC_11_B0, (tmp_olpc_val << AR_PHY_TPC_OLPC_GAIN_DELTA_S), AR_PHY_TPC_OLPC_GAIN_DELTA); /* 1.2 TxPower Config */ ar9300_transmit_power_reg_write(ah, target_power_val_t); /* 2. Config OB/DB */ if (!ahp->ah_hw_green_tx_enable) { OS_REG_WRITE(ah, AR_PHY_65NM_CH0_TXRF2, temp_obdb_reg_val); } /* 3. config TPC settting */ OS_REG_WRITE(ah, AR_TPC, temp_tcp_reg_val); /* 4. config BB_powertx_rate9 value */ OS_REG_WRITE(ah, AR_PHY_BB_POWERTX_RATE9, temp_powertx_rate9_reg_val); } } #if 0 void ar9300_get_vow_stats( struct ath_hal *ah, HAL_VOWSTATS* p_stats, u_int8_t vow_reg_flags) { if (vow_reg_flags & AR_REG_TX_FRM_CNT) { p_stats->tx_frame_count = OS_REG_READ(ah, AR_TFCNT); } if (vow_reg_flags & AR_REG_RX_FRM_CNT) { p_stats->rx_frame_count = OS_REG_READ(ah, AR_RFCNT); } if (vow_reg_flags & AR_REG_RX_CLR_CNT) { p_stats->rx_clear_count = OS_REG_READ(ah, AR_RCCNT); } if (vow_reg_flags & AR_REG_CYCLE_CNT) { p_stats->cycle_count = OS_REG_READ(ah, AR_CCCNT); } if (vow_reg_flags & AR_REG_EXT_CYCLE_CNT) { p_stats->ext_cycle_count = OS_REG_READ(ah, AR_EXTRCCNT); } } #endif /* * ar9300_is_skip_paprd_by_greentx * * This function check if we need to skip PAPRD tuning * when GreenTx in specific state. */ HAL_BOOL ar9300_is_skip_paprd_by_greentx(struct ath_hal *ah) { if (AR_SREV_POSEIDON(ah) && ah->ah_config.ath_hal_sta_update_tx_pwr_enable && ((AH9300(ah)->green_tx_status == HAL_RSSI_TX_POWER_SHORT) || (AH9300(ah)->green_tx_status == HAL_RSSI_TX_POWER_MIDDLE))) { return AH_TRUE; } return AH_FALSE; } void ar9300_control_signals_for_green_tx_mode(struct ath_hal *ah) { unsigned int valid_obdb_0_b0 = 0x2d; // 5,5 - dB[0:2],oB[5:3] unsigned int valid_obdb_1_b0 = 0x25; // 4,5 - dB[0:2],oB[5:3] unsigned int valid_obdb_2_b0 = 0x1d; // 3,5 - dB[0:2],oB[5:3] unsigned int valid_obdb_3_b0 = 0x15; // 2,5 - dB[0:2],oB[5:3] unsigned int valid_obdb_4_b0 = 0xd; // 1,5 - dB[0:2],oB[5:3] struct ath_hal_9300 *ahp = AH9300(ah); if (AR_SREV_POSEIDON(ah) && ahp->ah_hw_green_tx_enable) { OS_REG_RMW_FIELD_ALT(ah, AR_PHY_PAPRD_VALID_OBDB_POSEIDON, AR_PHY_PAPRD_VALID_OBDB_0, valid_obdb_0_b0); OS_REG_RMW_FIELD_ALT(ah, AR_PHY_PAPRD_VALID_OBDB_POSEIDON, AR_PHY_PAPRD_VALID_OBDB_1, valid_obdb_1_b0); OS_REG_RMW_FIELD_ALT(ah, AR_PHY_PAPRD_VALID_OBDB_POSEIDON, AR_PHY_PAPRD_VALID_OBDB_2, valid_obdb_2_b0); OS_REG_RMW_FIELD_ALT(ah, AR_PHY_PAPRD_VALID_OBDB_POSEIDON, AR_PHY_PAPRD_VALID_OBDB_3, valid_obdb_3_b0); OS_REG_RMW_FIELD_ALT(ah, AR_PHY_PAPRD_VALID_OBDB_POSEIDON, AR_PHY_PAPRD_VALID_OBDB_4, valid_obdb_4_b0); } } void ar9300_hwgreentx_set_pal_spare(struct ath_hal *ah, int value) { struct ath_hal_9300 *ahp = AH9300(ah); if (AR_SREV_POSEIDON(ah) && ahp->ah_hw_green_tx_enable) { if ((value == 0) || (value == 1)) { OS_REG_RMW_FIELD(ah, AR_PHY_65NM_CH0_TXRF3, AR_PHY_65NM_CH0_TXRF3_OLD_PAL_SPARE, value); } } } void ar9300_reset_hw_beacon_proc_crc(struct ath_hal *ah) { OS_REG_SET_BIT(ah, AR_HWBCNPROC1, AR_HWBCNPROC1_RESET_CRC); } int32_t ar9300_get_hw_beacon_rssi(struct ath_hal *ah) { int32_t val = OS_REG_READ_FIELD(ah, AR_BCN_RSSI_AVE, AR_BCN_RSSI_AVE_VAL); /* RSSI format is 8.4. Ignore lowest four bits */ val = val >> 4; return val; } void ar9300_set_hw_beacon_rssi_threshold(struct ath_hal *ah, u_int32_t rssi_threshold) { struct ath_hal_9300 *ahp = AH9300(ah); OS_REG_RMW_FIELD(ah, AR_RSSI_THR, AR_RSSI_THR_VAL, rssi_threshold); /* save value for restoring after chip reset */ ahp->ah_beacon_rssi_threshold = rssi_threshold; } void ar9300_reset_hw_beacon_rssi(struct ath_hal *ah) { OS_REG_SET_BIT(ah, AR_RSSI_THR, AR_RSSI_BCN_RSSI_RST); } void ar9300_set_hw_beacon_proc(struct ath_hal *ah, HAL_BOOL on) { if (on) { OS_REG_SET_BIT(ah, AR_HWBCNPROC1, AR_HWBCNPROC1_CRC_ENABLE | AR_HWBCNPROC1_EXCLUDE_TIM_ELM); } else { OS_REG_CLR_BIT(ah, AR_HWBCNPROC1, AR_HWBCNPROC1_CRC_ENABLE | AR_HWBCNPROC1_EXCLUDE_TIM_ELM); } } /* * Gets the contents of the specified key cache entry. */ HAL_BOOL ar9300_print_keycache(struct ath_hal *ah) { const HAL_CAPABILITIES *p_cap = &AH_PRIVATE(ah)->ah_caps; u_int32_t key0, key1, key2, key3, key4; u_int32_t mac_hi, mac_lo; u_int16_t entry = 0; u_int32_t valid = 0; u_int32_t key_type; ath_hal_printf(ah, "Slot Key\t\t\t Valid Type Mac \n"); for (entry = 0 ; entry < p_cap->halKeyCacheSize; entry++) { key0 = OS_REG_READ(ah, AR_KEYTABLE_KEY0(entry)); key1 = OS_REG_READ(ah, AR_KEYTABLE_KEY1(entry)); key2 = OS_REG_READ(ah, AR_KEYTABLE_KEY2(entry)); key3 = OS_REG_READ(ah, AR_KEYTABLE_KEY3(entry)); key4 = OS_REG_READ(ah, AR_KEYTABLE_KEY4(entry)); key_type = OS_REG_READ(ah, AR_KEYTABLE_TYPE(entry)); mac_lo = OS_REG_READ(ah, AR_KEYTABLE_MAC0(entry)); mac_hi = OS_REG_READ(ah, AR_KEYTABLE_MAC1(entry)); if (mac_hi & AR_KEYTABLE_VALID) { valid = 1; } else { valid = 0; } if ((mac_hi != 0) && (mac_lo != 0)) { mac_hi &= ~0x8000; mac_hi <<= 1; mac_hi |= ((mac_lo & (1 << 31) )) >> 31; mac_lo <<= 1; } ath_hal_printf(ah, "%03d " "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x" " %02d %02d " "%02x:%02x:%02x:%02x:%02x:%02x \n", entry, (key0 << 24) >> 24, (key0 << 16) >> 24, (key0 << 8) >> 24, key0 >> 24, (key1 << 24) >> 24, (key1 << 16) >> 24, //(key1 << 8) >> 24, key1 >> 24, (key2 << 24) >> 24, (key2 << 16) >> 24, (key2 << 8) >> 24, key2 >> 24, (key3 << 24) >> 24, (key3 << 16) >> 24, //(key3 << 8) >> 24, key3 >> 24, (key4 << 24) >> 24, (key4 << 16) >> 24, (key4 << 8) >> 24, key4 >> 24, valid, key_type, (mac_lo << 24) >> 24, (mac_lo << 16) >> 24, (mac_lo << 8) >> 24, (mac_lo) >> 24, (mac_hi << 24) >> 24, (mac_hi << 16) >> 24 ); } return AH_TRUE; } /* enable/disable smart antenna mode */ HAL_BOOL ar9300_set_smart_antenna(struct ath_hal *ah, HAL_BOOL enable) { struct ath_hal_9300 *ahp = AH9300(ah); if (enable) { OS_REG_SET_BIT(ah, AR_XRTO, AR_ENABLE_SMARTANTENNA); } else { OS_REG_CLR_BIT(ah, AR_XRTO, AR_ENABLE_SMARTANTENNA); } /* if scropion and smart antenna is enabled, write swcom1 with 0x440 * and swcom2 with 0 * FIXME Ideally these registers need to be made read from caldata. * Until the calibration team gets them, keep them along with board * configuration. */ if (enable && AR_SREV_SCORPION(ah) && (HAL_OK == ar9300_get_capability(ah, HAL_CAP_SMARTANTENNA, 0,0))) { OS_REG_WRITE(ah, AR_PHY_SWITCH_COM, 0x440); OS_REG_WRITE(ah, AR_PHY_SWITCH_COM_2, 0); } ahp->ah_smartantenna_enable = enable; return 1; } #ifdef ATH_TX99_DIAG #ifndef ATH_SUPPORT_HTC void ar9300_tx99_channel_pwr_update(struct ath_hal *ah, HAL_CHANNEL *c, u_int32_t txpower) { #define PWR_MAS(_r, _s) (((_r) & 0x3f) << (_s)) static int16_t p_pwr_array[ar9300_rate_size] = { 0 }; int32_t i; /* The max power is limited to 63 */ if (txpower <= AR9300_MAX_RATE_POWER) { for (i = 0; i < ar9300_rate_size; i++) { p_pwr_array[i] = txpower; } } else { for (i = 0; i < ar9300_rate_size; i++) { p_pwr_array[i] = AR9300_MAX_RATE_POWER; } } OS_REG_WRITE(ah, 0xa458, 0); /* Write the OFDM power per rate set */ /* 6 (LSB), 9, 12, 18 (MSB) */ OS_REG_WRITE(ah, 0xa3c0, PWR_MAS(p_pwr_array[ALL_TARGET_LEGACY_6_24], 24) | PWR_MAS(p_pwr_array[ALL_TARGET_LEGACY_6_24], 16) | PWR_MAS(p_pwr_array[ALL_TARGET_LEGACY_6_24], 8) | PWR_MAS(p_pwr_array[ALL_TARGET_LEGACY_6_24], 0) ); /* 24 (LSB), 36, 48, 54 (MSB) */ OS_REG_WRITE(ah, 0xa3c4, PWR_MAS(p_pwr_array[ALL_TARGET_LEGACY_54], 24) | PWR_MAS(p_pwr_array[ALL_TARGET_LEGACY_48], 16) | PWR_MAS(p_pwr_array[ALL_TARGET_LEGACY_36], 8) | PWR_MAS(p_pwr_array[ALL_TARGET_LEGACY_6_24], 0) ); /* Write the CCK power per rate set */ /* 1L (LSB), reserved, 2L, 2S (MSB) */ OS_REG_WRITE(ah, 0xa3c8, PWR_MAS(p_pwr_array[ALL_TARGET_LEGACY_1L_5L], 24) | PWR_MAS(p_pwr_array[ALL_TARGET_LEGACY_1L_5L], 16) /* | PWR_MAS(txPowerTimes2, 8) */ /* this is reserved for Osprey */ | PWR_MAS(p_pwr_array[ALL_TARGET_LEGACY_1L_5L], 0) ); /* 5.5L (LSB), 5.5S, 11L, 11S (MSB) */ OS_REG_WRITE(ah, 0xa3cc, PWR_MAS(p_pwr_array[ALL_TARGET_LEGACY_11S], 24) | PWR_MAS(p_pwr_array[ALL_TARGET_LEGACY_11L], 16) | PWR_MAS(p_pwr_array[ALL_TARGET_LEGACY_5S], 8) | PWR_MAS(p_pwr_array[ALL_TARGET_LEGACY_1L_5L], 0) ); /* Write the HT20 power per rate set */ /* 0/8/16 (LSB), 1-3/9-11/17-19, 4, 5 (MSB) */ OS_REG_WRITE(ah, 0xa3d0, PWR_MAS(p_pwr_array[ALL_TARGET_HT20_5], 24) | PWR_MAS(p_pwr_array[ALL_TARGET_HT20_4], 16) | PWR_MAS(p_pwr_array[ALL_TARGET_HT20_1_3_9_11_17_19], 8) | PWR_MAS(p_pwr_array[ALL_TARGET_HT20_0_8_16], 0) ); /* 6 (LSB), 7, 12, 13 (MSB) */ OS_REG_WRITE(ah, 0xa3d4, PWR_MAS(p_pwr_array[ALL_TARGET_HT20_13], 24) | PWR_MAS(p_pwr_array[ALL_TARGET_HT20_12], 16) | PWR_MAS(p_pwr_array[ALL_TARGET_HT20_7], 8) | PWR_MAS(p_pwr_array[ALL_TARGET_HT20_6], 0) ); /* 14 (LSB), 15, 20, 21 */ OS_REG_WRITE(ah, 0xa3e4, PWR_MAS(p_pwr_array[ALL_TARGET_HT20_21], 24) | PWR_MAS(p_pwr_array[ALL_TARGET_HT20_20], 16) | PWR_MAS(p_pwr_array[ALL_TARGET_HT20_15], 8) | PWR_MAS(p_pwr_array[ALL_TARGET_HT20_14], 0) ); /* Mixed HT20 and HT40 rates */ /* HT20 22 (LSB), HT20 23, HT40 22, HT40 23 (MSB) */ OS_REG_WRITE(ah, 0xa3e8, PWR_MAS(p_pwr_array[ALL_TARGET_HT40_23], 24) | PWR_MAS(p_pwr_array[ALL_TARGET_HT40_22], 16) | PWR_MAS(p_pwr_array[ALL_TARGET_HT20_23], 8) | PWR_MAS(p_pwr_array[ALL_TARGET_HT20_22], 0) ); /* Write the HT40 power per rate set */ /* correct PAR difference between HT40 and HT20/LEGACY */ /* 0/8/16 (LSB), 1-3/9-11/17-19, 4, 5 (MSB) */ OS_REG_WRITE(ah, 0xa3d8, PWR_MAS(p_pwr_array[ALL_TARGET_HT40_5], 24) | PWR_MAS(p_pwr_array[ALL_TARGET_HT40_4], 16) | PWR_MAS(p_pwr_array[ALL_TARGET_HT40_1_3_9_11_17_19], 8) | PWR_MAS(p_pwr_array[ALL_TARGET_HT40_0_8_16], 0) ); /* 6 (LSB), 7, 12, 13 (MSB) */ OS_REG_WRITE(ah, 0xa3dc, PWR_MAS(p_pwr_array[ALL_TARGET_HT40_13], 24) | PWR_MAS(p_pwr_array[ALL_TARGET_HT40_12], 16) | PWR_MAS(p_pwr_array[ALL_TARGET_HT40_7], 8) | PWR_MAS(p_pwr_array[ALL_TARGET_HT40_6], 0) ); /* 14 (LSB), 15, 20, 21 */ OS_REG_WRITE(ah, 0xa3ec, PWR_MAS(p_pwr_array[ALL_TARGET_HT40_21], 24) | PWR_MAS(p_pwr_array[ALL_TARGET_HT40_20], 16) | PWR_MAS(p_pwr_array[ALL_TARGET_HT40_15], 8) | PWR_MAS(p_pwr_array[ALL_TARGET_HT40_14], 0) ); #undef PWR_MAS } void ar9300_tx99_chainmsk_setup(struct ath_hal *ah, int tx_chainmask) { if (tx_chainmask == 0x5) { OS_REG_WRITE(ah, AR_PHY_ANALOG_SWAP, OS_REG_READ(ah, AR_PHY_ANALOG_SWAP) | AR_PHY_SWAP_ALT_CHAIN); } OS_REG_WRITE(ah, AR_PHY_RX_CHAINMASK, tx_chainmask); OS_REG_WRITE(ah, AR_PHY_CAL_CHAINMASK, tx_chainmask); OS_REG_WRITE(ah, AR_SELFGEN_MASK, tx_chainmask); if (tx_chainmask == 0x5) { OS_REG_WRITE(ah, AR_PHY_ANALOG_SWAP, OS_REG_READ(ah, AR_PHY_ANALOG_SWAP) | AR_PHY_SWAP_ALT_CHAIN); } } void ar9300_tx99_set_single_carrier(struct ath_hal *ah, int tx_chain_mask, int chtype) { OS_REG_WRITE(ah, 0x98a4, OS_REG_READ(ah, 0x98a4) | (0x7ff << 11) | 0x7ff); OS_REG_WRITE(ah, 0xa364, OS_REG_READ(ah, 0xa364) | (1 << 7) | (1 << 1)); OS_REG_WRITE(ah, 0xa350, (OS_REG_READ(ah, 0xa350) | (1 << 31) | (1 << 15)) & ~(1 << 13)); /* 11G mode */ if (!chtype) { OS_REG_WRITE(ah, AR_PHY_65NM_CH0_RXTX2, OS_REG_READ(ah, AR_PHY_65NM_CH0_RXTX2) | (0x1 << 3) | (0x1 << 2)); if (AR_SREV_OSPREY(ah) || AR_SREV_WASP(ah)) { OS_REG_WRITE(ah, AR_PHY_65NM_CH0_TOP, OS_REG_READ(ah, AR_PHY_65NM_CH0_TOP) & ~(0x1 << 4)); OS_REG_WRITE(ah, AR_PHY_65NM_CH0_TOP2, (OS_REG_READ(ah, AR_PHY_65NM_CH0_TOP2) | (0x1 << 26) | (0x7 << 24)) & ~(0x1 << 22)); } else { OS_REG_WRITE(ah, AR_HORNET_CH0_TOP, OS_REG_READ(ah, AR_HORNET_CH0_TOP) & ~(0x1 << 4)); OS_REG_WRITE(ah, AR_HORNET_CH0_TOP2, (OS_REG_READ(ah, AR_HORNET_CH0_TOP2) | (0x1 << 26) | (0x7 << 24)) & ~(0x1 << 22)); } /* chain zero */ if ((tx_chain_mask & 0x01) == 0x01) { OS_REG_WRITE(ah, AR_PHY_65NM_CH0_RXTX1, (OS_REG_READ(ah, AR_PHY_65NM_CH0_RXTX1) | (0x1 << 31) | (0x5 << 15) | (0x3 << 9)) & ~(0x1 << 27) & ~(0x1 << 12)); OS_REG_WRITE(ah, AR_PHY_65NM_CH0_RXTX2, (OS_REG_READ(ah, AR_PHY_65NM_CH0_RXTX2) | (0x1 << 12) | (0x1 << 10) | (0x1 << 9) | (0x1 << 8) | (0x1 << 7)) & ~(0x1 << 11)); OS_REG_WRITE(ah, AR_PHY_65NM_CH0_RXTX3, (OS_REG_READ(ah, AR_PHY_65NM_CH0_RXTX3) | (0x1 << 29) | (0x1 << 25) | (0x1 << 23) | (0x1 << 19) | (0x1 << 10) | (0x1 << 9) | (0x1 << 8) | (0x1 << 3)) & ~(0x1 << 28)& ~(0x1 << 24) & ~(0x1 << 22)& ~(0x1 << 7)); OS_REG_WRITE(ah, AR_PHY_65NM_CH0_TXRF1, (OS_REG_READ(ah, AR_PHY_65NM_CH0_TXRF1) | (0x1 << 23))& ~(0x1 << 21)); OS_REG_WRITE(ah, AR_PHY_65NM_CH0_BB1, OS_REG_READ(ah, AR_PHY_65NM_CH0_BB1) | (0x1 << 12) | (0x1 << 10) | (0x1 << 9) | (0x1 << 8) | (0x1 << 6) | (0x1 << 5) | (0x1 << 4) | (0x1 << 3) | (0x1 << 2)); OS_REG_WRITE(ah, AR_PHY_65NM_CH0_BB2, OS_REG_READ(ah, AR_PHY_65NM_CH0_BB2) | (0x1 << 31)); } if (AR_SREV_OSPREY(ah) || AR_SREV_WASP(ah)) { /* chain one */ if ((tx_chain_mask & 0x02) == 0x02 ) { OS_REG_WRITE(ah, AR_PHY_65NM_CH1_RXTX1, (OS_REG_READ(ah, AR_PHY_65NM_CH1_RXTX1) | (0x1 << 31) | (0x5 << 15) | (0x3 << 9)) & ~(0x1 << 27) & ~(0x1 << 12)); OS_REG_WRITE(ah, AR_PHY_65NM_CH1_RXTX2, (OS_REG_READ(ah, AR_PHY_65NM_CH1_RXTX2) | (0x1 << 12) | (0x1 << 10) | (0x1 << 9) | (0x1 << 8) | (0x1 << 7)) & ~(0x1 << 11)); OS_REG_WRITE(ah, AR_PHY_65NM_CH1_RXTX3, (OS_REG_READ(ah, AR_PHY_65NM_CH1_RXTX3) | (0x1 << 29) | (0x1 << 25) | (0x1 << 23) | (0x1 << 19) | (0x1 << 10) | (0x1 << 9) | (0x1 << 8) | (0x1 << 3)) & ~(0x1 << 28)& ~(0x1 << 24) & ~(0x1 << 22)& ~(0x1 << 7)); OS_REG_WRITE(ah, AR_PHY_65NM_CH1_TXRF1, (OS_REG_READ(ah, AR_PHY_65NM_CH1_TXRF1) | (0x1 << 23))& ~(0x1 << 21)); OS_REG_WRITE(ah, AR_PHY_65NM_CH1_BB1, OS_REG_READ(ah, AR_PHY_65NM_CH1_BB1) | (0x1 << 12) | (0x1 << 10) | (0x1 << 9) | (0x1 << 8) | (0x1 << 6) | (0x1 << 5) | (0x1 << 4) | (0x1 << 3) | (0x1 << 2)); OS_REG_WRITE(ah, AR_PHY_65NM_CH1_BB2, OS_REG_READ(ah, AR_PHY_65NM_CH1_BB2) | (0x1 << 31)); } } if (AR_SREV_OSPREY(ah)) { /* chain two */ if ((tx_chain_mask & 0x04) == 0x04 ) { OS_REG_WRITE(ah, AR_PHY_65NM_CH2_RXTX1, (OS_REG_READ(ah, AR_PHY_65NM_CH2_RXTX1) | (0x1 << 31) | (0x5 << 15) | (0x3 << 9)) & ~(0x1 << 27) & ~(0x1 << 12)); OS_REG_WRITE(ah, AR_PHY_65NM_CH2_RXTX2, (OS_REG_READ(ah, AR_PHY_65NM_CH2_RXTX2) | (0x1 << 12) | (0x1 << 10) | (0x1 << 9) | (0x1 << 8) | (0x1 << 7)) & ~(0x1 << 11)); OS_REG_WRITE(ah, AR_PHY_65NM_CH2_RXTX3, (OS_REG_READ(ah, AR_PHY_65NM_CH2_RXTX3) | (0x1 << 29) | (0x1 << 25) | (0x1 << 23) | (0x1 << 19) | (0x1 << 10) | (0x1 << 9) | (0x1 << 8) | (0x1 << 3)) & ~(0x1 << 28)& ~(0x1 << 24) & ~(0x1 << 22)& ~(0x1 << 7)); OS_REG_WRITE(ah, AR_PHY_65NM_CH2_TXRF1, (OS_REG_READ(ah, AR_PHY_65NM_CH2_TXRF1) | (0x1 << 23))& ~(0x1 << 21)); OS_REG_WRITE(ah, AR_PHY_65NM_CH2_BB1, OS_REG_READ(ah, AR_PHY_65NM_CH2_BB1) | (0x1 << 12) | (0x1 << 10) | (0x1 << 9) | (0x1 << 8) | (0x1 << 6) | (0x1 << 5) | (0x1 << 4) | (0x1 << 3) | (0x1 << 2)); OS_REG_WRITE(ah, AR_PHY_65NM_CH2_BB2, OS_REG_READ(ah, AR_PHY_65NM_CH2_BB2) | (0x1 << 31)); } } OS_REG_WRITE(ah, 0xa28c, 0x11111); OS_REG_WRITE(ah, 0xa288, 0x111); } else { /* chain zero */ if ((tx_chain_mask & 0x01) == 0x01) { OS_REG_WRITE(ah, AR_PHY_65NM_CH0_RXTX1, (OS_REG_READ(ah, AR_PHY_65NM_CH0_RXTX1) | (0x1 << 31) | (0x1 << 27) | (0x3 << 23) | (0x1 << 19) | (0x1 << 15) | (0x3 << 9)) & ~(0x1 << 12)); OS_REG_WRITE(ah, AR_PHY_65NM_CH0_RXTX2, (OS_REG_READ(ah, AR_PHY_65NM_CH0_RXTX2) | (0x1 << 12) | (0x1 << 10) | (0x1 << 9) | (0x1 << 8) | (0x1 << 7) | (0x1 << 3) | (0x1 << 2) | (0x1 << 1)) & ~(0x1 << 11)& ~(0x1 << 0)); OS_REG_WRITE(ah, AR_PHY_65NM_CH0_RXTX3, (OS_REG_READ(ah, AR_PHY_65NM_CH0_RXTX3) | (0x1 << 29) | (0x1 << 25) | (0x1 << 23) | (0x1 << 19) | (0x1 << 10) | (0x1 << 9) | (0x1 << 8) | (0x1 << 3)) & ~(0x1 << 28)& ~(0x1 << 24) & ~(0x1 << 22)& ~(0x1 << 7)); OS_REG_WRITE(ah, AR_PHY_65NM_CH0_TXRF1, (OS_REG_READ(ah, AR_PHY_65NM_CH0_TXRF1) | (0x1 << 23))& ~(0x1 << 21)); OS_REG_WRITE(ah, AR_PHY_65NM_CH0_TXRF2, OS_REG_READ(ah, AR_PHY_65NM_CH0_TXRF2) | (0x3 << 3) | (0x3 << 0)); OS_REG_WRITE(ah, AR_PHY_65NM_CH0_TXRF3, (OS_REG_READ(ah, AR_PHY_65NM_CH0_TXRF3) | (0x3 << 29) | (0x3 << 26) | (0x2 << 23) | (0x2 << 20) | (0x2 << 17))& ~(0x1 << 14)); OS_REG_WRITE(ah, AR_PHY_65NM_CH0_BB1, OS_REG_READ(ah, AR_PHY_65NM_CH0_BB1) | (0x1 << 12) | (0x1 << 10) | (0x1 << 9) | (0x1 << 8) | (0x1 << 6) | (0x1 << 5) | (0x1 << 4) | (0x1 << 3) | (0x1 << 2)); OS_REG_WRITE(ah, AR_PHY_65NM_CH0_BB2, OS_REG_READ(ah, AR_PHY_65NM_CH0_BB2) | (0x1 << 31)); if (AR_SREV_OSPREY(ah) || AR_SREV_WASP(ah)) { OS_REG_WRITE(ah, AR_PHY_65NM_CH0_TOP, OS_REG_READ(ah, AR_PHY_65NM_CH0_TOP) & ~(0x1 << 4)); OS_REG_WRITE(ah, AR_PHY_65NM_CH0_TOP2, OS_REG_READ(ah, AR_PHY_65NM_CH0_TOP2) | (0x1 << 26) | (0x7 << 24) | (0x3 << 22)); } else { OS_REG_WRITE(ah, AR_HORNET_CH0_TOP, OS_REG_READ(ah, AR_HORNET_CH0_TOP) & ~(0x1 << 4)); OS_REG_WRITE(ah, AR_HORNET_CH0_TOP2, OS_REG_READ(ah, AR_HORNET_CH0_TOP2) | (0x1 << 26) | (0x7 << 24) | (0x3 << 22)); } if (AR_SREV_OSPREY(ah) || AR_SREV_WASP(ah)) { OS_REG_WRITE(ah, AR_PHY_65NM_CH1_RXTX2, (OS_REG_READ(ah, AR_PHY_65NM_CH1_RXTX2) | (0x1 << 3) | (0x1 << 2) | (0x1 << 1)) & ~(0x1 << 0)); OS_REG_WRITE(ah, AR_PHY_65NM_CH1_RXTX3, OS_REG_READ(ah, AR_PHY_65NM_CH1_RXTX3) | (0x1 << 19) | (0x1 << 3)); OS_REG_WRITE(ah, AR_PHY_65NM_CH1_TXRF1, OS_REG_READ(ah, AR_PHY_65NM_CH1_TXRF1) | (0x1 << 23)); } if (AR_SREV_OSPREY(ah)) { OS_REG_WRITE(ah, AR_PHY_65NM_CH2_RXTX2, (OS_REG_READ(ah, AR_PHY_65NM_CH2_RXTX2) | (0x1 << 3) | (0x1 << 2) | (0x1 << 1)) & ~(0x1 << 0)); OS_REG_WRITE(ah, AR_PHY_65NM_CH2_RXTX3, OS_REG_READ(ah, AR_PHY_65NM_CH2_RXTX3) | (0x1 << 19) | (0x1 << 3)); OS_REG_WRITE(ah, AR_PHY_65NM_CH2_TXRF1, OS_REG_READ(ah, AR_PHY_65NM_CH2_TXRF1) | (0x1 << 23)); } } if (AR_SREV_OSPREY(ah) || AR_SREV_WASP(ah)) { /* chain one */ if ((tx_chain_mask & 0x02) == 0x02 ) { OS_REG_WRITE(ah, AR_PHY_65NM_CH0_RXTX2, (OS_REG_READ(ah, AR_PHY_65NM_CH0_RXTX2) | (0x1 << 3) | (0x1 << 2) | (0x1 << 1)) & ~(0x1 << 0)); OS_REG_WRITE(ah, AR_PHY_65NM_CH0_RXTX3, OS_REG_READ(ah, AR_PHY_65NM_CH0_RXTX3) | (0x1 << 19) | (0x1 << 3)); OS_REG_WRITE(ah, AR_PHY_65NM_CH0_TXRF1, OS_REG_READ(ah, AR_PHY_65NM_CH0_TXRF1) | (0x1 << 23)); if (AR_SREV_OSPREY(ah) || AR_SREV_WASP(ah)) { OS_REG_WRITE(ah, AR_PHY_65NM_CH0_TOP, OS_REG_READ(ah, AR_PHY_65NM_CH0_TOP) & ~(0x1 << 4)); OS_REG_WRITE(ah, AR_PHY_65NM_CH0_TOP2, OS_REG_READ(ah, AR_PHY_65NM_CH0_TOP2) | (0x1 << 26) | (0x7 << 24) | (0x3 << 22)); } else { OS_REG_WRITE(ah, AR_HORNET_CH0_TOP, OS_REG_READ(ah, AR_HORNET_CH0_TOP) & ~(0x1 << 4)); OS_REG_WRITE(ah, AR_HORNET_CH0_TOP2, OS_REG_READ(ah, AR_HORNET_CH0_TOP2) | (0x1 << 26) | (0x7 << 24) | (0x3 << 22)); } OS_REG_WRITE(ah, AR_PHY_65NM_CH1_RXTX1, (OS_REG_READ(ah, AR_PHY_65NM_CH1_RXTX1) | (0x1 << 31) | (0x1 << 27) | (0x3 << 23) | (0x1 << 19) | (0x1 << 15) | (0x3 << 9)) & ~(0x1 << 12)); OS_REG_WRITE(ah, AR_PHY_65NM_CH1_RXTX2, (OS_REG_READ(ah, AR_PHY_65NM_CH1_RXTX2) | (0x1 << 12) | (0x1 << 10) | (0x1 << 9) | (0x1 << 8) | (0x1 << 7) | (0x1 << 3) | (0x1 << 2) | (0x1 << 1)) & ~(0x1 << 11)& ~(0x1 << 0)); OS_REG_WRITE(ah, AR_PHY_65NM_CH1_RXTX3, (OS_REG_READ(ah, AR_PHY_65NM_CH1_RXTX3) | (0x1 << 29) | (0x1 << 25) | (0x1 << 23) | (0x1 << 19) | (0x1 << 10) | (0x1 << 9) | (0x1 << 8) | (0x1 << 3)) & ~(0x1 << 28)& ~(0x1 << 24) & ~(0x1 << 22)& ~(0x1 << 7)); OS_REG_WRITE(ah, AR_PHY_65NM_CH1_TXRF1, (OS_REG_READ(ah, AR_PHY_65NM_CH1_TXRF1) | (0x1 << 23))& ~(0x1 << 21)); OS_REG_WRITE(ah, AR_PHY_65NM_CH1_TXRF2, OS_REG_READ(ah, AR_PHY_65NM_CH1_TXRF2) | (0x3 << 3) | (0x3 << 0)); OS_REG_WRITE(ah, AR_PHY_65NM_CH1_TXRF3, (OS_REG_READ(ah, AR_PHY_65NM_CH1_TXRF3) | (0x3 << 29) | (0x3 << 26) | (0x2 << 23) | (0x2 << 20) | (0x2 << 17))& ~(0x1 << 14)); OS_REG_WRITE(ah, AR_PHY_65NM_CH1_BB1, OS_REG_READ(ah, AR_PHY_65NM_CH1_BB1) | (0x1 << 12) | (0x1 << 10) | (0x1 << 9) | (0x1 << 8) | (0x1 << 6) | (0x1 << 5) | (0x1 << 4) | (0x1 << 3) | (0x1 << 2)); OS_REG_WRITE(ah, AR_PHY_65NM_CH1_BB2, OS_REG_READ(ah, AR_PHY_65NM_CH1_BB2) | (0x1 << 31)); if (AR_SREV_OSPREY(ah)) { OS_REG_WRITE(ah, AR_PHY_65NM_CH2_RXTX2, (OS_REG_READ(ah, AR_PHY_65NM_CH2_RXTX2) | (0x1 << 3) | (0x1 << 2) | (0x1 << 1)) & ~(0x1 << 0)); OS_REG_WRITE(ah, AR_PHY_65NM_CH2_RXTX3, OS_REG_READ(ah, AR_PHY_65NM_CH2_RXTX3) | (0x1 << 19) | (0x1 << 3)); OS_REG_WRITE(ah, AR_PHY_65NM_CH2_TXRF1, OS_REG_READ(ah, AR_PHY_65NM_CH2_TXRF1) | (0x1 << 23)); } } } if (AR_SREV_OSPREY(ah)) { /* chain two */ if ((tx_chain_mask & 0x04) == 0x04 ) { OS_REG_WRITE(ah, AR_PHY_65NM_CH0_RXTX2, (OS_REG_READ(ah, AR_PHY_65NM_CH0_RXTX2) | (0x1 << 3) | (0x1 << 2) | (0x1 << 1)) & ~(0x1 << 0)); OS_REG_WRITE(ah, AR_PHY_65NM_CH0_RXTX3, OS_REG_READ(ah, AR_PHY_65NM_CH0_RXTX3) | (0x1 << 19) | (0x1 << 3)); OS_REG_WRITE(ah, AR_PHY_65NM_CH0_TXRF1, OS_REG_READ(ah, AR_PHY_65NM_CH0_TXRF1) | (0x1 << 23)); if (AR_SREV_OSPREY(ah) || AR_SREV_WASP(ah)) { OS_REG_WRITE(ah, AR_PHY_65NM_CH0_TOP, OS_REG_READ(ah, AR_PHY_65NM_CH0_TOP) & ~(0x1 << 4)); OS_REG_WRITE(ah, AR_PHY_65NM_CH0_TOP2, OS_REG_READ(ah, AR_PHY_65NM_CH0_TOP2) | (0x1 << 26) | (0x7 << 24) | (0x3 << 22)); } else { OS_REG_WRITE(ah, AR_HORNET_CH0_TOP, OS_REG_READ(ah, AR_HORNET_CH0_TOP) & ~(0x1 << 4)); OS_REG_WRITE(ah, AR_HORNET_CH0_TOP2, OS_REG_READ(ah, AR_HORNET_CH0_TOP2) | (0x1 << 26) | (0x7 << 24) | (0x3 << 22)); } OS_REG_WRITE(ah, AR_PHY_65NM_CH1_RXTX2, (OS_REG_READ(ah, AR_PHY_65NM_CH1_RXTX2) | (0x1 << 3) | (0x1 << 2) | (0x1 << 1)) & ~(0x1 << 0)); OS_REG_WRITE(ah, AR_PHY_65NM_CH1_RXTX3, OS_REG_READ(ah, AR_PHY_65NM_CH1_RXTX3) | (0x1 << 19) | (0x1 << 3)); OS_REG_WRITE(ah, AR_PHY_65NM_CH1_TXRF1, OS_REG_READ(ah, AR_PHY_65NM_CH1_TXRF1) | (0x1 << 23)); OS_REG_WRITE(ah, AR_PHY_65NM_CH2_RXTX1, (OS_REG_READ(ah, AR_PHY_65NM_CH2_RXTX1) | (0x1 << 31) | (0x1 << 27) | (0x3 << 23) | (0x1 << 19) | (0x1 << 15) | (0x3 << 9)) & ~(0x1 << 12)); OS_REG_WRITE(ah, AR_PHY_65NM_CH2_RXTX2, (OS_REG_READ(ah, AR_PHY_65NM_CH2_RXTX2) | (0x1 << 12) | (0x1 << 10) | (0x1 << 9) | (0x1 << 8) | (0x1 << 7) | (0x1 << 3) | (0x1 << 2) | (0x1 << 1)) & ~(0x1 << 11)& ~(0x1 << 0)); OS_REG_WRITE(ah, AR_PHY_65NM_CH2_RXTX3, (OS_REG_READ(ah, AR_PHY_65NM_CH2_RXTX3) | (0x1 << 29) | (0x1 << 25) | (0x1 << 23) | (0x1 << 19) | (0x1 << 10) | (0x1 << 9) | (0x1 << 8) | (0x1 << 3)) & ~(0x1 << 28)& ~(0x1 << 24) & ~(0x1 << 22)& ~(0x1 << 7)); OS_REG_WRITE(ah, AR_PHY_65NM_CH2_TXRF1, (OS_REG_READ(ah, AR_PHY_65NM_CH2_TXRF1) | (0x1 << 23))& ~(0x1 << 21)); OS_REG_WRITE(ah, AR_PHY_65NM_CH2_TXRF2, OS_REG_READ(ah, AR_PHY_65NM_CH2_TXRF2) | (0x3 << 3) | (0x3 << 0)); OS_REG_WRITE(ah, AR_PHY_65NM_CH2_TXRF3, (OS_REG_READ(ah, AR_PHY_65NM_CH2_TXRF3) | (0x3 << 29) | (0x3 << 26) | (0x2 << 23) | (0x2 << 20) | (0x2 << 17))& ~(0x1 << 14)); OS_REG_WRITE(ah, AR_PHY_65NM_CH2_BB1, OS_REG_READ(ah, AR_PHY_65NM_CH2_BB1) | (0x1 << 12) | (0x1 << 10) | (0x1 << 9) | (0x1 << 8) | (0x1 << 6) | (0x1 << 5) | (0x1 << 4) | (0x1 << 3) | (0x1 << 2)); OS_REG_WRITE(ah, AR_PHY_65NM_CH2_BB2, OS_REG_READ(ah, AR_PHY_65NM_CH2_BB2) | (0x1 << 31)); } } OS_REG_WRITE(ah, 0xa28c, 0x22222); OS_REG_WRITE(ah, 0xa288, 0x222); } } void ar9300_tx99_start(struct ath_hal *ah, u_int8_t *data) { u_int32_t val; u_int32_t qnum = (u_int32_t)data; /* Disable AGC to A2 */ OS_REG_WRITE(ah, AR_PHY_TEST, (OS_REG_READ(ah, AR_PHY_TEST) | PHY_AGC_CLR)); OS_REG_WRITE(ah, AR_DIAG_SW, OS_REG_READ(ah, AR_DIAG_SW) &~ AR_DIAG_RX_DIS); OS_REG_WRITE(ah, AR_CR, AR_CR_RXD); /* set receive disable */ /* set CW_MIN and CW_MAX both to 0, AIFS=2 */ OS_REG_WRITE(ah, AR_DLCL_IFS(qnum), 0); OS_REG_WRITE(ah, AR_D_GBL_IFS_SIFS, 20); /* 50 OK */ OS_REG_WRITE(ah, AR_D_GBL_IFS_EIFS, 20); /* 200 ok for HT20, 400 ok for HT40 */ OS_REG_WRITE(ah, AR_TIME_OUT, 0x00000400); OS_REG_WRITE(ah, AR_DRETRY_LIMIT(qnum), 0xffffffff); /* set QCU modes to early termination */ val = OS_REG_READ(ah, AR_QMISC(qnum)); OS_REG_WRITE(ah, AR_QMISC(qnum), val | AR_Q_MISC_DCU_EARLY_TERM_REQ); } void ar9300_tx99_stop(struct ath_hal *ah) { /* this should follow the setting of start */ OS_REG_WRITE(ah, AR_PHY_TEST, OS_REG_READ(ah, AR_PHY_TEST) &~ PHY_AGC_CLR); OS_REG_WRITE(ah, AR_DIAG_SW, OS_REG_READ(ah, AR_DIAG_SW) | AR_DIAG_RX_DIS); } #endif /* ATH_TX99_DIAG */ #endif /* ATH_SUPPORT_HTC */ HAL_BOOL ar9300Get3StreamSignature(struct ath_hal *ah) { return AH_FALSE; } HAL_BOOL ar9300ForceVCS(struct ath_hal *ah) { return AH_FALSE; } HAL_BOOL ar9300SetDfs3StreamFix(struct ath_hal *ah, u_int32_t val) { return AH_FALSE; } static u_int32_t ar9300_read_loc_timer(struct ath_hal *ah) { return OS_REG_READ(ah, AR_LOC_TIMER_REG); } HAL_BOOL ar9300_set_ctl_pwr(struct ath_hal *ah, u_int8_t *ctl_array) { struct ath_hal_9300 *ahp = AH9300(ah); ar9300_eeprom_t *p_eep_data = &ahp->ah_eeprom; u_int8_t *ctl_index; u_int32_t offset = 0; if (!ctl_array) return AH_FALSE; /* copy 2G ctl freqbin and power data */ ctl_index = p_eep_data->ctl_index_2g; OS_MEMCPY(ctl_index + OSPREY_NUM_CTLS_2G, ctl_array, OSPREY_NUM_CTLS_2G * OSPREY_NUM_BAND_EDGES_2G + /* ctl_freqbin_2G */ OSPREY_NUM_CTLS_2G * sizeof(OSP_CAL_CTL_DATA_2G)); /* ctl_power_data_2g */ offset = (OSPREY_NUM_CTLS_2G * OSPREY_NUM_BAND_EDGES_2G) + ( OSPREY_NUM_CTLS_2G * sizeof(OSP_CAL_CTL_DATA_2G)); /* copy 2G ctl freqbin and power data */ ctl_index = p_eep_data->ctl_index_5g; OS_MEMCPY(ctl_index + OSPREY_NUM_CTLS_5G, ctl_array + offset, OSPREY_NUM_CTLS_5G * OSPREY_NUM_BAND_EDGES_5G + /* ctl_freqbin_5G */ OSPREY_NUM_CTLS_5G * sizeof(OSP_CAL_CTL_DATA_5G)); /* ctl_power_data_5g */ return AH_FALSE; } void ar9300_set_txchainmaskopt(struct ath_hal *ah, u_int8_t mask) { struct ath_hal_9300 *ahp = AH9300(ah); /* optional txchainmask should be subset of primary txchainmask */ if ((mask & ahp->ah_tx_chainmask) != mask) { ahp->ah_tx_chainmaskopt = 0; ath_hal_printf(ah, "Error: ah_tx_chainmask=%d, mask=%d\n", ahp->ah_tx_chainmask, mask); return; } ahp->ah_tx_chainmaskopt = mask; } Index: projects/ipsec/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_reset.c =================================================================== --- projects/ipsec/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_reset.c (revision 313186) +++ projects/ipsec/sys/contrib/dev/ath/ath_hal/ar9300/ar9300_reset.c (revision 313187) @@ -1,6515 +1,6515 @@ /* * Copyright (c) 2013 Qualcomm Atheros, Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR * PERFORMANCE OF THIS SOFTWARE. */ #include "opt_ah.h" #include "ah.h" #include "ah_internal.h" #include "ah_devid.h" #include "ah_desc.h" #include "ar9300.h" #include "ar9300reg.h" #include "ar9300phy.h" #include "ar9300desc.h" #define FIX_NOISE_FLOOR 1 /* Additional Time delay to wait after activiting the Base band */ #define BASE_ACTIVATE_DELAY 100 /* usec */ #define RTC_PLL_SETTLE_DELAY 100 /* usec */ #define COEF_SCALE_S 24 #define HT40_CHANNEL_CENTER_SHIFT 10 /* MHz */ #define DELPT 32 /* XXX Duplicates! (in ar9300desc.h) */ #if 0 extern HAL_BOOL ar9300_reset_tx_queue(struct ath_hal *ah, u_int q); extern u_int32_t ar9300_num_tx_pending(struct ath_hal *ah, u_int q); #endif #define MAX_MEASUREMENT 8 #define MAXIQCAL 3 struct coeff_t { int32_t mag_coeff[AR9300_MAX_CHAINS][MAX_MEASUREMENT][MAXIQCAL]; int32_t phs_coeff[AR9300_MAX_CHAINS][MAX_MEASUREMENT][MAXIQCAL]; int32_t iqc_coeff[2]; int last_nmeasurement; HAL_BOOL last_cal; }; static HAL_BOOL ar9300_tx_iq_cal_hw_run(struct ath_hal *ah); static void ar9300_tx_iq_cal_post_proc(struct ath_hal *ah,HAL_CHANNEL_INTERNAL *ichan, int iqcal_idx, int max_iqcal, HAL_BOOL is_cal_reusable, HAL_BOOL apply_last_corr); static void ar9300_tx_iq_cal_outlier_detection(struct ath_hal *ah,HAL_CHANNEL_INTERNAL *ichan, u_int32_t num_chains, struct coeff_t *coeff, HAL_BOOL is_cal_reusable); #if ATH_SUPPORT_CAL_REUSE static void ar9300_tx_iq_cal_apply(struct ath_hal *ah, HAL_CHANNEL_INTERNAL *ichan); #endif static inline void ar9300_prog_ini(struct ath_hal *ah, struct ar9300_ini_array *ini_arr, int column); static inline void ar9300_set_rf_mode(struct ath_hal *ah, struct ieee80211_channel *chan); static inline HAL_BOOL ar9300_init_cal(struct ath_hal *ah, struct ieee80211_channel *chan, HAL_BOOL skip_if_none, HAL_BOOL apply_last_corr); static inline void ar9300_init_user_settings(struct ath_hal *ah); #ifdef HOST_OFFLOAD /* * For usb offload solution, some USB registers must be tuned * to gain better stability/performance but these registers * might be changed while doing wlan reset so do this here */ #define WAR_USB_DISABLE_PLL_LOCK_DETECT(__ah) \ do { \ if (AR_SREV_HORNET(__ah) || AR_SREV_WASP(__ah)) { \ volatile u_int32_t *usb_ctrl_r1 = (u_int32_t *) 0xb8116c84; \ volatile u_int32_t *usb_ctrl_r2 = (u_int32_t *) 0xb8116c88; \ *usb_ctrl_r1 = (*usb_ctrl_r1 & 0xffefffff); \ *usb_ctrl_r2 = (*usb_ctrl_r2 & 0xfc1fffff) | (1 << 21) | (3 << 22); \ } \ } while (0) #else #define WAR_USB_DISABLE_PLL_LOCK_DETECT(__ah) #endif static inline void ar9300_attach_hw_platform(struct ath_hal *ah) { struct ath_hal_9300 *ahp = AH9300(ah); ahp->ah_hwp = HAL_TRUE_CHIP; return; } /* Adjust various register settings based on half/quarter rate clock setting. * This includes: +USEC, TX/RX latency, * + IFS params: slot, eifs, misc etc. * SIFS stays the same. */ static void ar9300_set_ifs_timing(struct ath_hal *ah, struct ieee80211_channel *chan) { u_int32_t tx_lat, rx_lat, usec, slot, regval, eifs; regval = OS_REG_READ(ah, AR_USEC); regval &= ~(AR_USEC_RX_LATENCY | AR_USEC_TX_LATENCY | AR_USEC_USEC); if (IEEE80211_IS_CHAN_HALF(chan)) { /* half rates */ slot = ar9300_mac_to_clks(ah, AR_SLOT_HALF); eifs = ar9300_mac_to_clks(ah, AR_EIFS_HALF); if (IS_5GHZ_FAST_CLOCK_EN(ah, chan)) { /* fast clock */ rx_lat = SM(AR_RX_LATENCY_HALF_FAST_CLOCK, AR_USEC_RX_LATENCY); tx_lat = SM(AR_TX_LATENCY_HALF_FAST_CLOCK, AR_USEC_TX_LATENCY); usec = SM(AR_USEC_HALF_FAST_CLOCK, AR_USEC_USEC); } else { rx_lat = SM(AR_RX_LATENCY_HALF, AR_USEC_RX_LATENCY); tx_lat = SM(AR_TX_LATENCY_HALF, AR_USEC_TX_LATENCY); usec = SM(AR_USEC_HALF, AR_USEC_USEC); } } else { /* quarter rate */ slot = ar9300_mac_to_clks(ah, AR_SLOT_QUARTER); eifs = ar9300_mac_to_clks(ah, AR_EIFS_QUARTER); if (IS_5GHZ_FAST_CLOCK_EN(ah, chan)) { /* fast clock */ rx_lat = SM(AR_RX_LATENCY_QUARTER_FAST_CLOCK, AR_USEC_RX_LATENCY); tx_lat = SM(AR_TX_LATENCY_QUARTER_FAST_CLOCK, AR_USEC_TX_LATENCY); usec = SM(AR_USEC_QUARTER_FAST_CLOCK, AR_USEC_USEC); } else { rx_lat = SM(AR_RX_LATENCY_QUARTER, AR_USEC_RX_LATENCY); tx_lat = SM(AR_TX_LATENCY_QUARTER, AR_USEC_TX_LATENCY); usec = SM(AR_USEC_QUARTER, AR_USEC_USEC); } } OS_REG_WRITE(ah, AR_USEC, (usec | regval | tx_lat | rx_lat)); OS_REG_WRITE(ah, AR_D_GBL_IFS_SLOT, slot); OS_REG_WRITE(ah, AR_D_GBL_IFS_EIFS, eifs); } /* * This inline function configures the chip either * to encrypt/decrypt management frames or pass thru */ static inline void ar9300_init_mfp(struct ath_hal * ah) { u_int32_t mfpcap, mfp_qos; ath_hal_getcapability(ah, HAL_CAP_MFP, 0, &mfpcap); if (mfpcap == HAL_MFP_QOSDATA) { /* Treat like legacy hardware. Do not touch the MFP registers. */ HALDEBUG(ah, HAL_DEBUG_RESET, "%s forced to use QOSDATA\n", __func__); return; } /* MFP support (Sowl 1.0 or greater) */ if (mfpcap == HAL_MFP_HW_CRYPTO) { /* configure hardware MFP support */ HALDEBUG(ah, HAL_DEBUG_RESET, "%s using HW crypto\n", __func__); OS_REG_RMW_FIELD(ah, AR_AES_MUTE_MASK1, AR_AES_MUTE_MASK1_FC_MGMT, AR_AES_MUTE_MASK1_FC_MGMT_MFP); OS_REG_RMW(ah, AR_PCU_MISC_MODE2, AR_PCU_MISC_MODE2_MGMT_CRYPTO_ENABLE, AR_PCU_MISC_MODE2_NO_CRYPTO_FOR_NON_DATA_PKT); /* * Mask used to construct AAD for CCMP-AES * Cisco spec defined bits 0-3 as mask * IEEE802.11w defined as bit 4. */ if (ath_hal_get_mfp_qos(ah)) { mfp_qos = AR_MFP_QOS_MASK_IEEE; } else { mfp_qos = AR_MFP_QOS_MASK_CISCO; } OS_REG_RMW_FIELD(ah, AR_PCU_MISC_MODE2, AR_PCU_MISC_MODE2_MGMT_QOS, mfp_qos); } else if (mfpcap == HAL_MFP_PASSTHRU) { /* Disable en/decrypt by hardware */ HALDEBUG(ah, HAL_DEBUG_RESET, "%s using passthru\n", __func__); OS_REG_RMW(ah, AR_PCU_MISC_MODE2, AR_PCU_MISC_MODE2_NO_CRYPTO_FOR_NON_DATA_PKT, AR_PCU_MISC_MODE2_MGMT_CRYPTO_ENABLE); } } void ar9300_get_channel_centers(struct ath_hal *ah, const struct ieee80211_channel *chan, CHAN_CENTERS *centers) { int8_t extoff; struct ath_hal_9300 *ahp = AH9300(ah); HAL_CHANNEL_INTERNAL *ichan = ath_hal_checkchannel(ah, chan); if (!IEEE80211_IS_CHAN_HT40(chan)) { centers->ctl_center = centers->ext_center = centers->synth_center = ichan->channel; return; } HALASSERT(IEEE80211_IS_CHAN_HT40(chan)); /* * In 20/40 phy mode, the center frequency is * "between" the primary and extension channels. */ if (IEEE80211_IS_CHAN_HT40U(chan)) { centers->synth_center = ichan->channel + HT40_CHANNEL_CENTER_SHIFT; extoff = 1; } else { centers->synth_center = ichan->channel - HT40_CHANNEL_CENTER_SHIFT; extoff = -1; } centers->ctl_center = centers->synth_center - (extoff * HT40_CHANNEL_CENTER_SHIFT); centers->ext_center = centers->synth_center + (extoff * ((ahp->ah_ext_prot_spacing == HAL_HT_EXTPROTSPACING_20) ? HT40_CHANNEL_CENTER_SHIFT : 15)); } /* * Read the noise-floor values from the HW. * Specifically, read the minimum clear-channel assessment value for * each chain, for both the control and extension channels. * (The received power level during clear-channel periods is the * noise floor.) * These noise floor values computed by the HW will be stored in the * NF history buffer. * The HW sometimes produces bogus NF values. To avoid using these * bogus values, the NF data is (a) range-limited, and (b) filtered. * However, this data-processing is done when reading the NF values * out of the history buffer. The history buffer stores the raw values. * This allows the NF history buffer to be used to check for interference. * A single high NF reading might be a bogus HW value, but if the NF * readings are consistently high, it must be due to interference. * This is the purpose of storing raw NF values in the history buffer, * rather than processed values. By looking at a history of NF values * that have not been range-limited, we can check if they are consistently * high (due to interference). */ #define AH_NF_SIGN_EXTEND(nf) \ ((nf) & 0x100) ? \ 0 - (((nf) ^ 0x1ff) + 1) : \ (nf) void ar9300_upload_noise_floor(struct ath_hal *ah, int is_2g, int16_t nfarray[HAL_NUM_NF_READINGS]) { int16_t nf; int chan, chain; u_int32_t regs[HAL_NUM_NF_READINGS] = { /* control channel */ AR_PHY_CCA_0, /* chain 0 */ AR_PHY_CCA_1, /* chain 1 */ AR_PHY_CCA_2, /* chain 2 */ /* extension channel */ AR_PHY_EXT_CCA, /* chain 0 */ AR_PHY_EXT_CCA_1, /* chain 1 */ AR_PHY_EXT_CCA_2, /* chain 2 */ }; u_int8_t chainmask; /* * Within a given channel (ctl vs. ext), the CH0, CH1, and CH2 * masks and shifts are the same, though they differ for the * control vs. extension channels. */ u_int32_t masks[2] = { AR_PHY_MINCCA_PWR, /* control channel */ AR_PHY_EXT_MINCCA_PWR, /* extention channel */ }; u_int8_t shifts[2] = { AR_PHY_MINCCA_PWR_S, /* control channel */ AR_PHY_EXT_MINCCA_PWR_S, /* extention channel */ }; /* * Force NF calibration for all chains. */ if (AR_SREV_HORNET(ah) || AR_SREV_POSEIDON(ah) || AR_SREV_APHRODITE(ah)) { chainmask = 0x01; } else if (AR_SREV_WASP(ah) || AR_SREV_JUPITER(ah) || AR_SREV_HONEYBEE(ah)) { chainmask = 0x03; } else { chainmask = 0x07; } for (chan = 0; chan < 2 /*ctl,ext*/; chan++) { for (chain = 0; chain < AR9300_MAX_CHAINS; chain++) { int i; if (!((chainmask >> chain) & 0x1)) { continue; } i = chan * AR9300_MAX_CHAINS + chain; nf = (OS_REG_READ(ah, regs[i]) & masks[chan]) >> shifts[chan]; nfarray[i] = AH_NF_SIGN_EXTEND(nf); } } } /* ar9300_get_min_cca_pwr - * Used by the scan function for a quick read of the noise floor. * This is used to detect presence of CW interference such as video bridge. * The noise floor is assumed to have been already started during reset * called during channel change. The function checks if the noise floor * reading is done. In case it has been done, it reads the noise floor value. * If the noise floor calibration has not been finished, it assumes this is * due to presence of CW interference an returns a high value for noise floor, * derived from the CW interference threshold + margin fudge factor. */ #define BAD_SCAN_NF_MARGIN (30) int16_t ar9300_get_min_cca_pwr(struct ath_hal *ah) { int16_t nf; // struct ath_hal_private *ahpriv = AH_PRIVATE(ah); if ((OS_REG_READ(ah, AR_PHY_AGC_CONTROL) & AR_PHY_AGC_CONTROL_NF) == 0) { nf = MS(OS_REG_READ(ah, AR_PHY_CCA_0), AR9280_PHY_MINCCA_PWR); if (nf & 0x100) { nf = 0 - ((nf ^ 0x1ff) + 1); } } else { /* NF calibration is not done, assume CW interference */ nf = AH9300(ah)->nfp->nominal + AH9300(ah)->nf_cw_int_delta + BAD_SCAN_NF_MARGIN; } return nf; } /* * Noise Floor values for all chains. * Most recently updated values from the NF history buffer are used. */ void ar9300_chain_noise_floor(struct ath_hal *ah, int16_t *nf_buf, struct ieee80211_channel *chan, int is_scan) { struct ath_hal_9300 *ahp = AH9300(ah); int i, nf_hist_len, recent_nf_index = 0; HAL_NFCAL_HIST_FULL *h; u_int8_t rx_chainmask = ahp->ah_rx_chainmask | (ahp->ah_rx_chainmask << 3); HAL_CHANNEL_INTERNAL *ichan = ath_hal_checkchannel(ah, chan); HALASSERT(ichan); #ifdef ATH_NF_PER_CHAN /* Fill 0 if valid internal channel is not found */ if (ichan == AH_NULL) { OS_MEMZERO(nf_buf, sizeof(nf_buf[0])*HAL_NUM_NF_READINGS); return; } h = &ichan->nf_cal_hist; nf_hist_len = HAL_NF_CAL_HIST_LEN_FULL; #else /* * If a scan is not in progress, then the most recent value goes * into ahpriv->nf_cal_hist. If a scan is in progress, then * the most recent value goes into ichan->nf_cal_hist. * Thus, return the value from ahpriv->nf_cal_hist if there's * no scan, and if the specified channel is the current channel. * Otherwise, return the noise floor from ichan->nf_cal_hist. */ if ((!is_scan) && chan == AH_PRIVATE(ah)->ah_curchan) { h = &AH_PRIVATE(ah)->nf_cal_hist; nf_hist_len = HAL_NF_CAL_HIST_LEN_FULL; } else { /* Fill 0 if valid internal channel is not found */ if (ichan == AH_NULL) { OS_MEMZERO(nf_buf, sizeof(nf_buf[0])*HAL_NUM_NF_READINGS); return; } /* * It is okay to treat a HAL_NFCAL_HIST_SMALL struct as if it were a * HAL_NFCAL_HIST_FULL struct, as long as only the index 0 of the * nf_cal_buffer is used (nf_cal_buffer[0][0:HAL_NUM_NF_READINGS-1]) */ h = (HAL_NFCAL_HIST_FULL *) &ichan->nf_cal_hist; nf_hist_len = HAL_NF_CAL_HIST_LEN_SMALL; } #endif /* Get most recently updated values from nf cal history buffer */ recent_nf_index = (h->base.curr_index) ? h->base.curr_index - 1 : nf_hist_len - 1; for (i = 0; i < HAL_NUM_NF_READINGS; i++) { /* Fill 0 for unsupported chains */ if (!(rx_chainmask & (1 << i))) { nf_buf[i] = 0; continue; } nf_buf[i] = h->nf_cal_buffer[recent_nf_index][i]; } } /* * Return the current NF value in register. * If the current NF cal is not completed, return 0. */ int16_t ar9300_get_nf_from_reg(struct ath_hal *ah, struct ieee80211_channel *chan, int wait_time) { int16_t nfarray[HAL_NUM_NF_READINGS] = {0}; int is_2g = 0; HAL_CHANNEL_INTERNAL *ichan = NULL; ichan = ath_hal_checkchannel(ah, chan); if (ichan == NULL) return (0); if (wait_time <= 0) { return 0; } if (!ath_hal_waitfor(ah, AR_PHY_AGC_CONTROL, AR_PHY_AGC_CONTROL_NF, 0, wait_time)) { ath_hal_printf(ah, "%s: NF cal is not complete in %dus", __func__, wait_time); return 0; } is_2g = !! (IS_CHAN_2GHZ(ichan)); ar9300_upload_noise_floor(ah, is_2g, nfarray); return nfarray[0]; } /* * Pick up the medium one in the noise floor buffer and update the * corresponding range for valid noise floor values */ static int16_t ar9300_get_nf_hist_mid(struct ath_hal *ah, HAL_NFCAL_HIST_FULL *h, int reading, int hist_len) { int16_t nfval; int16_t sort[HAL_NF_CAL_HIST_LEN_FULL]; /* upper bound for hist_len */ int i, j; for (i = 0; i < hist_len; i++) { sort[i] = h->nf_cal_buffer[i][reading]; HALDEBUG(ah, HAL_DEBUG_NFCAL, "nf_cal_buffer[%d][%d] = %d\n", i, reading, (int)sort[i]); } for (i = 0; i < hist_len - 1; i++) { for (j = 1; j < hist_len - i; j++) { if (sort[j] > sort[j - 1]) { nfval = sort[j]; sort[j] = sort[j - 1]; sort[j - 1] = nfval; } } } nfval = sort[(hist_len - 1) >> 1]; return nfval; } static int16_t ar9300_limit_nf_range(struct ath_hal *ah, int16_t nf) { if (nf < AH9300(ah)->nfp->min) { return AH9300(ah)->nfp->nominal; } else if (nf > AH9300(ah)->nfp->max) { return AH9300(ah)->nfp->max; } return nf; } #ifndef ATH_NF_PER_CHAN inline static void ar9300_reset_nf_hist_buff(struct ath_hal *ah, HAL_CHANNEL_INTERNAL *ichan) { HAL_CHAN_NFCAL_HIST *h = &ichan->nf_cal_hist; HAL_NFCAL_HIST_FULL *home = &AH_PRIVATE(ah)->nf_cal_hist; int i; /* * Copy the value for the channel in question into the home-channel * NF history buffer. The channel NF is probably a value filled in by * a prior background channel scan, but if no scan has been done then * it is the nominal noise floor filled in by ath_hal_init_NF_buffer * for this chip and the channel's band. * Replicate this channel NF into all entries of the home-channel NF * history buffer. * If the channel NF was filled in by a channel scan, it has not had * bounds limits applied to it yet - do so now. It is important to * apply bounds limits to the priv_nf value that gets loaded into the * WLAN chip's min_cca_pwr register field. It is also necessary to * apply bounds limits to the nf_cal_buffer[] elements. Since we are * replicating a single NF reading into all nf_cal_buffer elements, * if the single reading were above the CW_INT threshold, the CW_INT * check in ar9300_get_nf would immediately conclude that CW interference * is present, even though we're not supposed to set CW_INT unless * NF values are _consistently_ above the CW_INT threshold. * Applying the bounds limits to the nf_cal_buffer contents fixes this * problem. */ for (i = 0; i < HAL_NUM_NF_READINGS; i ++) { int j; int16_t nf; /* * No need to set curr_index, since it already has a value in * the range [0..HAL_NF_CAL_HIST_LEN_FULL), and all nf_cal_buffer * values will be the same. */ nf = ar9300_limit_nf_range(ah, h->nf_cal_buffer[0][i]); for (j = 0; j < HAL_NF_CAL_HIST_LEN_FULL; j++) { home->nf_cal_buffer[j][i] = nf; } AH_PRIVATE(ah)->nf_cal_hist.base.priv_nf[i] = nf; } } #endif /* * Update the noise floor buffer as a ring buffer */ static int16_t ar9300_update_nf_hist_buff(struct ath_hal *ah, HAL_NFCAL_HIST_FULL *h, int16_t *nfarray, int hist_len) { int i, nr; int16_t nf_no_lim_chain0; nf_no_lim_chain0 = ar9300_get_nf_hist_mid(ah, h, 0, hist_len); HALDEBUG(ah, HAL_DEBUG_NFCAL, "%s[%d] BEFORE\n", __func__, __LINE__); for (nr = 0; nr < HAL_NF_CAL_HIST_LEN_FULL; nr++) { for (i = 0; i < HAL_NUM_NF_READINGS; i++) { HALDEBUG(ah, HAL_DEBUG_NFCAL, "nf_cal_buffer[%d][%d] = %d\n", nr, i, (int)h->nf_cal_buffer[nr][i]); } } for (i = 0; i < HAL_NUM_NF_READINGS; i++) { h->nf_cal_buffer[h->base.curr_index][i] = nfarray[i]; h->base.priv_nf[i] = ar9300_limit_nf_range( ah, ar9300_get_nf_hist_mid(ah, h, i, hist_len)); } HALDEBUG(ah, HAL_DEBUG_NFCAL, "%s[%d] AFTER\n", __func__, __LINE__); for (nr = 0; nr < HAL_NF_CAL_HIST_LEN_FULL; nr++) { for (i = 0; i < HAL_NUM_NF_READINGS; i++) { HALDEBUG(ah, HAL_DEBUG_NFCAL, "nf_cal_buffer[%d][%d] = %d\n", nr, i, (int)h->nf_cal_buffer[nr][i]); } } if (++h->base.curr_index >= hist_len) { h->base.curr_index = 0; } return nf_no_lim_chain0; } #ifdef UNUSED static HAL_BOOL get_noise_floor_thresh(struct ath_hal *ah, const HAL_CHANNEL_INTERNAL *chan, int16_t *nft) { struct ath_hal_9300 *ahp = AH9300(ah); switch (chan->channel_flags & CHANNEL_ALL_NOTURBO) { case CHANNEL_A: case CHANNEL_A_HT20: case CHANNEL_A_HT40PLUS: case CHANNEL_A_HT40MINUS: *nft = (int8_t)ar9300_eeprom_get(ahp, EEP_NFTHRESH_5); break; case CHANNEL_B: case CHANNEL_G: case CHANNEL_G_HT20: case CHANNEL_G_HT40PLUS: case CHANNEL_G_HT40MINUS: *nft = (int8_t)ar9300_eeprom_get(ahp, EEP_NFTHRESH_2); break; default: HALDEBUG(ah, HAL_DEBUG_CHANNEL, "%s: invalid channel flags 0x%x\n", __func__, chan->channel_flags); return AH_FALSE; } return AH_TRUE; } #endif /* * Read the NF and check it against the noise floor threshhold */ #define IS(_c, _f) (((_c)->channel_flags & _f) || 0) static int ar9300_store_new_nf(struct ath_hal *ah, struct ieee80211_channel *chan, int is_scan) { // struct ath_hal_private *ahpriv = AH_PRIVATE(ah); int nf_hist_len; int16_t nf_no_lim; int16_t nfarray[HAL_NUM_NF_READINGS] = {0}; HAL_NFCAL_HIST_FULL *h; int is_2g = 0; HAL_CHANNEL_INTERNAL *ichan = ath_hal_checkchannel(ah, chan); struct ath_hal_9300 *ahp = AH9300(ah); if (OS_REG_READ(ah, AR_PHY_AGC_CONTROL) & AR_PHY_AGC_CONTROL_NF) { u_int32_t tsf32, nf_cal_dur_tsf; /* * The reason the NF calibration did not complete may just be that * not enough time has passed since the NF calibration was started, * because under certain conditions (when first moving to a new * channel) the NF calibration may be checked very repeatedly. * Or, there may be CW interference keeping the NF calibration * from completing. Check the delta time between when the NF * calibration was started and now to see whether the NF calibration * should have already completed (but hasn't, probably due to CW * interference), or hasn't had enough time to finish yet. */ /* * AH_NF_CAL_DUR_MAX_TSF - A conservative maximum time that the * HW should need to finish a NF calibration. If the HW * does not complete a NF calibration within this time period, * there must be a problem - probably CW interference. * AH_NF_CAL_PERIOD_MAX_TSF - A conservative maximum time between * check of the HW's NF calibration being finished. * If the difference between the current TSF and the TSF * recorded when the NF calibration started is larger than this * value, the TSF must have been reset. * In general, we expect the TSF to only be reset during * regular operation for STAs, not for APs. However, an * AP's TSF could be reset when joining an IBSS. * There's an outside chance that this could result in the * CW_INT flag being erroneously set, if the TSF adjustment * is smaller than AH_NF_CAL_PERIOD_MAX_TSF but larger than * AH_NF_CAL_DUR_TSF. However, even if this does happen, * it shouldn't matter, as the IBSS case shouldn't be * concerned about CW_INT. */ /* AH_NF_CAL_DUR_TSF - 90 sec in usec units */ #define AH_NF_CAL_DUR_TSF (90 * 1000 * 1000) /* AH_NF_CAL_PERIOD_MAX_TSF - 180 sec in usec units */ #define AH_NF_CAL_PERIOD_MAX_TSF (180 * 1000 * 1000) /* wraparound handled by using unsigned values */ tsf32 = ar9300_get_tsf32(ah); nf_cal_dur_tsf = tsf32 - AH9300(ah)->nf_tsf32; if (nf_cal_dur_tsf > AH_NF_CAL_PERIOD_MAX_TSF) { /* * The TSF must have gotten reset during the NF cal - * just reset the NF TSF timestamp, so the next time * this function is called, the timestamp comparison * will be valid. */ AH9300(ah)->nf_tsf32 = tsf32; } else if (nf_cal_dur_tsf > AH_NF_CAL_DUR_TSF) { HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: NF did not complete in calibration window\n", __func__); /* the NF incompletion is probably due to CW interference */ chan->ic_state |= IEEE80211_CHANSTATE_CWINT; } return 0; /* HW's NF measurement not finished */ } HALDEBUG(ah, HAL_DEBUG_NFCAL, "%s[%d] chan %d\n", __func__, __LINE__, ichan->channel); is_2g = !! IS_CHAN_2GHZ(ichan); ar9300_upload_noise_floor(ah, is_2g, nfarray); /* Update the NF buffer for each chain masked by chainmask */ #ifdef ATH_NF_PER_CHAN h = &ichan->nf_cal_hist; nf_hist_len = HAL_NF_CAL_HIST_LEN_FULL; #else if (is_scan) { /* * This channel's NF cal info is just a HAL_NFCAL_HIST_SMALL struct * rather than a HAL_NFCAL_HIST_FULL struct. * As long as we only use the first history element of nf_cal_buffer * (nf_cal_buffer[0][0:HAL_NUM_NF_READINGS-1]), we can use * HAL_NFCAL_HIST_SMALL and HAL_NFCAL_HIST_FULL interchangeably. */ h = (HAL_NFCAL_HIST_FULL *) &ichan->nf_cal_hist; nf_hist_len = HAL_NF_CAL_HIST_LEN_SMALL; } else { h = &AH_PRIVATE(ah)->nf_cal_hist; nf_hist_len = HAL_NF_CAL_HIST_LEN_FULL; } #endif /* * nf_no_lim = median value from NF history buffer without bounds limits, * priv_nf = median value from NF history buffer with bounds limits. */ nf_no_lim = ar9300_update_nf_hist_buff(ah, h, nfarray, nf_hist_len); ichan->rawNoiseFloor = h->base.priv_nf[0]; /* check if there is interference */ // ichan->channel_flags &= (~CHANNEL_CW_INT); /* * Use AR9300_EMULATION to check for emulation purpose as PCIE Device ID * 0xABCD is recognized as valid Osprey as WAR in some EVs. */ if (nf_no_lim > ahp->nfp->nominal + ahp->nf_cw_int_delta) { /* * Since this CW interference check is being applied to the * median element of the NF history buffer, this indicates that * the CW interference is persistent. A single high NF reading * will not show up in the median, and thus will not cause the * CW_INT flag to be set. */ HALDEBUG(ah, HAL_DEBUG_NFCAL, "%s: NF Cal: CW interferer detected through NF: %d\n", __func__, nf_no_lim); chan->ic_state |= IEEE80211_CHANSTATE_CWINT; } return 1; /* HW's NF measurement finished */ } #undef IS static inline void ar9300_get_delta_slope_values(struct ath_hal *ah, u_int32_t coef_scaled, u_int32_t *coef_mantissa, u_int32_t *coef_exponent) { u_int32_t coef_exp, coef_man; /* * ALGO -> coef_exp = 14-floor(log2(coef)); * floor(log2(x)) is the highest set bit position */ for (coef_exp = 31; coef_exp > 0; coef_exp--) { if ((coef_scaled >> coef_exp) & 0x1) { break; } } /* A coef_exp of 0 is a legal bit position but an unexpected coef_exp */ HALASSERT(coef_exp); coef_exp = 14 - (coef_exp - COEF_SCALE_S); /* * ALGO -> coef_man = floor(coef* 2^coef_exp+0.5); * The coefficient is already shifted up for scaling */ coef_man = coef_scaled + (1 << (COEF_SCALE_S - coef_exp - 1)); *coef_mantissa = coef_man >> (COEF_SCALE_S - coef_exp); *coef_exponent = coef_exp - 16; } #define MAX_ANALOG_START 319 /* XXX */ /* * Delta slope coefficient computation. * Required for OFDM operation. */ static void ar9300_set_delta_slope(struct ath_hal *ah, struct ieee80211_channel *chan) { u_int32_t coef_scaled, ds_coef_exp, ds_coef_man; u_int32_t fclk = COEFF; /* clock * 2.5 */ u_int32_t clock_mhz_scaled = 0x1000000 * fclk; CHAN_CENTERS centers; /* * half and quarter rate can divide the scaled clock by 2 or 4 * scale for selected channel bandwidth */ if (IEEE80211_IS_CHAN_HALF(chan)) { clock_mhz_scaled = clock_mhz_scaled >> 1; } else if (IEEE80211_IS_CHAN_QUARTER(chan)) { clock_mhz_scaled = clock_mhz_scaled >> 2; } /* * ALGO -> coef = 1e8/fcarrier*fclock/40; * scaled coef to provide precision for this floating calculation */ ar9300_get_channel_centers(ah, chan, ¢ers); coef_scaled = clock_mhz_scaled / centers.synth_center; ar9300_get_delta_slope_values(ah, coef_scaled, &ds_coef_man, &ds_coef_exp); OS_REG_RMW_FIELD(ah, AR_PHY_TIMING3, AR_PHY_TIMING3_DSC_MAN, ds_coef_man); OS_REG_RMW_FIELD(ah, AR_PHY_TIMING3, AR_PHY_TIMING3_DSC_EXP, ds_coef_exp); /* * For Short GI, * scaled coeff is 9/10 that of normal coeff */ coef_scaled = (9 * coef_scaled) / 10; ar9300_get_delta_slope_values(ah, coef_scaled, &ds_coef_man, &ds_coef_exp); /* for short gi */ OS_REG_RMW_FIELD(ah, AR_PHY_SGI_DELTA, AR_PHY_SGI_DSC_MAN, ds_coef_man); OS_REG_RMW_FIELD(ah, AR_PHY_SGI_DELTA, AR_PHY_SGI_DSC_EXP, ds_coef_exp); } #define IS(_c, _f) (IEEE80211_IS_ ## _f(_c)) /* * XXX FreeBSD: This should be turned into something generic in ath_hal! */ HAL_CHANNEL_INTERNAL * ar9300_check_chan(struct ath_hal *ah, const struct ieee80211_channel *chan) { if (chan == NULL) { return AH_NULL; } if ((IS(chan, CHAN_2GHZ) ^ IS(chan, CHAN_5GHZ)) == 0) { HALDEBUG(ah, HAL_DEBUG_CHANNEL, "%s: invalid channel %u/0x%x; not marked as 2GHz or 5GHz\n", __func__, chan->ic_freq , chan->ic_flags); return AH_NULL; } /* * FreeBSD sets multiple flags, so this will fail. */ #if 0 if ((IS(chan, CHAN_OFDM) ^ IS(chan, CHAN_CCK) ^ IS(chan, CHAN_DYN) ^ IS(chan, CHAN_HT20) ^ IS(chan, CHAN_HT40U) ^ IS(chan, CHAN_HT40D)) == 0) { HALDEBUG(ah, HAL_DEBUG_CHANNEL, "%s: invalid channel %u/0x%x; not marked as " "OFDM or CCK or DYN or HT20 or HT40PLUS or HT40MINUS\n", __func__, chan->ic_freq , chan->ic_flags); return AH_NULL; } #endif return (ath_hal_checkchannel(ah, chan)); } #undef IS static void ar9300_set_11n_regs(struct ath_hal *ah, struct ieee80211_channel *chan, HAL_HT_MACMODE macmode) { u_int32_t phymode; // struct ath_hal_9300 *ahp = AH9300(ah); u_int32_t enable_dac_fifo; /* XXX */ enable_dac_fifo = OS_REG_READ(ah, AR_PHY_GEN_CTRL) & AR_PHY_GC_ENABLE_DAC_FIFO; /* Enable 11n HT, 20 MHz */ phymode = AR_PHY_GC_HT_EN | AR_PHY_GC_SINGLE_HT_LTF1 | AR_PHY_GC_SHORT_GI_40 | enable_dac_fifo; /* Configure baseband for dynamic 20/40 operation */ if (IEEE80211_IS_CHAN_HT40(chan)) { phymode |= AR_PHY_GC_DYN2040_EN; /* Configure control (primary) channel at +-10MHz */ if (IEEE80211_IS_CHAN_HT40U(chan)) { phymode |= AR_PHY_GC_DYN2040_PRI_CH; } #if 0 /* Configure 20/25 spacing */ if (ahp->ah_ext_prot_spacing == HAL_HT_EXTPROTSPACING_25) { phymode |= AR_PHY_GC_DYN2040_EXT_CH; } #endif } /* make sure we preserve INI settings */ phymode |= OS_REG_READ(ah, AR_PHY_GEN_CTRL); /* EV 62881/64991 - turn off Green Field detection for Maverick STA beta */ phymode &= ~AR_PHY_GC_GF_DETECT_EN; OS_REG_WRITE(ah, AR_PHY_GEN_CTRL, phymode); /* Set IFS timing for half/quarter rates */ if (IEEE80211_IS_CHAN_HALF(chan) || IEEE80211_IS_CHAN_QUARTER(chan)) { u_int32_t modeselect = OS_REG_READ(ah, AR_PHY_MODE); if (IEEE80211_IS_CHAN_HALF(chan)) { modeselect |= AR_PHY_MS_HALF_RATE; } else if (IEEE80211_IS_CHAN_QUARTER(chan)) { modeselect |= AR_PHY_MS_QUARTER_RATE; } OS_REG_WRITE(ah, AR_PHY_MODE, modeselect); ar9300_set_ifs_timing(ah, chan); OS_REG_RMW_FIELD( ah, AR_PHY_FRAME_CTL, AR_PHY_FRAME_CTL_CF_OVERLAP_WINDOW, 0x3); } /* Configure MAC for 20/40 operation */ ar9300_set_11n_mac2040(ah, macmode); /* global transmit timeout (25 TUs default)*/ /* XXX - put this elsewhere??? */ OS_REG_WRITE(ah, AR_GTXTO, 25 << AR_GTXTO_TIMEOUT_LIMIT_S); /* carrier sense timeout */ OS_REG_WRITE(ah, AR_CST, 0xF << AR_CST_TIMEOUT_LIMIT_S); } /* * Spur mitigation for MRC CCK */ static void ar9300_spur_mitigate_mrc_cck(struct ath_hal *ah, struct ieee80211_channel *chan) { int i; /* spur_freq_for_osprey - hardcoded by Systems team for now. */ u_int32_t spur_freq_for_osprey[4] = { 2420, 2440, 2464, 2480 }; u_int32_t spur_freq_for_jupiter[2] = { 2440, 2464}; int cur_bb_spur, negative = 0, cck_spur_freq; u_int8_t* spur_fbin_ptr = NULL; int synth_freq; int range = 10; int max_spurcounts = OSPREY_EEPROM_MODAL_SPURS; HAL_CHANNEL_INTERNAL *ichan = ath_hal_checkchannel(ah, chan); /* * Need to verify range +/- 10 MHz in control channel, otherwise spur * is out-of-band and can be ignored. */ if (AR_SREV_HORNET(ah) || AR_SREV_POSEIDON(ah) || AR_SREV_WASP(ah) || AR_SREV_SCORPION(ah)) { spur_fbin_ptr = ar9300_eeprom_get_spur_chans_ptr(ah, 1); if (spur_fbin_ptr[0] == 0) { return; /* No spur in the mode */ } if (IEEE80211_IS_CHAN_HT40(chan)) { range = 19; if (OS_REG_READ_FIELD(ah, AR_PHY_GEN_CTRL, AR_PHY_GC_DYN2040_PRI_CH) == 0x0) { synth_freq = ichan->channel + 10; } else { synth_freq = ichan->channel - 10; } } else { range = 10; synth_freq = ichan->channel; } } else if(AR_SREV_JUPITER(ah)) { range = 5; max_spurcounts = 2; /* Hardcoded by Jupiter Systems team for now. */ synth_freq = ichan->channel; } else { range = 10; max_spurcounts = 4; /* Hardcoded by Osprey Systems team for now. */ synth_freq = ichan->channel; } for (i = 0; i < max_spurcounts; i++) { negative = 0; if (AR_SREV_HORNET(ah) || AR_SREV_POSEIDON(ah) || AR_SREV_WASP(ah) || AR_SREV_SCORPION(ah)) { cur_bb_spur = FBIN2FREQ(spur_fbin_ptr[i], HAL_FREQ_BAND_2GHZ) - synth_freq; } else if(AR_SREV_JUPITER(ah)) { cur_bb_spur = spur_freq_for_jupiter[i] - synth_freq; } else { cur_bb_spur = spur_freq_for_osprey[i] - synth_freq; } if (cur_bb_spur < 0) { negative = 1; cur_bb_spur = -cur_bb_spur; } if (cur_bb_spur < range) { cck_spur_freq = (int)((cur_bb_spur << 19) / 11); if (negative == 1) { cck_spur_freq = -cck_spur_freq; } cck_spur_freq = cck_spur_freq & 0xfffff; /*OS_REG_WRITE_field(ah, BB_agc_control.ycok_max, 0x7);*/ OS_REG_RMW_FIELD(ah, AR_PHY_AGC_CONTROL, AR_PHY_AGC_CONTROL_YCOK_MAX, 0x7); /*OS_REG_WRITE_field(ah, BB_cck_spur_mit.spur_rssi_thr, 0x7f);*/ OS_REG_RMW_FIELD(ah, AR_PHY_CCK_SPUR_MIT, AR_PHY_CCK_SPUR_MIT_SPUR_RSSI_THR, 0x7f); /*OS_REG_WRITE(ah, BB_cck_spur_mit.spur_filter_type, 0x2);*/ OS_REG_RMW_FIELD(ah, AR_PHY_CCK_SPUR_MIT, AR_PHY_CCK_SPUR_MIT_SPUR_FILTER_TYPE, 0x2); /*OS_REG_WRITE(ah, BB_cck_spur_mit.use_cck_spur_mit, 0x1);*/ OS_REG_RMW_FIELD(ah, AR_PHY_CCK_SPUR_MIT, AR_PHY_CCK_SPUR_MIT_USE_CCK_SPUR_MIT, 0x1); /*OS_REG_WRITE(ah, BB_cck_spur_mit.cck_spur_freq, cck_spur_freq);*/ OS_REG_RMW_FIELD(ah, AR_PHY_CCK_SPUR_MIT, AR_PHY_CCK_SPUR_MIT_CCK_SPUR_FREQ, cck_spur_freq); return; } } /*OS_REG_WRITE(ah, BB_agc_control.ycok_max, 0x5);*/ OS_REG_RMW_FIELD(ah, AR_PHY_AGC_CONTROL, AR_PHY_AGC_CONTROL_YCOK_MAX, 0x5); /*OS_REG_WRITE(ah, BB_cck_spur_mit.use_cck_spur_mit, 0x0);*/ OS_REG_RMW_FIELD(ah, AR_PHY_CCK_SPUR_MIT, AR_PHY_CCK_SPUR_MIT_USE_CCK_SPUR_MIT, 0x0); /*OS_REG_WRITE(ah, BB_cck_spur_mit.cck_spur_freq, 0x0);*/ OS_REG_RMW_FIELD(ah, AR_PHY_CCK_SPUR_MIT, AR_PHY_CCK_SPUR_MIT_CCK_SPUR_FREQ, 0x0); } /* Spur mitigation for OFDM */ static void ar9300_spur_mitigate_ofdm(struct ath_hal *ah, struct ieee80211_channel *chan) { int synth_freq; int range = 10; int freq_offset = 0; int spur_freq_sd = 0; int spur_subchannel_sd = 0; int spur_delta_phase = 0; int mask_index = 0; int i; int mode; u_int8_t* spur_chans_ptr; struct ath_hal_9300 *ahp; ahp = AH9300(ah); HAL_CHANNEL_INTERNAL *ichan = ath_hal_checkchannel(ah, chan); if (IS_CHAN_5GHZ(ichan)) { spur_chans_ptr = ar9300_eeprom_get_spur_chans_ptr(ah, 0); mode = 0; } else { spur_chans_ptr = ar9300_eeprom_get_spur_chans_ptr(ah, 1); mode = 1; } if (IEEE80211_IS_CHAN_HT40(chan)) { range = 19; if (OS_REG_READ_FIELD(ah, AR_PHY_GEN_CTRL, AR_PHY_GC_DYN2040_PRI_CH) == 0x0) { synth_freq = ichan->channel - 10; } else { synth_freq = ichan->channel + 10; } } else { range = 10; synth_freq = ichan->channel; } /* Clean all spur register fields */ OS_REG_RMW_FIELD(ah, AR_PHY_TIMING4, AR_PHY_TIMING4_ENABLE_SPUR_FILTER, 0); OS_REG_RMW_FIELD(ah, AR_PHY_TIMING11, AR_PHY_TIMING11_SPUR_FREQ_SD, 0); OS_REG_RMW_FIELD(ah, AR_PHY_TIMING11, AR_PHY_TIMING11_SPUR_DELTA_PHASE, 0); OS_REG_RMW_FIELD(ah, AR_PHY_SFCORR_EXT, AR_PHY_SFCORR_EXT_SPUR_SUBCHANNEL_SD, 0); OS_REG_RMW_FIELD(ah, AR_PHY_TIMING11, AR_PHY_TIMING11_USE_SPUR_FILTER_IN_AGC, 0); OS_REG_RMW_FIELD(ah, AR_PHY_TIMING11, AR_PHY_TIMING11_USE_SPUR_FILTER_IN_SELFCOR, 0); OS_REG_RMW_FIELD(ah, AR_PHY_TIMING4, AR_PHY_TIMING4_ENABLE_SPUR_RSSI, 0); OS_REG_RMW_FIELD(ah, AR_PHY_SPUR_REG, AR_PHY_SPUR_REG_EN_VIT_SPUR_RSSI, 0); OS_REG_RMW_FIELD(ah, AR_PHY_SPUR_REG, AR_PHY_SPUR_REG_ENABLE_NF_RSSI_SPUR_MIT, 0); OS_REG_RMW_FIELD(ah, AR_PHY_SPUR_REG, AR_PHY_SPUR_REG_ENABLE_MASK_PPM, 0); OS_REG_RMW_FIELD(ah, AR_PHY_TIMING4, AR_PHY_TIMING4_ENABLE_PILOT_MASK, 0); OS_REG_RMW_FIELD(ah, AR_PHY_TIMING4, AR_PHY_TIMING4_ENABLE_CHAN_MASK, 0); OS_REG_RMW_FIELD(ah, AR_PHY_PILOT_SPUR_MASK, AR_PHY_PILOT_SPUR_MASK_CF_PILOT_MASK_IDX_A, 0); OS_REG_RMW_FIELD(ah, AR_PHY_SPUR_MASK_A, AR_PHY_SPUR_MASK_A_CF_PUNC_MASK_IDX_A, 0); OS_REG_RMW_FIELD(ah, AR_PHY_CHAN_SPUR_MASK, AR_PHY_CHAN_SPUR_MASK_CF_CHAN_MASK_IDX_A, 0); OS_REG_RMW_FIELD(ah, AR_PHY_PILOT_SPUR_MASK, AR_PHY_PILOT_SPUR_MASK_CF_PILOT_MASK_A, 0); OS_REG_RMW_FIELD(ah, AR_PHY_CHAN_SPUR_MASK, AR_PHY_CHAN_SPUR_MASK_CF_CHAN_MASK_A, 0); OS_REG_RMW_FIELD(ah, AR_PHY_SPUR_MASK_A, AR_PHY_SPUR_MASK_A_CF_PUNC_MASK_A, 0); OS_REG_RMW_FIELD(ah, AR_PHY_SPUR_REG, AR_PHY_SPUR_REG_MASK_RATE_CNTL, 0); i = 0; while (spur_chans_ptr[i] && i < 5) { freq_offset = FBIN2FREQ(spur_chans_ptr[i], mode) - synth_freq; if (abs(freq_offset) < range) { /* printf( "Spur Mitigation for OFDM: Synth Frequency = %d, " "Spur Frequency = %d\n", synth_freq, FBIN2FREQ(spur_chans_ptr[i], mode)); */ if (IEEE80211_IS_CHAN_HT40(chan)) { if (freq_offset < 0) { if (OS_REG_READ_FIELD( ah, AR_PHY_GEN_CTRL, AR_PHY_GC_DYN2040_PRI_CH) == 0x0) { spur_subchannel_sd = 1; } else { spur_subchannel_sd = 0; } spur_freq_sd = ((freq_offset + 10) << 9) / 11; } else { if (OS_REG_READ_FIELD(ah, AR_PHY_GEN_CTRL, AR_PHY_GC_DYN2040_PRI_CH) == 0x0) { spur_subchannel_sd = 0; } else { spur_subchannel_sd = 1; } spur_freq_sd = ((freq_offset - 10) << 9) / 11; } spur_delta_phase = (freq_offset << 17) / 5; } else { spur_subchannel_sd = 0; spur_freq_sd = (freq_offset << 9) / 11; spur_delta_phase = (freq_offset << 18) / 5; } spur_freq_sd = spur_freq_sd & 0x3ff; spur_delta_phase = spur_delta_phase & 0xfffff; /* printf( "spur_subchannel_sd = %d, spur_freq_sd = 0x%x, " "spur_delta_phase = 0x%x\n", spur_subchannel_sd, spur_freq_sd, spur_delta_phase); */ /* OFDM Spur mitigation */ OS_REG_RMW_FIELD(ah, AR_PHY_TIMING4, AR_PHY_TIMING4_ENABLE_SPUR_FILTER, 0x1); OS_REG_RMW_FIELD(ah, AR_PHY_TIMING11, AR_PHY_TIMING11_SPUR_FREQ_SD, spur_freq_sd); OS_REG_RMW_FIELD(ah, AR_PHY_TIMING11, AR_PHY_TIMING11_SPUR_DELTA_PHASE, spur_delta_phase); OS_REG_RMW_FIELD(ah, AR_PHY_SFCORR_EXT, AR_PHY_SFCORR_EXT_SPUR_SUBCHANNEL_SD, spur_subchannel_sd); OS_REG_RMW_FIELD(ah, AR_PHY_TIMING11, AR_PHY_TIMING11_USE_SPUR_FILTER_IN_AGC, 0x1); OS_REG_RMW_FIELD(ah, AR_PHY_TIMING11, AR_PHY_TIMING11_USE_SPUR_FILTER_IN_SELFCOR, 0x1); OS_REG_RMW_FIELD(ah, AR_PHY_TIMING4, AR_PHY_TIMING4_ENABLE_SPUR_RSSI, 0x1); OS_REG_RMW_FIELD(ah, AR_PHY_SPUR_REG, AR_PHY_SPUR_REG_SPUR_RSSI_THRESH, 34); OS_REG_RMW_FIELD(ah, AR_PHY_SPUR_REG, AR_PHY_SPUR_REG_EN_VIT_SPUR_RSSI, 1); /* * Do not subtract spur power from noise floor for wasp. * This causes the maximum client test (on Veriwave) to fail * when run on spur channel (2464 MHz). * Refer to ev#82746 and ev#82744. */ if (!AR_SREV_WASP(ah) && (OS_REG_READ_FIELD(ah, AR_PHY_MODE, AR_PHY_MODE_DYNAMIC) == 0x1)) { OS_REG_RMW_FIELD(ah, AR_PHY_SPUR_REG, AR_PHY_SPUR_REG_ENABLE_NF_RSSI_SPUR_MIT, 1); } mask_index = (freq_offset << 4) / 5; if (mask_index < 0) { mask_index = mask_index - 1; } mask_index = mask_index & 0x7f; /*printf("Bin 0x%x\n", mask_index);*/ OS_REG_RMW_FIELD(ah, AR_PHY_SPUR_REG, AR_PHY_SPUR_REG_ENABLE_MASK_PPM, 0x1); OS_REG_RMW_FIELD(ah, AR_PHY_TIMING4, AR_PHY_TIMING4_ENABLE_PILOT_MASK, 0x1); OS_REG_RMW_FIELD(ah, AR_PHY_TIMING4, AR_PHY_TIMING4_ENABLE_CHAN_MASK, 0x1); OS_REG_RMW_FIELD(ah, AR_PHY_PILOT_SPUR_MASK, AR_PHY_PILOT_SPUR_MASK_CF_PILOT_MASK_IDX_A, mask_index); OS_REG_RMW_FIELD(ah, AR_PHY_SPUR_MASK_A, AR_PHY_SPUR_MASK_A_CF_PUNC_MASK_IDX_A, mask_index); OS_REG_RMW_FIELD(ah, AR_PHY_CHAN_SPUR_MASK, AR_PHY_CHAN_SPUR_MASK_CF_CHAN_MASK_IDX_A, mask_index); OS_REG_RMW_FIELD(ah, AR_PHY_PILOT_SPUR_MASK, AR_PHY_PILOT_SPUR_MASK_CF_PILOT_MASK_A, 0xc); OS_REG_RMW_FIELD(ah, AR_PHY_CHAN_SPUR_MASK, AR_PHY_CHAN_SPUR_MASK_CF_CHAN_MASK_A, 0xc); OS_REG_RMW_FIELD(ah, AR_PHY_SPUR_MASK_A, AR_PHY_SPUR_MASK_A_CF_PUNC_MASK_A, 0xa0); OS_REG_RMW_FIELD(ah, AR_PHY_SPUR_REG, AR_PHY_SPUR_REG_MASK_RATE_CNTL, 0xff); /* printf("BB_timing_control_4 = 0x%x\n", OS_REG_READ(ah, AR_PHY_TIMING4)); printf("BB_timing_control_11 = 0x%x\n", OS_REG_READ(ah, AR_PHY_TIMING11)); printf("BB_ext_chan_scorr_thr = 0x%x\n", OS_REG_READ(ah, AR_PHY_SFCORR_EXT)); printf("BB_spur_mask_controls = 0x%x\n", OS_REG_READ(ah, AR_PHY_SPUR_REG)); printf("BB_pilot_spur_mask = 0x%x\n", OS_REG_READ(ah, AR_PHY_PILOT_SPUR_MASK)); printf("BB_chan_spur_mask = 0x%x\n", OS_REG_READ(ah, AR_PHY_CHAN_SPUR_MASK)); printf("BB_vit_spur_mask_A = 0x%x\n", OS_REG_READ(ah, AR_PHY_SPUR_MASK_A)); */ break; } i++; } } /* * Convert to baseband spur frequency given input channel frequency * and compute register settings below. */ static void ar9300_spur_mitigate(struct ath_hal *ah, struct ieee80211_channel *chan) { ar9300_spur_mitigate_ofdm(ah, chan); ar9300_spur_mitigate_mrc_cck(ah, chan); } /************************************************************** * ar9300_channel_change * Assumes caller wants to change channel, and not reset. */ static inline HAL_BOOL ar9300_channel_change(struct ath_hal *ah, struct ieee80211_channel *chan, HAL_CHANNEL_INTERNAL *ichan, HAL_HT_MACMODE macmode) { u_int32_t synth_delay, qnum; struct ath_hal_9300 *ahp = AH9300(ah); /* TX must be stopped by now */ for (qnum = 0; qnum < AR_NUM_QCU; qnum++) { if (ar9300_num_tx_pending(ah, qnum)) { HALDEBUG(ah, HAL_DEBUG_QUEUE, "%s: Transmit frames pending on queue %d\n", __func__, qnum); HALASSERT(0); return AH_FALSE; } } /* * Kill last Baseband Rx Frame - Request analog bus grant */ OS_REG_WRITE(ah, AR_PHY_RFBUS_REQ, AR_PHY_RFBUS_REQ_EN); if (!ath_hal_wait(ah, AR_PHY_RFBUS_GRANT, AR_PHY_RFBUS_GRANT_EN, AR_PHY_RFBUS_GRANT_EN)) { HALDEBUG(ah, HAL_DEBUG_PHYIO, "%s: Could not kill baseband RX\n", __func__); return AH_FALSE; } /* Setup 11n MAC/Phy mode registers */ ar9300_set_11n_regs(ah, chan, macmode); /* * Change the synth */ if (!ahp->ah_rf_hal.set_channel(ah, chan)) { HALDEBUG(ah, HAL_DEBUG_CHANNEL, "%s: failed to set channel\n", __func__); return AH_FALSE; } /* * Some registers get reinitialized during ATH_INI_POST INI programming. */ ar9300_init_user_settings(ah); /* * Setup the transmit power values. * * After the public to private hal channel mapping, ichan contains the * valid regulatory power value. * ath_hal_getctl and ath_hal_getantennaallowed look up ichan from chan. */ if (ar9300_eeprom_set_transmit_power( ah, &ahp->ah_eeprom, chan, ath_hal_getctl(ah, chan), ath_hal_getantennaallowed(ah, chan), ath_hal_get_twice_max_regpower(AH_PRIVATE(ah), ichan, chan), AH_MIN(MAX_RATE_POWER, AH_PRIVATE(ah)->ah_powerLimit)) != HAL_OK) { HALDEBUG(ah, HAL_DEBUG_EEPROM, "%s: error init'ing transmit power\n", __func__); return AH_FALSE; } /* * Release the RFBus Grant. */ OS_REG_WRITE(ah, AR_PHY_RFBUS_REQ, 0); /* * Write spur immunity and delta slope for OFDM enabled modes (A, G, Turbo) */ if (IEEE80211_IS_CHAN_OFDM(chan) || IEEE80211_IS_CHAN_HT(chan)) { ar9300_set_delta_slope(ah, chan); } else { /* Set to Ini default */ OS_REG_WRITE(ah, AR_PHY_TIMING3, 0x9c0a9f6b); OS_REG_WRITE(ah, AR_PHY_SGI_DELTA, 0x00046384); } ar9300_spur_mitigate(ah, chan); /* * Wait for the frequency synth to settle (synth goes on via PHY_ACTIVE_EN). * Read the phy active delay register. Value is in 100ns increments. */ synth_delay = OS_REG_READ(ah, AR_PHY_RX_DELAY) & AR_PHY_RX_DELAY_DELAY; if (IEEE80211_IS_CHAN_CCK(chan)) { synth_delay = (4 * synth_delay) / 22; } else { synth_delay /= 10; } OS_DELAY(synth_delay + BASE_ACTIVATE_DELAY); /* * Do calibration. */ return AH_TRUE; } void ar9300_set_operating_mode(struct ath_hal *ah, int opmode) { u_int32_t val; val = OS_REG_READ(ah, AR_STA_ID1); val &= ~(AR_STA_ID1_STA_AP | AR_STA_ID1_ADHOC); switch (opmode) { case HAL_M_HOSTAP: OS_REG_WRITE(ah, AR_STA_ID1, val | AR_STA_ID1_STA_AP | AR_STA_ID1_KSRCH_MODE); OS_REG_CLR_BIT(ah, AR_CFG, AR_CFG_AP_ADHOC_INDICATION); break; case HAL_M_IBSS: OS_REG_WRITE(ah, AR_STA_ID1, val | AR_STA_ID1_ADHOC | AR_STA_ID1_KSRCH_MODE); OS_REG_SET_BIT(ah, AR_CFG, AR_CFG_AP_ADHOC_INDICATION); break; case HAL_M_STA: case HAL_M_MONITOR: OS_REG_WRITE(ah, AR_STA_ID1, val | AR_STA_ID1_KSRCH_MODE); break; } } /* XXX need the logic for Osprey */ void ar9300_init_pll(struct ath_hal *ah, struct ieee80211_channel *chan) { u_int32_t pll; u_int8_t clk_25mhz = AH9300(ah)->clk_25mhz; HAL_CHANNEL_INTERNAL *ichan = NULL; if (chan) ichan = ath_hal_checkchannel(ah, chan); if (AR_SREV_HORNET(ah)) { if (clk_25mhz) { /* Hornet uses PLL_CONTROL_2. Xtal is 25MHz for Hornet. * REFDIV set to 0x1. * $xtal_freq = 25; * $PLL2_div = (704/$xtal_freq); # 176 * 4 = 704. * MAC and BB run at 176 MHz. * $PLL2_divint = int($PLL2_div); * $PLL2_divfrac = $PLL2_div - $PLL2_divint; * $PLL2_divfrac = int($PLL2_divfrac * 0x4000); # 2^14 * $PLL2_Val = ($PLL2_divint & 0x3f) << 19 | (0x1) << 14 | * $PLL2_divfrac & 0x3fff; * Therefore, $PLL2_Val = 0xe04a3d */ #define DPLL2_KD_VAL 0x1D #define DPLL2_KI_VAL 0x06 #define DPLL3_PHASE_SHIFT_VAL 0x1 /* Rewrite DDR PLL2 and PLL3 */ /* program DDR PLL ki and kd value, ki=0x6, kd=0x1d */ OS_REG_WRITE(ah, AR_HORNET_CH0_DDR_DPLL2, 0x18e82f01); /* program DDR PLL phase_shift to 0x1 */ OS_REG_RMW_FIELD(ah, AR_HORNET_CH0_DDR_DPLL3, AR_PHY_BB_DPLL3_PHASE_SHIFT, DPLL3_PHASE_SHIFT_VAL); OS_REG_WRITE(ah, AR_RTC_PLL_CONTROL, 0x1142c); OS_DELAY(1000); /* program refdiv, nint, frac to RTC register */ OS_REG_WRITE(ah, AR_RTC_PLL_CONTROL2, 0xe04a3d); /* program BB PLL ki and kd value, ki=0x6, kd=0x1d */ OS_REG_RMW_FIELD(ah, AR_PHY_BB_DPLL2, AR_PHY_BB_DPLL2_KD, DPLL2_KD_VAL); OS_REG_RMW_FIELD(ah, AR_PHY_BB_DPLL2, AR_PHY_BB_DPLL2_KI, DPLL2_KI_VAL); /* program BB PLL phase_shift to 0x1 */ OS_REG_RMW_FIELD(ah, AR_PHY_BB_DPLL3, AR_PHY_BB_DPLL3_PHASE_SHIFT, DPLL3_PHASE_SHIFT_VAL); } else { /* 40MHz */ #undef DPLL2_KD_VAL #undef DPLL2_KI_VAL #define DPLL2_KD_VAL 0x3D #define DPLL2_KI_VAL 0x06 /* Rewrite DDR PLL2 and PLL3 */ /* program DDR PLL ki and kd value, ki=0x6, kd=0x3d */ OS_REG_WRITE(ah, AR_HORNET_CH0_DDR_DPLL2, 0x19e82f01); /* program DDR PLL phase_shift to 0x1 */ OS_REG_RMW_FIELD(ah, AR_HORNET_CH0_DDR_DPLL3, AR_PHY_BB_DPLL3_PHASE_SHIFT, DPLL3_PHASE_SHIFT_VAL); OS_REG_WRITE(ah, AR_RTC_PLL_CONTROL, 0x1142c); OS_DELAY(1000); /* program refdiv, nint, frac to RTC register */ OS_REG_WRITE(ah, AR_RTC_PLL_CONTROL2, 0x886666); /* program BB PLL ki and kd value, ki=0x6, kd=0x3d */ OS_REG_RMW_FIELD(ah, AR_PHY_BB_DPLL2, AR_PHY_BB_DPLL2_KD, DPLL2_KD_VAL); OS_REG_RMW_FIELD(ah, AR_PHY_BB_DPLL2, AR_PHY_BB_DPLL2_KI, DPLL2_KI_VAL); /* program BB PLL phase_shift to 0x1 */ OS_REG_RMW_FIELD(ah, AR_PHY_BB_DPLL3, AR_PHY_BB_DPLL3_PHASE_SHIFT, DPLL3_PHASE_SHIFT_VAL); } OS_REG_WRITE(ah, AR_RTC_PLL_CONTROL, 0x142c); OS_DELAY(1000); } else if (AR_SREV_POSEIDON(ah) || AR_SREV_APHRODITE(ah)) { OS_REG_RMW_FIELD(ah, AR_PHY_BB_DPLL2, AR_PHY_BB_DPLL2_PLL_PWD, 0x1); /* program BB PLL ki and kd value, ki=0x4, kd=0x40 */ OS_REG_RMW_FIELD(ah, AR_PHY_BB_DPLL2, AR_PHY_BB_DPLL2_KD, 0x40); OS_REG_RMW_FIELD(ah, AR_PHY_BB_DPLL2, AR_PHY_BB_DPLL2_KI, 0x4); OS_REG_RMW_FIELD(ah, AR_PHY_BB_DPLL1, AR_PHY_BB_DPLL1_REFDIV, 0x5); OS_REG_RMW_FIELD(ah, AR_PHY_BB_DPLL1, AR_PHY_BB_DPLL1_NINI, 0x58); OS_REG_RMW_FIELD(ah, AR_PHY_BB_DPLL1, AR_PHY_BB_DPLL1_NFRAC, 0x0); OS_REG_RMW_FIELD(ah, AR_PHY_BB_DPLL2, AR_PHY_BB_DPLL2_OUTDIV, 0x1); OS_REG_RMW_FIELD(ah, AR_PHY_BB_DPLL2, AR_PHY_BB_DPLL2_LOCAL_PLL, 0x1); OS_REG_RMW_FIELD(ah, AR_PHY_BB_DPLL2, AR_PHY_BB_DPLL2_EN_NEGTRIG, 0x1); /* program BB PLL phase_shift to 0x6 */ OS_REG_RMW_FIELD(ah, AR_PHY_BB_DPLL3, AR_PHY_BB_DPLL3_PHASE_SHIFT, 0x6); OS_REG_RMW_FIELD(ah, AR_PHY_BB_DPLL2, AR_PHY_BB_DPLL2_PLL_PWD, 0x0); OS_DELAY(1000); OS_REG_WRITE(ah, AR_RTC_PLL_CONTROL, 0x142c); OS_DELAY(1000); } else if (AR_SREV_WASP(ah) || AR_SREV_SCORPION(ah) || AR_SREV_HONEYBEE(ah)) { #define SRIF_PLL 1 u_int32_t regdata, pll2_divint, pll2_divfrac; #ifndef SRIF_PLL u_int32_t pll2_clkmode; #endif #ifdef SRIF_PLL u_int32_t refdiv; #endif if (clk_25mhz) { #ifndef SRIF_PLL pll2_divint = 0x1c; pll2_divfrac = 0xa3d7; #else if (AR_SREV_HONEYBEE(ah)) { pll2_divint = 0x1c; pll2_divfrac = 0xa3d2; refdiv = 1; } else { pll2_divint = 0x54; pll2_divfrac = 0x1eb85; refdiv = 3; } #endif } else { #ifndef SRIF_PLL pll2_divint = 0x11; pll2_divfrac = 0x26666; #else if (AR_SREV_WASP(ah)) { pll2_divint = 88; pll2_divfrac = 0; refdiv = 5; } else { pll2_divint = 0x11; pll2_divfrac = 0x26666; refdiv = 1; } #endif } #ifndef SRIF_PLL pll2_clkmode = 0x3d; #endif /* PLL programming through SRIF Local Mode */ OS_REG_WRITE(ah, AR_RTC_PLL_CONTROL, 0x1142c); /* Bypass mode */ OS_DELAY(1000); do { regdata = OS_REG_READ(ah, AR_PHY_PLL_MODE); if (AR_SREV_HONEYBEE(ah)) { regdata = regdata | (0x1 << 22); } else { regdata = regdata | (0x1 << 16); } OS_REG_WRITE(ah, AR_PHY_PLL_MODE, regdata); /* PWD_PLL set to 1 */ OS_DELAY(100); /* override int, frac, refdiv */ #ifndef SRIF_PLL OS_REG_WRITE(ah, AR_PHY_PLL_CONTROL, ((1 << 27) | (pll2_divint << 18) | pll2_divfrac)); #else OS_REG_WRITE(ah, AR_PHY_PLL_CONTROL, ((refdiv << 27) | (pll2_divint << 18) | pll2_divfrac)); #endif OS_DELAY(100); regdata = OS_REG_READ(ah, AR_PHY_PLL_MODE); #ifndef SRIF_PLL regdata = (regdata & 0x80071fff) | (0x1 << 30) | (0x1 << 13) | (0x6 << 26) | (pll2_clkmode << 19); #else if (AR_SREV_WASP(ah)) { regdata = (regdata & 0x80071fff) | (0x1 << 30) | (0x1 << 13) | (0x4 << 26) | (0x18 << 19); } else if (AR_SREV_HONEYBEE(ah)) { /* * Kd=10, Ki=2, Outdiv=1, Local PLL=0, Phase Shift=4 */ regdata = (regdata & 0x01c00fff) | (0x1 << 31) | (0x2 << 29) | (0xa << 25) | (0x1 << 19) | (0x6 << 12); } else { regdata = (regdata & 0x80071fff) | (0x3 << 30) | (0x1 << 13) | (0x4 << 26) | (0x60 << 19); } #endif /* Ki, Kd, Local PLL, Outdiv */ OS_REG_WRITE(ah, AR_PHY_PLL_MODE, regdata); regdata = OS_REG_READ(ah, AR_PHY_PLL_MODE); if (AR_SREV_HONEYBEE(ah)) { regdata = (regdata & 0xffbfffff); } else { regdata = (regdata & 0xfffeffff); } OS_REG_WRITE(ah, AR_PHY_PLL_MODE, regdata); /* PWD_PLL set to 0 */ OS_DELAY(1000); if (AR_SREV_WASP(ah)) { /* clear do measure */ regdata = OS_REG_READ(ah, AR_PHY_PLL_BB_DPLL3); regdata &= ~(1 << 30); OS_REG_WRITE(ah, AR_PHY_PLL_BB_DPLL3, regdata); OS_DELAY(100); /* set do measure */ regdata = OS_REG_READ(ah, AR_PHY_PLL_BB_DPLL3); regdata |= (1 << 30); OS_REG_WRITE(ah, AR_PHY_PLL_BB_DPLL3, regdata); /* wait for measure done */ do { regdata = OS_REG_READ(ah, AR_PHY_PLL_BB_DPLL4); } while ((regdata & (1 << 3)) == 0); /* clear do measure */ regdata = OS_REG_READ(ah, AR_PHY_PLL_BB_DPLL3); regdata &= ~(1 << 30); OS_REG_WRITE(ah, AR_PHY_PLL_BB_DPLL3, regdata); /* get measure sqsum dvc */ regdata = (OS_REG_READ(ah, AR_PHY_PLL_BB_DPLL3) & 0x007FFFF8) >> 3; } else { break; } } while (regdata >= 0x40000); /* Remove from Bypass mode */ OS_REG_WRITE(ah, AR_RTC_PLL_CONTROL, 0x142c); OS_DELAY(1000); } else { pll = SM(0x5, AR_RTC_PLL_REFDIV); /* Supposedly not needed on Osprey */ #if 0 if (chan && IS_CHAN_HALF_RATE(chan)) { pll |= SM(0x1, AR_RTC_PLL_CLKSEL); } else if (chan && IS_CHAN_QUARTER_RATE(chan)) { pll |= SM(0x2, AR_RTC_PLL_CLKSEL); } #endif if (ichan && IS_CHAN_5GHZ(ichan)) { pll |= SM(0x28, AR_RTC_PLL_DIV); /* * When doing fast clock, set PLL to 0x142c */ if (IS_5GHZ_FAST_CLOCK_EN(ah, chan)) { pll = 0x142c; } } else { pll |= SM(0x2c, AR_RTC_PLL_DIV); } OS_REG_WRITE(ah, AR_RTC_PLL_CONTROL, pll); } /* TODO: * For multi-band owl, switch between bands by reiniting the PLL. */ OS_DELAY(RTC_PLL_SETTLE_DELAY); OS_REG_WRITE(ah, AR_RTC_SLEEP_CLK, AR_RTC_FORCE_DERIVED_CLK | AR_RTC_PCIE_RST_PWDN_EN); /* XXX TODO: honeybee? */ if (AR_SREV_WASP(ah) || AR_SREV_SCORPION(ah)) { if (clk_25mhz) { OS_REG_WRITE(ah, AR_RTC_DERIVED_RTC_CLK, (0x17c << 1)); /* 32KHz sleep clk */ OS_REG_WRITE(ah, AR_SLP32_MODE, 0x0010f3d7); OS_REG_WRITE(ah, AR_SLP32_INC, 0x0001e7ae); } else { OS_REG_WRITE(ah, AR_RTC_DERIVED_RTC_CLK, (0x261 << 1)); /* 32KHz sleep clk */ OS_REG_WRITE(ah, AR_SLP32_MODE, 0x0010f400); OS_REG_WRITE(ah, AR_SLP32_INC, 0x0001e800); } OS_DELAY(100); } } static inline HAL_BOOL ar9300_set_reset(struct ath_hal *ah, int type) { u_int32_t rst_flags; u_int32_t tmp_reg; struct ath_hal_9300 *ahp = AH9300(ah); HALASSERT(type == HAL_RESET_WARM || type == HAL_RESET_COLD); /* * RTC Force wake should be done before resetting the MAC. * MDK/ART does it that way. */ OS_REG_WRITE(ah, AR_HOSTIF_REG(ah, AR_WA), AH9300(ah)->ah_wa_reg_val); OS_DELAY(10); /* delay to allow AR_WA reg write to kick in */ OS_REG_WRITE(ah, AR_RTC_FORCE_WAKE, AR_RTC_FORCE_WAKE_EN | AR_RTC_FORCE_WAKE_ON_INT); /* Reset AHB */ /* Bug26871 */ tmp_reg = OS_REG_READ(ah, AR_HOSTIF_REG(ah, AR_INTR_SYNC_CAUSE)); if (AR_SREV_WASP(ah)) { if (tmp_reg & (AR9340_INTR_SYNC_LOCAL_TIMEOUT)) { OS_REG_WRITE(ah, AR_HOSTIF_REG(ah, AR_INTR_SYNC_ENABLE), 0); OS_REG_WRITE(ah, AR_HOSTIF_REG(ah, AR_RC), AR_RC_HOSTIF); } } else { if (tmp_reg & (AR9300_INTR_SYNC_LOCAL_TIMEOUT | AR9300_INTR_SYNC_RADM_CPL_TIMEOUT)) { OS_REG_WRITE(ah, AR_HOSTIF_REG(ah, AR_INTR_SYNC_ENABLE), 0); OS_REG_WRITE(ah, AR_HOSTIF_REG(ah, AR_RC), AR_RC_HOSTIF); } else { /* NO AR_RC_AHB in Osprey */ /*OS_REG_WRITE(ah, AR_HOSTIF_REG(ah, AR_RC), AR_RC_AHB);*/ } } rst_flags = AR_RTC_RC_MAC_WARM; if (type == HAL_RESET_COLD) { rst_flags |= AR_RTC_RC_MAC_COLD; } #ifdef AH_SUPPORT_HORNET /* Hornet WAR: trigger SoC to reset WMAC if ... * (1) doing cold reset. Ref: EV 69254 * (2) beacon pending. Ref: EV 70983 */ if (AR_SREV_HORNET(ah) && (ar9300_num_tx_pending( ah, AH_PRIVATE(ah)->ah_caps.halTotalQueues - 1) != 0 || type == HAL_RESET_COLD)) { u_int32_t time_out; #define AR_SOC_RST_RESET 0xB806001C #define AR_SOC_BOOT_STRAP 0xB80600AC #define AR_SOC_WLAN_RST 0x00000800 /* WLAN reset */ #define REG_WRITE(_reg, _val) *((volatile u_int32_t *)(_reg)) = (_val); #define REG_READ(_reg) *((volatile u_int32_t *)(_reg)) HALDEBUG(ah, HAL_DEBUG_RESET, "%s: Hornet SoC reset WMAC.\n", __func__); REG_WRITE(AR_SOC_RST_RESET, REG_READ(AR_SOC_RST_RESET) | AR_SOC_WLAN_RST); REG_WRITE(AR_SOC_RST_RESET, REG_READ(AR_SOC_RST_RESET) & (~AR_SOC_WLAN_RST)); time_out = 0; while (1) { tmp_reg = REG_READ(AR_SOC_BOOT_STRAP); if ((tmp_reg & 0x10) == 0) { break; } if (time_out > 20) { break; } OS_DELAY(10000); time_out++; } OS_REG_WRITE(ah, AR_RTC_RESET, 1); #undef REG_READ #undef REG_WRITE #undef AR_SOC_WLAN_RST #undef AR_SOC_RST_RESET #undef AR_SOC_BOOT_STRAP } #endif /* AH_SUPPORT_HORNET */ #ifdef AH_SUPPORT_SCORPION if (AR_SREV_SCORPION(ah)) { #define DDR_CTL_CONFIG_ADDRESS 0xb8000000 #define DDR_CTL_CONFIG_OFFSET 0x0108 #define DDR_CTL_CONFIG_CLIENT_ACTIVITY_MSB 29 #define DDR_CTL_CONFIG_CLIENT_ACTIVITY_LSB 21 #define DDR_CTL_CONFIG_CLIENT_ACTIVITY_MASK 0x3fe00000 #define DDR_CTL_CONFIG_CLIENT_ACTIVITY_GET(x) (((x) & DDR_CTL_CONFIG_CLIENT_ACTIVITY_MASK) >> DDR_CTL_CONFIG_CLIENT_ACTIVITY_LSB) #define DDR_CTL_CONFIG_CLIENT_ACTIVITY_SET(x) (((x) << DDR_CTL_CONFIG_CLIENT_ACTIVITY_LSB) & DDR_CTL_CONFIG_CLIENT_ACTIVITY_MASK) #define MAC_DMA_CFG_ADDRESS 0xb8100000 #define MAC_DMA_CFG_OFFSET 0x0014 #define MAC_DMA_CFG_HALT_REQ_MSB 11 #define MAC_DMA_CFG_HALT_REQ_LSB 11 #define MAC_DMA_CFG_HALT_REQ_MASK 0x00000800 #define MAC_DMA_CFG_HALT_REQ_GET(x) (((x) & MAC_DMA_CFG_HALT_REQ_MASK) >> MAC_DMA_CFG_HALT_REQ_LSB) #define MAC_DMA_CFG_HALT_REQ_SET(x) (((x) << MAC_DMA_CFG_HALT_REQ_LSB) & MAC_DMA_CFG_HALT_REQ_MASK) #define MAC_DMA_CFG_HALT_ACK_MSB 12 #define MAC_DMA_CFG_HALT_ACK_LSB 12 #define MAC_DMA_CFG_HALT_ACK_MASK 0x00001000 #define MAC_DMA_CFG_HALT_ACK_GET(x) (((x) & MAC_DMA_CFG_HALT_ACK_MASK) >> MAC_DMA_CFG_HALT_ACK_LSB) #define MAC_DMA_CFG_HALT_ACK_SET(x) (((x) << MAC_DMA_CFG_HALT_ACK_LSB) & MAC_DMA_CFG_HALT_ACK_MASK) #define RST_RESET 0xB806001c #define RTC_RESET (1<<27) #define REG_READ(_reg) *((volatile u_int32_t *)(_reg)) #define REG_WRITE(_reg, _val) *((volatile u_int32_t *)(_reg)) = (_val); #define DDR_REG_READ(_ah, _reg) \ *((volatile u_int32_t *)( DDR_CTL_CONFIG_ADDRESS + (_reg))) #define DDR_REG_WRITE(_ah, _reg, _val) \ *((volatile u_int32_t *)(DDR_CTL_CONFIG_ADDRESS + (_reg))) = (_val) OS_REG_WRITE(ah,MAC_DMA_CFG_OFFSET, (OS_REG_READ(ah,MAC_DMA_CFG_OFFSET) & ~MAC_DMA_CFG_HALT_REQ_MASK) | MAC_DMA_CFG_HALT_REQ_SET(1)); { int count; u_int32_t data; count = 0; while (!MAC_DMA_CFG_HALT_ACK_GET(OS_REG_READ(ah, MAC_DMA_CFG_OFFSET) )) { count++; if (count > 10) { ath_hal_printf(ah, "Halt ACK timeout\n"); break; } OS_DELAY(10); } data = DDR_REG_READ(ah,DDR_CTL_CONFIG_OFFSET); HALDEBUG(ah, HAL_DEBUG_RESET, "check DDR Activity - HIGH\n"); count = 0; while (DDR_CTL_CONFIG_CLIENT_ACTIVITY_GET(data)) { // AVE_DEBUG(0,"DDR Activity - HIGH\n"); HALDEBUG(ah, HAL_DEBUG_RESET, "DDR Activity - HIGH\n"); count++; OS_DELAY(10); data = DDR_REG_READ(ah,DDR_CTL_CONFIG_OFFSET); if (count > 10) { ath_hal_printf(ah, "DDR Activity timeout\n"); break; } } } { //Force RTC reset REG_WRITE(RST_RESET, (REG_READ(RST_RESET) | RTC_RESET)); OS_DELAY(10); REG_WRITE(RST_RESET, (REG_READ(RST_RESET) & ~RTC_RESET)); OS_DELAY(10); OS_REG_WRITE(ah, AR_RTC_RESET, 0); OS_DELAY(10); OS_REG_WRITE(ah, AR_RTC_RESET, 1); OS_DELAY(10); HALDEBUG(ah, HAL_DEBUG_RESET, "%s: Scorpion SoC RTC reset done.\n", __func__); } #undef REG_READ #undef REG_WRITE } #endif /* AH_SUPPORT_SCORPION */ /* * Set Mac(BB,Phy) Warm Reset */ OS_REG_WRITE(ah, AR_RTC_RC, rst_flags); OS_DELAY(50); /* XXX 50 usec */ /* * Clear resets and force wakeup */ OS_REG_WRITE(ah, AR_RTC_RC, 0); if (!ath_hal_wait(ah, AR_RTC_RC, AR_RTC_RC_M, 0)) { HALDEBUG(ah, HAL_DEBUG_UNMASKABLE, "%s: RTC stuck in MAC reset\n", __FUNCTION__); HALDEBUG(ah, HAL_DEBUG_UNMASKABLE, "%s: AR_RTC_RC = 0x%x\n", __func__, OS_REG_READ(ah, AR_RTC_RC)); return AH_FALSE; } /* Clear AHB reset */ OS_REG_WRITE(ah, AR_HOSTIF_REG(ah, AR_RC), 0); ar9300_attach_hw_platform(ah); ahp->ah_chip_reset_done = 1; return AH_TRUE; } static inline HAL_BOOL ar9300_set_reset_power_on(struct ath_hal *ah) { /* Force wake */ OS_REG_WRITE(ah, AR_HOSTIF_REG(ah, AR_WA), AH9300(ah)->ah_wa_reg_val); OS_DELAY(10); /* delay to allow AR_WA reg write to kick in */ OS_REG_WRITE(ah, AR_RTC_FORCE_WAKE, AR_RTC_FORCE_WAKE_EN | AR_RTC_FORCE_WAKE_ON_INT); /* * RTC reset and clear. Some delay in between is needed * to give the chip time to settle. */ OS_REG_WRITE(ah, AR_RTC_RESET, 0); OS_DELAY(2); OS_REG_WRITE(ah, AR_RTC_RESET, 1); /* * Poll till RTC is ON */ if (!ath_hal_wait(ah, AR_RTC_STATUS, AR_RTC_STATUS_M, AR_RTC_STATUS_ON)) { HALDEBUG(ah, HAL_DEBUG_UNMASKABLE, "%s: RTC not waking up for %d\n", __FUNCTION__, 1000); return AH_FALSE; } /* * Read Revisions from Chip right after RTC is on for the first time. * This helps us detect the chip type early and initialize it accordingly. */ ar9300_read_revisions(ah); /* * Warm reset if we aren't really powering on, * just restarting the driver. */ return ar9300_set_reset(ah, HAL_RESET_WARM); } /* * Write the given reset bit mask into the reset register */ HAL_BOOL ar9300_set_reset_reg(struct ath_hal *ah, u_int32_t type) { HAL_BOOL ret = AH_FALSE; /* * Set force wake */ OS_REG_WRITE(ah, AR_HOSTIF_REG(ah, AR_WA), AH9300(ah)->ah_wa_reg_val); OS_DELAY(10); /* delay to allow AR_WA reg write to kick in */ OS_REG_WRITE(ah, AR_RTC_FORCE_WAKE, AR_RTC_FORCE_WAKE_EN | AR_RTC_FORCE_WAKE_ON_INT); switch (type) { case HAL_RESET_POWER_ON: ret = ar9300_set_reset_power_on(ah); break; case HAL_RESET_WARM: case HAL_RESET_COLD: ret = ar9300_set_reset(ah, type); break; default: break; } #if ATH_SUPPORT_MCI if (AH_PRIVATE(ah)->ah_caps.halMciSupport) { OS_REG_WRITE(ah, AR_RTC_KEEP_AWAKE, 0x2); } #endif return ret; } /* * Places the PHY and Radio chips into reset. A full reset * must be called to leave this state. The PCI/MAC/PCU are * not placed into reset as we must receive interrupt to * re-enable the hardware. */ HAL_BOOL ar9300_phy_disable(struct ath_hal *ah) { if (!ar9300_set_reset_reg(ah, HAL_RESET_WARM)) { return AH_FALSE; } #ifdef ATH_SUPPORT_LED #define REG_READ(_reg) *((volatile u_int32_t *)(_reg)) #define REG_WRITE(_reg, _val) *((volatile u_int32_t *)(_reg)) = (_val); #define ATH_GPIO_OE 0xB8040000 #define ATH_GPIO_OUT 0xB8040008 /* GPIO Ouput Value reg.*/ if (AR_SREV_WASP(ah)) { if (IS_CHAN_2GHZ((AH_PRIVATE(ah)->ah_curchan))) { REG_WRITE(ATH_GPIO_OE, (REG_READ(ATH_GPIO_OE) | (0x1 << 13))); } else { REG_WRITE(ATH_GPIO_OE, (REG_READ(ATH_GPIO_OE) | (0x1 << 12))); } } else if (AR_SREV_SCORPION(ah)) { if (IS_CHAN_2GHZ((AH_PRIVATE(ah)->ah_curchan))) { REG_WRITE(ATH_GPIO_OE, (REG_READ(ATH_GPIO_OE) | (0x1 << 13))); } else { REG_WRITE(ATH_GPIO_OE, (REG_READ(ATH_GPIO_OE) | (0x1 << 12))); } /* Turn off JMPST led */ REG_WRITE(ATH_GPIO_OUT, (REG_READ(ATH_GPIO_OUT) | (0x1 << 15))); } else if (AR_SREV_HONEYBEE(ah)) { REG_WRITE(ATH_GPIO_OE, (REG_READ(ATH_GPIO_OE) | (0x1 << 12))); } #undef REG_READ #undef REG_WRITE #endif if ( AR_SREV_OSPREY(ah) ) { OS_REG_RMW(ah, AR_HOSTIF_REG(ah, AR_GPIO_OUTPUT_MUX1), 0x0, 0x1f); } ar9300_init_pll(ah, AH_NULL); return AH_TRUE; } /* * Places all of hardware into reset */ HAL_BOOL ar9300_disable(struct ath_hal *ah) { if (!ar9300_set_power_mode(ah, HAL_PM_AWAKE, AH_TRUE)) { return AH_FALSE; } if (!ar9300_set_reset_reg(ah, HAL_RESET_COLD)) { return AH_FALSE; } ar9300_init_pll(ah, AH_NULL); return AH_TRUE; } /* * TODO: Only write the PLL if we're changing to or from CCK mode * * WARNING: The order of the PLL and mode registers must be correct. */ static inline void ar9300_set_rf_mode(struct ath_hal *ah, struct ieee80211_channel *chan) { u_int32_t rf_mode = 0; if (chan == AH_NULL) { return; } switch (AH9300(ah)->ah_hwp) { case HAL_TRUE_CHIP: rf_mode |= (IEEE80211_IS_CHAN_B(chan) || IEEE80211_IS_CHAN_G(chan)) ? AR_PHY_MODE_DYNAMIC : AR_PHY_MODE_OFDM; break; default: HALASSERT(0); break; } /* Phy mode bits for 5GHz channels requiring Fast Clock */ if ( IS_5GHZ_FAST_CLOCK_EN(ah, chan)) { rf_mode |= (AR_PHY_MODE_DYNAMIC | AR_PHY_MODE_DYN_CCK_DISABLE); } OS_REG_WRITE(ah, AR_PHY_MODE, rf_mode); } /* * Places the hardware into reset and then pulls it out of reset */ HAL_BOOL ar9300_chip_reset(struct ath_hal *ah, struct ieee80211_channel *chan) { struct ath_hal_9300 *ahp = AH9300(ah); int type = HAL_RESET_WARM; OS_MARK(ah, AH_MARK_CHIPRESET, chan ? chan->ic_freq : 0); /* * Warm reset is optimistic. * * If the TX/RX DMA engines aren't shut down (eg, they're * wedged) then we're better off doing a full cold reset * to try and shake that condition. */ if (ahp->ah_chip_full_sleep || (ah->ah_config.ah_force_full_reset == 1) || OS_REG_READ(ah, AR_Q_TXE) || (OS_REG_READ(ah, AR_CR) & AR_CR_RXE)) { type = HAL_RESET_COLD; } if (!ar9300_set_reset_reg(ah, type)) { return AH_FALSE; } /* Bring out of sleep mode (AGAIN) */ if (!ar9300_set_power_mode(ah, HAL_PM_AWAKE, AH_TRUE)) { return AH_FALSE; } ahp->ah_chip_full_sleep = AH_FALSE; if (AR_SREV_HORNET(ah)) { ar9300_internal_regulator_apply(ah); } ar9300_init_pll(ah, chan); /* * Perform warm reset before the mode/PLL/turbo registers * are changed in order to deactivate the radio. Mode changes * with an active radio can result in corrupted shifts to the * radio device. */ ar9300_set_rf_mode(ah, chan); return AH_TRUE; } /* ar9300_setup_calibration * Setup HW to collect samples used for current cal */ inline static void ar9300_setup_calibration(struct ath_hal *ah, HAL_CAL_LIST *curr_cal) { /* Select calibration to run */ switch (curr_cal->cal_data->cal_type) { case IQ_MISMATCH_CAL: /* Start calibration w/ 2^(INIT_IQCAL_LOG_COUNT_MAX+1) samples */ OS_REG_RMW_FIELD(ah, AR_PHY_TIMING4, AR_PHY_TIMING4_IQCAL_LOG_COUNT_MAX, curr_cal->cal_data->cal_count_max); OS_REG_WRITE(ah, AR_PHY_CALMODE, AR_PHY_CALMODE_IQ); HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: starting IQ Mismatch Calibration\n", __func__); /* Kick-off cal */ OS_REG_SET_BIT(ah, AR_PHY_TIMING4, AR_PHY_TIMING4_DO_CAL); break; case TEMP_COMP_CAL: if (AR_SREV_HORNET(ah) || AR_SREV_POSEIDON(ah) || AR_SREV_WASP(ah) || AR_SREV_SCORPION(ah)) { OS_REG_RMW_FIELD(ah, AR_HORNET_CH0_THERM, AR_PHY_65NM_CH0_THERM_LOCAL, 1); OS_REG_RMW_FIELD(ah, AR_HORNET_CH0_THERM, AR_PHY_65NM_CH0_THERM_START, 1); } else if (AR_SREV_JUPITER(ah) || AR_SREV_APHRODITE(ah)) { OS_REG_RMW_FIELD(ah, AR_PHY_65NM_CH0_THERM_JUPITER, AR_PHY_65NM_CH0_THERM_LOCAL, 1); OS_REG_RMW_FIELD(ah, AR_PHY_65NM_CH0_THERM_JUPITER, AR_PHY_65NM_CH0_THERM_START, 1); } else { OS_REG_RMW_FIELD(ah, AR_PHY_65NM_CH0_THERM, AR_PHY_65NM_CH0_THERM_LOCAL, 1); OS_REG_RMW_FIELD(ah, AR_PHY_65NM_CH0_THERM, AR_PHY_65NM_CH0_THERM_START, 1); } HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: starting Temperature Compensation Calibration\n", __func__); break; default: HALDEBUG(ah, HAL_DEBUG_UNMASKABLE, "%s called with incorrect calibration type.\n", __func__); } } /* ar9300_reset_calibration * Initialize shared data structures and prepare a cal to be run. */ inline static void ar9300_reset_calibration(struct ath_hal *ah, HAL_CAL_LIST *curr_cal) { struct ath_hal_9300 *ahp = AH9300(ah); int i; /* Setup HW for new calibration */ ar9300_setup_calibration(ah, curr_cal); /* Change SW state to RUNNING for this calibration */ curr_cal->cal_state = CAL_RUNNING; /* Reset data structures shared between different calibrations */ for (i = 0; i < AR9300_MAX_CHAINS; i++) { ahp->ah_meas0.sign[i] = 0; ahp->ah_meas1.sign[i] = 0; ahp->ah_meas2.sign[i] = 0; ahp->ah_meas3.sign[i] = 0; } ahp->ah_cal_samples = 0; } #ifdef XXX_UNUSED_FUNCTION /* * Find out which of the RX chains are enabled */ static u_int32_t ar9300_get_rx_chain_mask(struct ath_hal *ah) { u_int32_t ret_val = OS_REG_READ(ah, AR_PHY_RX_CHAINMASK); /* The bits [2:0] indicate the rx chain mask and are to be * interpreted as follows: * 00x => Only chain 0 is enabled * 01x => Chain 1 and 0 enabled * 1xx => Chain 2,1 and 0 enabled */ return (ret_val & 0x7); } #endif static void ar9300_get_nf_hist_base(struct ath_hal *ah, HAL_CHANNEL_INTERNAL *chan, int is_scan, int16_t nf[]) { HAL_NFCAL_BASE *h_base; #ifdef ATH_NF_PER_CHAN h_base = &chan->nf_cal_hist.base; #else if (is_scan) { /* * The channel we are currently on is not the home channel, * so we shouldn't use the home channel NF buffer's values on * this channel. Instead, use the NF single value already * read for this channel. (Or, if we haven't read the NF for * this channel yet, the SW default for this chip/band will * be used.) */ h_base = &chan->nf_cal_hist.base; } else { /* use the home channel NF info */ h_base = &AH_PRIVATE(ah)->nf_cal_hist.base; } #endif OS_MEMCPY(nf, h_base->priv_nf, sizeof(h_base->priv_nf)); } HAL_BOOL ar9300_load_nf(struct ath_hal *ah, int16_t nf[]) { int i, j; int32_t val; /* XXX where are EXT regs defined */ const u_int32_t ar9300_cca_regs[] = { AR_PHY_CCA_0, AR_PHY_CCA_1, AR_PHY_CCA_2, AR_PHY_EXT_CCA, AR_PHY_EXT_CCA_1, AR_PHY_EXT_CCA_2, }; u_int8_t chainmask; /* * Force NF calibration for all chains, otherwise Vista station * would conduct a bad performance */ if (AR_SREV_HORNET(ah) || AR_SREV_POSEIDON(ah) || AR_SREV_APHRODITE(ah)) { chainmask = 0x9; } else if (AR_SREV_WASP(ah) || AR_SREV_JUPITER(ah) || AR_SREV_HONEYBEE(ah)) { chainmask = 0x1b; } else { chainmask = 0x3F; } /* * Write filtered NF values into max_cca_pwr register parameter * so we can load below. */ for (i = 0; i < HAL_NUM_NF_READINGS; i++) { if (chainmask & (1 << i)) { val = OS_REG_READ(ah, ar9300_cca_regs[i]); val &= 0xFFFFFE00; val |= (((u_int32_t)(nf[i]) << 1) & 0x1ff); OS_REG_WRITE(ah, ar9300_cca_regs[i], val); } } HALDEBUG(ah, HAL_DEBUG_NFCAL, "%s: load %d %d %d %d %d %d\n", __func__, nf[0], nf[1], nf[2], nf[3], nf[4], nf[5]); /* * Load software filtered NF value into baseband internal min_cca_pwr * variable. */ OS_REG_CLR_BIT(ah, AR_PHY_AGC_CONTROL, AR_PHY_AGC_CONTROL_ENABLE_NF); OS_REG_CLR_BIT(ah, AR_PHY_AGC_CONTROL, AR_PHY_AGC_CONTROL_NO_UPDATE_NF); OS_REG_SET_BIT(ah, AR_PHY_AGC_CONTROL, AR_PHY_AGC_CONTROL_NF); /* Wait for load to complete, should be fast, a few 10s of us. */ /* Changed the max delay 250us back to 10000us, since 250us often * results in NF load timeout and causes deaf condition * during stress testing 12/12/2009 */ for (j = 0; j < 10000; j++) { if ((OS_REG_READ(ah, AR_PHY_AGC_CONTROL) & AR_PHY_AGC_CONTROL_NF) == 0){ break; } OS_DELAY(10); } if (j == 10000) { /* * We timed out waiting for the noisefloor to load, probably * due to an in-progress rx. Simply return here and allow * the load plenty of time to complete before the next * calibration interval. We need to avoid trying to load -50 * (which happens below) while the previous load is still in * progress as this can cause rx deafness (see EV 66368,62830). * Instead by returning here, the baseband nf cal will * just be capped by our present noisefloor until the next * calibration timer. */ HALDEBUG(AH_NULL, HAL_DEBUG_UNMASKABLE, "%s: *** TIMEOUT while waiting for nf to load: " "AR_PHY_AGC_CONTROL=0x%x ***\n", __func__, OS_REG_READ(ah, AR_PHY_AGC_CONTROL)); return AH_FALSE; } /* * Restore max_cca_power register parameter again so that we're not capped * by the median we just loaded. This will be initial (and max) value * of next noise floor calibration the baseband does. */ for (i = 0; i < HAL_NUM_NF_READINGS; i++) { if (chainmask & (1 << i)) { val = OS_REG_READ(ah, ar9300_cca_regs[i]); val &= 0xFFFFFE00; val |= (((u_int32_t)(-50) << 1) & 0x1ff); OS_REG_WRITE(ah, ar9300_cca_regs[i], val); } } return AH_TRUE; } /* ar9300_per_calibration * Generic calibration routine. * Recalibrate the lower PHY chips to account for temperature/environment * changes. */ inline static void ar9300_per_calibration(struct ath_hal *ah, HAL_CHANNEL_INTERNAL *ichan, u_int8_t rxchainmask, HAL_CAL_LIST *curr_cal, HAL_BOOL *is_cal_done) { struct ath_hal_9300 *ahp = AH9300(ah); /* Cal is assumed not done until explicitly set below */ *is_cal_done = AH_FALSE; /* Calibration in progress. */ if (curr_cal->cal_state == CAL_RUNNING) { /* Check to see if it has finished. */ if (!(OS_REG_READ(ah, AR_PHY_TIMING4) & AR_PHY_TIMING4_DO_CAL)) { int i, num_chains = 0; for (i = 0; i < AR9300_MAX_CHAINS; i++) { if (rxchainmask & (1 << i)) { num_chains++; } } /* * Accumulate cal measures for active chains */ curr_cal->cal_data->cal_collect(ah, num_chains); ahp->ah_cal_samples++; if (ahp->ah_cal_samples >= curr_cal->cal_data->cal_num_samples) { /* * Process accumulated data */ curr_cal->cal_data->cal_post_proc(ah, num_chains); /* Calibration has finished. */ ichan->calValid |= curr_cal->cal_data->cal_type; curr_cal->cal_state = CAL_DONE; *is_cal_done = AH_TRUE; } else { /* Set-up collection of another sub-sample until we * get desired number */ ar9300_setup_calibration(ah, curr_cal); } } } else if (!(ichan->calValid & curr_cal->cal_data->cal_type)) { /* If current cal is marked invalid in channel, kick it off */ ar9300_reset_calibration(ah, curr_cal); } } static void ar9300_start_nf_cal(struct ath_hal *ah) { struct ath_hal_9300 *ahp = AH9300(ah); OS_REG_SET_BIT(ah, AR_PHY_AGC_CONTROL, AR_PHY_AGC_CONTROL_ENABLE_NF); OS_REG_SET_BIT(ah, AR_PHY_AGC_CONTROL, AR_PHY_AGC_CONTROL_NO_UPDATE_NF); OS_REG_SET_BIT(ah, AR_PHY_AGC_CONTROL, AR_PHY_AGC_CONTROL_NF); AH9300(ah)->nf_tsf32 = ar9300_get_tsf32(ah); /* * We are reading the NF values before we start the NF operation, because * of that we are getting very high values like -45. * This triggers the CW_INT detected and EACS module triggers the channel change * chip_reset_done value is used to fix this issue. * chip_reset_flag is set during the RTC reset. * chip_reset_flag is cleared during the starting NF operation. * if flag is set we will clear the flag and will not read the NF values. */ ahp->ah_chip_reset_done = 0; } /* ar9300_calibration * Wrapper for a more generic Calibration routine. Primarily to abstract to * upper layers whether there is 1 or more calibrations to be run. */ HAL_BOOL ar9300_calibration(struct ath_hal *ah, struct ieee80211_channel *chan, u_int8_t rxchainmask, HAL_BOOL do_nf_cal, HAL_BOOL *is_cal_done, int is_scan, u_int32_t *sched_cals) { struct ath_hal_9300 *ahp = AH9300(ah); HAL_CAL_LIST *curr_cal = ahp->ah_cal_list_curr; HAL_CHANNEL_INTERNAL *ichan = ath_hal_checkchannel(ah, chan); int16_t nf_buf[HAL_NUM_NF_READINGS]; *is_cal_done = AH_TRUE; /* XXX: For initial wasp bringup - disable periodic calibration */ /* Invalid channel check */ if (ichan == AH_NULL) { HALDEBUG(ah, HAL_DEBUG_CHANNEL, "%s: invalid channel %u/0x%x; no mapping\n", __func__, chan->ic_freq, chan->ic_flags); return AH_FALSE; } HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: Entering, Doing NF Cal = %d\n", __func__, do_nf_cal); HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: Chain 0 Rx IQ Cal Correction 0x%08x\n", __func__, OS_REG_READ(ah, AR_PHY_RX_IQCAL_CORR_B0)); if (!AR_SREV_HORNET(ah) && !AR_SREV_POSEIDON(ah) && !AR_SREV_APHRODITE(ah)) { HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: Chain 1 Rx IQ Cal Correction 0x%08x\n", __func__, OS_REG_READ(ah, AR_PHY_RX_IQCAL_CORR_B1)); if (!AR_SREV_WASP(ah) && !AR_SREV_JUPITER(ah) && !AR_SREV_HONEYBEE(ah)) { HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: Chain 2 Rx IQ Cal Correction 0x%08x\n", __func__, OS_REG_READ(ah, AR_PHY_RX_IQCAL_CORR_B2)); } } OS_MARK(ah, AH_MARK_PERCAL, chan->ic_freq); /* For given calibration: * 1. Call generic cal routine * 2. When this cal is done (is_cal_done) if we have more cals waiting * (eg after reset), mask this to upper layers by not propagating * is_cal_done if it is set to TRUE. * Instead, change is_cal_done to FALSE and setup the waiting cal(s) * to be run. */ if (curr_cal && (curr_cal->cal_data->cal_type & *sched_cals) && (curr_cal->cal_state == CAL_RUNNING || curr_cal->cal_state == CAL_WAITING)) { ar9300_per_calibration(ah, ichan, rxchainmask, curr_cal, is_cal_done); if (*is_cal_done == AH_TRUE) { ahp->ah_cal_list_curr = curr_cal = curr_cal->cal_next; if (curr_cal && curr_cal->cal_state == CAL_WAITING) { *is_cal_done = AH_FALSE; ar9300_reset_calibration(ah, curr_cal); } else { *sched_cals &= ~IQ_MISMATCH_CAL; } } } /* Do NF cal only at longer intervals */ if (do_nf_cal) { int nf_done; /* Get the value from the previous NF cal and update history buffer */ nf_done = ar9300_store_new_nf(ah, chan, is_scan); #if 0 if (ichan->channel_flags & CHANNEL_CW_INT) { chan->channel_flags |= CHANNEL_CW_INT; } #endif chan->ic_state &= ~IEEE80211_CHANSTATE_CWINT; if (nf_done) { /* * Load the NF from history buffer of the current channel. * NF is slow time-variant, so it is OK to use a historical value. */ ar9300_get_nf_hist_base(ah, ichan, is_scan, nf_buf); ar9300_load_nf(ah, nf_buf); /* start NF calibration, without updating BB NF register*/ ar9300_start_nf_cal(ah); } } return AH_TRUE; } /* ar9300_iq_cal_collect * Collect data from HW to later perform IQ Mismatch Calibration */ void ar9300_iq_cal_collect(struct ath_hal *ah, u_int8_t num_chains) { struct ath_hal_9300 *ahp = AH9300(ah); int i; /* * Accumulate IQ cal measures for active chains */ for (i = 0; i < num_chains; i++) { ahp->ah_total_power_meas_i[i] = OS_REG_READ(ah, AR_PHY_CAL_MEAS_0(i)); ahp->ah_total_power_meas_q[i] = OS_REG_READ(ah, AR_PHY_CAL_MEAS_1(i)); ahp->ah_total_iq_corr_meas[i] = (int32_t) OS_REG_READ(ah, AR_PHY_CAL_MEAS_2(i)); HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%d: Chn %d " "Reg Offset(0x%04x)pmi=0x%08x; " "Reg Offset(0x%04x)pmq=0x%08x; " "Reg Offset (0x%04x)iqcm=0x%08x;\n", ahp->ah_cal_samples, i, (unsigned) AR_PHY_CAL_MEAS_0(i), ahp->ah_total_power_meas_i[i], (unsigned) AR_PHY_CAL_MEAS_1(i), ahp->ah_total_power_meas_q[i], (unsigned) AR_PHY_CAL_MEAS_2(i), ahp->ah_total_iq_corr_meas[i]); } } /* ar9300_iq_calibration * Use HW data to perform IQ Mismatch Calibration */ void ar9300_iq_calibration(struct ath_hal *ah, u_int8_t num_chains) { struct ath_hal_9300 *ahp = AH9300(ah); u_int32_t power_meas_q, power_meas_i, iq_corr_meas; u_int32_t q_coff_denom, i_coff_denom; int32_t q_coff, i_coff; int iq_corr_neg, i; HAL_CHANNEL_INTERNAL *ichan; static const u_int32_t offset_array[3] = { AR_PHY_RX_IQCAL_CORR_B0, AR_PHY_RX_IQCAL_CORR_B1, AR_PHY_RX_IQCAL_CORR_B2, }; ichan = ath_hal_checkchannel(ah, AH_PRIVATE(ah)->ah_curchan); for (i = 0; i < num_chains; i++) { power_meas_i = ahp->ah_total_power_meas_i[i]; power_meas_q = ahp->ah_total_power_meas_q[i]; iq_corr_meas = ahp->ah_total_iq_corr_meas[i]; HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "Starting IQ Cal and Correction for Chain %d\n", i); HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "Orignal: Chn %diq_corr_meas = 0x%08x\n", i, ahp->ah_total_iq_corr_meas[i]); iq_corr_neg = 0; /* iq_corr_meas is always negative. */ if (iq_corr_meas > 0x80000000) { iq_corr_meas = (0xffffffff - iq_corr_meas) + 1; iq_corr_neg = 1; } HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "Chn %d pwr_meas_i = 0x%08x\n", i, power_meas_i); HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "Chn %d pwr_meas_q = 0x%08x\n", i, power_meas_q); HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "iq_corr_neg is 0x%08x\n", iq_corr_neg); i_coff_denom = (power_meas_i / 2 + power_meas_q / 2) / 256; q_coff_denom = power_meas_q / 64; /* Protect against divide-by-0 */ if ((i_coff_denom != 0) && (q_coff_denom != 0)) { /* IQ corr_meas is already negated if iqcorr_neg == 1 */ i_coff = iq_corr_meas / i_coff_denom; q_coff = power_meas_i / q_coff_denom - 64; HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "Chn %d i_coff = 0x%08x\n", i, i_coff); HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "Chn %d q_coff = 0x%08x\n", i, q_coff); /* Force bounds on i_coff */ if (i_coff >= 63) { i_coff = 63; } else if (i_coff <= -63) { i_coff = -63; } /* Negate i_coff if iq_corr_neg == 0 */ if (iq_corr_neg == 0x0) { i_coff = -i_coff; } /* Force bounds on q_coff */ if (q_coff >= 63) { q_coff = 63; } else if (q_coff <= -63) { q_coff = -63; } i_coff = i_coff & 0x7f; q_coff = q_coff & 0x7f; HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "Chn %d : i_coff = 0x%x q_coff = 0x%x\n", i, i_coff, q_coff); HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "Register offset (0x%04x) before update = 0x%x\n", offset_array[i], OS_REG_READ(ah, offset_array[i])); OS_REG_RMW_FIELD(ah, offset_array[i], AR_PHY_RX_IQCAL_CORR_IQCORR_Q_I_COFF, i_coff); OS_REG_RMW_FIELD(ah, offset_array[i], AR_PHY_RX_IQCAL_CORR_IQCORR_Q_Q_COFF, q_coff); /* store the RX cal results */ if (ichan != NULL) { ahp->ah_rx_cal_corr[i] = OS_REG_READ(ah, offset_array[i]) & 0x7fff; ahp->ah_rx_cal_complete = AH_TRUE; ahp->ah_rx_cal_chan = ichan->channel; // ahp->ah_rx_cal_chan_flag = ichan->channel_flags &~ CHANNEL_PASSIVE; ahp->ah_rx_cal_chan_flag = 0; /* XXX */ } else { /* XXX? Is this what I should do? */ ahp->ah_rx_cal_complete = AH_FALSE; } HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "Register offset (0x%04x) QI COFF (bitfields 0x%08x) " "after update = 0x%x\n", offset_array[i], AR_PHY_RX_IQCAL_CORR_IQCORR_Q_I_COFF, OS_REG_READ(ah, offset_array[i])); HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "Register offset (0x%04x) QQ COFF (bitfields 0x%08x) " "after update = 0x%x\n", offset_array[i], AR_PHY_RX_IQCAL_CORR_IQCORR_Q_Q_COFF, OS_REG_READ(ah, offset_array[i])); HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "IQ Cal and Correction done for Chain %d\n", i); } } OS_REG_SET_BIT(ah, AR_PHY_RX_IQCAL_CORR_B0, AR_PHY_RX_IQCAL_CORR_IQCORR_ENABLE); HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "IQ Cal and Correction (offset 0x%04x) enabled " "(bit position 0x%08x). New Value 0x%08x\n", (unsigned) (AR_PHY_RX_IQCAL_CORR_B0), AR_PHY_RX_IQCAL_CORR_IQCORR_ENABLE, OS_REG_READ(ah, AR_PHY_RX_IQCAL_CORR_B0)); } /* * When coming back from offchan, we do not perform RX IQ Cal. * But the chip reset will clear all previous results * We store the previous results and restore here. */ static void ar9300_rx_iq_cal_restore(struct ath_hal *ah) { struct ath_hal_9300 *ahp = AH9300(ah); u_int32_t i_coff, q_coff; HAL_BOOL is_restore = AH_FALSE; int i; static const u_int32_t offset_array[3] = { AR_PHY_RX_IQCAL_CORR_B0, AR_PHY_RX_IQCAL_CORR_B1, AR_PHY_RX_IQCAL_CORR_B2, }; for (i=0; iah_rx_cal_corr[i]) { i_coff = (ahp->ah_rx_cal_corr[i] & AR_PHY_RX_IQCAL_CORR_IQCORR_Q_I_COFF) >> AR_PHY_RX_IQCAL_CORR_IQCORR_Q_I_COFF_S; q_coff = (ahp->ah_rx_cal_corr[i] & AR_PHY_RX_IQCAL_CORR_IQCORR_Q_Q_COFF) >> AR_PHY_RX_IQCAL_CORR_IQCORR_Q_Q_COFF_S; OS_REG_RMW_FIELD(ah, offset_array[i], AR_PHY_RX_IQCAL_CORR_IQCORR_Q_I_COFF, i_coff); OS_REG_RMW_FIELD(ah, offset_array[i], AR_PHY_RX_IQCAL_CORR_IQCORR_Q_Q_COFF, q_coff); is_restore = AH_TRUE; } } if (is_restore) OS_REG_SET_BIT(ah, AR_PHY_RX_IQCAL_CORR_B0, AR_PHY_RX_IQCAL_CORR_IQCORR_ENABLE); HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: IQ Cal and Correction (offset 0x%04x) enabled " "(bit position 0x%08x). New Value 0x%08x\n", __func__, (unsigned) (AR_PHY_RX_IQCAL_CORR_B0), AR_PHY_RX_IQCAL_CORR_IQCORR_ENABLE, OS_REG_READ(ah, AR_PHY_RX_IQCAL_CORR_B0)); } /* * Set a limit on the overall output power. Used for dynamic * transmit power control and the like. * * NB: limit is in units of 0.5 dbM. */ HAL_BOOL ar9300_set_tx_power_limit(struct ath_hal *ah, u_int32_t limit, u_int16_t extra_txpow, u_int16_t tpc_in_db) { struct ath_hal_9300 *ahp = AH9300(ah); struct ath_hal_private *ahpriv = AH_PRIVATE(ah); const struct ieee80211_channel *chan = ahpriv->ah_curchan; HAL_CHANNEL_INTERNAL *ichan = ath_hal_checkchannel(ah, chan); if (NULL == chan) { return AH_FALSE; } ahpriv->ah_powerLimit = AH_MIN(limit, MAX_RATE_POWER); ahpriv->ah_extraTxPow = extra_txpow; if(chan == NULL) { return AH_FALSE; } if (ar9300_eeprom_set_transmit_power(ah, &ahp->ah_eeprom, chan, ath_hal_getctl(ah, chan), ath_hal_getantennaallowed(ah, chan), ath_hal_get_twice_max_regpower(ahpriv, ichan, chan), AH_MIN(MAX_RATE_POWER, ahpriv->ah_powerLimit)) != HAL_OK) { return AH_FALSE; } return AH_TRUE; } /* * Exported call to check for a recent gain reading and return * the current state of the thermal calibration gain engine. */ HAL_RFGAIN ar9300_get_rfgain(struct ath_hal *ah) { return HAL_RFGAIN_INACTIVE; } #define HAL_GREEN_AP_RX_MASK 0x1 static inline void ar9300_init_chain_masks(struct ath_hal *ah, int rx_chainmask, int tx_chainmask) { if (AH9300(ah)->green_ap_ps_on) { rx_chainmask = HAL_GREEN_AP_RX_MASK; } if (rx_chainmask == 0x5) { OS_REG_SET_BIT(ah, AR_PHY_ANALOG_SWAP, AR_PHY_SWAP_ALT_CHAIN); } OS_REG_WRITE(ah, AR_PHY_RX_CHAINMASK, rx_chainmask); OS_REG_WRITE(ah, AR_PHY_CAL_CHAINMASK, rx_chainmask); /* * Adaptive Power Management: * Some 3 stream chips exceed the PCIe power requirements. * This workaround will reduce power consumption by using 2 tx chains * for 1 and 2 stream rates (5 GHz only). * * Set the self gen mask to 2 tx chains when APM is enabled. * */ if (AH_PRIVATE(ah)->ah_caps.halApmEnable && (tx_chainmask == 0x7)) { OS_REG_WRITE(ah, AR_SELFGEN_MASK, 0x3); } else { OS_REG_WRITE(ah, AR_SELFGEN_MASK, tx_chainmask); } if (tx_chainmask == 0x5) { OS_REG_SET_BIT(ah, AR_PHY_ANALOG_SWAP, AR_PHY_SWAP_ALT_CHAIN); } } /* * Override INI values with chip specific configuration. */ static inline void ar9300_override_ini(struct ath_hal *ah, struct ieee80211_channel *chan) { u_int32_t val; HAL_CAPABILITIES *p_cap = &AH_PRIVATE(ah)->ah_caps; /* * Set the RX_ABORT and RX_DIS and clear it only after * RXE is set for MAC. This prevents frames with * corrupted descriptor status. */ OS_REG_SET_BIT(ah, AR_DIAG_SW, (AR_DIAG_RX_DIS | AR_DIAG_RX_ABORT)); /* * For Merlin and above, there is a new feature that allows Multicast * search based on both MAC Address and Key ID. * By default, this feature is enabled. * But since the driver is not using this feature, we switch it off; * otherwise multicast search based on MAC addr only will fail. */ val = OS_REG_READ(ah, AR_PCU_MISC_MODE2) & (~AR_ADHOC_MCAST_KEYID_ENABLE); OS_REG_WRITE(ah, AR_PCU_MISC_MODE2, val | AR_BUG_58603_FIX_ENABLE | AR_AGG_WEP_ENABLE); /* Osprey revision specific configuration */ /* Osprey 2.0+ - if SW RAC support is disabled, must also disable * the Osprey 2.0 hardware RAC fix. */ if (p_cap->halIsrRacSupport == AH_FALSE) { OS_REG_CLR_BIT(ah, AR_CFG, AR_CFG_MISSING_TX_INTR_FIX_ENABLE); } /* try to enable old pal if it is needed for h/w green tx */ ar9300_hwgreentx_set_pal_spare(ah, 1); } static inline void ar9300_prog_ini(struct ath_hal *ah, struct ar9300_ini_array *ini_arr, int column) { int i, reg_writes = 0; /* New INI format: Array may be undefined (pre, core, post arrays) */ if (ini_arr->ia_array == NULL) { return; } /* * New INI format: Pre, core, and post arrays for a given subsystem may be * modal (> 2 columns) or non-modal (2 columns). * Determine if the array is non-modal and force the column to 1. */ if (column >= ini_arr->ia_columns) { column = 1; } for (i = 0; i < ini_arr->ia_rows; i++) { u_int32_t reg = INI_RA(ini_arr, i, 0); u_int32_t val = INI_RA(ini_arr, i, column); /* ** Determine if this is a shift register value ** (reg >= 0x16000 && reg < 0x17000 for Osprey) , ** and insert the configured delay if so. ** -this delay is not required for Osprey (EV#71410) */ OS_REG_WRITE(ah, reg, val); WAR_6773(reg_writes); } } static inline HAL_STATUS ar9300_process_ini(struct ath_hal *ah, struct ieee80211_channel *chan, HAL_CHANNEL_INTERNAL *ichan, HAL_HT_MACMODE macmode) { int reg_writes = 0; struct ath_hal_9300 *ahp = AH9300(ah); u_int modes_index, modes_txgaintable_index = 0; int i; HAL_STATUS status; struct ath_hal_private *ahpriv = AH_PRIVATE(ah); /* Setup the indices for the next set of register array writes */ /* TODO: * If the channel marker is indicative of the current mode rather * than capability, we do not need to check the phy mode below. */ #if 0 switch (chan->channel_flags & CHANNEL_ALL) { case CHANNEL_A: case CHANNEL_A_HT20: if (AR_SREV_SCORPION(ah)){ if (chan->channel <= 5350){ modes_txgaintable_index = 1; }else if ((chan->channel > 5350) && (chan->channel <= 5600)){ modes_txgaintable_index = 3; }else if (chan->channel > 5600){ modes_txgaintable_index = 5; } } modes_index = 1; freq_index = 1; break; case CHANNEL_A_HT40PLUS: case CHANNEL_A_HT40MINUS: if (AR_SREV_SCORPION(ah)){ if (chan->channel <= 5350){ modes_txgaintable_index = 2; }else if ((chan->channel > 5350) && (chan->channel <= 5600)){ modes_txgaintable_index = 4; }else if (chan->channel > 5600){ modes_txgaintable_index = 6; } } modes_index = 2; freq_index = 1; break; case CHANNEL_PUREG: case CHANNEL_G_HT20: case CHANNEL_B: if (AR_SREV_SCORPION(ah)){ modes_txgaintable_index = 8; }else if (AR_SREV_HONEYBEE(ah)){ modes_txgaintable_index = 1; } modes_index = 4; freq_index = 2; break; case CHANNEL_G_HT40PLUS: case CHANNEL_G_HT40MINUS: if (AR_SREV_SCORPION(ah)){ modes_txgaintable_index = 7; }else if (AR_SREV_HONEYBEE(ah)){ modes_txgaintable_index = 1; } modes_index = 3; freq_index = 2; break; case CHANNEL_108G: modes_index = 5; freq_index = 2; break; default: HALASSERT(0); return HAL_EINVAL; } #endif /* FreeBSD */ if (IS_CHAN_5GHZ(ichan)) { if (IEEE80211_IS_CHAN_HT40U(chan) || IEEE80211_IS_CHAN_HT40D(chan)) { if (AR_SREV_SCORPION(ah)){ if (ichan->channel <= 5350){ modes_txgaintable_index = 2; }else if ((ichan->channel > 5350) && (ichan->channel <= 5600)){ modes_txgaintable_index = 4; }else if (ichan->channel > 5600){ modes_txgaintable_index = 6; } } modes_index = 2; } else if (IEEE80211_IS_CHAN_A(chan) || IEEE80211_IS_CHAN_HT20(chan)) { if (AR_SREV_SCORPION(ah)){ if (ichan->channel <= 5350){ modes_txgaintable_index = 1; }else if ((ichan->channel > 5350) && (ichan->channel <= 5600)){ modes_txgaintable_index = 3; }else if (ichan->channel > 5600){ modes_txgaintable_index = 5; } } modes_index = 1; } else return HAL_EINVAL; } else if (IS_CHAN_2GHZ(ichan)) { if (IEEE80211_IS_CHAN_108G(chan)) { modes_index = 5; } else if (IEEE80211_IS_CHAN_HT40U(chan) || IEEE80211_IS_CHAN_HT40D(chan)) { if (AR_SREV_SCORPION(ah)){ modes_txgaintable_index = 7; } else if (AR_SREV_HONEYBEE(ah)){ modes_txgaintable_index = 1; } modes_index = 3; } else if (IEEE80211_IS_CHAN_HT20(chan) || IEEE80211_IS_CHAN_G(chan) || IEEE80211_IS_CHAN_B(chan) || IEEE80211_IS_CHAN_PUREG(chan)) { if (AR_SREV_SCORPION(ah)){ modes_txgaintable_index = 8; } else if (AR_SREV_HONEYBEE(ah)){ modes_txgaintable_index = 1; } modes_index = 4; } else return HAL_EINVAL; } else return HAL_EINVAL; #if 0 /* Set correct Baseband to analog shift setting to access analog chips. */ OS_REG_WRITE(ah, AR_PHY(0), 0x00000007); #endif HALDEBUG(ah, HAL_DEBUG_RESET, "ar9300_process_ini: " "Skipping OS-REG-WRITE(ah, AR-PHY(0), 0x00000007)\n"); HALDEBUG(ah, HAL_DEBUG_RESET, "ar9300_process_ini: no ADDac programming\n"); /* * Osprey 2.0+ - new INI format. * Each subsystem has a pre, core, and post array. */ for (i = 0; i < ATH_INI_NUM_SPLIT; i++) { ar9300_prog_ini(ah, &ahp->ah_ini_soc[i], modes_index); ar9300_prog_ini(ah, &ahp->ah_ini_mac[i], modes_index); ar9300_prog_ini(ah, &ahp->ah_ini_bb[i], modes_index); ar9300_prog_ini(ah, &ahp->ah_ini_radio[i], modes_index); if ((i == ATH_INI_POST) && (AR_SREV_JUPITER_20_OR_LATER(ah) || AR_SREV_APHRODITE(ah))) { ar9300_prog_ini(ah, &ahp->ah_ini_radio_post_sys2ant, modes_index); } } if (!(AR_SREV_SOC(ah))) { /* Doubler issue : Some board doesn't work well with MCS15. Turn off doubler after freq locking is complete*/ //ath_hal_printf(ah, "%s[%d] ==== before reg[0x%08x] = 0x%08x\n", __func__, __LINE__, AR_PHY_65NM_CH0_RXTX2, OS_REG_READ(ah, AR_PHY_65NM_CH0_RXTX2)); OS_REG_RMW(ah, AR_PHY_65NM_CH0_RXTX2, 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK_S | 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK_S, 0); /*Set synthon, synthover */ //ath_hal_printf(ah, "%s[%d] ==== after reg[0x%08x] = 0x%08x\n", __func__, __LINE__, AR_PHY_65NM_CH0_RXTX2, OS_REG_READ(ah, AR_PHY_65NM_CH0_RXTX2)); OS_REG_RMW(ah, AR_PHY_65NM_CH1_RXTX2, 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK_S | 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK_S, 0); /*Set synthon, synthover */ OS_REG_RMW(ah, AR_PHY_65NM_CH2_RXTX2, 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK_S | 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK_S, 0); /*Set synthon, synthover */ OS_DELAY(200); //ath_hal_printf(ah, "%s[%d] ==== before reg[0x%08x] = 0x%08x\n", __func__, __LINE__, AR_PHY_65NM_CH0_RXTX2, OS_REG_READ(ah, AR_PHY_65NM_CH0_RXTX2)); OS_REG_CLR_BIT(ah, AR_PHY_65NM_CH0_RXTX2, AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK); /* clr synthon */ OS_REG_CLR_BIT(ah, AR_PHY_65NM_CH1_RXTX2, AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK); /* clr synthon */ OS_REG_CLR_BIT(ah, AR_PHY_65NM_CH2_RXTX2, AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK); /* clr synthon */ //ath_hal_printf(ah, "%s[%d] ==== after reg[0x%08x] = 0x%08x\n", __func__, __LINE__, AR_PHY_65NM_CH0_RXTX2, OS_REG_READ(ah, AR_PHY_65NM_CH0_RXTX2)); OS_DELAY(1); //ath_hal_printf(ah, "%s[%d] ==== before reg[0x%08x] = 0x%08x\n", __func__, __LINE__, AR_PHY_65NM_CH0_RXTX2, OS_REG_READ(ah, AR_PHY_65NM_CH0_RXTX2)); OS_REG_RMW_FIELD(ah, AR_PHY_65NM_CH0_RXTX2, AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK, 1); /* set synthon */ OS_REG_RMW_FIELD(ah, AR_PHY_65NM_CH1_RXTX2, AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK, 1); /* set synthon */ OS_REG_RMW_FIELD(ah, AR_PHY_65NM_CH2_RXTX2, AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK, 1); /* set synthon */ //ath_hal_printf(ah, "%s[%d] ==== after reg[0x%08x] = 0x%08x\n", __func__, __LINE__, AR_PHY_65NM_CH0_RXTX2, OS_REG_READ(ah, AR_PHY_65NM_CH0_RXTX2)); OS_DELAY(200); //ath_hal_printf(ah, "%s[%d] ==== before reg[0x%08x] = 0x%08x\n", __func__, __LINE__, AR_PHY_65NM_CH0_SYNTH12, OS_REG_READ(ah, AR_PHY_65NM_CH0_SYNTH12)); OS_REG_RMW_FIELD(ah, AR_PHY_65NM_CH0_SYNTH12, AR_PHY_65NM_CH0_SYNTH12_VREFMUL3, 0xf); //OS_REG_CLR_BIT(ah, AR_PHY_65NM_CH0_SYNTH12, 1<< 16); /* clr charge pump */ //ath_hal_printf(ah, "%s[%d] ==== After reg[0x%08x] = 0x%08x\n", __func__, __LINE__, AR_PHY_65NM_CH0_SYNTH12, OS_REG_READ(ah, AR_PHY_65NM_CH0_SYNTH12)); OS_REG_RMW(ah, AR_PHY_65NM_CH0_RXTX2, 0, 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK_S | 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK_S); /*Clr synthon, synthover */ OS_REG_RMW(ah, AR_PHY_65NM_CH1_RXTX2, 0, 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK_S | 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK_S); /*Clr synthon, synthover */ OS_REG_RMW(ah, AR_PHY_65NM_CH2_RXTX2, 0, 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHON_MASK_S | 1 << AR_PHY_65NM_CH0_RXTX2_SYNTHOVR_MASK_S); /*Clr synthon, synthover */ //ath_hal_printf(ah, "%s[%d] ==== after reg[0x%08x] = 0x%08x\n", __func__, __LINE__, AR_PHY_65NM_CH0_RXTX2, OS_REG_READ(ah, AR_PHY_65NM_CH0_RXTX2)); } /* Write rxgain Array Parameters */ REG_WRITE_ARRAY(&ahp->ah_ini_modes_rxgain, 1, reg_writes); HALDEBUG(ah, HAL_DEBUG_RESET, "ar9300_process_ini: Rx Gain programming\n"); if (AR_SREV_JUPITER_20_OR_LATER(ah)) { /* * CUS217 mix LNA mode. */ if (ar9300_rx_gain_index_get(ah) == 2) { REG_WRITE_ARRAY(&ahp->ah_ini_modes_rxgain_bb_core, 1, reg_writes); REG_WRITE_ARRAY(&ahp->ah_ini_modes_rxgain_bb_postamble, modes_index, reg_writes); } /* * 5G-XLNA */ if ((ar9300_rx_gain_index_get(ah) == 2) || (ar9300_rx_gain_index_get(ah) == 3)) { REG_WRITE_ARRAY(&ahp->ah_ini_modes_rxgain_xlna, modes_index, reg_writes); } } if (AR_SREV_SCORPION(ah)) { /* Write rxgain bounds Array */ REG_WRITE_ARRAY(&ahp->ah_ini_modes_rxgain_bounds, modes_index, reg_writes); HALDEBUG(ah, HAL_DEBUG_RESET, "ar9300_process_ini: Rx Gain table bounds programming\n"); } /* UB124 xLNA settings */ if (AR_SREV_WASP(ah) && ar9300_rx_gain_index_get(ah) == 2) { #define REG_WRITE(_reg,_val) *((volatile u_int32_t *)(_reg)) = (_val); #define REG_READ(_reg) *((volatile u_int32_t *)(_reg)) u_int32_t val; /* B8040000: bit[0]=0, bit[3]=0; */ val = REG_READ(0xB8040000); val &= 0xfffffff6; REG_WRITE(0xB8040000, val); /* B804002c: bit[31:24]=0x2e; bit[7:0]=0x2f; */ val = REG_READ(0xB804002c); val &= 0x00ffff00; val |= 0x2e00002f; REG_WRITE(0xB804002c, val); /* B804006c: bit[1]=1; */ val = REG_READ(0xB804006c); val |= 0x2; REG_WRITE(0xB804006c, val); #undef REG_READ #undef REG_WRITE } /* Write txgain Array Parameters */ if (AR_SREV_SCORPION(ah) || AR_SREV_HONEYBEE(ah)) { REG_WRITE_ARRAY(&ahp->ah_ini_modes_txgain, modes_txgaintable_index, reg_writes); }else{ REG_WRITE_ARRAY(&ahp->ah_ini_modes_txgain, modes_index, reg_writes); } HALDEBUG(ah, HAL_DEBUG_RESET, "ar9300_process_ini: Tx Gain programming\n"); /* For 5GHz channels requiring Fast Clock, apply different modal values */ if (IS_5GHZ_FAST_CLOCK_EN(ah, chan)) { HALDEBUG(ah, HAL_DEBUG_RESET, "%s: Fast clock enabled, use special ini values\n", __func__); REG_WRITE_ARRAY(&ahp->ah_ini_modes_additional, modes_index, reg_writes); } if (AR_SREV_HORNET(ah) || AR_SREV_POSEIDON(ah)) { HALDEBUG(ah, HAL_DEBUG_RESET, "%s: use xtal ini for AH9300(ah)->clk_25mhz: %d\n", __func__, AH9300(ah)->clk_25mhz); REG_WRITE_ARRAY( &ahp->ah_ini_modes_additional, 1/*modes_index*/, reg_writes); } if (AR_SREV_WASP(ah) && (AH9300(ah)->clk_25mhz == 0)) { HALDEBUG(ah, HAL_DEBUG_RESET, "%s: Apply 40MHz ini settings\n", __func__); REG_WRITE_ARRAY( &ahp->ah_ini_modes_additional_40mhz, 1/*modesIndex*/, reg_writes); } /* Handle Japan Channel 14 channel spreading */ if (2484 == ichan->channel) { ar9300_prog_ini(ah, &ahp->ah_ini_japan2484, 1); } #if 0 /* XXX TODO! */ if (AR_SREV_JUPITER_20_OR_LATER(ah) || AR_SREV_APHRODITE(ah)) { ar9300_prog_ini(ah, &ahp->ah_ini_BTCOEX_MAX_TXPWR, 1); } #endif /* Override INI with chip specific configuration */ ar9300_override_ini(ah, chan); /* Setup 11n MAC/Phy mode registers */ ar9300_set_11n_regs(ah, chan, macmode); /* * Moved ar9300_init_chain_masks() here to ensure the swap bit is set before * the pdadc table is written. Swap must occur before any radio dependent * replicated register access. The pdadc curve addressing in particular * depends on the consistent setting of the swap bit. */ ar9300_init_chain_masks(ah, ahp->ah_rx_chainmask, ahp->ah_tx_chainmask); /* * Setup the transmit power values. * * After the public to private hal channel mapping, ichan contains the * valid regulatory power value. * ath_hal_getctl and ath_hal_getantennaallowed look up ichan from chan. */ status = ar9300_eeprom_set_transmit_power(ah, &ahp->ah_eeprom, chan, ath_hal_getctl(ah, chan), ath_hal_getantennaallowed(ah, chan), ath_hal_get_twice_max_regpower(ahpriv, ichan, chan), AH_MIN(MAX_RATE_POWER, ahpriv->ah_powerLimit)); if (status != HAL_OK) { HALDEBUG(ah, HAL_DEBUG_POWER_MGMT, "%s: error init'ing transmit power\n", __func__); return HAL_EIO; } return HAL_OK; #undef N } /* ar9300_is_cal_supp * Determine if calibration is supported by device and channel flags */ inline static HAL_BOOL ar9300_is_cal_supp(struct ath_hal *ah, const struct ieee80211_channel *chan, HAL_CAL_TYPES cal_type) { struct ath_hal_9300 *ahp = AH9300(ah); HAL_BOOL retval = AH_FALSE; switch (cal_type & ahp->ah_supp_cals) { case IQ_MISMATCH_CAL: /* Run IQ Mismatch for non-CCK only */ if (!IEEE80211_IS_CHAN_B(chan)) { retval = AH_TRUE; } break; case TEMP_COMP_CAL: retval = AH_TRUE; break; } return retval; } #if 0 /* ar9285_pa_cal * PA Calibration for Kite 1.1 and later versions of Kite. * - from system's team. */ static inline void ar9285_pa_cal(struct ath_hal *ah) { u_int32_t reg_val; int i, lo_gn, offs_6_1, offs_0; u_int8_t reflo; u_int32_t phy_test2_reg_val, phy_adc_ctl_reg_val; u_int32_t an_top2_reg_val, phy_tst_dac_reg_val; /* Kite 1.1 WAR for Bug 35666 * Increase the LDO value to 1.28V before accessing analog Reg */ if (AR_SREV_KITE_11(ah)) { OS_REG_WRITE(ah, AR9285_AN_TOP4, (AR9285_AN_TOP4_DEFAULT | 0x14) ); } an_top2_reg_val = OS_REG_READ(ah, AR9285_AN_TOP2); /* set pdv2i pdrxtxbb */ reg_val = OS_REG_READ(ah, AR9285_AN_RXTXBB1); reg_val |= ((0x1 << 5) | (0x1 << 7)); OS_REG_WRITE(ah, AR9285_AN_RXTXBB1, reg_val); /* clear pwddb */ reg_val = OS_REG_READ(ah, AR9285_AN_RF2G7); reg_val &= 0xfffffffd; OS_REG_WRITE(ah, AR9285_AN_RF2G7, reg_val); /* clear enpacal */ reg_val = OS_REG_READ(ah, AR9285_AN_RF2G1); reg_val &= 0xfffff7ff; OS_REG_WRITE(ah, AR9285_AN_RF2G1, reg_val); /* set offcal */ reg_val = OS_REG_READ(ah, AR9285_AN_RF2G2); reg_val |= (0x1 << 12); OS_REG_WRITE(ah, AR9285_AN_RF2G2, reg_val); /* set pdpadrv1=pdpadrv2=pdpaout=1 */ reg_val = OS_REG_READ(ah, AR9285_AN_RF2G1); reg_val |= (0x7 << 23); OS_REG_WRITE(ah, AR9285_AN_RF2G1, reg_val); /* Read back reflo, increase it by 1 and write it. */ reg_val = OS_REG_READ(ah, AR9285_AN_RF2G3); reflo = ((reg_val >> 26) & 0x7); if (reflo < 0x7) { reflo++; } reg_val = ((reg_val & 0xe3ffffff) | (reflo << 26)); OS_REG_WRITE(ah, AR9285_AN_RF2G3, reg_val); reg_val = OS_REG_READ(ah, AR9285_AN_RF2G3); reflo = ((reg_val >> 26) & 0x7); /* use TX single carrier to transmit * dac const * reg. 15 */ phy_tst_dac_reg_val = OS_REG_READ(ah, AR_PHY_TSTDAC_CONST); OS_REG_WRITE(ah, AR_PHY_TSTDAC_CONST, ((0x7ff << 11) | 0x7ff)); reg_val = OS_REG_READ(ah, AR_PHY_TSTDAC_CONST); /* source is dac const * reg. 2 */ phy_test2_reg_val = OS_REG_READ(ah, AR_PHY_TEST2); OS_REG_WRITE(ah, AR_PHY_TEST2, ((0x1 << 7) | (0x1 << 1))); reg_val = OS_REG_READ(ah, AR_PHY_TEST2); /* set dac on * reg. 11 */ phy_adc_ctl_reg_val = OS_REG_READ(ah, AR_PHY_ADC_CTL); OS_REG_WRITE(ah, AR_PHY_ADC_CTL, 0x80008000); reg_val = OS_REG_READ(ah, AR_PHY_ADC_CTL); OS_REG_WRITE(ah, AR9285_AN_TOP2, (0x1 << 27) | (0x1 << 17) | (0x1 << 16) | (0x1 << 14) | (0x1 << 12) | (0x1 << 11) | (0x1 << 7) | (0x1 << 5)); OS_DELAY(10); /* 10 usec */ /* clear off[6:0] */ reg_val = OS_REG_READ(ah, AR9285_AN_RF2G6); reg_val &= 0xfc0fffff; OS_REG_WRITE(ah, AR9285_AN_RF2G6, reg_val); reg_val = OS_REG_READ(ah, AR9285_AN_RF2G3); reg_val &= 0xfdffffff; OS_REG_WRITE(ah, AR9285_AN_RF2G3, reg_val); offs_6_1 = 0; for (i = 6; i > 0; i--) { /* sef off[$k]==1 */ reg_val = OS_REG_READ(ah, AR9285_AN_RF2G6); reg_val &= 0xfc0fffff; reg_val = reg_val | (0x1 << (19 + i)) | ((offs_6_1) << 20); OS_REG_WRITE(ah, AR9285_AN_RF2G6, reg_val); lo_gn = (OS_REG_READ(ah, AR9285_AN_RF2G9)) & 0x1; offs_6_1 = offs_6_1 | (lo_gn << (i - 1)); } reg_val = OS_REG_READ(ah, AR9285_AN_RF2G6); reg_val &= 0xfc0fffff; reg_val = reg_val | ((offs_6_1 - 1) << 20); OS_REG_WRITE(ah, AR9285_AN_RF2G6, reg_val); /* set off_0=1; */ reg_val = OS_REG_READ(ah, AR9285_AN_RF2G3); reg_val &= 0xfdffffff; reg_val = reg_val | (0x1 << 25); OS_REG_WRITE(ah, AR9285_AN_RF2G3, reg_val); lo_gn = OS_REG_READ(ah, AR9285_AN_RF2G9) & 0x1; offs_0 = lo_gn; reg_val = OS_REG_READ(ah, AR9285_AN_RF2G3); reg_val &= 0xfdffffff; reg_val = reg_val | (offs_0 << 25); OS_REG_WRITE(ah, AR9285_AN_RF2G3, reg_val); /* clear pdv2i */ reg_val = OS_REG_READ(ah, AR9285_AN_RXTXBB1); reg_val &= 0xffffff5f; OS_REG_WRITE(ah, AR9285_AN_RXTXBB1, reg_val); /* set enpacal */ reg_val = OS_REG_READ(ah, AR9285_AN_RF2G1); reg_val |= (0x1 << 11); OS_REG_WRITE(ah, AR9285_AN_RF2G1, reg_val); /* clear offcal */ reg_val = OS_REG_READ(ah, AR9285_AN_RF2G2); reg_val &= 0xffffefff; OS_REG_WRITE(ah, AR9285_AN_RF2G2, reg_val); /* set pdpadrv1=pdpadrv2=pdpaout=0 */ reg_val = OS_REG_READ(ah, AR9285_AN_RF2G1); reg_val &= 0xfc7fffff; OS_REG_WRITE(ah, AR9285_AN_RF2G1, reg_val); /* Read back reflo, decrease it by 1 and write it. */ reg_val = OS_REG_READ(ah, AR9285_AN_RF2G3); reflo = (reg_val >> 26) & 0x7; if (reflo) { reflo--; } reg_val = ((reg_val & 0xe3ffffff) | (reflo << 26)); OS_REG_WRITE(ah, AR9285_AN_RF2G3, reg_val); reg_val = OS_REG_READ(ah, AR9285_AN_RF2G3); reflo = (reg_val >> 26) & 0x7; /* write back registers */ OS_REG_WRITE(ah, AR_PHY_TSTDAC_CONST, phy_tst_dac_reg_val); OS_REG_WRITE(ah, AR_PHY_TEST2, phy_test2_reg_val); OS_REG_WRITE(ah, AR_PHY_ADC_CTL, phy_adc_ctl_reg_val); OS_REG_WRITE(ah, AR9285_AN_TOP2, an_top2_reg_val); /* Kite 1.1 WAR for Bug 35666 * Decrease the LDO value back to 1.20V */ if (AR_SREV_KITE_11(ah)) { OS_REG_WRITE(ah, AR9285_AN_TOP4, AR9285_AN_TOP4_DEFAULT); } } #endif /* ar9300_run_init_cals * Runs non-periodic calibrations */ inline static HAL_BOOL ar9300_run_init_cals(struct ath_hal *ah, int init_cal_count) { struct ath_hal_9300 *ahp = AH9300(ah); HAL_CHANNEL_INTERNAL ichan; /* bogus */ HAL_BOOL is_cal_done; HAL_CAL_LIST *curr_cal; const HAL_PERCAL_DATA *cal_data; int i; curr_cal = ahp->ah_cal_list_curr; if (curr_cal == AH_NULL) { return AH_FALSE; } cal_data = curr_cal->cal_data; ichan.calValid = 0; for (i = 0; i < init_cal_count; i++) { /* Reset this Cal */ ar9300_reset_calibration(ah, curr_cal); /* Poll for offset calibration complete */ if (!ath_hal_wait( ah, AR_PHY_TIMING4, AR_PHY_TIMING4_DO_CAL, 0)) { HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: Cal %d failed to complete in 100ms.\n", __func__, curr_cal->cal_data->cal_type); /* Re-initialize list pointers for periodic cals */ ahp->ah_cal_list = ahp->ah_cal_list_last = ahp->ah_cal_list_curr = AH_NULL; return AH_FALSE; } /* Run this cal */ ar9300_per_calibration( ah, &ichan, ahp->ah_rx_chainmask, curr_cal, &is_cal_done); if (is_cal_done == AH_FALSE) { HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: Not able to run Init Cal %d.\n", __func__, curr_cal->cal_data->cal_type); } if (curr_cal->cal_next) { curr_cal = curr_cal->cal_next; } } /* Re-initialize list pointers for periodic cals */ ahp->ah_cal_list = ahp->ah_cal_list_last = ahp->ah_cal_list_curr = AH_NULL; return AH_TRUE; } #if 0 static void ar9300_tx_carrier_leak_war(struct ath_hal *ah) { unsigned long tx_gain_table_max; unsigned long reg_bb_cl_map_0_b0 = 0xffffffff; unsigned long reg_bb_cl_map_1_b0 = 0xffffffff; unsigned long reg_bb_cl_map_2_b0 = 0xffffffff; unsigned long reg_bb_cl_map_3_b0 = 0xffffffff; unsigned long tx_gain, cal_run = 0; unsigned long cal_gain[AR_PHY_TPC_7_TX_GAIN_TABLE_MAX + 1]; unsigned long cal_gain_index[AR_PHY_TPC_7_TX_GAIN_TABLE_MAX + 1]; unsigned long new_gain[AR_PHY_TPC_7_TX_GAIN_TABLE_MAX + 1]; int i, j; OS_MEMSET(new_gain, 0, sizeof(new_gain)); /*printf(" Running TxCarrierLeakWAR\n");*/ /* process tx gain table, we use cl_map_hw_gen=0. */ OS_REG_RMW_FIELD(ah, AR_PHY_CL_CAL_CTL, AR_PHY_CL_MAP_HW_GEN, 0); //the table we used is txbb_gc[2:0], 1dB[2:1]. tx_gain_table_max = OS_REG_READ_FIELD(ah, AR_PHY_TPC_7, AR_PHY_TPC_7_TX_GAIN_TABLE_MAX); for (i = 0; i <= tx_gain_table_max; i++) { tx_gain = OS_REG_READ(ah, AR_PHY_TXGAIN_TAB(1) + i * 4); cal_gain[i] = (((tx_gain >> 5)& 0x7) << 2) | (((tx_gain >> 1) & 0x3) << 0); if (i == 0) { cal_gain_index[i] = cal_run; new_gain[i] = 1; cal_run++; } else { new_gain[i] = 1; for (j = 0; j < i; j++) { /* printf("i=%d, j=%d cal_gain[$i]=0x%04x\n", i, j, cal_gain[i]); */ if (new_gain[i]) { if ((cal_gain[i] != cal_gain[j])) { new_gain[i] = 1; } else { /* if old gain found, use old cal_run value. */ new_gain[i] = 0; cal_gain_index[i] = cal_gain_index[j]; } } } /* if new gain found, increase cal_run */ if (new_gain[i] == 1) { cal_gain_index[i] = cal_run; cal_run++; } } reg_bb_cl_map_0_b0 = (reg_bb_cl_map_0_b0 & ~(0x1 << i)) | ((cal_gain_index[i] >> 0 & 0x1) << i); reg_bb_cl_map_1_b0 = (reg_bb_cl_map_1_b0 & ~(0x1 << i)) | ((cal_gain_index[i] >> 1 & 0x1) << i); reg_bb_cl_map_2_b0 = (reg_bb_cl_map_2_b0 & ~(0x1 << i)) | ((cal_gain_index[i] >> 2 & 0x1) << i); reg_bb_cl_map_3_b0 = (reg_bb_cl_map_3_b0 & ~(0x1 << i)) | ((cal_gain_index[i] >> 3 & 0x1) << i); /* printf("i=%2d, cal_gain[$i]= 0x%04x, cal_run= %d, " "cal_gain_index[i]=%d, new_gain[i] = %d\n", i, cal_gain[i], cal_run, cal_gain_index[i], new_gain[i]); */ } OS_REG_WRITE(ah, AR_PHY_CL_MAP_0_B0, reg_bb_cl_map_0_b0); OS_REG_WRITE(ah, AR_PHY_CL_MAP_1_B0, reg_bb_cl_map_1_b0); OS_REG_WRITE(ah, AR_PHY_CL_MAP_2_B0, reg_bb_cl_map_2_b0); OS_REG_WRITE(ah, AR_PHY_CL_MAP_3_B0, reg_bb_cl_map_3_b0); if (AR_SREV_WASP(ah)) { OS_REG_WRITE(ah, AR_PHY_CL_MAP_0_B1, reg_bb_cl_map_0_b0); OS_REG_WRITE(ah, AR_PHY_CL_MAP_1_B1, reg_bb_cl_map_1_b0); OS_REG_WRITE(ah, AR_PHY_CL_MAP_2_B1, reg_bb_cl_map_2_b0); OS_REG_WRITE(ah, AR_PHY_CL_MAP_3_B1, reg_bb_cl_map_3_b0); } } #endif static inline void ar9300_invalidate_saved_cals(struct ath_hal *ah, HAL_CHANNEL_INTERNAL *ichan) { #if ATH_SUPPORT_CAL_REUSE if (AH_PRIVATE(ah)->ah_config.ath_hal_cal_reuse & ATH_CAL_REUSE_REDO_IN_FULL_RESET) { ichan->one_time_txiqcal_done = AH_FALSE; ichan->one_time_txclcal_done = AH_FALSE; } #endif } static inline HAL_BOOL ar9300_restore_rtt_cals(struct ath_hal *ah, HAL_CHANNEL_INTERNAL *ichan) { HAL_BOOL restore_status = AH_FALSE; return restore_status; } /* ar9300_init_cal * Initialize Calibration infrastructure */ static inline HAL_BOOL ar9300_init_cal_internal(struct ath_hal *ah, struct ieee80211_channel *chan, HAL_CHANNEL_INTERNAL *ichan, HAL_BOOL enable_rtt, HAL_BOOL do_rtt_cal, HAL_BOOL skip_if_none, HAL_BOOL apply_last_iqcorr) { struct ath_hal_9300 *ahp = AH9300(ah); HAL_BOOL txiqcal_success_flag = AH_FALSE; HAL_BOOL cal_done = AH_FALSE; int iqcal_idx = 0; HAL_BOOL do_sep_iq_cal = AH_FALSE; HAL_BOOL do_agc_cal = do_rtt_cal; HAL_BOOL is_cal_reusable = AH_TRUE; #if ATH_SUPPORT_CAL_REUSE HAL_BOOL cal_reuse_enable = AH_PRIVATE(ah)->ah_config.ath_hal_cal_reuse & ATH_CAL_REUSE_ENABLE; HAL_BOOL clc_success = AH_FALSE; int32_t ch_idx, j, cl_tab_reg; u_int32_t BB_cl_tab_entry = MAX_BB_CL_TABLE_ENTRY; u_int32_t BB_cl_tab_b[AR9300_MAX_CHAINS] = { AR_PHY_CL_TAB_0, AR_PHY_CL_TAB_1, AR_PHY_CL_TAB_2 }; #endif if (AR_SREV_HORNET(ah) || AR_SREV_POSEIDON(ah) || AR_SREV_APHRODITE(ah)) { /* Hornet: 1 x 1 */ ahp->ah_rx_cal_chainmask = 0x1; ahp->ah_tx_cal_chainmask = 0x1; } else if (AR_SREV_WASP(ah) || AR_SREV_JUPITER(ah) || AR_SREV_HONEYBEE(ah)) { /* Wasp/Jupiter: 2 x 2 */ ahp->ah_rx_cal_chainmask = 0x3; ahp->ah_tx_cal_chainmask = 0x3; } else { /* * Osprey needs to be configured for the correct chain mode * before running AGC/TxIQ cals. */ if (ahp->ah_enterprise_mode & AR_ENT_OTP_CHAIN2_DISABLE) { /* chain 2 disabled - 2 chain mode */ ahp->ah_rx_cal_chainmask = 0x3; ahp->ah_tx_cal_chainmask = 0x3; } else { ahp->ah_rx_cal_chainmask = 0x7; ahp->ah_tx_cal_chainmask = 0x7; } } ar9300_init_chain_masks(ah, ahp->ah_rx_cal_chainmask, ahp->ah_tx_cal_chainmask); if (ahp->tx_cl_cal_enable) { #if ATH_SUPPORT_CAL_REUSE /* disable Carrie Leak or set do_agc_cal accordingly */ if (cal_reuse_enable && ichan->one_time_txclcal_done) { OS_REG_CLR_BIT(ah, AR_PHY_CL_CAL_CTL, AR_PHY_CL_CAL_ENABLE); } else #endif /* ATH_SUPPORT_CAL_REUSE */ { OS_REG_SET_BIT(ah, AR_PHY_CL_CAL_CTL, AR_PHY_CL_CAL_ENABLE); do_agc_cal = AH_TRUE; } } /* Do Tx IQ Calibration here for osprey hornet and wasp */ /* XXX: For initial wasp bringup - check and enable this */ /* EV 74233: Tx IQ fails to complete for half/quarter rates */ if (!(IEEE80211_IS_CHAN_HALF(chan) || IEEE80211_IS_CHAN_QUARTER(chan))) { if (ahp->tx_iq_cal_enable) { /* this should be eventually moved to INI file */ OS_REG_RMW_FIELD(ah, AR_PHY_TX_IQCAL_CONTROL_1(ah), AR_PHY_TX_IQCAL_CONTROL_1_IQCORR_I_Q_COFF_DELPT, DELPT); /* * For poseidon and later chips, * Tx IQ cal HW run will be a part of AGC calibration */ if (ahp->tx_iq_cal_during_agc_cal) { /* * txiqcal_success_flag always set to 1 to run * ar9300_tx_iq_cal_post_proc * if following AGC cal passes */ #if ATH_SUPPORT_CAL_REUSE if (!cal_reuse_enable || !ichan->one_time_txiqcal_done) { txiqcal_success_flag = AH_TRUE; OS_REG_WRITE(ah, AR_PHY_TX_IQCAL_CONTROL_0(ah), OS_REG_READ(ah, AR_PHY_TX_IQCAL_CONTROL_0(ah)) | AR_PHY_TX_IQCAL_CONTROL_0_ENABLE_TXIQ_CAL); } else { OS_REG_WRITE(ah, AR_PHY_TX_IQCAL_CONTROL_0(ah), OS_REG_READ(ah, AR_PHY_TX_IQCAL_CONTROL_0(ah)) & (~AR_PHY_TX_IQCAL_CONTROL_0_ENABLE_TXIQ_CAL)); } #else if (OS_REG_READ_FIELD(ah, AR_PHY_TX_IQCAL_CONTROL_0(ah), AR_PHY_TX_IQCAL_CONTROL_0_ENABLE_TXIQ_CAL)){ if (apply_last_iqcorr == AH_TRUE) { OS_REG_CLR_BIT(ah, AR_PHY_TX_IQCAL_CONTROL_0(ah), AR_PHY_TX_IQCAL_CONTROL_0_ENABLE_TXIQ_CAL); txiqcal_success_flag = AH_FALSE; } else { txiqcal_success_flag = AH_TRUE; } }else{ txiqcal_success_flag = AH_FALSE; } #endif if (txiqcal_success_flag) { do_agc_cal = AH_TRUE; } } else #if ATH_SUPPORT_CAL_REUSE if (!cal_reuse_enable || !ichan->one_time_txiqcal_done) #endif { do_sep_iq_cal = AH_TRUE; do_agc_cal = AH_TRUE; } } } #if ATH_SUPPORT_MCI if (AH_PRIVATE(ah)->ah_caps.halMciSupport && IS_CHAN_2GHZ(ichan) && (ahp->ah_mci_bt_state == MCI_BT_AWAKE) && do_agc_cal && !(ah->ah_config.ath_hal_mci_config & ATH_MCI_CONFIG_DISABLE_MCI_CAL)) { u_int32_t payload[4] = {0, 0, 0, 0}; /* Send CAL_REQ only when BT is AWAKE. */ HALDEBUG(ah, HAL_DEBUG_BT_COEX, "(MCI) %s: Send WLAN_CAL_REQ 0x%X\n", __func__, ahp->ah_mci_wlan_cal_seq); MCI_GPM_SET_CAL_TYPE(payload, MCI_GPM_WLAN_CAL_REQ); payload[MCI_GPM_WLAN_CAL_W_SEQUENCE] = ahp->ah_mci_wlan_cal_seq++; ar9300_mci_send_message(ah, MCI_GPM, 0, payload, 16, AH_TRUE, AH_FALSE); /* Wait BT_CAL_GRANT for 50ms */ HALDEBUG(ah, HAL_DEBUG_BT_COEX, "(MCI) %s: Wait for BT_CAL_GRANT\n", __func__); if (ar9300_mci_wait_for_gpm(ah, MCI_GPM_BT_CAL_GRANT, 0, 50000)) { HALDEBUG(ah, HAL_DEBUG_BT_COEX, "(MCI) %s: Got BT_CAL_GRANT.\n", __func__); } else { is_cal_reusable = AH_FALSE; HALDEBUG(ah, HAL_DEBUG_BT_COEX, "(MCI) %s: BT is not responding.\n", __func__); } } #endif /* ATH_SUPPORT_MCI */ if (do_sep_iq_cal) { /* enable Tx IQ Calibration HW for osprey/hornet/wasp */ txiqcal_success_flag = ar9300_tx_iq_cal_hw_run(ah); OS_REG_WRITE(ah, AR_PHY_ACTIVE, AR_PHY_ACTIVE_DIS); OS_DELAY(5); OS_REG_WRITE(ah, AR_PHY_ACTIVE, AR_PHY_ACTIVE_EN); } #if 0 if (AR_SREV_HORNET(ah) || AR_SREV_POSEIDON(ah)) { ar9300_tx_carrier_leak_war(ah); } #endif /* * Calibrate the AGC * * Tx IQ cal is a part of AGC cal for Jupiter/Poseidon, etc. * please enable the bit of txiqcal_control_0[31] in INI file * for Jupiter/Poseidon/etc. */ if(!AR_SREV_SCORPION(ah)) { if (do_agc_cal || !skip_if_none) { OS_REG_WRITE(ah, AR_PHY_AGC_CONTROL, OS_REG_READ(ah, AR_PHY_AGC_CONTROL) | AR_PHY_AGC_CONTROL_CAL); /* Poll for offset calibration complete */ cal_done = ath_hal_wait(ah, AR_PHY_AGC_CONTROL, AR_PHY_AGC_CONTROL_CAL, 0); if (!cal_done) { HALDEBUG(ah, HAL_DEBUG_FCS_RTT, "(FCS) CAL NOT DONE!!! - %d\n", ichan->channel); } } else { cal_done = AH_TRUE; } /* * Tx IQ cal post-processing in SW * This part of code should be common to all chips, * no chip specific code for Jupiter/Posdeion except for register names. */ if (txiqcal_success_flag) { ar9300_tx_iq_cal_post_proc(ah,ichan, 1, 1,is_cal_reusable, AH_FALSE); } } else { if (!txiqcal_success_flag) { OS_REG_WRITE(ah, AR_PHY_AGC_CONTROL, OS_REG_READ(ah, AR_PHY_AGC_CONTROL) | AR_PHY_AGC_CONTROL_CAL); if (!ath_hal_wait(ah, AR_PHY_AGC_CONTROL, AR_PHY_AGC_CONTROL_CAL, 0)) { HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: offset calibration failed to complete in 1ms; " "noisy environment?\n", __func__); return AH_FALSE; } if (apply_last_iqcorr == AH_TRUE) { ar9300_tx_iq_cal_post_proc(ah, ichan, 0, 0, is_cal_reusable, AH_TRUE); } } else { for (iqcal_idx=0;iqcal_idxah_caps.halMciSupport && IS_CHAN_2GHZ(ichan) && (ahp->ah_mci_bt_state == MCI_BT_AWAKE) && do_agc_cal && !(ah->ah_config.ath_hal_mci_config & ATH_MCI_CONFIG_DISABLE_MCI_CAL)) { u_int32_t payload[4] = {0, 0, 0, 0}; HALDEBUG(ah, HAL_DEBUG_BT_COEX, "(MCI) %s: Send WLAN_CAL_DONE 0x%X\n", __func__, ahp->ah_mci_wlan_cal_done); MCI_GPM_SET_CAL_TYPE(payload, MCI_GPM_WLAN_CAL_DONE); payload[MCI_GPM_WLAN_CAL_W_SEQUENCE] = ahp->ah_mci_wlan_cal_done++; ar9300_mci_send_message(ah, MCI_GPM, 0, payload, 16, AH_TRUE, AH_FALSE); } #endif /* ATH_SUPPORT_MCI */ if (!cal_done && !AR_SREV_SCORPION(ah) ) { HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: offset calibration failed to complete in 1ms; " "noisy environment?\n", __func__); return AH_FALSE; } #if 0 /* Beacon stuck fix, refer to EV 120056 */ if(IS_CHAN_2GHZ(chan) && AR_SREV_SCORPION(ah)) OS_REG_WRITE(ah, AR_PHY_TIMING5, OS_REG_READ(ah,AR_PHY_TIMING5) & ~AR_PHY_TIMING5_CYCPWR_THR1_ENABLE); #endif #if 0 /* Do PA Calibration */ if (AR_SREV_KITE(ah) && AR_SREV_KITE_11_OR_LATER(ah)) { ar9285_pa_cal(ah); } #endif #if ATH_SUPPORT_CAL_REUSE if (ichan->one_time_txiqcal_done) { ar9300_tx_iq_cal_apply(ah, ichan); HALDEBUG(ah, HAL_DEBUG_FCS_RTT, "(FCS) TXIQCAL applied - %d\n", ichan->channel); } #endif /* ATH_SUPPORT_CAL_REUSE */ #if ATH_SUPPORT_CAL_REUSE if (cal_reuse_enable && ahp->tx_cl_cal_enable) { clc_success = (OS_REG_READ(ah, AR_PHY_AGC_CONTROL) & AR_PHY_AGC_CONTROL_CLC_SUCCESS) ? 1 : 0; if (ichan->one_time_txclcal_done) { /* reapply CL cal results */ for (ch_idx = 0; ch_idx < AR9300_MAX_CHAINS; ch_idx++) { if ((ahp->ah_tx_cal_chainmask & (1 << ch_idx)) == 0) { continue; } cl_tab_reg = BB_cl_tab_b[ch_idx]; for (j = 0; j < BB_cl_tab_entry; j++) { OS_REG_WRITE(ah, cl_tab_reg, ichan->tx_clcal[ch_idx][j]); cl_tab_reg += 4;; } } HALDEBUG(ah, HAL_DEBUG_FCS_RTT, "(FCS) TX CL CAL applied - %d\n", ichan->channel); } else if (is_cal_reusable && clc_success) { /* save CL cal results */ for (ch_idx = 0; ch_idx < AR9300_MAX_CHAINS; ch_idx++) { if ((ahp->ah_tx_cal_chainmask & (1 << ch_idx)) == 0) { continue; } cl_tab_reg = BB_cl_tab_b[ch_idx]; for (j = 0; j < BB_cl_tab_entry; j++) { ichan->tx_clcal[ch_idx][j] = OS_REG_READ(ah, cl_tab_reg); cl_tab_reg += 4; } } ichan->one_time_txclcal_done = AH_TRUE; HALDEBUG(ah, HAL_DEBUG_FCS_RTT, "(FCS) TX CL CAL saved - %d\n", ichan->channel); } } #endif /* ATH_SUPPORT_CAL_REUSE */ /* Revert chainmasks to their original values before NF cal */ ar9300_init_chain_masks(ah, ahp->ah_rx_chainmask, ahp->ah_tx_chainmask); #if !FIX_NOISE_FLOOR /* * Do NF calibration after DC offset and other CALs. * Per system engineers, noise floor value can sometimes be 20 dB * higher than normal value if DC offset and noise floor cal are * triggered at the same time. */ OS_REG_WRITE(ah, AR_PHY_AGC_CONTROL, OS_REG_READ(ah, AR_PHY_AGC_CONTROL) | AR_PHY_AGC_CONTROL_NF); #endif /* Initialize list pointers */ ahp->ah_cal_list = ahp->ah_cal_list_last = ahp->ah_cal_list_curr = AH_NULL; /* * Enable IQ, ADC Gain, ADC DC Offset Cals */ /* Setup all non-periodic, init time only calibrations */ /* XXX: Init DC Offset not working yet */ #ifdef not_yet if (AH_TRUE == ar9300_is_cal_supp(ah, chan, ADC_DC_INIT_CAL)) { INIT_CAL(&ahp->ah_adc_dc_cal_init_data); INSERT_CAL(ahp, &ahp->ah_adc_dc_cal_init_data); } /* Initialize current pointer to first element in list */ ahp->ah_cal_list_curr = ahp->ah_cal_list; if (ahp->ah_cal_list_curr) { if (ar9300_run_init_cals(ah, 0) == AH_FALSE) { return AH_FALSE; } } #endif /* end - Init time calibrations */ /* Do not do RX cal in case of offchan, or cal data already exists on same channel*/ if (ahp->ah_skip_rx_iq_cal) { HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "Skip RX IQ Cal\n"); return AH_TRUE; } /* If Cals are supported, add them to list via INIT/INSERT_CAL */ if (AH_TRUE == ar9300_is_cal_supp(ah, chan, IQ_MISMATCH_CAL)) { INIT_CAL(&ahp->ah_iq_cal_data); INSERT_CAL(ahp, &ahp->ah_iq_cal_data); HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: enabling IQ Calibration.\n", __func__); } if (AH_TRUE == ar9300_is_cal_supp(ah, chan, TEMP_COMP_CAL)) { INIT_CAL(&ahp->ah_temp_comp_cal_data); INSERT_CAL(ahp, &ahp->ah_temp_comp_cal_data); HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: enabling Temperature Compensation Calibration.\n", __func__); } /* Initialize current pointer to first element in list */ ahp->ah_cal_list_curr = ahp->ah_cal_list; /* Reset state within current cal */ if (ahp->ah_cal_list_curr) { ar9300_reset_calibration(ah, ahp->ah_cal_list_curr); } /* Mark all calibrations on this channel as being invalid */ ichan->calValid = 0; return AH_TRUE; } static inline HAL_BOOL ar9300_init_cal(struct ath_hal *ah, struct ieee80211_channel *chan, HAL_BOOL skip_if_none, HAL_BOOL apply_last_iqcorr) { HAL_CHANNEL_INTERNAL *ichan = ath_hal_checkchannel(ah, chan); HAL_BOOL do_rtt_cal = AH_TRUE; HAL_BOOL enable_rtt = AH_FALSE; HALASSERT(ichan); return ar9300_init_cal_internal(ah, chan, ichan, enable_rtt, do_rtt_cal, skip_if_none, apply_last_iqcorr); } /* ar9300_reset_cal_valid * Entry point for upper layers to restart current cal. * Reset the calibration valid bit in channel. */ void ar9300_reset_cal_valid(struct ath_hal *ah, const struct ieee80211_channel *chan, HAL_BOOL *is_cal_done, u_int32_t cal_type) { struct ath_hal_9300 *ahp = AH9300(ah); HAL_CHANNEL_INTERNAL *ichan = ath_hal_checkchannel(ah, chan); HAL_CAL_LIST *curr_cal = ahp->ah_cal_list_curr; *is_cal_done = AH_TRUE; if (curr_cal == AH_NULL) { return; } if (ichan == AH_NULL) { HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: invalid channel %u/0x%x; no mapping\n", __func__, chan->ic_freq, chan->ic_flags); return; } if (!(cal_type & IQ_MISMATCH_CAL)) { *is_cal_done = AH_FALSE; return; } /* Expected that this calibration has run before, post-reset. * Current state should be done */ if (curr_cal->cal_state != CAL_DONE) { HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: Calibration state incorrect, %d\n", __func__, curr_cal->cal_state); return; } /* Verify Cal is supported on this channel */ if (ar9300_is_cal_supp(ah, chan, curr_cal->cal_data->cal_type) == AH_FALSE) { return; } HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: Resetting Cal %d state for channel %u/0x%x\n", __func__, curr_cal->cal_data->cal_type, chan->ic_freq, chan->ic_flags); /* Disable cal validity in channel */ ichan->calValid &= ~curr_cal->cal_data->cal_type; curr_cal->cal_state = CAL_WAITING; /* Indicate to upper layers that we need polling */ *is_cal_done = AH_FALSE; } static inline void ar9300_set_dma(struct ath_hal *ah) { u_int32_t regval; struct ath_hal_9300 *ahp = AH9300(ah); struct ath_hal_private *ahpriv = AH_PRIVATE(ah); HAL_CAPABILITIES *pCap = &ahpriv->ah_caps; #if 0 /* * set AHB_MODE not to do cacheline prefetches */ regval = OS_REG_READ(ah, AR_AHB_MODE); OS_REG_WRITE(ah, AR_AHB_MODE, regval | AR_AHB_PREFETCH_RD_EN); #endif /* * let mac dma reads be in 128 byte chunks */ regval = OS_REG_READ(ah, AR_TXCFG) & ~AR_TXCFG_DMASZ_MASK; OS_REG_WRITE(ah, AR_TXCFG, regval | AR_TXCFG_DMASZ_128B); /* * Restore TX Trigger Level to its pre-reset value. * The initial value depends on whether aggregation is enabled, and is * adjusted whenever underruns are detected. */ /* OS_REG_RMW_FIELD(ah, AR_TXCFG, AR_FTRIG, AH_PRIVATE(ah)->ah_tx_trig_level); */ /* * Osprey 1.0 bug (EV 61936). Don't change trigger level from .ini default. * Osprey 2.0 - hardware recommends using the default INI settings. */ #if 0 OS_REG_RMW_FIELD(ah, AR_TXCFG, AR_FTRIG, 0x3f); #endif /* * let mac dma writes be in 128 byte chunks */ regval = OS_REG_READ(ah, AR_RXCFG) & ~AR_RXCFG_DMASZ_MASK; OS_REG_WRITE(ah, AR_RXCFG, regval | AR_RXCFG_DMASZ_128B); /* * Setup receive FIFO threshold to hold off TX activities */ OS_REG_WRITE(ah, AR_RXFIFO_CFG, 0x200); /* * reduce the number of usable entries in PCU TXBUF to avoid * wrap around bugs. (bug 20428) */ if (AR_SREV_WASP(ah) && (AH_PRIVATE((ah))->ah_macRev > AR_SREV_REVISION_WASP_12)) { /* Wasp 1.3 fix for EV#85395 requires usable entries * to be set to 0x500 */ OS_REG_WRITE(ah, AR_PCU_TXBUF_CTRL, 0x500); } else { OS_REG_WRITE(ah, AR_PCU_TXBUF_CTRL, AR_PCU_TXBUF_CTRL_USABLE_SIZE); } /* * Enable HPQ for UAPSD */ if (pCap->halHwUapsdTrig == AH_TRUE) { /* Only enable this if HAL capabilities says it is OK */ if (AH_PRIVATE(ah)->ah_opmode == HAL_M_HOSTAP) { OS_REG_WRITE(ah, AR_HP_Q_CONTROL, AR_HPQ_ENABLE | AR_HPQ_UAPSD | AR_HPQ_UAPSD_TRIGGER_EN); } } else { /* use default value from ini file - which disable HPQ queue usage */ } /* * set the transmit status ring */ ar9300_reset_tx_status_ring(ah); /* * set rxbp threshold. Must be non-zero for RX_EOL to occur. * For Osprey 2.0+, keep the original thresholds * otherwise performance is lost due to excessive RX EOL interrupts. */ OS_REG_RMW_FIELD(ah, AR_RXBP_THRESH, AR_RXBP_THRESH_HP, 0x1); OS_REG_RMW_FIELD(ah, AR_RXBP_THRESH, AR_RXBP_THRESH_LP, 0x1); /* * set receive buffer size. */ if (ahp->rx_buf_size) { OS_REG_WRITE(ah, AR_DATABUF, ahp->rx_buf_size); } } static inline void ar9300_init_bb(struct ath_hal *ah, struct ieee80211_channel *chan) { u_int32_t synth_delay; /* * Wait for the frequency synth to settle (synth goes on * via AR_PHY_ACTIVE_EN). Read the phy active delay register. * Value is in 100ns increments. */ synth_delay = OS_REG_READ(ah, AR_PHY_RX_DELAY) & AR_PHY_RX_DELAY_DELAY; if (IEEE80211_IS_CHAN_CCK(chan)) { synth_delay = (4 * synth_delay) / 22; } else { synth_delay /= 10; } /* Activate the PHY (includes baseband activate + synthesizer on) */ OS_REG_WRITE(ah, AR_PHY_ACTIVE, AR_PHY_ACTIVE_EN); /* * There is an issue if the AP starts the calibration before * the base band timeout completes. This could result in the * rx_clear false triggering. As a workaround we add delay an * extra BASE_ACTIVATE_DELAY usecs to ensure this condition * does not happen. */ OS_DELAY(synth_delay + BASE_ACTIVATE_DELAY); } static inline void ar9300_init_interrupt_masks(struct ath_hal *ah, HAL_OPMODE opmode) { struct ath_hal_9300 *ahp = AH9300(ah); u_int32_t msi_cfg = 0; u_int32_t sync_en_def = AR9300_INTR_SYNC_DEFAULT; /* * Setup interrupt handling. Note that ar9300_reset_tx_queue * manipulates the secondary IMR's as queues are enabled * and disabled. This is done with RMW ops to insure the * settings we make here are preserved. */ ahp->ah_mask_reg = AR_IMR_TXERR | AR_IMR_TXURN | AR_IMR_RXERR | AR_IMR_RXORN | AR_IMR_BCNMISC; if (ahp->ah_intr_mitigation_rx) { /* enable interrupt mitigation for rx */ ahp->ah_mask_reg |= AR_IMR_RXINTM | AR_IMR_RXMINTR | AR_IMR_RXOK_HP; msi_cfg |= AR_INTCFG_MSI_RXINTM | AR_INTCFG_MSI_RXMINTR; } else { ahp->ah_mask_reg |= AR_IMR_RXOK_LP | AR_IMR_RXOK_HP; msi_cfg |= AR_INTCFG_MSI_RXOK; } if (ahp->ah_intr_mitigation_tx) { /* enable interrupt mitigation for tx */ ahp->ah_mask_reg |= AR_IMR_TXINTM | AR_IMR_TXMINTR; msi_cfg |= AR_INTCFG_MSI_TXINTM | AR_INTCFG_MSI_TXMINTR; } else { ahp->ah_mask_reg |= AR_IMR_TXOK; msi_cfg |= AR_INTCFG_MSI_TXOK; } if (opmode == HAL_M_HOSTAP) { ahp->ah_mask_reg |= AR_IMR_MIB; } OS_REG_WRITE(ah, AR_IMR, ahp->ah_mask_reg); OS_REG_WRITE(ah, AR_IMR_S2, OS_REG_READ(ah, AR_IMR_S2) | AR_IMR_S2_GTT); ahp->ah_mask2Reg = OS_REG_READ(ah, AR_IMR_S2); if (ah->ah_config.ath_hal_enable_msi) { /* Cache MSI register value */ ahp->ah_msi_reg = OS_REG_READ(ah, AR_HOSTIF_REG(ah, AR_PCIE_MSI)); ahp->ah_msi_reg |= AR_PCIE_MSI_HW_DBI_WR_EN; if (AR_SREV_POSEIDON(ah)) { ahp->ah_msi_reg &= AR_PCIE_MSI_HW_INT_PENDING_ADDR_MSI_64; } else { ahp->ah_msi_reg &= AR_PCIE_MSI_HW_INT_PENDING_ADDR; } /* Program MSI configuration */ OS_REG_WRITE(ah, AR_INTCFG, msi_cfg); } /* * debug - enable to see all synchronous interrupts status */ /* Clear any pending sync cause interrupts */ OS_REG_WRITE(ah, AR_HOSTIF_REG(ah, AR_INTR_SYNC_CAUSE), 0xFFFFFFFF); /* Allow host interface sync interrupt sources to set cause bit */ if (AR_SREV_POSEIDON(ah)) { sync_en_def = AR9300_INTR_SYNC_DEF_NO_HOST1_PERR; } else if (AR_SREV_WASP(ah)) { sync_en_def = AR9340_INTR_SYNC_DEFAULT; } OS_REG_WRITE(ah, AR_HOSTIF_REG(ah, AR_INTR_SYNC_ENABLE), sync_en_def); /* _Disable_ host interface sync interrupt when cause bits set */ OS_REG_WRITE(ah, AR_HOSTIF_REG(ah, AR_INTR_SYNC_MASK), 0); OS_REG_WRITE(ah, AR_HOSTIF_REG(ah, AR_INTR_PRIO_ASYNC_ENABLE), 0); OS_REG_WRITE(ah, AR_HOSTIF_REG(ah, AR_INTR_PRIO_ASYNC_MASK), 0); OS_REG_WRITE(ah, AR_HOSTIF_REG(ah, AR_INTR_PRIO_SYNC_ENABLE), 0); OS_REG_WRITE(ah, AR_HOSTIF_REG(ah, AR_INTR_PRIO_SYNC_MASK), 0); } static inline void ar9300_init_qos(struct ath_hal *ah) { OS_REG_WRITE(ah, AR_MIC_QOS_CONTROL, 0x100aa); /* XXX magic */ OS_REG_WRITE(ah, AR_MIC_QOS_SELECT, 0x3210); /* XXX magic */ /* Turn on NOACK Support for QoS packets */ OS_REG_WRITE(ah, AR_QOS_NO_ACK, SM(2, AR_QOS_NO_ACK_TWO_BIT) | SM(5, AR_QOS_NO_ACK_BIT_OFF) | SM(0, AR_QOS_NO_ACK_BYTE_OFF)); /* * initialize TXOP for all TIDs */ OS_REG_WRITE(ah, AR_TXOP_X, AR_TXOP_X_VAL); OS_REG_WRITE(ah, AR_TXOP_0_3, 0xFFFFFFFF); OS_REG_WRITE(ah, AR_TXOP_4_7, 0xFFFFFFFF); OS_REG_WRITE(ah, AR_TXOP_8_11, 0xFFFFFFFF); OS_REG_WRITE(ah, AR_TXOP_12_15, 0xFFFFFFFF); } static inline void ar9300_init_user_settings(struct ath_hal *ah) { struct ath_hal_9300 *ahp = AH9300(ah); /* Restore user-specified settings */ HALDEBUG(ah, HAL_DEBUG_RESET, "--AP %s ahp->ah_misc_mode 0x%x\n", __func__, ahp->ah_misc_mode); if (ahp->ah_misc_mode != 0) { OS_REG_WRITE(ah, AR_PCU_MISC, OS_REG_READ(ah, AR_PCU_MISC) | ahp->ah_misc_mode); } if (ahp->ah_get_plcp_hdr) { OS_REG_CLR_BIT(ah, AR_PCU_MISC, AR_PCU_SEL_EVM); } if (ahp->ah_slot_time != (u_int) -1) { ar9300_set_slot_time(ah, ahp->ah_slot_time); } if (ahp->ah_ack_timeout != (u_int) -1) { ar9300_set_ack_timeout(ah, ahp->ah_ack_timeout); } if (AH_PRIVATE(ah)->ah_diagreg != 0) { OS_REG_SET_BIT(ah, AR_DIAG_SW, AH_PRIVATE(ah)->ah_diagreg); } if (ahp->ah_beacon_rssi_threshold != 0) { ar9300_set_hw_beacon_rssi_threshold(ah, ahp->ah_beacon_rssi_threshold); } -#ifdef ATH_SUPPORT_DFS +//#ifdef ATH_SUPPORT_DFS if (ahp->ah_cac_quiet_enabled) { ar9300_cac_tx_quiet(ah, 1); } -#endif /* ATH_SUPPORT_DFS */ +//#endif /* ATH_SUPPORT_DFS */ } int ar9300_get_spur_info(struct ath_hal * ah, int *enable, int len, u_int16_t *freq) { // struct ath_hal_private *ap = AH_PRIVATE(ah); int i, j; for (i = 0; i < len; i++) { freq[i] = 0; } *enable = ah->ah_config.ath_hal_spur_mode; for (i = 0, j = 0; i < AR_EEPROM_MODAL_SPURS; i++) { if (AH9300(ah)->ath_hal_spur_chans[i][0] != AR_NO_SPUR) { freq[j++] = AH9300(ah)->ath_hal_spur_chans[i][0]; HALDEBUG(ah, HAL_DEBUG_ANI, "1. get spur %d\n", AH9300(ah)->ath_hal_spur_chans[i][0]); } if (AH9300(ah)->ath_hal_spur_chans[i][1] != AR_NO_SPUR) { freq[j++] = AH9300(ah)->ath_hal_spur_chans[i][1]; HALDEBUG(ah, HAL_DEBUG_ANI, "2. get spur %d\n", AH9300(ah)->ath_hal_spur_chans[i][1]); } } return 0; } #define ATH_HAL_2GHZ_FREQ_MIN 20000 #define ATH_HAL_2GHZ_FREQ_MAX 29999 #define ATH_HAL_5GHZ_FREQ_MIN 50000 #define ATH_HAL_5GHZ_FREQ_MAX 59999 #if 0 int ar9300_set_spur_info(struct ath_hal * ah, int enable, int len, u_int16_t *freq) { struct ath_hal_private *ap = AH_PRIVATE(ah); int i, j, k; ap->ah_config.ath_hal_spur_mode = enable; if (ap->ah_config.ath_hal_spur_mode == SPUR_ENABLE_IOCTL) { for (i = 0; i < AR_EEPROM_MODAL_SPURS; i++) { AH9300(ah)->ath_hal_spur_chans[i][0] = AR_NO_SPUR; AH9300(ah)->ath_hal_spur_chans[i][1] = AR_NO_SPUR; } for (i = 0, j = 0, k = 0; i < len; i++) { if (freq[i] > ATH_HAL_2GHZ_FREQ_MIN && freq[i] < ATH_HAL_2GHZ_FREQ_MAX) { /* 2GHz Spur */ if (j < AR_EEPROM_MODAL_SPURS) { AH9300(ah)->ath_hal_spur_chans[j++][1] = freq[i]; HALDEBUG(ah, HAL_DEBUG_ANI, "1 set spur %d\n", freq[i]); } } else if (freq[i] > ATH_HAL_5GHZ_FREQ_MIN && freq[i] < ATH_HAL_5GHZ_FREQ_MAX) { /* 5Ghz Spur */ if (k < AR_EEPROM_MODAL_SPURS) { AH9300(ah)->ath_hal_spur_chans[k++][0] = freq[i]; HALDEBUG(ah, HAL_DEBUG_ANI, "2 set spur %d\n", freq[i]); } } } } return 0; } #endif #define ar9300_check_op_mode(_opmode) \ ((_opmode == HAL_M_STA) || (_opmode == HAL_M_IBSS) ||\ (_opmode == HAL_M_HOSTAP) || (_opmode == HAL_M_MONITOR)) #ifndef ATH_NF_PER_CHAN /* * To fixed first reset noise floor value not correct issue * For ART need it to fixed low rate sens too low issue */ static int First_NFCal(struct ath_hal *ah, HAL_CHANNEL_INTERNAL *ichan, int is_scan, struct ieee80211_channel *chan) { HAL_NFCAL_HIST_FULL *nfh; int i, j, k; int16_t nfarray[HAL_NUM_NF_READINGS] = {0}; int is_2g = 0; int nf_hist_len; int stats = 0; int16_t nf_buf[HAL_NUM_NF_READINGS]; #define IS(_c, _f) (((_c)->channel_flags & _f) || 0) if ((!is_scan) && chan->ic_freq == AH_PRIVATE(ah)->ah_curchan->ic_freq) { nfh = &AH_PRIVATE(ah)->nf_cal_hist; } else { nfh = (HAL_NFCAL_HIST_FULL *) &ichan->nf_cal_hist; } ar9300_start_nf_cal(ah); for (j = 0; j < 10000; j++) { if ((OS_REG_READ(ah, AR_PHY_AGC_CONTROL) & AR_PHY_AGC_CONTROL_NF) == 0){ break; } OS_DELAY(10); } if (j < 10000) { is_2g = IEEE80211_IS_CHAN_2GHZ(chan); ar9300_upload_noise_floor(ah, is_2g, nfarray); if (is_scan) { /* * This channel's NF cal info is just a HAL_NFCAL_HIST_SMALL struct * rather than a HAL_NFCAL_HIST_FULL struct. * As long as we only use the first history element of nf_cal_buffer * (nf_cal_buffer[0][0:HAL_NUM_NF_READINGS-1]), we can use * HAL_NFCAL_HIST_SMALL and HAL_NFCAL_HIST_FULL interchangeably. */ nfh = (HAL_NFCAL_HIST_FULL *) &ichan->nf_cal_hist; nf_hist_len = HAL_NF_CAL_HIST_LEN_SMALL; } else { nfh = &AH_PRIVATE(ah)->nf_cal_hist; nf_hist_len = HAL_NF_CAL_HIST_LEN_FULL; } for (i = 0; i < HAL_NUM_NF_READINGS; i ++) { for (k = 0; k < HAL_NF_CAL_HIST_LEN_FULL; k++) { nfh->nf_cal_buffer[k][i] = nfarray[i]; } nfh->base.priv_nf[i] = ar9300_limit_nf_range(ah, ar9300_get_nf_hist_mid(ah, nfh, i, nf_hist_len)); } //ar9300StoreNewNf(ah, ichan, is_scan); /* * See if the NF value from the old channel should be * retained when switching to a new channel. * TBD: this may need to be changed, as it wipes out the * purpose of saving NF values for each channel. */ for (i = 0; i < HAL_NUM_NF_READINGS; i++) { if (IEEE80211_IS_CHAN_2GHZ(chan)) { if (nfh->nf_cal_buffer[0][i] < AR_PHY_CCA_MAX_GOOD_VAL_OSPREY_2GHZ) { ichan->nf_cal_hist.nf_cal_buffer[0][i] = AH_PRIVATE(ah)->nf_cal_hist.nf_cal_buffer[0][i]; } } else { if (AR_SREV_AR9580(ah)) { if (nfh->nf_cal_buffer[0][i] < AR_PHY_CCA_NOM_VAL_PEACOCK_5GHZ) { ichan->nf_cal_hist.nf_cal_buffer[0][i] = AH_PRIVATE(ah)->nf_cal_hist.nf_cal_buffer[0][i]; } } else { if (nfh->nf_cal_buffer[0][i] < AR_PHY_CCA_NOM_VAL_OSPREY_5GHZ) { ichan->nf_cal_hist.nf_cal_buffer[0][i] = AH_PRIVATE(ah)->nf_cal_hist.nf_cal_buffer[0][i]; } } } } /* * Copy the channel's NF buffer, which may have been modified * just above here, to the full NF history buffer. */ ar9300_reset_nf_hist_buff(ah, ichan); ar9300_get_nf_hist_base(ah, ichan, is_scan, nf_buf); ar9300_load_nf(ah, nf_buf); stats = 0; } else { stats = 1; } #undef IS return stats; } #endif /* * Places the device in and out of reset and then places sane * values in the registers based on EEPROM config, initialization * vectors (as determined by the mode), and station configuration * * b_channel_change is used to preserve DMA/PCU registers across * a HW Reset during channel change. */ HAL_BOOL ar9300_reset(struct ath_hal *ah, HAL_OPMODE opmode, struct ieee80211_channel *chan, HAL_HT_MACMODE macmode, u_int8_t txchainmask, u_int8_t rxchainmask, HAL_HT_EXTPROTSPACING extprotspacing, HAL_BOOL b_channel_change, HAL_STATUS *status, int is_scan) { #define FAIL(_code) do { ecode = _code; goto bad; } while (0) u_int32_t save_led_state; struct ath_hal_9300 *ahp = AH9300(ah); struct ath_hal_private *ap = AH_PRIVATE(ah); HAL_CHANNEL_INTERNAL *ichan; //const struct ieee80211_channel *curchan = ap->ah_curchan; #if ATH_SUPPORT_MCI HAL_BOOL save_full_sleep = ahp->ah_chip_full_sleep; #endif u_int32_t save_def_antenna; u_int32_t mac_sta_id1; HAL_STATUS ecode; int i, rx_chainmask; int nf_hist_buff_reset = 0; int16_t nf_buf[HAL_NUM_NF_READINGS]; #ifdef ATH_FORCE_PPM u_int32_t save_force_val, tmp_reg; #endif u_int8_t clk_25mhz = AH9300(ah)->clk_25mhz; HAL_BOOL stopped, cal_ret; HAL_BOOL apply_last_iqcorr = AH_FALSE; if (OS_REG_READ(ah, AR_IER) == AR_IER_ENABLE) { HALDEBUG(AH_NULL, HAL_DEBUG_UNMASKABLE, "** Reset called with WLAN " "interrupt enabled %08x **\n", ar9300_get_interrupts(ah)); } /* * Set the status to "ok" by default to cover the cases * where we return false without going to "bad" */ HALASSERT(status); *status = HAL_OK; if ((ah->ah_config.ath_hal_sta_update_tx_pwr_enable)) { AH9300(ah)->green_tx_status = HAL_RSSI_TX_POWER_NONE; } #if ATH_SUPPORT_MCI if (AH_PRIVATE(ah)->ah_caps.halMciSupport && (AR_SREV_JUPITER_20_OR_LATER(ah) || AR_SREV_APHRODITE(ah))) { ar9300_mci_2g5g_changed(ah, IEEE80211_IS_CHAN_2GHZ(chan)); } #endif ahp->ah_ext_prot_spacing = extprotspacing; ahp->ah_tx_chainmask = txchainmask & ap->ah_caps.halTxChainMask; ahp->ah_rx_chainmask = rxchainmask & ap->ah_caps.halRxChainMask; ahp->ah_tx_cal_chainmask = ap->ah_caps.halTxChainMask; ahp->ah_rx_cal_chainmask = ap->ah_caps.halRxChainMask; /* * Keep the previous optinal txchainmask value */ HALASSERT(ar9300_check_op_mode(opmode)); OS_MARK(ah, AH_MARK_RESET, b_channel_change); /* * Map public channel to private. */ ichan = ar9300_check_chan(ah, chan); if (ichan == AH_NULL) { HALDEBUG(ah, HAL_DEBUG_CHANNEL, "%s: invalid channel %u/0x%x; no mapping\n", __func__, chan->ic_freq, chan->ic_flags); FAIL(HAL_EINVAL); } ichan->paprd_table_write_done = 0; /* Clear PAPRD table write flag */ #if 0 chan->paprd_table_write_done = 0; /* Clear PAPRD table write flag */ #endif if (ar9300_get_power_mode(ah) != HAL_PM_FULL_SLEEP) { /* Need to stop RX DMA before reset otherwise chip might hang */ stopped = ar9300_set_rx_abort(ah, AH_TRUE); /* abort and disable PCU */ ar9300_set_rx_filter(ah, 0); stopped &= ar9300_stop_dma_receive(ah, 0); /* stop and disable RX DMA */ if (!stopped) { /* * During the transition from full sleep to reset, * recv DMA regs are not available to be read */ HALDEBUG(ah, HAL_DEBUG_UNMASKABLE, "%s[%d]: ar9300_stop_dma_receive failed\n", __func__, __LINE__); b_channel_change = AH_FALSE; } } else { HALDEBUG(ah, HAL_DEBUG_UNMASKABLE, "%s[%d]: Chip is already in full sleep\n", __func__, __LINE__); } #if ATH_SUPPORT_MCI if ((AH_PRIVATE(ah)->ah_caps.halMciSupport) && (ahp->ah_mci_bt_state == MCI_BT_CAL_START)) { u_int32_t payload[4] = {0, 0, 0, 0}; HALDEBUG(ah, HAL_DEBUG_BT_COEX, "(MCI) %s: Stop rx for BT cal.\n", __func__); ahp->ah_mci_bt_state = MCI_BT_CAL; /* * MCIFIX: disable mci interrupt here. This is to avoid SW_MSG_DONE or * RX_MSG bits to trigger MCI_INT and lead to mci_intr reentry. */ ar9300_mci_disable_interrupt(ah); HALDEBUG(ah, HAL_DEBUG_BT_COEX, "(MCI) %s: Send WLAN_CAL_GRANT\n", __func__); MCI_GPM_SET_CAL_TYPE(payload, MCI_GPM_WLAN_CAL_GRANT); ar9300_mci_send_message(ah, MCI_GPM, 0, payload, 16, AH_TRUE, AH_FALSE); /* Wait BT calibration to be completed for 25ms */ HALDEBUG(ah, HAL_DEBUG_BT_COEX, "(MCI) %s: BT is calibrating.\n", __func__); if (ar9300_mci_wait_for_gpm(ah, MCI_GPM_BT_CAL_DONE, 0, 25000)) { HALDEBUG(ah, HAL_DEBUG_BT_COEX, "(MCI) %s: Got BT_CAL_DONE.\n", __func__); } else { HALDEBUG(ah, HAL_DEBUG_BT_COEX, "(MCI) %s: ### BT cal takes too long. Force bt_state to be bt_awake.\n", __func__); } ahp->ah_mci_bt_state = MCI_BT_AWAKE; /* MCIFIX: enable mci interrupt here */ ar9300_mci_enable_interrupt(ah); return AH_TRUE; } #endif /* Bring out of sleep mode */ if (!ar9300_set_power_mode(ah, HAL_PM_AWAKE, AH_TRUE)) { *status = HAL_INV_PMODE; return AH_FALSE; } /* Check the Rx mitigation config again, it might have changed * during attach in ath_vap_attach. */ if (ah->ah_config.ath_hal_intr_mitigation_rx != 0) { ahp->ah_intr_mitigation_rx = AH_TRUE; } else { ahp->ah_intr_mitigation_rx = AH_FALSE; } /* * XXX TODO FreeBSD: * * This is painful because we don't have a non-const channel pointer * at this stage. * * Make sure this gets fixed! */ #if 0 /* Get the value from the previous NF cal and update history buffer */ if (curchan && (ahp->ah_chip_full_sleep != AH_TRUE)) { if(ahp->ah_chip_reset_done){ ahp->ah_chip_reset_done = 0; } else { /* * is_scan controls updating NF for home channel or off channel. * Home -> Off, update home channel * Off -> Home, update off channel * Home -> Home, uppdate home channel */ if (ap->ah_curchan->channel != chan->channel) ar9300_store_new_nf(ah, curchan, !is_scan); else ar9300_store_new_nf(ah, curchan, is_scan); } } #endif /* * Account for the effect of being in either the 2 GHz or 5 GHz band * on the nominal, max allowable, and min allowable noise floor values. */ AH9300(ah)->nfp = IS_CHAN_2GHZ(ichan) ? &ahp->nf_2GHz : &ahp->nf_5GHz; /* * XXX FreeBSD For now, don't apply the last IQ correction. * * This should be done when scorpion is enabled on FreeBSD; just be * sure to fix this channel match code so it uses net80211 flags * instead. */ #if 0 if (AR_SREV_SCORPION(ah) && curchan && (chan->channel == curchan->channel) && ((chan->channel_flags & (CHANNEL_ALL|CHANNEL_HALF|CHANNEL_QUARTER)) == (curchan->channel_flags & (CHANNEL_ALL | CHANNEL_HALF | CHANNEL_QUARTER)))) { apply_last_iqcorr = AH_TRUE; } #endif apply_last_iqcorr = AH_FALSE; #ifndef ATH_NF_PER_CHAN /* * If there's only one full-size home-channel NF history buffer * rather than a full-size NF history buffer per channel, decide * whether to (re)initialize the home-channel NF buffer. * If this is just a channel change for a scan, or if the channel * is not being changed, don't mess up the home channel NF history * buffer with NF values from this scanned channel. If we're * changing the home channel to a new channel, reset the home-channel * NF history buffer with the most accurate NF known for the new channel. */ if (!is_scan && (!ap->ah_curchan || ap->ah_curchan->ic_freq != chan->ic_freq)) // || // ap->ah_curchan->channel_flags != chan->channel_flags)) { nf_hist_buff_reset = 1; ar9300_reset_nf_hist_buff(ah, ichan); } #endif /* * In case of * - offchan scan, or * - same channel and RX IQ Cal already available * disable RX IQ Cal. */ if (is_scan) { ahp->ah_skip_rx_iq_cal = AH_TRUE; HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "Skip RX IQ Cal due to scanning\n"); } else { #if 0 /* XXX FreeBSD: always just do the RX IQ cal */ /* XXX I think it's just going to speed things up; I don't think it's to avoid chan bugs */ if (ahp->ah_rx_cal_complete && ahp->ah_rx_cal_chan == ichan->channel && ahp->ah_rx_cal_chan_flag == chan->channel_flags) { ahp->ah_skip_rx_iq_cal = AH_TRUE; HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "Skip RX IQ Cal due to same channel with completed RX IQ Cal\n"); } else #endif ahp->ah_skip_rx_iq_cal = AH_FALSE; } /* FreeBSD: clear the channel survey data */ ath_hal_survey_clear(ah); /* * Fast channel change (Change synthesizer based on channel freq * without resetting chip) * Don't do it when * - Flag is not set * - Chip is just coming out of full sleep * - Channel to be set is same as current channel * - Channel flags are different, like when moving from 2GHz to 5GHz * channels * - Merlin: Switching in/out of fast clock enabled channels * (not currently coded, since fast clock is enabled * across the 5GHz band * and we already do a full reset when switching in/out * of 5GHz channels) */ #if 0 if (b_channel_change && (ahp->ah_chip_full_sleep != AH_TRUE) && (AH_PRIVATE(ah)->ah_curchan != AH_NULL) && ((chan->channel != AH_PRIVATE(ah)->ah_curchan->channel) && (((CHANNEL_ALL|CHANNEL_HALF|CHANNEL_QUARTER) & chan->channel_flags) == ((CHANNEL_ALL|CHANNEL_HALF|CHANNEL_QUARTER) & AH_PRIVATE(ah)->ah_curchan->channel_flags)))) { if (ar9300_channel_change(ah, chan, ichan, macmode)) { chan->channel_flags = ichan->channel_flags; chan->priv_flags = ichan->priv_flags; AH_PRIVATE(ah)->ah_curchan->ah_channel_time = 0; AH_PRIVATE(ah)->ah_curchan->ah_tsf_last = ar9300_get_tsf64(ah); /* * Load the NF from history buffer of the current channel. * NF is slow time-variant, so it is OK to use a historical value. */ ar9300_get_nf_hist_base(ah, AH_PRIVATE(ah)->ah_curchan, is_scan, nf_buf); ar9300_load_nf(ah, nf_buf); /* start NF calibration, without updating BB NF register*/ ar9300_start_nf_cal(ah); /* * If channel_change completed and DMA was stopped * successfully - skip the rest of reset */ if (AH9300(ah)->ah_dma_stuck != AH_TRUE) { WAR_USB_DISABLE_PLL_LOCK_DETECT(ah); #if ATH_SUPPORT_MCI if (AH_PRIVATE(ah)->ah_caps.halMciSupport && ahp->ah_mci_ready) { ar9300_mci_2g5g_switch(ah, AH_TRUE); } #endif return HAL_OK; } } } #endif /* #if 0 */ #if ATH_SUPPORT_MCI if (AH_PRIVATE(ah)->ah_caps.halMciSupport) { ar9300_mci_disable_interrupt(ah); if (ahp->ah_mci_ready && !save_full_sleep) { ar9300_mci_mute_bt(ah); OS_DELAY(20); OS_REG_WRITE(ah, AR_BTCOEX_CTRL, 0); } ahp->ah_mci_bt_state = MCI_BT_SLEEP; ahp->ah_mci_ready = AH_FALSE; } #endif AH9300(ah)->ah_dma_stuck = AH_FALSE; #ifdef ATH_FORCE_PPM /* Preserve force ppm state */ save_force_val = OS_REG_READ(ah, AR_PHY_TIMING2) & (AR_PHY_TIMING2_USE_FORCE | AR_PHY_TIMING2_FORCE_VAL); #endif /* * Preserve the antenna on a channel change */ save_def_antenna = OS_REG_READ(ah, AR_DEF_ANTENNA); if (0 == ahp->ah_smartantenna_enable ) { if (save_def_antenna == 0) { save_def_antenna = 1; } } /* Save hardware flag before chip reset clears the register */ mac_sta_id1 = OS_REG_READ(ah, AR_STA_ID1) & AR_STA_ID1_BASE_RATE_11B; /* Save led state from pci config register */ save_led_state = OS_REG_READ(ah, AR_CFG_LED) & (AR_CFG_LED_ASSOC_CTL | AR_CFG_LED_MODE_SEL | AR_CFG_LED_BLINK_THRESH_SEL | AR_CFG_LED_BLINK_SLOW); /* Mark PHY inactive prior to reset, to be undone in ar9300_init_bb () */ ar9300_mark_phy_inactive(ah); if (!ar9300_chip_reset(ah, chan)) { HALDEBUG(ah, HAL_DEBUG_RESET, "%s: chip reset failed\n", __func__); FAIL(HAL_EIO); } OS_MARK(ah, AH_MARK_RESET_LINE, __LINE__); /* Disable JTAG */ OS_REG_SET_BIT(ah, AR_HOSTIF_REG(ah, AR_GPIO_INPUT_EN_VAL), AR_GPIO_JTAG_DISABLE); /* * Note that ar9300_init_chain_masks() is called from within * ar9300_process_ini() to ensure the swap bit is set before * the pdadc table is written. */ ecode = ar9300_process_ini(ah, chan, ichan, macmode); if (ecode != HAL_OK) { goto bad; } /* * Configuring WMAC PLL values for 25/40 MHz */ if(AR_SREV_WASP(ah) || AR_SREV_HONEYBEE(ah) || AR_SREV_SCORPION(ah) ) { if(clk_25mhz) { OS_REG_WRITE(ah, AR_RTC_DERIVED_RTC_CLK, (0x17c << 1)); // 32KHz sleep clk } else { OS_REG_WRITE(ah, AR_RTC_DERIVED_RTC_CLK, (0x261 << 1)); // 32KHz sleep clk } OS_DELAY(100); } ahp->ah_immunity_on = AH_FALSE; if (AR_SREV_JUPITER(ah) || AR_SREV_APHRODITE(ah)) { ahp->tx_iq_cal_enable = OS_REG_READ_FIELD(ah, AR_PHY_TX_IQCAL_CONTROL_0(ah), AR_PHY_TX_IQCAL_CONTROL_0_ENABLE_TXIQ_CAL) ? 1 : 0; } ahp->tx_cl_cal_enable = (OS_REG_READ(ah, AR_PHY_CL_CAL_CTL) & AR_PHY_CL_CAL_ENABLE) ? 1 : 0; /* For devices with full HW RIFS Rx support (Sowl/Howl/Merlin, etc), * restore register settings from prior to reset. */ if ((AH_PRIVATE(ah)->ah_curchan != AH_NULL) && (ar9300_get_capability(ah, HAL_CAP_LDPCWAR, 0, AH_NULL) == HAL_OK)) { /* Re-program RIFS Rx policy after reset */ ar9300_set_rifs_delay(ah, ahp->ah_rifs_enabled); } #if ATH_SUPPORT_MCI if (AH_PRIVATE(ah)->ah_caps.halMciSupport) { ar9300_mci_reset(ah, AH_FALSE, IS_CHAN_2GHZ(ichan), save_full_sleep); } #endif /* Initialize Management Frame Protection */ ar9300_init_mfp(ah); ahp->ah_immunity_vals[0] = OS_REG_READ_FIELD(ah, AR_PHY_SFCORR_LOW, AR_PHY_SFCORR_LOW_M1_THRESH_LOW); ahp->ah_immunity_vals[1] = OS_REG_READ_FIELD(ah, AR_PHY_SFCORR_LOW, AR_PHY_SFCORR_LOW_M2_THRESH_LOW); ahp->ah_immunity_vals[2] = OS_REG_READ_FIELD(ah, AR_PHY_SFCORR, AR_PHY_SFCORR_M1_THRESH); ahp->ah_immunity_vals[3] = OS_REG_READ_FIELD(ah, AR_PHY_SFCORR, AR_PHY_SFCORR_M2_THRESH); ahp->ah_immunity_vals[4] = OS_REG_READ_FIELD(ah, AR_PHY_SFCORR, AR_PHY_SFCORR_M2COUNT_THR); ahp->ah_immunity_vals[5] = OS_REG_READ_FIELD(ah, AR_PHY_SFCORR_LOW, AR_PHY_SFCORR_LOW_M2COUNT_THR_LOW); /* Write delta slope for OFDM enabled modes (A, G, Turbo) */ if (IEEE80211_IS_CHAN_OFDM(chan) || IEEE80211_IS_CHAN_HT(chan)) { ar9300_set_delta_slope(ah, chan); } ar9300_spur_mitigate(ah, chan); if (!ar9300_eeprom_set_board_values(ah, chan)) { HALDEBUG(ah, HAL_DEBUG_EEPROM, "%s: error setting board options\n", __func__); FAIL(HAL_EIO); } #ifdef ATH_HAL_WAR_REG16284_APH128 /* temp work around, will be removed. */ if (AR_SREV_WASP(ah)) { OS_REG_WRITE(ah, 0x16284, 0x1553e000); } #endif OS_MARK(ah, AH_MARK_RESET_LINE, __LINE__); OS_REG_WRITE(ah, AR_STA_ID0, LE_READ_4(ahp->ah_macaddr)); OS_REG_WRITE(ah, AR_STA_ID1, LE_READ_2(ahp->ah_macaddr + 4) | mac_sta_id1 | AR_STA_ID1_RTS_USE_DEF | (ah->ah_config.ath_hal_6mb_ack ? AR_STA_ID1_ACKCTS_6MB : 0) | ahp->ah_sta_id1_defaults ); ar9300_set_operating_mode(ah, opmode); /* Set Venice BSSID mask according to current state */ OS_REG_WRITE(ah, AR_BSSMSKL, LE_READ_4(ahp->ah_bssid_mask)); OS_REG_WRITE(ah, AR_BSSMSKU, LE_READ_2(ahp->ah_bssid_mask + 4)); /* Restore previous antenna */ OS_REG_WRITE(ah, AR_DEF_ANTENNA, save_def_antenna); #ifdef ATH_FORCE_PPM /* Restore force ppm state */ tmp_reg = OS_REG_READ(ah, AR_PHY_TIMING2) & ~(AR_PHY_TIMING2_USE_FORCE | AR_PHY_TIMING2_FORCE_VAL); OS_REG_WRITE(ah, AR_PHY_TIMING2, tmp_reg | save_force_val); #endif /* then our BSSID and assocID */ OS_REG_WRITE(ah, AR_BSS_ID0, LE_READ_4(ahp->ah_bssid)); OS_REG_WRITE(ah, AR_BSS_ID1, LE_READ_2(ahp->ah_bssid + 4) | ((ahp->ah_assoc_id & 0x3fff) << AR_BSS_ID1_AID_S)); OS_REG_WRITE(ah, AR_ISR, ~0); /* cleared on write */ OS_REG_RMW_FIELD(ah, AR_RSSI_THR, AR_RSSI_THR_BM_THR, INIT_RSSI_THR); /* HW beacon processing */ /* * XXX what happens if I just leave filter_interval=0? * it stays disabled? */ OS_REG_RMW_FIELD(ah, AR_RSSI_THR, AR_RSSI_BCN_WEIGHT, INIT_RSSI_BEACON_WEIGHT); OS_REG_SET_BIT(ah, AR_HWBCNPROC1, AR_HWBCNPROC1_CRC_ENABLE | AR_HWBCNPROC1_EXCLUDE_TIM_ELM); if (ah->ah_config.ath_hal_beacon_filter_interval) { OS_REG_RMW_FIELD(ah, AR_HWBCNPROC2, AR_HWBCNPROC2_FILTER_INTERVAL, ah->ah_config.ath_hal_beacon_filter_interval); OS_REG_SET_BIT(ah, AR_HWBCNPROC2, AR_HWBCNPROC2_FILTER_INTERVAL_ENABLE); } /* * Set Channel now modifies bank 6 parameters for FOWL workaround * to force rf_pwd_icsyndiv bias current as function of synth * frequency.Thus must be called after ar9300_process_ini() to ensure * analog register cache is valid. */ if (!ahp->ah_rf_hal.set_channel(ah, chan)) { FAIL(HAL_EIO); } OS_MARK(ah, AH_MARK_RESET_LINE, __LINE__); /* Set 1:1 QCU to DCU mapping for all queues */ for (i = 0; i < AR_NUM_DCU; i++) { OS_REG_WRITE(ah, AR_DQCUMASK(i), 1 << i); } ahp->ah_intr_txqs = 0; for (i = 0; i < AH_PRIVATE(ah)->ah_caps.halTotalQueues; i++) { ar9300_reset_tx_queue(ah, i); } ar9300_init_interrupt_masks(ah, opmode); /* Reset ier reference count to disabled */ // OS_ATOMIC_SET(&ahp->ah_ier_ref_count, 1); if (ath_hal_isrfkillenabled(ah)) { ar9300_enable_rf_kill(ah); } /* must be called AFTER ini is processed */ ar9300_ani_init_defaults(ah, macmode); ar9300_init_qos(ah); ar9300_init_user_settings(ah); AH_PRIVATE(ah)->ah_opmode = opmode; /* record operating mode */ OS_MARK(ah, AH_MARK_RESET_DONE, 0); /* * disable seq number generation in hw */ OS_REG_WRITE(ah, AR_STA_ID1, OS_REG_READ(ah, AR_STA_ID1) | AR_STA_ID1_PRESERVE_SEQNUM); ar9300_set_dma(ah); /* * program OBS bus to see MAC interrupts */ #if ATH_SUPPORT_MCI if (!AH_PRIVATE(ah)->ah_caps.halMciSupport) { OS_REG_WRITE(ah, AR_HOSTIF_REG(ah, AR_OBS), 8); } #else OS_REG_WRITE(ah, AR_HOSTIF_REG(ah, AR_OBS), 8); #endif /* enabling AR_GTTM_IGNORE_IDLE in GTTM register so that GTT timer will not increment if the channel idle indicates the air is busy or NAV is still counting down */ OS_REG_WRITE(ah, AR_GTTM, AR_GTTM_IGNORE_IDLE); /* * GTT debug mode setting */ /* OS_REG_WRITE(ah, 0x64, 0x00320000); OS_REG_WRITE(ah, 0x68, 7); OS_REG_WRITE(ah, 0x4080, 0xC); */ /* * Disable general interrupt mitigation by setting MIRT = 0x0 * Rx and tx interrupt mitigation are conditionally enabled below. */ OS_REG_WRITE(ah, AR_MIRT, 0); if (ahp->ah_intr_mitigation_rx) { /* * Enable Interrupt Mitigation for Rx. * If no build-specific limits for the rx interrupt mitigation * timer have been specified, use conservative defaults. */ #ifndef AH_RIMT_VAL_LAST #define AH_RIMT_LAST_MICROSEC 500 #endif #ifndef AH_RIMT_VAL_FIRST #define AH_RIMT_FIRST_MICROSEC 2000 #endif #ifndef HOST_OFFLOAD OS_REG_RMW_FIELD(ah, AR_RIMT, AR_RIMT_LAST, AH_RIMT_LAST_MICROSEC); OS_REG_RMW_FIELD(ah, AR_RIMT, AR_RIMT_FIRST, AH_RIMT_FIRST_MICROSEC); #else /* lower mitigation level to reduce latency for offload arch. */ OS_REG_RMW_FIELD(ah, AR_RIMT, AR_RIMT_LAST, (AH_RIMT_LAST_MICROSEC >> 2)); OS_REG_RMW_FIELD(ah, AR_RIMT, AR_RIMT_FIRST, (AH_RIMT_FIRST_MICROSEC >> 2)); #endif } if (ahp->ah_intr_mitigation_tx) { /* * Enable Interrupt Mitigation for Tx. * If no build-specific limits for the tx interrupt mitigation * timer have been specified, use the values preferred for * the carrier group's products. */ #ifndef AH_TIMT_LAST #define AH_TIMT_LAST_MICROSEC 300 #endif #ifndef AH_TIMT_FIRST #define AH_TIMT_FIRST_MICROSEC 750 #endif OS_REG_RMW_FIELD(ah, AR_TIMT, AR_TIMT_LAST, AH_TIMT_LAST_MICROSEC); OS_REG_RMW_FIELD(ah, AR_TIMT, AR_TIMT_FIRST, AH_TIMT_FIRST_MICROSEC); } rx_chainmask = ahp->ah_rx_chainmask; OS_REG_WRITE(ah, AR_PHY_RX_CHAINMASK, rx_chainmask); OS_REG_WRITE(ah, AR_PHY_CAL_CHAINMASK, rx_chainmask); ar9300_init_bb(ah, chan); /* BB Step 7: Calibration */ /* * Only kick off calibration not on offchan. * If coming back from offchan, restore prevous Cal results * since chip reset will clear existings. */ if (!ahp->ah_skip_rx_iq_cal) { int i; /* clear existing RX cal data */ for (i=0; iah_rx_cal_corr[i] = 0; ahp->ah_rx_cal_complete = AH_FALSE; // ahp->ah_rx_cal_chan = chan->channel; // ahp->ah_rx_cal_chan_flag = ichan->channel_flags; ahp->ah_rx_cal_chan = 0; ahp->ah_rx_cal_chan_flag = 0; /* XXX FreeBSD */ } ar9300_invalidate_saved_cals(ah, ichan); cal_ret = ar9300_init_cal(ah, chan, AH_FALSE, apply_last_iqcorr); #if ATH_SUPPORT_MCI if (AH_PRIVATE(ah)->ah_caps.halMciSupport && ahp->ah_mci_ready) { if (IS_CHAN_2GHZ(ichan) && (ahp->ah_mci_bt_state == MCI_BT_SLEEP)) { if (ar9300_mci_check_int(ah, AR_MCI_INTERRUPT_RX_MSG_REMOTE_RESET) || ar9300_mci_check_int(ah, AR_MCI_INTERRUPT_RX_MSG_REQ_WAKE)) { /* * BT is sleeping. Check if BT wakes up duing WLAN * calibration. If BT wakes up during WLAN calibration, need * to go through all message exchanges again and recal. */ HALDEBUG(ah, HAL_DEBUG_BT_COEX, "(MCI) ### %s: BT wakes up during WLAN calibration.\n", __func__); OS_REG_WRITE(ah, AR_MCI_INTERRUPT_RX_MSG_RAW, AR_MCI_INTERRUPT_RX_MSG_REMOTE_RESET | AR_MCI_INTERRUPT_RX_MSG_REQ_WAKE); HALDEBUG(ah, HAL_DEBUG_BT_COEX, "(MCI) send REMOTE_RESET\n"); ar9300_mci_remote_reset(ah, AH_TRUE); ar9300_mci_send_sys_waking(ah, AH_TRUE); OS_DELAY(1); if (IS_CHAN_2GHZ(ichan)) { ar9300_mci_send_lna_transfer(ah, AH_TRUE); } ahp->ah_mci_bt_state = MCI_BT_AWAKE; /* Redo calibration */ HALDEBUG(ah, HAL_DEBUG_BT_COEX, "(MCI) %s: Re-calibrate.\n", __func__); ar9300_invalidate_saved_cals(ah, ichan); cal_ret = ar9300_init_cal(ah, chan, AH_FALSE, apply_last_iqcorr); } } ar9300_mci_enable_interrupt(ah); } #endif if (!cal_ret) { HALDEBUG(ah, HAL_DEBUG_RESET, "%s: Init Cal Failed\n", __func__); FAIL(HAL_ESELFTEST); } ar9300_init_txbf(ah); #if 0 /* * WAR for owl 1.0 - restore chain mask for 2-chain cfgs after cal */ rx_chainmask = ahp->ah_rx_chainmask; if ((rx_chainmask == 0x5) || (rx_chainmask == 0x3)) { OS_REG_WRITE(ah, AR_PHY_RX_CHAINMASK, rx_chainmask); OS_REG_WRITE(ah, AR_PHY_CAL_CHAINMASK, rx_chainmask); } #endif /* Restore previous led state */ OS_REG_WRITE(ah, AR_CFG_LED, save_led_state | AR_CFG_SCLK_32KHZ); #if ATH_BT_COEX if (ahp->ah_bt_coex_config_type != HAL_BT_COEX_CFG_NONE) { ar9300_init_bt_coex(ah); #if ATH_SUPPORT_MCI if (AH_PRIVATE(ah)->ah_caps.halMciSupport && ahp->ah_mci_ready) { /* Check BT state again to make sure it's not changed. */ ar9300_mci_sync_bt_state(ah); ar9300_mci_2g5g_switch(ah, AH_TRUE); if ((ahp->ah_mci_bt_state == MCI_BT_AWAKE) && (ahp->ah_mci_query_bt == AH_TRUE)) { ahp->ah_mci_need_flush_btinfo = AH_TRUE; } } #endif } #endif /* Start TSF2 for generic timer 8-15. */ ar9300_start_tsf2(ah); /* MIMO Power save setting */ if (ar9300_get_capability(ah, HAL_CAP_DYNAMIC_SMPS, 0, AH_NULL) == HAL_OK) { ar9300_set_sm_power_mode(ah, ahp->ah_sm_power_mode); } /* * For big endian systems turn on swapping for descriptors */ #if AH_BYTE_ORDER == AH_BIG_ENDIAN if (AR_SREV_HORNET(ah) || AR_SREV_WASP(ah) || AR_SREV_SCORPION(ah) || AR_SREV_HONEYBEE(ah)) { OS_REG_RMW(ah, AR_CFG, AR_CFG_SWTB | AR_CFG_SWRB, 0); } else { ar9300_init_cfg_reg(ah); } #endif if ( AR_SREV_OSPREY(ah) || AR_SREV_WASP(ah) || AR_SREV_SCORPION(ah) || AR_SREV_HONEYBEE(ah) ) { OS_REG_RMW(ah, AR_CFG_LED, AR_CFG_LED_ASSOC_CTL, AR_CFG_LED_ASSOC_CTL); } #if !(defined(ART_BUILD)) && defined(ATH_SUPPORT_LED) #define REG_WRITE(_reg, _val) *((volatile u_int32_t *)(_reg)) = (_val); #define REG_READ(_reg) *((volatile u_int32_t *)(_reg)) #define ATH_GPIO_OUT_FUNCTION3 0xB8040038 #define ATH_GPIO_OE 0xB8040000 if ( AR_SREV_WASP(ah)) { if (IS_CHAN_2GHZ((AH_PRIVATE(ah)->ah_curchan))) { REG_WRITE(ATH_GPIO_OUT_FUNCTION3, ( REG_READ(ATH_GPIO_OUT_FUNCTION3) & (~(0xff << 8))) | (0x33 << 8) ); REG_WRITE(ATH_GPIO_OE, ( REG_READ(ATH_GPIO_OE) & (~(0x1 << 13) ))); } else { /* Disable 2G WLAN LED. During ath_open, reset function is called even before channel is set. So 2GHz is taken as default and it also blinks. Hence to avoid both from blinking, disable 2G led while in 5G mode */ REG_WRITE(ATH_GPIO_OE, ( REG_READ(ATH_GPIO_OE) | (1 << 13) )); REG_WRITE(ATH_GPIO_OUT_FUNCTION3, ( REG_READ(ATH_GPIO_OUT_FUNCTION3) & (~(0xff))) | (0x33) ); REG_WRITE(ATH_GPIO_OE, ( REG_READ(ATH_GPIO_OE) & (~(0x1 << 12) ))); } } else if (AR_SREV_SCORPION(ah)) { if (IS_CHAN_2GHZ((AH_PRIVATE(ah)->ah_curchan))) { REG_WRITE(ATH_GPIO_OUT_FUNCTION3, ( REG_READ(ATH_GPIO_OUT_FUNCTION3) & (~(0xff << 8))) | (0x2F << 8) ); REG_WRITE(ATH_GPIO_OE, (( REG_READ(ATH_GPIO_OE) & (~(0x1 << 13) )) | (0x1 << 12))); } else if (IS_CHAN_5GHZ((AH_PRIVATE(ah)->ah_curchan))) { REG_WRITE(ATH_GPIO_OUT_FUNCTION3, ( REG_READ(ATH_GPIO_OUT_FUNCTION3) & (~(0xff))) | (0x2F) ); REG_WRITE(ATH_GPIO_OE, (( REG_READ(ATH_GPIO_OE) & (~(0x1 << 12) )) | (0x1 << 13))); } } else if (AR_SREV_HONEYBEE(ah)) { REG_WRITE(ATH_GPIO_OUT_FUNCTION3, ( REG_READ(ATH_GPIO_OUT_FUNCTION3) & (~(0xff))) | (0x32) ); REG_WRITE(ATH_GPIO_OE, (( REG_READ(ATH_GPIO_OE) & (~(0x1 << 12) )))); } #undef REG_READ #undef REG_WRITE #endif /* XXX FreeBSD What's this? -adrian */ #if 0 chan->channel_flags = ichan->channel_flags; chan->priv_flags = ichan->priv_flags; #endif #if FIX_NOISE_FLOOR /* XXX FreeBSD is ichan appropariate? It was curchan.. */ ar9300_get_nf_hist_base(ah, ichan, is_scan, nf_buf); ar9300_load_nf(ah, nf_buf); if (nf_hist_buff_reset == 1) { nf_hist_buff_reset = 0; #ifndef ATH_NF_PER_CHAN if (First_NFCal(ah, ichan, is_scan, chan)){ if (ahp->ah_skip_rx_iq_cal && !is_scan) { /* restore RX Cal result if existing */ ar9300_rx_iq_cal_restore(ah); ahp->ah_skip_rx_iq_cal = AH_FALSE; } } #endif /* ATH_NF_PER_CHAN */ } else{ ar9300_start_nf_cal(ah); } #endif #ifdef AH_SUPPORT_AR9300 /* BB Panic Watchdog */ if (ar9300_get_capability(ah, HAL_CAP_BB_PANIC_WATCHDOG, 0, AH_NULL) == HAL_OK) { ar9300_config_bb_panic_watchdog(ah); } #endif /* While receiving unsupported rate frame receive state machine * gets into a state 0xb and if phy_restart happens when rx * state machine is in 0xb state, BB would go hang, if we * see 0xb state after first bb panic, make sure that we * disable the phy_restart. * * There may be multiple panics, make sure that we always do * this if we see this panic at least once. This is required * because reset seems to be writing from INI file. */ if ((ar9300_get_capability(ah, HAL_CAP_PHYRESTART_CLR_WAR, 0, AH_NULL) == HAL_OK) && (((MS((AH9300(ah)->ah_bb_panic_last_status), AR_PHY_BB_WD_RX_OFDM_SM)) == 0xb) || AH9300(ah)->ah_phyrestart_disabled) ) { ar9300_disable_phy_restart(ah, 1); } ahp->ah_radar1 = MS(OS_REG_READ(ah, AR_PHY_RADAR_1), AR_PHY_RADAR_1_CF_BIN_THRESH); ahp->ah_dc_offset = MS(OS_REG_READ(ah, AR_PHY_TIMING2), AR_PHY_TIMING2_DC_OFFSET); ahp->ah_disable_cck = MS(OS_REG_READ(ah, AR_PHY_MODE), AR_PHY_MODE_DISABLE_CCK); if (AH9300(ah)->ah_enable_keysearch_always) { ar9300_enable_keysearch_always(ah, 1); } #if ATH_LOW_POWER_ENABLE #define REG_WRITE(_reg, _val) *((volatile u_int32_t *)(_reg)) = (_val) #define REG_READ(_reg) *((volatile u_int32_t *)(_reg)) if (AR_SREV_OSPREY(ah)) { REG_WRITE(0xb4000080, REG_READ(0xb4000080) | 3); OS_REG_WRITE(ah, AR_RTC_RESET, 1); OS_REG_SET_BIT(ah, AR_HOSTIF_REG(ah, AR_PCIE_PM_CTRL), AR_PCIE_PM_CTRL_ENA); OS_REG_SET_BIT(ah, AR_HOSTIF_REG(ah, AR_SPARE), 0xffffffff); } #undef REG_READ #undef REG_WRITE #endif /* ATH_LOW_POWER_ENABLE */ WAR_USB_DISABLE_PLL_LOCK_DETECT(ah); /* H/W Green TX */ ar9300_control_signals_for_green_tx_mode(ah); /* Smart Antenna, only for 5GHz on Scropion */ if (IEEE80211_IS_CHAN_2GHZ((AH_PRIVATE(ah)->ah_curchan)) && AR_SREV_SCORPION(ah)) { ahp->ah_smartantenna_enable = 0; } ar9300_set_smart_antenna(ah, ahp->ah_smartantenna_enable); if (AR_SREV_APHRODITE(ah) && ahp->ah_lna_div_use_bt_ant_enable) OS_REG_SET_BIT(ah, AR_BTCOEX_WL_LNADIV, AR_BTCOEX_WL_LNADIV_FORCE_ON); if (ahp->ah_skip_rx_iq_cal && !is_scan) { /* restore RX Cal result if existing */ ar9300_rx_iq_cal_restore(ah); ahp->ah_skip_rx_iq_cal = AH_FALSE; } return AH_TRUE; bad: OS_MARK(ah, AH_MARK_RESET_DONE, ecode); *status = ecode; if (ahp->ah_skip_rx_iq_cal && !is_scan) { /* restore RX Cal result if existing */ ar9300_rx_iq_cal_restore(ah); ahp->ah_skip_rx_iq_cal = AH_FALSE; } return AH_FALSE; #undef FAIL } void ar9300_green_ap_ps_on_off( struct ath_hal *ah, u_int16_t on_off) { /* Set/reset the ps flag */ AH9300(ah)->green_ap_ps_on = !!on_off; } /* * This function returns 1, where it is possible to do * single-chain power save. */ u_int16_t ar9300_is_single_ant_power_save_possible(struct ath_hal *ah) { return AH_TRUE; } /* To avoid compilation warnings. Functions not used when EMULATION. */ /* * ar9300_find_mag_approx() */ static int32_t ar9300_find_mag_approx(struct ath_hal *ah, int32_t in_re, int32_t in_im) { int32_t abs_i = abs(in_re); int32_t abs_q = abs(in_im); int32_t max_abs, min_abs; if (abs_i > abs_q) { max_abs = abs_i; min_abs = abs_q; } else { max_abs = abs_q; min_abs = abs_i; } return (max_abs - (max_abs / 32) + (min_abs / 8) + (min_abs / 4)); } /* * ar9300_solve_iq_cal() * solve 4x4 linear equation used in loopback iq cal. */ static HAL_BOOL ar9300_solve_iq_cal( struct ath_hal *ah, int32_t sin_2phi_1, int32_t cos_2phi_1, int32_t sin_2phi_2, int32_t cos_2phi_2, int32_t mag_a0_d0, int32_t phs_a0_d0, int32_t mag_a1_d0, int32_t phs_a1_d0, int32_t solved_eq[]) { int32_t f1 = cos_2phi_1 - cos_2phi_2; int32_t f3 = sin_2phi_1 - sin_2phi_2; int32_t f2; int32_t mag_tx, phs_tx, mag_rx, phs_rx; const int32_t result_shift = 1 << 15; f2 = (((int64_t)f1 * (int64_t)f1) / result_shift) + (((int64_t)f3 * (int64_t)f3) / result_shift); if (0 == f2) { HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: Divide by 0(%d).\n", __func__, __LINE__); return AH_FALSE; } /* magnitude mismatch, tx */ mag_tx = f1 * (mag_a0_d0 - mag_a1_d0) + f3 * (phs_a0_d0 - phs_a1_d0); /* phase mismatch, tx */ phs_tx = f3 * (-mag_a0_d0 + mag_a1_d0) + f1 * (phs_a0_d0 - phs_a1_d0); mag_tx = (mag_tx / f2); phs_tx = (phs_tx / f2); /* magnitude mismatch, rx */ mag_rx = mag_a0_d0 - (cos_2phi_1 * mag_tx + sin_2phi_1 * phs_tx) / result_shift; /* phase mismatch, rx */ phs_rx = phs_a0_d0 + (sin_2phi_1 * mag_tx - cos_2phi_1 * phs_tx) / result_shift; solved_eq[0] = mag_tx; solved_eq[1] = phs_tx; solved_eq[2] = mag_rx; solved_eq[3] = phs_rx; return AH_TRUE; } /* * ar9300_calc_iq_corr() */ static HAL_BOOL ar9300_calc_iq_corr(struct ath_hal *ah, int32_t chain_idx, const int32_t iq_res[], int32_t iqc_coeff[]) { int32_t i2_m_q2_a0_d0, i2_p_q2_a0_d0, iq_corr_a0_d0; int32_t i2_m_q2_a0_d1, i2_p_q2_a0_d1, iq_corr_a0_d1; int32_t i2_m_q2_a1_d0, i2_p_q2_a1_d0, iq_corr_a1_d0; int32_t i2_m_q2_a1_d1, i2_p_q2_a1_d1, iq_corr_a1_d1; int32_t mag_a0_d0, mag_a1_d0, mag_a0_d1, mag_a1_d1; int32_t phs_a0_d0, phs_a1_d0, phs_a0_d1, phs_a1_d1; int32_t sin_2phi_1, cos_2phi_1, sin_2phi_2, cos_2phi_2; int32_t mag_tx, phs_tx, mag_rx, phs_rx; int32_t solved_eq[4], mag_corr_tx, phs_corr_tx, mag_corr_rx, phs_corr_rx; int32_t q_q_coff, q_i_coff; const int32_t res_scale = 1 << 15; const int32_t delpt_shift = 1 << 8; int32_t mag1, mag2; i2_m_q2_a0_d0 = iq_res[0] & 0xfff; i2_p_q2_a0_d0 = (iq_res[0] >> 12) & 0xfff; iq_corr_a0_d0 = ((iq_res[0] >> 24) & 0xff) + ((iq_res[1] & 0xf) << 8); if (i2_m_q2_a0_d0 > 0x800) { i2_m_q2_a0_d0 = -((0xfff - i2_m_q2_a0_d0) + 1); } if (iq_corr_a0_d0 > 0x800) { iq_corr_a0_d0 = -((0xfff - iq_corr_a0_d0) + 1); } i2_m_q2_a0_d1 = (iq_res[1] >> 4) & 0xfff; i2_p_q2_a0_d1 = (iq_res[2] & 0xfff); iq_corr_a0_d1 = (iq_res[2] >> 12) & 0xfff; if (i2_m_q2_a0_d1 > 0x800) { i2_m_q2_a0_d1 = -((0xfff - i2_m_q2_a0_d1) + 1); } if (iq_corr_a0_d1 > 0x800) { iq_corr_a0_d1 = -((0xfff - iq_corr_a0_d1) + 1); } i2_m_q2_a1_d0 = ((iq_res[2] >> 24) & 0xff) + ((iq_res[3] & 0xf) << 8); i2_p_q2_a1_d0 = (iq_res[3] >> 4) & 0xfff; iq_corr_a1_d0 = iq_res[4] & 0xfff; if (i2_m_q2_a1_d0 > 0x800) { i2_m_q2_a1_d0 = -((0xfff - i2_m_q2_a1_d0) + 1); } if (iq_corr_a1_d0 > 0x800) { iq_corr_a1_d0 = -((0xfff - iq_corr_a1_d0) + 1); } i2_m_q2_a1_d1 = (iq_res[4] >> 12) & 0xfff; i2_p_q2_a1_d1 = ((iq_res[4] >> 24) & 0xff) + ((iq_res[5] & 0xf) << 8); iq_corr_a1_d1 = (iq_res[5] >> 4) & 0xfff; if (i2_m_q2_a1_d1 > 0x800) { i2_m_q2_a1_d1 = -((0xfff - i2_m_q2_a1_d1) + 1); } if (iq_corr_a1_d1 > 0x800) { iq_corr_a1_d1 = -((0xfff - iq_corr_a1_d1) + 1); } if ((i2_p_q2_a0_d0 == 0) || (i2_p_q2_a0_d1 == 0) || (i2_p_q2_a1_d0 == 0) || (i2_p_q2_a1_d1 == 0)) { HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: Divide by 0(%d):\na0_d0=%d\na0_d1=%d\na2_d0=%d\na1_d1=%d\n", __func__, __LINE__, i2_p_q2_a0_d0, i2_p_q2_a0_d1, i2_p_q2_a1_d0, i2_p_q2_a1_d1); return AH_FALSE; } if ((i2_p_q2_a0_d0 <= 1024) || (i2_p_q2_a0_d0 > 2047) || (i2_p_q2_a1_d0 < 0) || (i2_p_q2_a1_d1 < 0) || (i2_p_q2_a0_d0 <= i2_m_q2_a0_d0) || (i2_p_q2_a0_d0 <= iq_corr_a0_d0) || (i2_p_q2_a0_d1 <= i2_m_q2_a0_d1) || (i2_p_q2_a0_d1 <= iq_corr_a0_d1) || (i2_p_q2_a1_d0 <= i2_m_q2_a1_d0) || (i2_p_q2_a1_d0 <= iq_corr_a1_d0) || (i2_p_q2_a1_d1 <= i2_m_q2_a1_d1) || (i2_p_q2_a1_d1 <= iq_corr_a1_d1)) { return AH_FALSE; } mag_a0_d0 = (i2_m_q2_a0_d0 * res_scale) / i2_p_q2_a0_d0; phs_a0_d0 = (iq_corr_a0_d0 * res_scale) / i2_p_q2_a0_d0; mag_a0_d1 = (i2_m_q2_a0_d1 * res_scale) / i2_p_q2_a0_d1; phs_a0_d1 = (iq_corr_a0_d1 * res_scale) / i2_p_q2_a0_d1; mag_a1_d0 = (i2_m_q2_a1_d0 * res_scale) / i2_p_q2_a1_d0; phs_a1_d0 = (iq_corr_a1_d0 * res_scale) / i2_p_q2_a1_d0; mag_a1_d1 = (i2_m_q2_a1_d1 * res_scale) / i2_p_q2_a1_d1; phs_a1_d1 = (iq_corr_a1_d1 * res_scale) / i2_p_q2_a1_d1; /* without analog phase shift */ sin_2phi_1 = (((mag_a0_d0 - mag_a0_d1) * delpt_shift) / DELPT); /* without analog phase shift */ cos_2phi_1 = (((phs_a0_d1 - phs_a0_d0) * delpt_shift) / DELPT); /* with analog phase shift */ sin_2phi_2 = (((mag_a1_d0 - mag_a1_d1) * delpt_shift) / DELPT); /* with analog phase shift */ cos_2phi_2 = (((phs_a1_d1 - phs_a1_d0) * delpt_shift) / DELPT); /* force sin^2 + cos^2 = 1; */ /* find magnitude by approximation */ mag1 = ar9300_find_mag_approx(ah, cos_2phi_1, sin_2phi_1); mag2 = ar9300_find_mag_approx(ah, cos_2phi_2, sin_2phi_2); if ((mag1 == 0) || (mag2 == 0)) { HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: Divide by 0(%d): mag1=%d, mag2=%d\n", __func__, __LINE__, mag1, mag2); return AH_FALSE; } /* normalization sin and cos by mag */ sin_2phi_1 = (sin_2phi_1 * res_scale / mag1); cos_2phi_1 = (cos_2phi_1 * res_scale / mag1); sin_2phi_2 = (sin_2phi_2 * res_scale / mag2); cos_2phi_2 = (cos_2phi_2 * res_scale / mag2); /* calculate IQ mismatch */ if (AH_FALSE == ar9300_solve_iq_cal(ah, sin_2phi_1, cos_2phi_1, sin_2phi_2, cos_2phi_2, mag_a0_d0, phs_a0_d0, mag_a1_d0, phs_a1_d0, solved_eq)) { HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: Call to ar9300_solve_iq_cal failed.\n", __func__); return AH_FALSE; } mag_tx = solved_eq[0]; phs_tx = solved_eq[1]; mag_rx = solved_eq[2]; phs_rx = solved_eq[3]; HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: chain %d: mag mismatch=%d phase mismatch=%d\n", __func__, chain_idx, mag_tx / res_scale, phs_tx / res_scale); if (res_scale == mag_tx) { HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: Divide by 0(%d): mag_tx=%d, res_scale=%d\n", __func__, __LINE__, mag_tx, res_scale); return AH_FALSE; } /* calculate and quantize Tx IQ correction factor */ mag_corr_tx = (mag_tx * res_scale) / (res_scale - mag_tx); phs_corr_tx = -phs_tx; q_q_coff = (mag_corr_tx * 128 / res_scale); q_i_coff = (phs_corr_tx * 256 / res_scale); HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: tx chain %d: mag corr=%d phase corr=%d\n", __func__, chain_idx, q_q_coff, q_i_coff); if (q_i_coff < -63) { q_i_coff = -63; } if (q_i_coff > 63) { q_i_coff = 63; } if (q_q_coff < -63) { q_q_coff = -63; } if (q_q_coff > 63) { q_q_coff = 63; } iqc_coeff[0] = (q_q_coff * 128) + (0x7f & q_i_coff); HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: tx chain %d: iq corr coeff=%x\n", __func__, chain_idx, iqc_coeff[0]); if (-mag_rx == res_scale) { HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: Divide by 0(%d): mag_rx=%d, res_scale=%d\n", __func__, __LINE__, mag_rx, res_scale); return AH_FALSE; } /* calculate and quantize Rx IQ correction factors */ mag_corr_rx = (-mag_rx * res_scale) / (res_scale + mag_rx); phs_corr_rx = -phs_rx; q_q_coff = (mag_corr_rx * 128 / res_scale); q_i_coff = (phs_corr_rx * 256 / res_scale); HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: rx chain %d: mag corr=%d phase corr=%d\n", __func__, chain_idx, q_q_coff, q_i_coff); if (q_i_coff < -63) { q_i_coff = -63; } if (q_i_coff > 63) { q_i_coff = 63; } if (q_q_coff < -63) { q_q_coff = -63; } if (q_q_coff > 63) { q_q_coff = 63; } iqc_coeff[1] = (q_q_coff * 128) + (0x7f & q_i_coff); HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: rx chain %d: iq corr coeff=%x\n", __func__, chain_idx, iqc_coeff[1]); return AH_TRUE; } #define MAX_MAG_DELTA 11 //maximum magnitude mismatch delta across gains #define MAX_PHS_DELTA 10 //maximum phase mismatch delta across gains #define ABS(x) ((x) >= 0 ? (x) : (-(x))) u_int32_t tx_corr_coeff[MAX_MEASUREMENT][AR9300_MAX_CHAINS] = { { AR_PHY_TX_IQCAL_CORR_COEFF_01_B0, AR_PHY_TX_IQCAL_CORR_COEFF_01_B1, AR_PHY_TX_IQCAL_CORR_COEFF_01_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_01_B0, AR_PHY_TX_IQCAL_CORR_COEFF_01_B1, AR_PHY_TX_IQCAL_CORR_COEFF_01_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_23_B0, AR_PHY_TX_IQCAL_CORR_COEFF_23_B1, AR_PHY_TX_IQCAL_CORR_COEFF_23_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_23_B0, AR_PHY_TX_IQCAL_CORR_COEFF_23_B1, AR_PHY_TX_IQCAL_CORR_COEFF_23_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_45_B0, AR_PHY_TX_IQCAL_CORR_COEFF_45_B1, AR_PHY_TX_IQCAL_CORR_COEFF_45_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_45_B0, AR_PHY_TX_IQCAL_CORR_COEFF_45_B1, AR_PHY_TX_IQCAL_CORR_COEFF_45_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_67_B0, AR_PHY_TX_IQCAL_CORR_COEFF_67_B1, AR_PHY_TX_IQCAL_CORR_COEFF_67_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_67_B0, AR_PHY_TX_IQCAL_CORR_COEFF_67_B1, AR_PHY_TX_IQCAL_CORR_COEFF_67_B2}, }; static void ar9300_tx_iq_cal_outlier_detection(struct ath_hal *ah, HAL_CHANNEL_INTERNAL *ichan, u_int32_t num_chains, struct coeff_t *coeff, HAL_BOOL is_cal_reusable) { int nmeasurement, ch_idx, im; int32_t magnitude, phase; int32_t magnitude_max, phase_max; int32_t magnitude_min, phase_min; int32_t magnitude_max_idx, phase_max_idx; int32_t magnitude_min_idx, phase_min_idx; int32_t magnitude_avg, phase_avg; int32_t outlier_mag_idx = 0; int32_t outlier_phs_idx = 0; if (AR_SREV_POSEIDON(ah)) { HALASSERT(num_chains == 0x1); tx_corr_coeff[0][0] = AR_PHY_TX_IQCAL_CORR_COEFF_01_B0_POSEIDON; tx_corr_coeff[1][0] = AR_PHY_TX_IQCAL_CORR_COEFF_01_B0_POSEIDON; tx_corr_coeff[2][0] = AR_PHY_TX_IQCAL_CORR_COEFF_23_B0_POSEIDON; tx_corr_coeff[3][0] = AR_PHY_TX_IQCAL_CORR_COEFF_23_B0_POSEIDON; tx_corr_coeff[4][0] = AR_PHY_TX_IQCAL_CORR_COEFF_45_B0_POSEIDON; tx_corr_coeff[5][0] = AR_PHY_TX_IQCAL_CORR_COEFF_45_B0_POSEIDON; tx_corr_coeff[6][0] = AR_PHY_TX_IQCAL_CORR_COEFF_67_B0_POSEIDON; tx_corr_coeff[7][0] = AR_PHY_TX_IQCAL_CORR_COEFF_67_B0_POSEIDON; } for (ch_idx = 0; ch_idx < num_chains; ch_idx++) { nmeasurement = OS_REG_READ_FIELD(ah, AR_PHY_TX_IQCAL_STATUS_B0(ah), AR_PHY_CALIBRATED_GAINS_0); if (nmeasurement > MAX_MEASUREMENT) { nmeasurement = MAX_MEASUREMENT; } if (!AR_SREV_SCORPION(ah)) { /* * reset max/min variable to min/max values so that * we always start with 1st calibrated gain value */ magnitude_max = -64; phase_max = -64; magnitude_min = 63; phase_min = 63; magnitude_avg = 0; phase_avg = 0; magnitude_max_idx = 0; magnitude_min_idx = 0; phase_max_idx = 0; phase_min_idx = 0; /* detect outlier only if nmeasurement > 1 */ if (nmeasurement > 1) { /* printf("----------- start outlier detection -----------\n"); */ /* * find max/min and phase/mag mismatch across all calibrated gains */ for (im = 0; im < nmeasurement; im++) { magnitude = coeff->mag_coeff[ch_idx][im][0]; phase = coeff->phs_coeff[ch_idx][im][0]; magnitude_avg = magnitude_avg + magnitude; phase_avg = phase_avg + phase; if (magnitude > magnitude_max) { magnitude_max = magnitude; magnitude_max_idx = im; } if (magnitude < magnitude_min) { magnitude_min = magnitude; magnitude_min_idx = im; } if (phase > phase_max) { phase_max = phase; phase_max_idx = im; } if (phase < phase_min) { phase_min = phase; phase_min_idx = im; } } /* find average (exclude max abs value) */ for (im = 0; im < nmeasurement; im++) { magnitude = coeff->mag_coeff[ch_idx][im][0]; phase = coeff->phs_coeff[ch_idx][im][0]; if ((ABS(magnitude) < ABS(magnitude_max)) || (ABS(magnitude) < ABS(magnitude_min))) { magnitude_avg = magnitude_avg + magnitude; } if ((ABS(phase) < ABS(phase_max)) || (ABS(phase) < ABS(phase_min))) { phase_avg = phase_avg + phase; } } magnitude_avg = magnitude_avg / (nmeasurement - 1); phase_avg = phase_avg / (nmeasurement - 1); /* detect magnitude outlier */ if (ABS(magnitude_max - magnitude_min) > MAX_MAG_DELTA) { if (ABS(magnitude_max - magnitude_avg) > ABS(magnitude_min - magnitude_avg)) { /* max is outlier, force to avg */ outlier_mag_idx = magnitude_max_idx; } else { /* min is outlier, force to avg */ outlier_mag_idx = magnitude_min_idx; } coeff->mag_coeff[ch_idx][outlier_mag_idx][0] = magnitude_avg; coeff->phs_coeff[ch_idx][outlier_mag_idx][0] = phase_avg; HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "[ch%d][outlier mag gain%d]:: " "mag_avg = %d (/128), phase_avg = %d (/256)\n", ch_idx, outlier_mag_idx, magnitude_avg, phase_avg); } /* detect phase outlier */ if (ABS(phase_max - phase_min) > MAX_PHS_DELTA) { if (ABS(phase_max-phase_avg) > ABS(phase_min - phase_avg)) { /* max is outlier, force to avg */ outlier_phs_idx = phase_max_idx; } else{ /* min is outlier, force to avg */ outlier_phs_idx = phase_min_idx; } coeff->mag_coeff[ch_idx][outlier_phs_idx][0] = magnitude_avg; coeff->phs_coeff[ch_idx][outlier_phs_idx][0] = phase_avg; HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "[ch%d][outlier phs gain%d]:: " "mag_avg = %d (/128), phase_avg = %d (/256)\n", ch_idx, outlier_phs_idx, magnitude_avg, phase_avg); } } } /*printf("------------ after outlier detection -------------\n");*/ for (im = 0; im < nmeasurement; im++) { magnitude = coeff->mag_coeff[ch_idx][im][0]; phase = coeff->phs_coeff[ch_idx][im][0]; #if 0 printf("[ch%d][gain%d]:: mag = %d (/128), phase = %d (/256)\n", ch_idx, im, magnitude, phase); #endif coeff->iqc_coeff[0] = (phase & 0x7f) | ((magnitude & 0x7f) << 7); if ((im % 2) == 0) { OS_REG_RMW_FIELD(ah, tx_corr_coeff[im][ch_idx], AR_PHY_TX_IQCAL_CORR_COEFF_00_COEFF_TABLE, coeff->iqc_coeff[0]); } else { OS_REG_RMW_FIELD(ah, tx_corr_coeff[im][ch_idx], AR_PHY_TX_IQCAL_CORR_COEFF_01_COEFF_TABLE, coeff->iqc_coeff[0]); } #if ATH_SUPPORT_CAL_REUSE ichan->tx_corr_coeff[im][ch_idx] = coeff->iqc_coeff[0]; #endif } #if ATH_SUPPORT_CAL_REUSE ichan->num_measures[ch_idx] = nmeasurement; #endif } OS_REG_RMW_FIELD(ah, AR_PHY_TX_IQCAL_CONTROL_3, AR_PHY_TX_IQCAL_CONTROL_3_IQCORR_EN, 0x1); OS_REG_RMW_FIELD(ah, AR_PHY_RX_IQCAL_CORR_B0, AR_PHY_RX_IQCAL_CORR_B0_LOOPBACK_IQCORR_EN, 0x1); #if ATH_SUPPORT_CAL_REUSE if (is_cal_reusable) { ichan->one_time_txiqcal_done = AH_TRUE; HALDEBUG(ah, HAL_DEBUG_FCS_RTT, "(FCS) TXIQCAL saved - %d\n", ichan->channel); } #endif } #if ATH_SUPPORT_CAL_REUSE static void ar9300_tx_iq_cal_apply(struct ath_hal *ah, HAL_CHANNEL_INTERNAL *ichan) { struct ath_hal_9300 *ahp = AH9300(ah); int nmeasurement, ch_idx, im; u_int32_t tx_corr_coeff[MAX_MEASUREMENT][AR9300_MAX_CHAINS] = { { AR_PHY_TX_IQCAL_CORR_COEFF_01_B0, AR_PHY_TX_IQCAL_CORR_COEFF_01_B1, AR_PHY_TX_IQCAL_CORR_COEFF_01_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_01_B0, AR_PHY_TX_IQCAL_CORR_COEFF_01_B1, AR_PHY_TX_IQCAL_CORR_COEFF_01_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_23_B0, AR_PHY_TX_IQCAL_CORR_COEFF_23_B1, AR_PHY_TX_IQCAL_CORR_COEFF_23_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_23_B0, AR_PHY_TX_IQCAL_CORR_COEFF_23_B1, AR_PHY_TX_IQCAL_CORR_COEFF_23_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_45_B0, AR_PHY_TX_IQCAL_CORR_COEFF_45_B1, AR_PHY_TX_IQCAL_CORR_COEFF_45_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_45_B0, AR_PHY_TX_IQCAL_CORR_COEFF_45_B1, AR_PHY_TX_IQCAL_CORR_COEFF_45_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_67_B0, AR_PHY_TX_IQCAL_CORR_COEFF_67_B1, AR_PHY_TX_IQCAL_CORR_COEFF_67_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_67_B0, AR_PHY_TX_IQCAL_CORR_COEFF_67_B1, AR_PHY_TX_IQCAL_CORR_COEFF_67_B2}, }; if (AR_SREV_POSEIDON(ah)) { HALASSERT(ahp->ah_tx_cal_chainmask == 0x1); tx_corr_coeff[0][0] = AR_PHY_TX_IQCAL_CORR_COEFF_01_B0_POSEIDON; tx_corr_coeff[1][0] = AR_PHY_TX_IQCAL_CORR_COEFF_01_B0_POSEIDON; tx_corr_coeff[2][0] = AR_PHY_TX_IQCAL_CORR_COEFF_23_B0_POSEIDON; tx_corr_coeff[3][0] = AR_PHY_TX_IQCAL_CORR_COEFF_23_B0_POSEIDON; tx_corr_coeff[4][0] = AR_PHY_TX_IQCAL_CORR_COEFF_45_B0_POSEIDON; tx_corr_coeff[5][0] = AR_PHY_TX_IQCAL_CORR_COEFF_45_B0_POSEIDON; tx_corr_coeff[6][0] = AR_PHY_TX_IQCAL_CORR_COEFF_67_B0_POSEIDON; tx_corr_coeff[7][0] = AR_PHY_TX_IQCAL_CORR_COEFF_67_B0_POSEIDON; } for (ch_idx = 0; ch_idx < AR9300_MAX_CHAINS; ch_idx++) { if ((ahp->ah_tx_cal_chainmask & (1 << ch_idx)) == 0) { continue; } nmeasurement = ichan->num_measures[ch_idx]; for (im = 0; im < nmeasurement; im++) { if ((im % 2) == 0) { OS_REG_RMW_FIELD(ah, tx_corr_coeff[im][ch_idx], AR_PHY_TX_IQCAL_CORR_COEFF_00_COEFF_TABLE, ichan->tx_corr_coeff[im][ch_idx]); } else { OS_REG_RMW_FIELD(ah, tx_corr_coeff[im][ch_idx], AR_PHY_TX_IQCAL_CORR_COEFF_01_COEFF_TABLE, ichan->tx_corr_coeff[im][ch_idx]); } } } OS_REG_RMW_FIELD(ah, AR_PHY_TX_IQCAL_CONTROL_3, AR_PHY_TX_IQCAL_CONTROL_3_IQCORR_EN, 0x1); OS_REG_RMW_FIELD(ah, AR_PHY_RX_IQCAL_CORR_B0, AR_PHY_RX_IQCAL_CORR_B0_LOOPBACK_IQCORR_EN, 0x1); } #endif /* * ar9300_tx_iq_cal_hw_run is only needed for osprey/wasp/hornet * It is not needed for jupiter/poseidon. */ HAL_BOOL ar9300_tx_iq_cal_hw_run(struct ath_hal *ah) { int is_tx_gain_forced; is_tx_gain_forced = OS_REG_READ_FIELD(ah, AR_PHY_TX_FORCED_GAIN, AR_PHY_TXGAIN_FORCE); if (is_tx_gain_forced) { /*printf("Tx gain can not be forced during tx I/Q cal!\n");*/ OS_REG_RMW_FIELD(ah, AR_PHY_TX_FORCED_GAIN, AR_PHY_TXGAIN_FORCE, 0); } /* enable tx IQ cal */ OS_REG_RMW_FIELD(ah, AR_PHY_TX_IQCAL_START(ah), AR_PHY_TX_IQCAL_START_DO_CAL, AR_PHY_TX_IQCAL_START_DO_CAL); if (!ath_hal_wait(ah, AR_PHY_TX_IQCAL_START(ah), AR_PHY_TX_IQCAL_START_DO_CAL, 0)) { HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: Tx IQ Cal is never completed.\n", __func__); return AH_FALSE; } return AH_TRUE; } static void ar9300_tx_iq_cal_post_proc(struct ath_hal *ah,HAL_CHANNEL_INTERNAL *ichan, int iqcal_idx, int max_iqcal,HAL_BOOL is_cal_reusable, HAL_BOOL apply_last_corr) { int nmeasurement=0, im, ix, iy, temp; struct ath_hal_9300 *ahp = AH9300(ah); u_int32_t txiqcal_status[AR9300_MAX_CHAINS] = { AR_PHY_TX_IQCAL_STATUS_B0(ah), AR_PHY_TX_IQCAL_STATUS_B1, AR_PHY_TX_IQCAL_STATUS_B2, }; const u_int32_t chan_info_tab[] = { AR_PHY_CHAN_INFO_TAB_0, AR_PHY_CHAN_INFO_TAB_1, AR_PHY_CHAN_INFO_TAB_2, }; int32_t iq_res[6]; int32_t ch_idx, j; u_int32_t num_chains = 0; static struct coeff_t coeff; txiqcal_status[0] = AR_PHY_TX_IQCAL_STATUS_B0(ah); for (ch_idx = 0; ch_idx < AR9300_MAX_CHAINS; ch_idx++) { if (ahp->ah_tx_chainmask & (1 << ch_idx)) { num_chains++; } } if (apply_last_corr) { if (coeff.last_cal == AH_TRUE) { int32_t magnitude, phase; int ch_idx, im; u_int32_t tx_corr_coeff[MAX_MEASUREMENT][AR9300_MAX_CHAINS] = { { AR_PHY_TX_IQCAL_CORR_COEFF_01_B0, AR_PHY_TX_IQCAL_CORR_COEFF_01_B1, AR_PHY_TX_IQCAL_CORR_COEFF_01_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_01_B0, AR_PHY_TX_IQCAL_CORR_COEFF_01_B1, AR_PHY_TX_IQCAL_CORR_COEFF_01_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_23_B0, AR_PHY_TX_IQCAL_CORR_COEFF_23_B1, AR_PHY_TX_IQCAL_CORR_COEFF_23_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_23_B0, AR_PHY_TX_IQCAL_CORR_COEFF_23_B1, AR_PHY_TX_IQCAL_CORR_COEFF_23_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_45_B0, AR_PHY_TX_IQCAL_CORR_COEFF_45_B1, AR_PHY_TX_IQCAL_CORR_COEFF_45_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_45_B0, AR_PHY_TX_IQCAL_CORR_COEFF_45_B1, AR_PHY_TX_IQCAL_CORR_COEFF_45_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_67_B0, AR_PHY_TX_IQCAL_CORR_COEFF_67_B1, AR_PHY_TX_IQCAL_CORR_COEFF_67_B2}, { AR_PHY_TX_IQCAL_CORR_COEFF_67_B0, AR_PHY_TX_IQCAL_CORR_COEFF_67_B1, AR_PHY_TX_IQCAL_CORR_COEFF_67_B2}, }; for (ch_idx = 0; ch_idx < num_chains; ch_idx++) { for (im = 0; im < coeff.last_nmeasurement; im++) { magnitude = coeff.mag_coeff[ch_idx][im][0]; phase = coeff.phs_coeff[ch_idx][im][0]; #if 0 printf("[ch%d][gain%d]:: mag = %d (/128), phase = %d (/256)\n", ch_idx, im, magnitude, phase); #endif coeff.iqc_coeff[0] = (phase & 0x7f) | ((magnitude & 0x7f) << 7); if ((im % 2) == 0) { OS_REG_RMW_FIELD(ah, tx_corr_coeff[im][ch_idx], AR_PHY_TX_IQCAL_CORR_COEFF_00_COEFF_TABLE, coeff.iqc_coeff[0]); } else { OS_REG_RMW_FIELD(ah, tx_corr_coeff[im][ch_idx], AR_PHY_TX_IQCAL_CORR_COEFF_01_COEFF_TABLE, coeff.iqc_coeff[0]); } } } OS_REG_RMW_FIELD(ah, AR_PHY_TX_IQCAL_CONTROL_3, AR_PHY_TX_IQCAL_CONTROL_3_IQCORR_EN, 0x1); } return; } for (ch_idx = 0; ch_idx < num_chains; ch_idx++) { nmeasurement = OS_REG_READ_FIELD(ah, AR_PHY_TX_IQCAL_STATUS_B0(ah), AR_PHY_CALIBRATED_GAINS_0); if (nmeasurement > MAX_MEASUREMENT) { nmeasurement = MAX_MEASUREMENT; } for (im = 0; im < nmeasurement; im++) { HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: Doing Tx IQ Cal for chain %d.\n", __func__, ch_idx); if (OS_REG_READ(ah, txiqcal_status[ch_idx]) & AR_PHY_TX_IQCAL_STATUS_FAILED) { HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: Tx IQ Cal failed for chain %d.\n", __func__, ch_idx); goto TX_IQ_CAL_FAILED_; } for (j = 0; j < 3; j++) { u_int32_t idx = 2 * j; /* 3 registers for each calibration result */ u_int32_t offset = 4 * (3 * im + j); OS_REG_RMW_FIELD(ah, AR_PHY_CHAN_INFO_MEMORY, AR_PHY_CHAN_INFO_TAB_S2_READ, 0); /* 32 bits */ iq_res[idx] = OS_REG_READ(ah, chan_info_tab[ch_idx] + offset); OS_REG_RMW_FIELD(ah, AR_PHY_CHAN_INFO_MEMORY, AR_PHY_CHAN_INFO_TAB_S2_READ, 1); /* 16 bits */ iq_res[idx + 1] = 0xffff & OS_REG_READ(ah, chan_info_tab[ch_idx] + offset); HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: IQ RES[%d]=0x%x IQ_RES[%d]=0x%x\n", __func__, idx, iq_res[idx], idx + 1, iq_res[idx + 1]); } if (AH_FALSE == ar9300_calc_iq_corr( ah, ch_idx, iq_res, coeff.iqc_coeff)) { HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "%s: Failed in calculation of IQ correction.\n", __func__); goto TX_IQ_CAL_FAILED_; } coeff.phs_coeff[ch_idx][im][iqcal_idx-1] = coeff.iqc_coeff[0] & 0x7f; coeff.mag_coeff[ch_idx][im][iqcal_idx-1] = (coeff.iqc_coeff[0] >> 7) & 0x7f; if (coeff.mag_coeff[ch_idx][im][iqcal_idx-1] > 63) { coeff.mag_coeff[ch_idx][im][iqcal_idx-1] -= 128; } if (coeff.phs_coeff[ch_idx][im][iqcal_idx-1] > 63) { coeff.phs_coeff[ch_idx][im][iqcal_idx-1] -= 128; } #if 0 ath_hal_printf(ah, "IQCAL::[ch%d][gain%d]:: mag = %d phase = %d \n", ch_idx, im, coeff.mag_coeff[ch_idx][im][iqcal_idx-1], coeff.phs_coeff[ch_idx][im][iqcal_idx-1]); #endif } } //last iteration; calculate mag and phs if (iqcal_idx == max_iqcal) { if (max_iqcal>1) { for (ch_idx = 0; ch_idx < num_chains; ch_idx++) { for (im = 0; im < nmeasurement; im++) { //sort mag and phs for( ix=0;ixah_phyrestart_disabled = 1; } else { val |= AR_PHY_RESTART_ENA; AH9300(ah)->ah_phyrestart_disabled = 0; } OS_REG_WRITE(ah, AR_PHY_RESTART, val); val = OS_REG_READ(ah, AR_PHY_RESTART); } HAL_BOOL ar9300_interference_is_present(struct ath_hal *ah) { int i; struct ath_hal_private *ahpriv = AH_PRIVATE(ah); const struct ieee80211_channel *chan = ahpriv->ah_curchan; HAL_CHANNEL_INTERNAL *ichan = ath_hal_checkchannel(ah, chan); if (ichan == NULL) { ath_hal_printf(ah, "%s: called with ichan=NULL\n", __func__); return AH_FALSE; } /* This function is called after a stuck beacon, if EACS is enabled. * If CW interference is severe, then HW goes into a loop of continuous * stuck beacons and resets. On reset the NF cal history is cleared. * So the median value of the history cannot be used - * hence check if any value (Chain 0/Primary Channel) * is outside the bounds. */ HAL_NFCAL_HIST_FULL *h = AH_HOME_CHAN_NFCAL_HIST(ah, ichan); for (i = 0; i < HAL_NF_CAL_HIST_LEN_FULL; i++) { if (h->nf_cal_buffer[i][0] > AH9300(ah)->nfp->nominal + AH9300(ah)->nf_cw_int_delta) { return AH_TRUE; } } return AH_FALSE; } #if ATH_SUPPORT_CRDC void ar9300_crdc_rx_notify(struct ath_hal *ah, struct ath_rx_status *rxs) { struct ath_hal_private *ahpriv = AH_PRIVATE(ah); int rssi_index; if ((!AR_SREV_WASP(ah)) || (!ahpriv->ah_config.ath_hal_crdc_enable)) { return; } if (rxs->rs_isaggr && rxs->rs_moreaggr) { return; } if ((rxs->rs_rssi_ctl0 >= HAL_RSSI_BAD) || (rxs->rs_rssi_ctl1 >= HAL_RSSI_BAD)) { return; } rssi_index = ah->ah_crdc_rssi_ptr % HAL_MAX_CRDC_RSSI_SAMPLE; ah->ah_crdc_rssi_sample[0][rssi_index] = rxs->rs_rssi_ctl0; ah->ah_crdc_rssi_sample[1][rssi_index] = rxs->rs_rssi_ctl1; ah->ah_crdc_rssi_ptr++; } static int ar9300_crdc_avg_rssi(struct ath_hal *ah, int chain) { int crdc_rssi_sum = 0; int crdc_rssi_ptr = ah->ah_crdc_rssi_ptr, i; struct ath_hal_private *ahpriv = AH_PRIVATE(ah); int crdc_window = ahpriv->ah_config.ath_hal_crdc_window; if (crdc_window > HAL_MAX_CRDC_RSSI_SAMPLE) { crdc_window = HAL_MAX_CRDC_RSSI_SAMPLE; } for (i = 1; i <= crdc_window; i++) { crdc_rssi_sum += ah->ah_crdc_rssi_sample[chain] [(crdc_rssi_ptr - i) % HAL_MAX_CRDC_RSSI_SAMPLE]; } return crdc_rssi_sum / crdc_window; } static void ar9300_crdc_activate(struct ath_hal *ah, int rssi_diff, int enable) { int val, orig_val; struct ath_hal_private *ahpriv = AH_PRIVATE(ah); int crdc_numerator = ahpriv->ah_config.ath_hal_crdc_numerator; int crdc_denominator = ahpriv->ah_config.ath_hal_crdc_denominator; int c = (rssi_diff * crdc_numerator) / crdc_denominator; val = orig_val = OS_REG_READ(ah, AR_PHY_MULTICHAIN_CTRL); val &= 0xffffff00; if (enable) { val |= 0x1; val |= ((c << 1) & 0xff); } OS_REG_WRITE(ah, AR_PHY_MULTICHAIN_CTRL, val); HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "diff: %02d comp: %02d reg: %08x %08x\n", rssi_diff, c, orig_val, val); } void ar9300_chain_rssi_diff_compensation(struct ath_hal *ah) { struct ath_hal_private *ahpriv = AH_PRIVATE(ah); int crdc_window = ahpriv->ah_config.ath_hal_crdc_window; int crdc_rssi_ptr = ah->ah_crdc_rssi_ptr; int crdc_rssi_thresh = ahpriv->ah_config.ath_hal_crdc_rssithresh; int crdc_diff_thresh = ahpriv->ah_config.ath_hal_crdc_diffthresh; int avg_rssi[2], avg_rssi_diff; if ((!AR_SREV_WASP(ah)) || (!ahpriv->ah_config.ath_hal_crdc_enable)) { if (ah->ah_crdc_rssi_ptr) { ar9300_crdc_activate(ah, 0, 0); ah->ah_crdc_rssi_ptr = 0; } return; } if (crdc_window > HAL_MAX_CRDC_RSSI_SAMPLE) { crdc_window = HAL_MAX_CRDC_RSSI_SAMPLE; } if (crdc_rssi_ptr < crdc_window) { return; } avg_rssi[0] = ar9300_crdc_avg_rssi(ah, 0); avg_rssi[1] = ar9300_crdc_avg_rssi(ah, 1); avg_rssi_diff = avg_rssi[1] - avg_rssi[0]; HALDEBUG(ah, HAL_DEBUG_CALIBRATE, "crdc: avg: %02d %02d ", avg_rssi[0], avg_rssi[1]); if ((avg_rssi[0] < crdc_rssi_thresh) && (avg_rssi[1] < crdc_rssi_thresh)) { ar9300_crdc_activate(ah, 0, 0); } else { if (ABS(avg_rssi_diff) >= crdc_diff_thresh) { ar9300_crdc_activate(ah, avg_rssi_diff, 1); } else { ar9300_crdc_activate(ah, 0, 1); } } } #endif #if ATH_ANT_DIV_COMB HAL_BOOL ar9300_ant_ctrl_set_lna_div_use_bt_ant(struct ath_hal *ah, HAL_BOOL enable, const struct ieee80211_channel *chan) { u_int32_t value; u_int32_t regval; struct ath_hal_9300 *ahp = AH9300(ah); HAL_CHANNEL_INTERNAL *ichan; struct ath_hal_private *ahpriv = AH_PRIVATE(ah); HAL_CAPABILITIES *pcap = &ahpriv->ah_caps; HALDEBUG(ah, HAL_DEBUG_RESET | HAL_DEBUG_BT_COEX, "%s: called; enable=%d\n", __func__, enable); if (AR_SREV_POSEIDON(ah)) { // Make sure this scheme is only used for WB225(Astra) ahp->ah_lna_div_use_bt_ant_enable = enable; ichan = ar9300_check_chan(ah, chan); if ( ichan == AH_NULL ) { HALDEBUG(ah, HAL_DEBUG_CHANNEL, "%s: invalid channel %u/0x%x; no mapping\n", __func__, chan->ic_freq, chan->ic_flags); return AH_FALSE; } if ( enable == TRUE ) { pcap->halAntDivCombSupport = TRUE; } else { pcap->halAntDivCombSupport = pcap->halAntDivCombSupportOrg; } #define AR_SWITCH_TABLE_COM2_ALL (0xffffff) #define AR_SWITCH_TABLE_COM2_ALL_S (0) value = ar9300_ant_ctrl_common2_get(ah, IS_CHAN_2GHZ(ichan)); if ( enable == TRUE ) { value &= ~AR_SWITCH_TABLE_COM2_ALL; value |= ah->ah_config.ath_hal_ant_ctrl_comm2g_switch_enable; } HALDEBUG(ah, HAL_DEBUG_RESET, "%s: com2=0x%08x\n", __func__, value); OS_REG_RMW_FIELD(ah, AR_PHY_SWITCH_COM_2, AR_SWITCH_TABLE_COM2_ALL, value); value = ar9300_eeprom_get(ahp, EEP_ANTDIV_control); /* main_lnaconf, alt_lnaconf, main_tb, alt_tb */ regval = OS_REG_READ(ah, AR_PHY_MC_GAIN_CTRL); regval &= (~ANT_DIV_CONTROL_ALL); /* clear bit 25~30 */ regval |= (value & 0x3f) << ANT_DIV_CONTROL_ALL_S; /* enable_lnadiv */ regval &= (~MULTICHAIN_GAIN_CTRL__ENABLE_ANT_DIV_LNADIV__MASK); regval |= ((value >> 6) & 0x1) << MULTICHAIN_GAIN_CTRL__ENABLE_ANT_DIV_LNADIV__SHIFT; if ( enable == TRUE ) { regval |= ANT_DIV_ENABLE; } OS_REG_WRITE(ah, AR_PHY_MC_GAIN_CTRL, regval); /* enable fast_div */ regval = OS_REG_READ(ah, AR_PHY_CCK_DETECT); regval &= (~BBB_SIG_DETECT__ENABLE_ANT_FAST_DIV__MASK); regval |= ((value >> 7) & 0x1) << BBB_SIG_DETECT__ENABLE_ANT_FAST_DIV__SHIFT; if ( enable == TRUE ) { regval |= FAST_DIV_ENABLE; } OS_REG_WRITE(ah, AR_PHY_CCK_DETECT, regval); if ( AR_SREV_POSEIDON_11_OR_LATER(ah) ) { if (pcap->halAntDivCombSupport) { /* If support DivComb, set MAIN to LNA1 and ALT to LNA2 at the first beginning */ regval = OS_REG_READ(ah, AR_PHY_MC_GAIN_CTRL); /* clear bit 25~30 main_lnaconf, alt_lnaconf, main_tb, alt_tb */ regval &= (~(MULTICHAIN_GAIN_CTRL__ANT_DIV_MAIN_LNACONF__MASK | MULTICHAIN_GAIN_CTRL__ANT_DIV_ALT_LNACONF__MASK | MULTICHAIN_GAIN_CTRL__ANT_DIV_ALT_GAINTB__MASK | MULTICHAIN_GAIN_CTRL__ANT_DIV_MAIN_GAINTB__MASK)); regval |= (HAL_ANT_DIV_COMB_LNA1 << MULTICHAIN_GAIN_CTRL__ANT_DIV_MAIN_LNACONF__SHIFT); regval |= (HAL_ANT_DIV_COMB_LNA2 << MULTICHAIN_GAIN_CTRL__ANT_DIV_ALT_LNACONF__SHIFT); OS_REG_WRITE(ah, AR_PHY_MC_GAIN_CTRL, regval); } } return AH_TRUE; } else if (AR_SREV_APHRODITE(ah)) { ahp->ah_lna_div_use_bt_ant_enable = enable; if (enable) { OS_REG_SET_BIT(ah, AR_PHY_MC_GAIN_CTRL, ANT_DIV_ENABLE); OS_REG_SET_BIT(ah, AR_PHY_MC_GAIN_CTRL, (1 << MULTICHAIN_GAIN_CTRL__ENABLE_ANT_SW_RX_PROT__SHIFT)); OS_REG_SET_BIT(ah, AR_PHY_CCK_DETECT, AR_PHY_CCK_DETECT_BB_ENABLE_ANT_FAST_DIV); OS_REG_SET_BIT(ah, AR_PHY_RESTART, RESTART__ENABLE_ANT_FAST_DIV_M2FLAG__MASK); OS_REG_SET_BIT(ah, AR_BTCOEX_WL_LNADIV, AR_BTCOEX_WL_LNADIV_FORCE_ON); } else { OS_REG_CLR_BIT(ah, AR_PHY_MC_GAIN_CTRL, ANT_DIV_ENABLE); OS_REG_CLR_BIT(ah, AR_PHY_MC_GAIN_CTRL, (1 << MULTICHAIN_GAIN_CTRL__ENABLE_ANT_SW_RX_PROT__SHIFT)); OS_REG_CLR_BIT(ah, AR_PHY_CCK_DETECT, AR_PHY_CCK_DETECT_BB_ENABLE_ANT_FAST_DIV); OS_REG_CLR_BIT(ah, AR_PHY_RESTART, RESTART__ENABLE_ANT_FAST_DIV_M2FLAG__MASK); OS_REG_CLR_BIT(ah, AR_BTCOEX_WL_LNADIV, AR_BTCOEX_WL_LNADIV_FORCE_ON); regval = OS_REG_READ(ah, AR_PHY_MC_GAIN_CTRL); regval &= (~(MULTICHAIN_GAIN_CTRL__ANT_DIV_MAIN_LNACONF__MASK | MULTICHAIN_GAIN_CTRL__ANT_DIV_ALT_LNACONF__MASK | MULTICHAIN_GAIN_CTRL__ANT_DIV_ALT_GAINTB__MASK | MULTICHAIN_GAIN_CTRL__ANT_DIV_MAIN_GAINTB__MASK)); regval |= (HAL_ANT_DIV_COMB_LNA1 << MULTICHAIN_GAIN_CTRL__ANT_DIV_MAIN_LNACONF__SHIFT); regval |= (HAL_ANT_DIV_COMB_LNA2 << MULTICHAIN_GAIN_CTRL__ANT_DIV_ALT_LNACONF__SHIFT); OS_REG_WRITE(ah, AR_PHY_MC_GAIN_CTRL, regval); } return AH_TRUE; } return AH_TRUE; } #endif /* ATH_ANT_DIV_COMB */ Index: projects/ipsec/sys/dev/ath/ath_hal/ah.c =================================================================== --- projects/ipsec/sys/dev/ath/ath_hal/ah.c (revision 313186) +++ projects/ipsec/sys/dev/ath/ath_hal/ah.c (revision 313187) @@ -1,1500 +1,1515 @@ /* * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting * Copyright (c) 2002-2008 Atheros Communications, Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * $FreeBSD$ */ #include "opt_ah.h" #include "ah.h" #include "ah_internal.h" #include "ah_devid.h" #include "ah_eeprom.h" /* for 5ghz fast clock flag */ #include "ar5416/ar5416reg.h" /* NB: includes ar5212reg.h */ #include "ar9003/ar9300_devid.h" /* linker set of registered chips */ OS_SET_DECLARE(ah_chips, struct ath_hal_chip); /* * Check the set of registered chips to see if any recognize * the device as one they can support. */ const char* ath_hal_probe(uint16_t vendorid, uint16_t devid) { struct ath_hal_chip * const *pchip; OS_SET_FOREACH(pchip, ah_chips) { const char *name = (*pchip)->probe(vendorid, devid); if (name != AH_NULL) return name; } return AH_NULL; } /* * Attach detects device chip revisions, initializes the hwLayer * function list, reads EEPROM information, * selects reset vectors, and performs a short self test. * Any failures will return an error that should cause a hardware * disable. */ struct ath_hal* ath_hal_attach(uint16_t devid, HAL_SOFTC sc, HAL_BUS_TAG st, HAL_BUS_HANDLE sh, uint16_t *eepromdata, HAL_OPS_CONFIG *ah_config, HAL_STATUS *error) { struct ath_hal_chip * const *pchip; OS_SET_FOREACH(pchip, ah_chips) { struct ath_hal_chip *chip = *pchip; struct ath_hal *ah; /* XXX don't have vendorid, assume atheros one works */ if (chip->probe(ATHEROS_VENDOR_ID, devid) == AH_NULL) continue; ah = chip->attach(devid, sc, st, sh, eepromdata, ah_config, error); if (ah != AH_NULL) { /* copy back private state to public area */ ah->ah_devid = AH_PRIVATE(ah)->ah_devid; ah->ah_subvendorid = AH_PRIVATE(ah)->ah_subvendorid; ah->ah_macVersion = AH_PRIVATE(ah)->ah_macVersion; ah->ah_macRev = AH_PRIVATE(ah)->ah_macRev; ah->ah_phyRev = AH_PRIVATE(ah)->ah_phyRev; ah->ah_analog5GhzRev = AH_PRIVATE(ah)->ah_analog5GhzRev; ah->ah_analog2GhzRev = AH_PRIVATE(ah)->ah_analog2GhzRev; return ah; } } return AH_NULL; } const char * ath_hal_mac_name(struct ath_hal *ah) { switch (ah->ah_macVersion) { case AR_SREV_VERSION_CRETE: case AR_SREV_VERSION_MAUI_1: return "AR5210"; case AR_SREV_VERSION_MAUI_2: case AR_SREV_VERSION_OAHU: return "AR5211"; case AR_SREV_VERSION_VENICE: return "AR5212"; case AR_SREV_VERSION_GRIFFIN: return "AR2413"; case AR_SREV_VERSION_CONDOR: return "AR5424"; case AR_SREV_VERSION_EAGLE: return "AR5413"; case AR_SREV_VERSION_COBRA: return "AR2415"; case AR_SREV_2425: /* Swan */ return "AR2425"; case AR_SREV_2417: /* Nala */ return "AR2417"; case AR_XSREV_VERSION_OWL_PCI: return "AR5416"; case AR_XSREV_VERSION_OWL_PCIE: return "AR5418"; case AR_XSREV_VERSION_HOWL: return "AR9130"; case AR_XSREV_VERSION_SOWL: return "AR9160"; case AR_XSREV_VERSION_MERLIN: if (AH_PRIVATE(ah)->ah_ispcie) return "AR9280"; return "AR9220"; case AR_XSREV_VERSION_KITE: return "AR9285"; case AR_XSREV_VERSION_KIWI: if (AH_PRIVATE(ah)->ah_ispcie) return "AR9287"; return "AR9227"; case AR_SREV_VERSION_AR9380: if (ah->ah_macRev >= AR_SREV_REVISION_AR9580_10) return "AR9580"; return "AR9380"; case AR_SREV_VERSION_AR9460: return "AR9460"; case AR_SREV_VERSION_AR9330: return "AR9330"; case AR_SREV_VERSION_AR9340: return "AR9340"; case AR_SREV_VERSION_QCA9550: return "QCA9550"; case AR_SREV_VERSION_AR9485: return "AR9485"; case AR_SREV_VERSION_QCA9565: return "QCA9565"; case AR_SREV_VERSION_QCA9530: return "QCA9530"; } return "????"; } /* * Return the mask of available modes based on the hardware capabilities. */ u_int ath_hal_getwirelessmodes(struct ath_hal*ah) { return ath_hal_getWirelessModes(ah); } /* linker set of registered RF backends */ OS_SET_DECLARE(ah_rfs, struct ath_hal_rf); /* * Check the set of registered RF backends to see if * any recognize the device as one they can support. */ struct ath_hal_rf * ath_hal_rfprobe(struct ath_hal *ah, HAL_STATUS *ecode) { struct ath_hal_rf * const *prf; OS_SET_FOREACH(prf, ah_rfs) { struct ath_hal_rf *rf = *prf; if (rf->probe(ah)) return rf; } *ecode = HAL_ENOTSUPP; return AH_NULL; } const char * ath_hal_rf_name(struct ath_hal *ah) { switch (ah->ah_analog5GhzRev & AR_RADIO_SREV_MAJOR) { case 0: /* 5210 */ return "5110"; /* NB: made up */ case AR_RAD5111_SREV_MAJOR: case AR_RAD5111_SREV_PROD: return "5111"; case AR_RAD2111_SREV_MAJOR: return "2111"; case AR_RAD5112_SREV_MAJOR: case AR_RAD5112_SREV_2_0: case AR_RAD5112_SREV_2_1: return "5112"; case AR_RAD2112_SREV_MAJOR: case AR_RAD2112_SREV_2_0: case AR_RAD2112_SREV_2_1: return "2112"; case AR_RAD2413_SREV_MAJOR: return "2413"; case AR_RAD5413_SREV_MAJOR: return "5413"; case AR_RAD2316_SREV_MAJOR: return "2316"; case AR_RAD2317_SREV_MAJOR: return "2317"; case AR_RAD5424_SREV_MAJOR: return "5424"; case AR_RAD5133_SREV_MAJOR: return "5133"; case AR_RAD2133_SREV_MAJOR: return "2133"; case AR_RAD5122_SREV_MAJOR: return "5122"; case AR_RAD2122_SREV_MAJOR: return "2122"; } return "????"; } /* * Poll the register looking for a specific value. */ HAL_BOOL ath_hal_wait(struct ath_hal *ah, u_int reg, uint32_t mask, uint32_t val) { #define AH_TIMEOUT 1000 return ath_hal_waitfor(ah, reg, mask, val, AH_TIMEOUT); #undef AH_TIMEOUT } HAL_BOOL ath_hal_waitfor(struct ath_hal *ah, u_int reg, uint32_t mask, uint32_t val, uint32_t timeout) { int i; for (i = 0; i < timeout; i++) { if ((OS_REG_READ(ah, reg) & mask) == val) return AH_TRUE; OS_DELAY(10); } HALDEBUG(ah, HAL_DEBUG_REGIO | HAL_DEBUG_PHYIO, "%s: timeout on reg 0x%x: 0x%08x & 0x%08x != 0x%08x\n", __func__, reg, OS_REG_READ(ah, reg), mask, val); return AH_FALSE; } /* * Reverse the bits starting at the low bit for a value of * bit_count in size */ uint32_t ath_hal_reverseBits(uint32_t val, uint32_t n) { uint32_t retval; int i; for (i = 0, retval = 0; i < n; i++) { retval = (retval << 1) | (val & 1); val >>= 1; } return retval; } /* 802.11n related timing definitions */ #define OFDM_PLCP_BITS 22 #define HT_L_STF 8 #define HT_L_LTF 8 #define HT_L_SIG 4 #define HT_SIG 8 #define HT_STF 4 #define HT_LTF(n) ((n) * 4) #define HT_RC_2_MCS(_rc) ((_rc) & 0x1f) #define HT_RC_2_STREAMS(_rc) ((((_rc) & 0x78) >> 3) + 1) #define IS_HT_RATE(_rc) ( (_rc) & IEEE80211_RATE_MCS) /* * Calculate the duration of a packet whether it is 11n or legacy. */ uint32_t ath_hal_pkt_txtime(struct ath_hal *ah, const HAL_RATE_TABLE *rates, uint32_t frameLen, uint16_t rateix, HAL_BOOL isht40, HAL_BOOL shortPreamble, HAL_BOOL includeSifs) { uint8_t rc; int numStreams; rc = rates->info[rateix].rateCode; /* Legacy rate? Return the old way */ if (! IS_HT_RATE(rc)) return ath_hal_computetxtime(ah, rates, frameLen, rateix, shortPreamble, includeSifs); /* 11n frame - extract out the number of spatial streams */ numStreams = HT_RC_2_STREAMS(rc); KASSERT(numStreams > 0 && numStreams <= 4, ("number of spatial streams needs to be 1..3: MCS rate 0x%x!", rateix)); /* XXX TODO: Add SIFS */ return ath_computedur_ht(frameLen, rc, numStreams, isht40, shortPreamble); } static const uint16_t ht20_bps[32] = { 26, 52, 78, 104, 156, 208, 234, 260, 52, 104, 156, 208, 312, 416, 468, 520, 78, 156, 234, 312, 468, 624, 702, 780, 104, 208, 312, 416, 624, 832, 936, 1040 }; static const uint16_t ht40_bps[32] = { 54, 108, 162, 216, 324, 432, 486, 540, 108, 216, 324, 432, 648, 864, 972, 1080, 162, 324, 486, 648, 972, 1296, 1458, 1620, 216, 432, 648, 864, 1296, 1728, 1944, 2160 }; /* * Calculate the transmit duration of an 11n frame. */ uint32_t ath_computedur_ht(uint32_t frameLen, uint16_t rate, int streams, HAL_BOOL isht40, HAL_BOOL isShortGI) { uint32_t bitsPerSymbol, numBits, numSymbols, txTime; KASSERT(rate & IEEE80211_RATE_MCS, ("not mcs %d", rate)); KASSERT((rate &~ IEEE80211_RATE_MCS) < 31, ("bad mcs 0x%x", rate)); if (isht40) bitsPerSymbol = ht40_bps[HT_RC_2_MCS(rate)]; else bitsPerSymbol = ht20_bps[HT_RC_2_MCS(rate)]; numBits = OFDM_PLCP_BITS + (frameLen << 3); numSymbols = howmany(numBits, bitsPerSymbol); if (isShortGI) txTime = ((numSymbols * 18) + 4) / 5; /* 3.6us */ else txTime = numSymbols * 4; /* 4us */ return txTime + HT_L_STF + HT_L_LTF + HT_L_SIG + HT_SIG + HT_STF + HT_LTF(streams); } /* * Compute the time to transmit a frame of length frameLen bytes * using the specified rate, phy, and short preamble setting. */ uint16_t ath_hal_computetxtime(struct ath_hal *ah, const HAL_RATE_TABLE *rates, uint32_t frameLen, uint16_t rateix, HAL_BOOL shortPreamble, HAL_BOOL includeSifs) { uint32_t bitsPerSymbol, numBits, numSymbols, phyTime, txTime; uint32_t kbps; /* Warn if this function is called for 11n rates; it should not be! */ if (IS_HT_RATE(rates->info[rateix].rateCode)) ath_hal_printf(ah, "%s: MCS rate? (index %d; hwrate 0x%x)\n", __func__, rateix, rates->info[rateix].rateCode); kbps = rates->info[rateix].rateKbps; /* * index can be invalid during dynamic Turbo transitions. * XXX */ if (kbps == 0) return 0; switch (rates->info[rateix].phy) { case IEEE80211_T_CCK: phyTime = CCK_PREAMBLE_BITS + CCK_PLCP_BITS; if (shortPreamble && rates->info[rateix].shortPreamble) phyTime >>= 1; numBits = frameLen << 3; txTime = phyTime + ((numBits * 1000)/kbps); if (includeSifs) txTime += CCK_SIFS_TIME; break; case IEEE80211_T_OFDM: bitsPerSymbol = (kbps * OFDM_SYMBOL_TIME) / 1000; HALASSERT(bitsPerSymbol != 0); numBits = OFDM_PLCP_BITS + (frameLen << 3); numSymbols = howmany(numBits, bitsPerSymbol); txTime = OFDM_PREAMBLE_TIME + (numSymbols * OFDM_SYMBOL_TIME); if (includeSifs) txTime += OFDM_SIFS_TIME; break; case IEEE80211_T_OFDM_HALF: bitsPerSymbol = (kbps * OFDM_HALF_SYMBOL_TIME) / 1000; HALASSERT(bitsPerSymbol != 0); numBits = OFDM_HALF_PLCP_BITS + (frameLen << 3); numSymbols = howmany(numBits, bitsPerSymbol); txTime = OFDM_HALF_PREAMBLE_TIME + (numSymbols * OFDM_HALF_SYMBOL_TIME); if (includeSifs) txTime += OFDM_HALF_SIFS_TIME; break; case IEEE80211_T_OFDM_QUARTER: bitsPerSymbol = (kbps * OFDM_QUARTER_SYMBOL_TIME) / 1000; HALASSERT(bitsPerSymbol != 0); numBits = OFDM_QUARTER_PLCP_BITS + (frameLen << 3); numSymbols = howmany(numBits, bitsPerSymbol); txTime = OFDM_QUARTER_PREAMBLE_TIME + (numSymbols * OFDM_QUARTER_SYMBOL_TIME); if (includeSifs) txTime += OFDM_QUARTER_SIFS_TIME; break; case IEEE80211_T_TURBO: bitsPerSymbol = (kbps * TURBO_SYMBOL_TIME) / 1000; HALASSERT(bitsPerSymbol != 0); numBits = TURBO_PLCP_BITS + (frameLen << 3); numSymbols = howmany(numBits, bitsPerSymbol); txTime = TURBO_PREAMBLE_TIME + (numSymbols * TURBO_SYMBOL_TIME); if (includeSifs) txTime += TURBO_SIFS_TIME; break; default: HALDEBUG(ah, HAL_DEBUG_PHYIO, "%s: unknown phy %u (rate ix %u)\n", __func__, rates->info[rateix].phy, rateix); txTime = 0; break; } return txTime; } int ath_hal_get_curmode(struct ath_hal *ah, const struct ieee80211_channel *chan) { /* * Pick a default mode at bootup. A channel change is inevitable. */ if (!chan) return HAL_MODE_11NG_HT20; if (IEEE80211_IS_CHAN_TURBO(chan)) return HAL_MODE_TURBO; /* check for NA_HT before plain A, since IS_CHAN_A includes NA_HT */ if (IEEE80211_IS_CHAN_5GHZ(chan) && IEEE80211_IS_CHAN_HT20(chan)) return HAL_MODE_11NA_HT20; if (IEEE80211_IS_CHAN_5GHZ(chan) && IEEE80211_IS_CHAN_HT40U(chan)) return HAL_MODE_11NA_HT40PLUS; if (IEEE80211_IS_CHAN_5GHZ(chan) && IEEE80211_IS_CHAN_HT40D(chan)) return HAL_MODE_11NA_HT40MINUS; if (IEEE80211_IS_CHAN_A(chan)) return HAL_MODE_11A; /* check for NG_HT before plain G, since IS_CHAN_G includes NG_HT */ if (IEEE80211_IS_CHAN_2GHZ(chan) && IEEE80211_IS_CHAN_HT20(chan)) return HAL_MODE_11NG_HT20; if (IEEE80211_IS_CHAN_2GHZ(chan) && IEEE80211_IS_CHAN_HT40U(chan)) return HAL_MODE_11NG_HT40PLUS; if (IEEE80211_IS_CHAN_2GHZ(chan) && IEEE80211_IS_CHAN_HT40D(chan)) return HAL_MODE_11NG_HT40MINUS; /* * XXX For FreeBSD, will this work correctly given the DYN * chan mode (OFDM+CCK dynamic) ? We have pure-G versions DYN-BG.. */ if (IEEE80211_IS_CHAN_G(chan)) return HAL_MODE_11G; if (IEEE80211_IS_CHAN_B(chan)) return HAL_MODE_11B; HALASSERT(0); return HAL_MODE_11NG_HT20; } typedef enum { WIRELESS_MODE_11a = 0, WIRELESS_MODE_TURBO = 1, WIRELESS_MODE_11b = 2, WIRELESS_MODE_11g = 3, WIRELESS_MODE_108g = 4, WIRELESS_MODE_MAX } WIRELESS_MODE; /* * XXX TODO: for some (?) chips, an 11b mode still runs at 11bg. * Maybe AR5211 has separate 11b and 11g only modes, so 11b is 22MHz * and 11g is 44MHz, but AR5416 and later run 11b in 11bg mode, right? */ static WIRELESS_MODE ath_hal_chan2wmode(struct ath_hal *ah, const struct ieee80211_channel *chan) { if (IEEE80211_IS_CHAN_B(chan)) return WIRELESS_MODE_11b; if (IEEE80211_IS_CHAN_G(chan)) return WIRELESS_MODE_11g; if (IEEE80211_IS_CHAN_108G(chan)) return WIRELESS_MODE_108g; if (IEEE80211_IS_CHAN_TURBO(chan)) return WIRELESS_MODE_TURBO; return WIRELESS_MODE_11a; } /* * Convert between microseconds and core system clocks. */ /* 11a Turbo 11b 11g 108g */ static const uint8_t CLOCK_RATE[] = { 40, 80, 22, 44, 88 }; #define CLOCK_FAST_RATE_5GHZ_OFDM 44 u_int ath_hal_mac_clks(struct ath_hal *ah, u_int usecs) { const struct ieee80211_channel *c = AH_PRIVATE(ah)->ah_curchan; u_int clks; /* NB: ah_curchan may be null when called attach time */ /* XXX merlin and later specific workaround - 5ghz fast clock is 44 */ if (c != AH_NULL && IS_5GHZ_FAST_CLOCK_EN(ah, c)) { clks = usecs * CLOCK_FAST_RATE_5GHZ_OFDM; if (IEEE80211_IS_CHAN_HT40(c)) clks <<= 1; } else if (c != AH_NULL) { clks = usecs * CLOCK_RATE[ath_hal_chan2wmode(ah, c)]; if (IEEE80211_IS_CHAN_HT40(c)) clks <<= 1; } else clks = usecs * CLOCK_RATE[WIRELESS_MODE_11b]; /* Compensate for half/quarter rate */ if (c != AH_NULL && IEEE80211_IS_CHAN_HALF(c)) clks = clks / 2; else if (c != AH_NULL && IEEE80211_IS_CHAN_QUARTER(c)) clks = clks / 4; return clks; } u_int ath_hal_mac_usec(struct ath_hal *ah, u_int clks) { uint64_t psec; psec = ath_hal_mac_psec(ah, clks); return (psec / 1000000); } /* * XXX TODO: half, quarter rates. */ uint64_t ath_hal_mac_psec(struct ath_hal *ah, u_int clks) { const struct ieee80211_channel *c = AH_PRIVATE(ah)->ah_curchan; uint64_t psec; /* NB: ah_curchan may be null when called attach time */ /* XXX merlin and later specific workaround - 5ghz fast clock is 44 */ if (c != AH_NULL && IS_5GHZ_FAST_CLOCK_EN(ah, c)) { psec = (clks * 1000000ULL) / CLOCK_FAST_RATE_5GHZ_OFDM; if (IEEE80211_IS_CHAN_HT40(c)) psec >>= 1; } else if (c != AH_NULL) { psec = (clks * 1000000ULL) / CLOCK_RATE[ath_hal_chan2wmode(ah, c)]; if (IEEE80211_IS_CHAN_HT40(c)) psec >>= 1; } else psec = (clks * 1000000ULL) / CLOCK_RATE[WIRELESS_MODE_11b]; return psec; } /* * Setup a h/w rate table's reverse lookup table and * fill in ack durations. This routine is called for * each rate table returned through the ah_getRateTable * method. The reverse lookup tables are assumed to be * initialized to zero (or at least the first entry). * We use this as a key that indicates whether or not * we've previously setup the reverse lookup table. * * XXX not reentrant, but shouldn't matter */ void ath_hal_setupratetable(struct ath_hal *ah, HAL_RATE_TABLE *rt) { #define N(a) (sizeof(a)/sizeof(a[0])) int i; if (rt->rateCodeToIndex[0] != 0) /* already setup */ return; for (i = 0; i < N(rt->rateCodeToIndex); i++) rt->rateCodeToIndex[i] = (uint8_t) -1; for (i = 0; i < rt->rateCount; i++) { uint8_t code = rt->info[i].rateCode; uint8_t cix = rt->info[i].controlRate; HALASSERT(code < N(rt->rateCodeToIndex)); rt->rateCodeToIndex[code] = i; HALASSERT((code | rt->info[i].shortPreamble) < N(rt->rateCodeToIndex)); rt->rateCodeToIndex[code | rt->info[i].shortPreamble] = i; /* * XXX for 11g the control rate to use for 5.5 and 11 Mb/s * depends on whether they are marked as basic rates; * the static tables are setup with an 11b-compatible * 2Mb/s rate which will work but is suboptimal */ rt->info[i].lpAckDuration = ath_hal_computetxtime(ah, rt, WLAN_CTRL_FRAME_SIZE, cix, AH_FALSE, AH_TRUE); rt->info[i].spAckDuration = ath_hal_computetxtime(ah, rt, WLAN_CTRL_FRAME_SIZE, cix, AH_TRUE, AH_TRUE); } #undef N } HAL_STATUS ath_hal_getcapability(struct ath_hal *ah, HAL_CAPABILITY_TYPE type, uint32_t capability, uint32_t *result) { const HAL_CAPABILITIES *pCap = &AH_PRIVATE(ah)->ah_caps; switch (type) { case HAL_CAP_REG_DMN: /* regulatory domain */ *result = AH_PRIVATE(ah)->ah_currentRD; return HAL_OK; case HAL_CAP_DFS_DMN: /* DFS Domain */ *result = AH_PRIVATE(ah)->ah_dfsDomain; return HAL_OK; case HAL_CAP_CIPHER: /* cipher handled in hardware */ case HAL_CAP_TKIP_MIC: /* handle TKIP MIC in hardware */ return HAL_ENOTSUPP; case HAL_CAP_TKIP_SPLIT: /* hardware TKIP uses split keys */ return HAL_ENOTSUPP; case HAL_CAP_PHYCOUNTERS: /* hardware PHY error counters */ return pCap->halHwPhyCounterSupport ? HAL_OK : HAL_ENXIO; case HAL_CAP_WME_TKIPMIC: /* hardware can do TKIP MIC when WMM is turned on */ return HAL_ENOTSUPP; case HAL_CAP_DIVERSITY: /* hardware supports fast diversity */ return HAL_ENOTSUPP; case HAL_CAP_KEYCACHE_SIZE: /* hardware key cache size */ *result = pCap->halKeyCacheSize; return HAL_OK; case HAL_CAP_NUM_TXQUEUES: /* number of hardware tx queues */ *result = pCap->halTotalQueues; return HAL_OK; case HAL_CAP_VEOL: /* hardware supports virtual EOL */ return pCap->halVEOLSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_PSPOLL: /* hardware PS-Poll support works */ return pCap->halPSPollBroken ? HAL_ENOTSUPP : HAL_OK; case HAL_CAP_COMPRESSION: return pCap->halCompressSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_BURST: return pCap->halBurstSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_FASTFRAME: return pCap->halFastFramesSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_DIAG: /* hardware diagnostic support */ *result = AH_PRIVATE(ah)->ah_diagreg; return HAL_OK; case HAL_CAP_TXPOW: /* global tx power limit */ switch (capability) { case 0: /* facility is supported */ return HAL_OK; case 1: /* current limit */ *result = AH_PRIVATE(ah)->ah_powerLimit; return HAL_OK; case 2: /* current max tx power */ *result = AH_PRIVATE(ah)->ah_maxPowerLevel; return HAL_OK; case 3: /* scale factor */ *result = AH_PRIVATE(ah)->ah_tpScale; return HAL_OK; } return HAL_ENOTSUPP; case HAL_CAP_BSSIDMASK: /* hardware supports bssid mask */ return pCap->halBssIdMaskSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_MCAST_KEYSRCH: /* multicast frame keycache search */ return pCap->halMcastKeySrchSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_TSF_ADJUST: /* hardware has beacon tsf adjust */ return HAL_ENOTSUPP; case HAL_CAP_RFSILENT: /* rfsilent support */ switch (capability) { case 0: /* facility is supported */ return pCap->halRfSilentSupport ? HAL_OK : HAL_ENOTSUPP; case 1: /* current setting */ return AH_PRIVATE(ah)->ah_rfkillEnabled ? HAL_OK : HAL_ENOTSUPP; case 2: /* rfsilent config */ *result = AH_PRIVATE(ah)->ah_rfsilent; return HAL_OK; } return HAL_ENOTSUPP; case HAL_CAP_11D: return HAL_OK; case HAL_CAP_HT: return pCap->halHTSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_GTXTO: return pCap->halGTTSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_FAST_CC: return pCap->halFastCCSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_TX_CHAINMASK: /* mask of TX chains supported */ *result = pCap->halTxChainMask; return HAL_OK; case HAL_CAP_RX_CHAINMASK: /* mask of RX chains supported */ *result = pCap->halRxChainMask; return HAL_OK; case HAL_CAP_NUM_GPIO_PINS: *result = pCap->halNumGpioPins; return HAL_OK; case HAL_CAP_CST: return pCap->halCSTSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_RTS_AGGR_LIMIT: *result = pCap->halRtsAggrLimit; return HAL_OK; case HAL_CAP_4ADDR_AGGR: return pCap->hal4AddrAggrSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_EXT_CHAN_DFS: return pCap->halExtChanDfsSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_RX_STBC: return pCap->halRxStbcSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_TX_STBC: return pCap->halTxStbcSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_COMBINED_RADAR_RSSI: return pCap->halUseCombinedRadarRssi ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_AUTO_SLEEP: return pCap->halAutoSleepSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_MBSSID_AGGR_SUPPORT: return pCap->halMbssidAggrSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_SPLIT_4KB_TRANS: /* hardware handles descriptors straddling 4k page boundary */ return pCap->hal4kbSplitTransSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_REG_FLAG: *result = AH_PRIVATE(ah)->ah_currentRDext; return HAL_OK; case HAL_CAP_ENHANCED_DMA_SUPPORT: return pCap->halEnhancedDmaSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_NUM_TXMAPS: *result = pCap->halNumTxMaps; return HAL_OK; case HAL_CAP_TXDESCLEN: *result = pCap->halTxDescLen; return HAL_OK; case HAL_CAP_TXSTATUSLEN: *result = pCap->halTxStatusLen; return HAL_OK; case HAL_CAP_RXSTATUSLEN: *result = pCap->halRxStatusLen; return HAL_OK; case HAL_CAP_RXFIFODEPTH: switch (capability) { case HAL_RX_QUEUE_HP: *result = pCap->halRxHpFifoDepth; return HAL_OK; case HAL_RX_QUEUE_LP: *result = pCap->halRxLpFifoDepth; return HAL_OK; default: return HAL_ENOTSUPP; } case HAL_CAP_RXBUFSIZE: case HAL_CAP_NUM_MR_RETRIES: *result = pCap->halNumMRRetries; return HAL_OK; case HAL_CAP_BT_COEX: return pCap->halBtCoexSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_SPECTRAL_SCAN: return pCap->halSpectralScanSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_HT20_SGI: return pCap->halHTSGI20Support ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_RXTSTAMP_PREC: /* rx desc tstamp precision (bits) */ *result = pCap->halRxTstampPrecision; return HAL_OK; case HAL_CAP_ANT_DIV_COMB: /* AR9285/AR9485 LNA diversity */ return pCap->halAntDivCombSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_ENHANCED_DFS_SUPPORT: return pCap->halEnhancedDfsSupport ? HAL_OK : HAL_ENOTSUPP; /* FreeBSD-specific entries for now */ case HAL_CAP_RXORN_FATAL: /* HAL_INT_RXORN treated as fatal */ return AH_PRIVATE(ah)->ah_rxornIsFatal ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_INTRMASK: /* mask of supported interrupts */ *result = pCap->halIntrMask; return HAL_OK; case HAL_CAP_BSSIDMATCH: /* hardware has disable bssid match */ return pCap->halBssidMatchSupport ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_STREAMS: /* number of 11n spatial streams */ switch (capability) { case 0: /* TX */ *result = pCap->halTxStreams; return HAL_OK; case 1: /* RX */ *result = pCap->halRxStreams; return HAL_OK; default: return HAL_ENOTSUPP; } case HAL_CAP_RXDESC_SELFLINK: /* hardware supports self-linked final RX descriptors correctly */ return pCap->halHasRxSelfLinkedTail ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_BB_READ_WAR: /* Baseband read WAR */ return pCap->halHasBBReadWar? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_SERIALISE_WAR: /* PCI register serialisation */ return pCap->halSerialiseRegWar ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_MFP: /* Management frame protection setting */ *result = pCap->halMfpSupport; return HAL_OK; case HAL_CAP_RX_LNA_MIXING: /* Hardware uses an RX LNA mixer to map 2 antennas to a 1 stream receiver */ return pCap->halRxUsingLnaMixing ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_DO_MYBEACON: /* Hardware supports filtering my-beacons */ return pCap->halRxDoMyBeacon ? HAL_OK : HAL_ENOTSUPP; case HAL_CAP_TXTSTAMP_PREC: /* tx desc tstamp precision (bits) */ *result = pCap->halTxTstampPrecision; return HAL_OK; default: return HAL_EINVAL; } } HAL_BOOL ath_hal_setcapability(struct ath_hal *ah, HAL_CAPABILITY_TYPE type, uint32_t capability, uint32_t setting, HAL_STATUS *status) { switch (type) { case HAL_CAP_TXPOW: switch (capability) { case 3: if (setting <= HAL_TP_SCALE_MIN) { AH_PRIVATE(ah)->ah_tpScale = setting; return AH_TRUE; } break; } break; case HAL_CAP_RFSILENT: /* rfsilent support */ /* * NB: allow even if halRfSilentSupport is false * in case the EEPROM is misprogrammed. */ switch (capability) { case 1: /* current setting */ AH_PRIVATE(ah)->ah_rfkillEnabled = (setting != 0); return AH_TRUE; case 2: /* rfsilent config */ /* XXX better done per-chip for validation? */ AH_PRIVATE(ah)->ah_rfsilent = setting; return AH_TRUE; } break; case HAL_CAP_REG_DMN: /* regulatory domain */ AH_PRIVATE(ah)->ah_currentRD = setting; return AH_TRUE; case HAL_CAP_RXORN_FATAL: /* HAL_INT_RXORN treated as fatal */ AH_PRIVATE(ah)->ah_rxornIsFatal = setting; return AH_TRUE; default: break; } if (status) *status = HAL_EINVAL; return AH_FALSE; } /* * Common support for getDiagState method. */ static u_int ath_hal_getregdump(struct ath_hal *ah, const HAL_REGRANGE *regs, void *dstbuf, int space) { uint32_t *dp = dstbuf; int i; for (i = 0; space >= 2*sizeof(uint32_t); i++) { uint32_t r = regs[i].start; uint32_t e = regs[i].end; *dp++ = r; *dp++ = e; space -= 2*sizeof(uint32_t); do { *dp++ = OS_REG_READ(ah, r); r += sizeof(uint32_t); space -= sizeof(uint32_t); } while (r <= e && space >= sizeof(uint32_t)); } return (char *) dp - (char *) dstbuf; } static void ath_hal_setregs(struct ath_hal *ah, const HAL_REGWRITE *regs, int space) { while (space >= sizeof(HAL_REGWRITE)) { OS_REG_WRITE(ah, regs->addr, regs->value); regs++, space -= sizeof(HAL_REGWRITE); } } HAL_BOOL ath_hal_getdiagstate(struct ath_hal *ah, int request, const void *args, uint32_t argsize, void **result, uint32_t *resultsize) { switch (request) { case HAL_DIAG_REVS: *result = &AH_PRIVATE(ah)->ah_devid; *resultsize = sizeof(HAL_REVS); return AH_TRUE; case HAL_DIAG_REGS: *resultsize = ath_hal_getregdump(ah, args, *result,*resultsize); return AH_TRUE; case HAL_DIAG_SETREGS: ath_hal_setregs(ah, args, argsize); *resultsize = 0; return AH_TRUE; case HAL_DIAG_FATALERR: *result = &AH_PRIVATE(ah)->ah_fatalState[0]; *resultsize = sizeof(AH_PRIVATE(ah)->ah_fatalState); return AH_TRUE; case HAL_DIAG_EEREAD: if (argsize != sizeof(uint16_t)) return AH_FALSE; if (!ath_hal_eepromRead(ah, *(const uint16_t *)args, *result)) return AH_FALSE; *resultsize = sizeof(uint16_t); return AH_TRUE; #ifdef AH_PRIVATE_DIAG case HAL_DIAG_SETKEY: { const HAL_DIAG_KEYVAL *dk; if (argsize != sizeof(HAL_DIAG_KEYVAL)) return AH_FALSE; dk = (const HAL_DIAG_KEYVAL *)args; return ah->ah_setKeyCacheEntry(ah, dk->dk_keyix, &dk->dk_keyval, dk->dk_mac, dk->dk_xor); } case HAL_DIAG_RESETKEY: if (argsize != sizeof(uint16_t)) return AH_FALSE; return ah->ah_resetKeyCacheEntry(ah, *(const uint16_t *)args); #ifdef AH_SUPPORT_WRITE_EEPROM case HAL_DIAG_EEWRITE: { const HAL_DIAG_EEVAL *ee; if (argsize != sizeof(HAL_DIAG_EEVAL)) return AH_FALSE; ee = (const HAL_DIAG_EEVAL *)args; return ath_hal_eepromWrite(ah, ee->ee_off, ee->ee_data); } #endif /* AH_SUPPORT_WRITE_EEPROM */ #endif /* AH_PRIVATE_DIAG */ case HAL_DIAG_11NCOMPAT: if (argsize == 0) { *resultsize = sizeof(uint32_t); *((uint32_t *)(*result)) = AH_PRIVATE(ah)->ah_11nCompat; } else if (argsize == sizeof(uint32_t)) { AH_PRIVATE(ah)->ah_11nCompat = *(const uint32_t *)args; } else return AH_FALSE; return AH_TRUE; case HAL_DIAG_CHANSURVEY: *result = &AH_PRIVATE(ah)->ah_chansurvey; *resultsize = sizeof(HAL_CHANNEL_SURVEY); return AH_TRUE; } return AH_FALSE; } /* * Set the properties of the tx queue with the parameters * from qInfo. */ HAL_BOOL ath_hal_setTxQProps(struct ath_hal *ah, HAL_TX_QUEUE_INFO *qi, const HAL_TXQ_INFO *qInfo) { uint32_t cw; if (qi->tqi_type == HAL_TX_QUEUE_INACTIVE) { HALDEBUG(ah, HAL_DEBUG_TXQUEUE, "%s: inactive queue\n", __func__); return AH_FALSE; } /* XXX validate parameters */ qi->tqi_ver = qInfo->tqi_ver; qi->tqi_subtype = qInfo->tqi_subtype; qi->tqi_qflags = qInfo->tqi_qflags; qi->tqi_priority = qInfo->tqi_priority; if (qInfo->tqi_aifs != HAL_TXQ_USEDEFAULT) qi->tqi_aifs = AH_MIN(qInfo->tqi_aifs, 255); else qi->tqi_aifs = INIT_AIFS; if (qInfo->tqi_cwmin != HAL_TXQ_USEDEFAULT) { cw = AH_MIN(qInfo->tqi_cwmin, 1024); /* make sure that the CWmin is of the form (2^n - 1) */ qi->tqi_cwmin = 1; while (qi->tqi_cwmin < cw) qi->tqi_cwmin = (qi->tqi_cwmin << 1) | 1; } else qi->tqi_cwmin = qInfo->tqi_cwmin; if (qInfo->tqi_cwmax != HAL_TXQ_USEDEFAULT) { cw = AH_MIN(qInfo->tqi_cwmax, 1024); /* make sure that the CWmax is of the form (2^n - 1) */ qi->tqi_cwmax = 1; while (qi->tqi_cwmax < cw) qi->tqi_cwmax = (qi->tqi_cwmax << 1) | 1; } else qi->tqi_cwmax = INIT_CWMAX; /* Set retry limit values */ if (qInfo->tqi_shretry != 0) qi->tqi_shretry = AH_MIN(qInfo->tqi_shretry, 15); else qi->tqi_shretry = INIT_SH_RETRY; if (qInfo->tqi_lgretry != 0) qi->tqi_lgretry = AH_MIN(qInfo->tqi_lgretry, 15); else qi->tqi_lgretry = INIT_LG_RETRY; qi->tqi_cbrPeriod = qInfo->tqi_cbrPeriod; qi->tqi_cbrOverflowLimit = qInfo->tqi_cbrOverflowLimit; qi->tqi_burstTime = qInfo->tqi_burstTime; qi->tqi_readyTime = qInfo->tqi_readyTime; switch (qInfo->tqi_subtype) { case HAL_WME_UPSD: if (qi->tqi_type == HAL_TX_QUEUE_DATA) qi->tqi_intFlags = HAL_TXQ_USE_LOCKOUT_BKOFF_DIS; break; default: break; /* NB: silence compiler */ } return AH_TRUE; } HAL_BOOL ath_hal_getTxQProps(struct ath_hal *ah, HAL_TXQ_INFO *qInfo, const HAL_TX_QUEUE_INFO *qi) { if (qi->tqi_type == HAL_TX_QUEUE_INACTIVE) { HALDEBUG(ah, HAL_DEBUG_TXQUEUE, "%s: inactive queue\n", __func__); return AH_FALSE; } qInfo->tqi_qflags = qi->tqi_qflags; qInfo->tqi_ver = qi->tqi_ver; qInfo->tqi_subtype = qi->tqi_subtype; qInfo->tqi_qflags = qi->tqi_qflags; qInfo->tqi_priority = qi->tqi_priority; qInfo->tqi_aifs = qi->tqi_aifs; qInfo->tqi_cwmin = qi->tqi_cwmin; qInfo->tqi_cwmax = qi->tqi_cwmax; qInfo->tqi_shretry = qi->tqi_shretry; qInfo->tqi_lgretry = qi->tqi_lgretry; qInfo->tqi_cbrPeriod = qi->tqi_cbrPeriod; qInfo->tqi_cbrOverflowLimit = qi->tqi_cbrOverflowLimit; qInfo->tqi_burstTime = qi->tqi_burstTime; qInfo->tqi_readyTime = qi->tqi_readyTime; return AH_TRUE; } /* 11a Turbo 11b 11g 108g */ static const int16_t NOISE_FLOOR[] = { -96, -93, -98, -96, -93 }; /* * Read the current channel noise floor and return. * If nf cal hasn't finished, channel noise floor should be 0 * and we return a nominal value based on band and frequency. * * NB: This is a private routine used by per-chip code to * implement the ah_getChanNoise method. */ int16_t ath_hal_getChanNoise(struct ath_hal *ah, const struct ieee80211_channel *chan) { HAL_CHANNEL_INTERNAL *ichan; ichan = ath_hal_checkchannel(ah, chan); if (ichan == AH_NULL) { HALDEBUG(ah, HAL_DEBUG_NFCAL, "%s: invalid channel %u/0x%x; no mapping\n", __func__, chan->ic_freq, chan->ic_flags); return 0; } if (ichan->rawNoiseFloor == 0) { WIRELESS_MODE mode = ath_hal_chan2wmode(ah, chan); HALASSERT(mode < WIRELESS_MODE_MAX); return NOISE_FLOOR[mode] + ath_hal_getNfAdjust(ah, ichan); } else return ichan->rawNoiseFloor + ichan->noiseFloorAdjust; } /* * Fetch the current setup of ctl/ext noise floor values. * * If the CHANNEL_MIMO_NF_VALID flag isn't set, the array is simply * populated with values from NOISE_FLOOR[] + ath_hal_getNfAdjust(). * * The caller must supply ctl/ext NF arrays which are at least * AH_MAX_CHAINS entries long. */ int ath_hal_get_mimo_chan_noise(struct ath_hal *ah, const struct ieee80211_channel *chan, int16_t *nf_ctl, int16_t *nf_ext) { #ifdef AH_SUPPORT_AR5416 HAL_CHANNEL_INTERNAL *ichan; int i; ichan = ath_hal_checkchannel(ah, chan); if (ichan == AH_NULL) { HALDEBUG(ah, HAL_DEBUG_NFCAL, "%s: invalid channel %u/0x%x; no mapping\n", __func__, chan->ic_freq, chan->ic_flags); for (i = 0; i < AH_MAX_CHAINS; i++) { nf_ctl[i] = nf_ext[i] = 0; } return 0; } /* Return 0 if there's no valid MIMO values (yet) */ if (! (ichan->privFlags & CHANNEL_MIMO_NF_VALID)) { for (i = 0; i < AH_MAX_CHAINS; i++) { nf_ctl[i] = nf_ext[i] = 0; } return 0; } if (ichan->rawNoiseFloor == 0) { WIRELESS_MODE mode = ath_hal_chan2wmode(ah, chan); HALASSERT(mode < WIRELESS_MODE_MAX); /* * See the comment below - this could cause issues for * stations which have a very low RSSI, below the * 'normalised' NF values in NOISE_FLOOR[]. */ for (i = 0; i < AH_MAX_CHAINS; i++) { nf_ctl[i] = nf_ext[i] = NOISE_FLOOR[mode] + ath_hal_getNfAdjust(ah, ichan); } return 1; } else { /* * The value returned here from a MIMO radio is presumed to be * "good enough" as a NF calculation. As RSSI values are calculated * against this, an adjusted NF may be higher than the RSSI value * returned from a vary weak station, resulting in an obscenely * high signal strength calculation being returned. * * This should be re-evaluated at a later date, along with any * signal strength calculations which are made. Quite likely the * RSSI values will need to be adjusted to ensure the calculations * don't "wrap" when RSSI is less than the "adjusted" NF value. * ("Adjust" here is via ichan->noiseFloorAdjust.) */ for (i = 0; i < AH_MAX_CHAINS; i++) { nf_ctl[i] = ichan->noiseFloorCtl[i] + ath_hal_getNfAdjust(ah, ichan); nf_ext[i] = ichan->noiseFloorExt[i] + ath_hal_getNfAdjust(ah, ichan); } return 1; } #else return 0; #endif /* AH_SUPPORT_AR5416 */ } /* * Process all valid raw noise floors into the dBm noise floor values. * Though our device has no reference for a dBm noise floor, we perform * a relative minimization of NF's based on the lowest NF found across a * channel scan. */ void ath_hal_process_noisefloor(struct ath_hal *ah) { HAL_CHANNEL_INTERNAL *c; int16_t correct2, correct5; int16_t lowest2, lowest5; int i; /* * Find the lowest 2GHz and 5GHz noise floor values after adjusting * for statistically recorded NF/channel deviation. */ correct2 = lowest2 = 0; correct5 = lowest5 = 0; for (i = 0; i < AH_PRIVATE(ah)->ah_nchan; i++) { WIRELESS_MODE mode; int16_t nf; c = &AH_PRIVATE(ah)->ah_channels[i]; if (c->rawNoiseFloor >= 0) continue; /* XXX can't identify proper mode */ mode = IS_CHAN_5GHZ(c) ? WIRELESS_MODE_11a : WIRELESS_MODE_11g; nf = c->rawNoiseFloor + NOISE_FLOOR[mode] + ath_hal_getNfAdjust(ah, c); if (IS_CHAN_5GHZ(c)) { if (nf < lowest5) { lowest5 = nf; correct5 = NOISE_FLOOR[mode] - (c->rawNoiseFloor + ath_hal_getNfAdjust(ah, c)); } } else { if (nf < lowest2) { lowest2 = nf; correct2 = NOISE_FLOOR[mode] - (c->rawNoiseFloor + ath_hal_getNfAdjust(ah, c)); } } } /* Correct the channels to reach the expected NF value */ for (i = 0; i < AH_PRIVATE(ah)->ah_nchan; i++) { c = &AH_PRIVATE(ah)->ah_channels[i]; if (c->rawNoiseFloor >= 0) continue; /* Apply correction factor */ c->noiseFloorAdjust = ath_hal_getNfAdjust(ah, c) + (IS_CHAN_5GHZ(c) ? correct5 : correct2); HALDEBUG(ah, HAL_DEBUG_NFCAL, "%u raw nf %d adjust %d\n", c->channel, c->rawNoiseFloor, c->noiseFloorAdjust); } } /* * INI support routines. */ int ath_hal_ini_write(struct ath_hal *ah, const HAL_INI_ARRAY *ia, int col, int regWr) { int r; HALASSERT(col < ia->cols); for (r = 0; r < ia->rows; r++) { OS_REG_WRITE(ah, HAL_INI_VAL(ia, r, 0), HAL_INI_VAL(ia, r, col)); /* Analog shift register delay seems needed for Merlin - PR kern/154220 */ if (HAL_INI_VAL(ia, r, 0) >= 0x7800 && HAL_INI_VAL(ia, r, 0) < 0x7900) OS_DELAY(100); DMA_YIELD(regWr); } return regWr; } void ath_hal_ini_bank_setup(uint32_t data[], const HAL_INI_ARRAY *ia, int col) { int r; HALASSERT(col < ia->cols); for (r = 0; r < ia->rows; r++) data[r] = HAL_INI_VAL(ia, r, col); } int ath_hal_ini_bank_write(struct ath_hal *ah, const HAL_INI_ARRAY *ia, const uint32_t data[], int regWr) { int r; for (r = 0; r < ia->rows; r++) { OS_REG_WRITE(ah, HAL_INI_VAL(ia, r, 0), data[r]); DMA_YIELD(regWr); } return regWr; } /* * These are EEPROM board related routines which should likely live in * a helper library of some sort. */ /************************************************************** * ath_ee_getLowerUppderIndex * * Return indices surrounding the value in sorted integer lists. * Requirement: the input list must be monotonically increasing * and populated up to the list size * Returns: match is set if an index in the array matches exactly * or a the target is before or after the range of the array. */ HAL_BOOL ath_ee_getLowerUpperIndex(uint8_t target, uint8_t *pList, uint16_t listSize, uint16_t *indexL, uint16_t *indexR) { uint16_t i; /* * Check first and last elements for beyond ordered array cases. */ if (target <= pList[0]) { *indexL = *indexR = 0; return AH_TRUE; } if (target >= pList[listSize-1]) { *indexL = *indexR = (uint16_t)(listSize - 1); return AH_TRUE; } /* look for value being near or between 2 values in list */ for (i = 0; i < listSize - 1; i++) { /* * If value is close to the current value of the list * then target is not between values, it is one of the values */ if (pList[i] == target) { *indexL = *indexR = i; return AH_TRUE; } /* * Look for value being between current value and next value * if so return these 2 values */ if (target < pList[i + 1]) { *indexL = i; *indexR = (uint16_t)(i + 1); return AH_FALSE; } } HALASSERT(0); *indexL = *indexR = 0; return AH_FALSE; } /************************************************************** * ath_ee_FillVpdTable * * Fill the Vpdlist for indices Pmax-Pmin * Note: pwrMin, pwrMax and Vpdlist are all in dBm * 4 */ HAL_BOOL ath_ee_FillVpdTable(uint8_t pwrMin, uint8_t pwrMax, uint8_t *pPwrList, uint8_t *pVpdList, uint16_t numIntercepts, uint8_t *pRetVpdList) { uint16_t i, k; uint8_t currPwr = pwrMin; uint16_t idxL, idxR; HALASSERT(pwrMax > pwrMin); for (i = 0; i <= (pwrMax - pwrMin) / 2; i++) { ath_ee_getLowerUpperIndex(currPwr, pPwrList, numIntercepts, &(idxL), &(idxR)); if (idxR < 1) idxR = 1; /* extrapolate below */ if (idxL == numIntercepts - 1) idxL = (uint16_t)(numIntercepts - 2); /* extrapolate above */ if (pPwrList[idxL] == pPwrList[idxR]) k = pVpdList[idxL]; else k = (uint16_t)( ((currPwr - pPwrList[idxL]) * pVpdList[idxR] + (pPwrList[idxR] - currPwr) * pVpdList[idxL]) / (pPwrList[idxR] - pPwrList[idxL]) ); HALASSERT(k < 256); pRetVpdList[i] = (uint8_t)k; currPwr += 2; /* half dB steps */ } return AH_TRUE; } /************************************************************************** * ath_ee_interpolate * * Returns signed interpolated or the scaled up interpolated value */ int16_t ath_ee_interpolate(uint16_t target, uint16_t srcLeft, uint16_t srcRight, int16_t targetLeft, int16_t targetRight) { int16_t rv; if (srcRight == srcLeft) { rv = targetLeft; } else { rv = (int16_t)( ((target - srcLeft) * targetRight + (srcRight - target) * targetLeft) / (srcRight - srcLeft) ); } return rv; } /* * Adjust the TSF. */ void ath_hal_adjusttsf(struct ath_hal *ah, int32_t tsfdelta) { /* XXX handle wrap/overflow */ OS_REG_WRITE(ah, AR_TSF_L32, OS_REG_READ(ah, AR_TSF_L32) + tsfdelta); } /* * Enable or disable CCA. */ void ath_hal_setcca(struct ath_hal *ah, int ena) { /* * NB: fill me in; this is not provided by default because disabling * CCA in most locales violates regulatory. */ } /* * Get CCA setting. * * XXX TODO: turn this and the above function into methods * in case there are chipset differences in handling CCA. */ int ath_hal_getcca(struct ath_hal *ah) { u_int32_t diag; if (ath_hal_getcapability(ah, HAL_CAP_DIAG, 0, &diag) != HAL_OK) return 1; return ((diag & 0x500000) == 0); } /* + * Set the current state of self-generated ACK and RTS/CTS frames. + * + * For correct DFS operation, the device should not even /ACK/ frames + * that are sent to it during CAC or CSA. + */ +void +ath_hal_set_dfs_cac_tx_quiet(struct ath_hal *ah, HAL_BOOL ena) +{ + + if (ah->ah_setDfsCacTxQuiet == NULL) + return; + ah->ah_setDfsCacTxQuiet(ah, ena); +} + +/* * This routine is only needed when supporting EEPROM-in-RAM setups * (eg embedded SoCs and on-board PCI/PCIe devices.) */ /* NB: This is in 16 bit words; not bytes */ /* XXX This doesn't belong here! */ #define ATH_DATA_EEPROM_SIZE 2048 HAL_BOOL ath_hal_EepromDataRead(struct ath_hal *ah, u_int off, uint16_t *data) { if (ah->ah_eepromdata == AH_NULL) { HALDEBUG(ah, HAL_DEBUG_ANY, "%s: no eeprom data!\n", __func__); return AH_FALSE; } if (off > ATH_DATA_EEPROM_SIZE) { HALDEBUG(ah, HAL_DEBUG_ANY, "%s: offset %x > %x\n", __func__, off, ATH_DATA_EEPROM_SIZE); return AH_FALSE; } (*data) = ah->ah_eepromdata[off]; return AH_TRUE; } /* * Do a 2GHz specific MHz->IEEE based on the hardware * frequency. * * This is the unmapped frequency which is programmed into the hardware. */ int ath_hal_mhz2ieee_2ghz(struct ath_hal *ah, int freq) { if (freq == 2484) return 14; if (freq < 2484) return ((int) freq - 2407) / 5; else return 15 + ((freq - 2512) / 20); } /* * Clear the current survey data. * * This should be done during a channel change. */ void ath_hal_survey_clear(struct ath_hal *ah) { OS_MEMZERO(&AH_PRIVATE(ah)->ah_chansurvey, sizeof(AH_PRIVATE(ah)->ah_chansurvey)); } /* * Add a sample to the channel survey. */ void ath_hal_survey_add_sample(struct ath_hal *ah, HAL_SURVEY_SAMPLE *hs) { HAL_CHANNEL_SURVEY *cs; cs = &AH_PRIVATE(ah)->ah_chansurvey; OS_MEMCPY(&cs->samples[cs->cur_sample], hs, sizeof(*hs)); cs->samples[cs->cur_sample].seq_num = cs->cur_seq; cs->cur_sample = (cs->cur_sample + 1) % CHANNEL_SURVEY_SAMPLE_COUNT; cs->cur_seq++; } Index: projects/ipsec/sys/dev/ath/ath_hal/ah.h =================================================================== --- projects/ipsec/sys/dev/ath/ath_hal/ah.h (revision 313186) +++ projects/ipsec/sys/dev/ath/ath_hal/ah.h (revision 313187) @@ -1,1685 +1,1691 @@ /* * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting * Copyright (c) 2002-2008 Atheros Communications, Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * $FreeBSD$ */ #ifndef _ATH_AH_H_ #define _ATH_AH_H_ /* * Atheros Hardware Access Layer * * Clients of the HAL call ath_hal_attach to obtain a reference to an ath_hal * structure for use with the device. Hardware-related operations that * follow must call back into the HAL through interface, supplying the * reference as the first parameter. */ #include "ah_osdep.h" /* * The maximum number of TX/RX chains supported. * This is intended to be used by various statistics gathering operations * (NF, RSSI, EVM). */ #define AH_MAX_CHAINS 3 #define AH_MIMO_MAX_EVM_PILOTS 6 /* * __ahdecl is analogous to _cdecl; it defines the calling * convention used within the HAL. For most systems this * can just default to be empty and the compiler will (should) * use _cdecl. For systems where _cdecl is not compatible this * must be defined. See linux/ah_osdep.h for an example. */ #ifndef __ahdecl #define __ahdecl #endif /* * Status codes that may be returned by the HAL. Note that * interfaces that return a status code set it only when an * error occurs--i.e. you cannot check it for success. */ typedef enum { HAL_OK = 0, /* No error */ HAL_ENXIO = 1, /* No hardware present */ HAL_ENOMEM = 2, /* Memory allocation failed */ HAL_EIO = 3, /* Hardware didn't respond as expected */ HAL_EEMAGIC = 4, /* EEPROM magic number invalid */ HAL_EEVERSION = 5, /* EEPROM version invalid */ HAL_EELOCKED = 6, /* EEPROM unreadable */ HAL_EEBADSUM = 7, /* EEPROM checksum invalid */ HAL_EEREAD = 8, /* EEPROM read problem */ HAL_EEBADMAC = 9, /* EEPROM mac address invalid */ HAL_EESIZE = 10, /* EEPROM size not supported */ HAL_EEWRITE = 11, /* Attempt to change write-locked EEPROM */ HAL_EINVAL = 12, /* Invalid parameter to function */ HAL_ENOTSUPP = 13, /* Hardware revision not supported */ HAL_ESELFTEST = 14, /* Hardware self-test failed */ HAL_EINPROGRESS = 15, /* Operation incomplete */ HAL_EEBADREG = 16, /* EEPROM invalid regulatory contents */ HAL_EEBADCC = 17, /* EEPROM invalid country code */ HAL_INV_PMODE = 18, /* Couldn't bring out of sleep state */ } HAL_STATUS; typedef enum { AH_FALSE = 0, /* NB: lots of code assumes false is zero */ AH_TRUE = 1, } HAL_BOOL; typedef enum { HAL_CAP_REG_DMN = 0, /* current regulatory domain */ HAL_CAP_CIPHER = 1, /* hardware supports cipher */ HAL_CAP_TKIP_MIC = 2, /* handle TKIP MIC in hardware */ HAL_CAP_TKIP_SPLIT = 3, /* hardware TKIP uses split keys */ HAL_CAP_PHYCOUNTERS = 4, /* hardware PHY error counters */ HAL_CAP_DIVERSITY = 5, /* hardware supports fast diversity */ HAL_CAP_KEYCACHE_SIZE = 6, /* number of entries in key cache */ HAL_CAP_NUM_TXQUEUES = 7, /* number of hardware xmit queues */ HAL_CAP_VEOL = 9, /* hardware supports virtual EOL */ HAL_CAP_PSPOLL = 10, /* hardware has working PS-Poll support */ HAL_CAP_DIAG = 11, /* hardware diagnostic support */ HAL_CAP_COMPRESSION = 12, /* hardware supports compression */ HAL_CAP_BURST = 13, /* hardware supports packet bursting */ HAL_CAP_FASTFRAME = 14, /* hardware supoprts fast frames */ HAL_CAP_TXPOW = 15, /* global tx power limit */ HAL_CAP_TPC = 16, /* per-packet tx power control */ HAL_CAP_PHYDIAG = 17, /* hardware phy error diagnostic */ HAL_CAP_BSSIDMASK = 18, /* hardware supports bssid mask */ HAL_CAP_MCAST_KEYSRCH = 19, /* hardware has multicast key search */ HAL_CAP_TSF_ADJUST = 20, /* hardware has beacon tsf adjust */ /* 21 was HAL_CAP_XR */ HAL_CAP_WME_TKIPMIC = 22, /* hardware can support TKIP MIC when WMM is turned on */ /* 23 was HAL_CAP_CHAN_HALFRATE */ /* 24 was HAL_CAP_CHAN_QUARTERRATE */ HAL_CAP_RFSILENT = 25, /* hardware has rfsilent support */ HAL_CAP_TPC_ACK = 26, /* ack txpower with per-packet tpc */ HAL_CAP_TPC_CTS = 27, /* cts txpower with per-packet tpc */ HAL_CAP_11D = 28, /* 11d beacon support for changing cc */ HAL_CAP_PCIE_PS = 29, HAL_CAP_HT = 30, /* hardware can support HT */ HAL_CAP_GTXTO = 31, /* hardware supports global tx timeout */ HAL_CAP_FAST_CC = 32, /* hardware supports fast channel change */ HAL_CAP_TX_CHAINMASK = 33, /* mask of TX chains supported */ HAL_CAP_RX_CHAINMASK = 34, /* mask of RX chains supported */ HAL_CAP_NUM_GPIO_PINS = 36, /* number of GPIO pins */ HAL_CAP_CST = 38, /* hardware supports carrier sense timeout */ HAL_CAP_RIFS_RX = 39, HAL_CAP_RIFS_TX = 40, HAL_CAP_FORCE_PPM = 41, HAL_CAP_RTS_AGGR_LIMIT = 42, /* aggregation limit with RTS */ HAL_CAP_4ADDR_AGGR = 43, /* hardware is capable of 4addr aggregation */ HAL_CAP_DFS_DMN = 44, /* current DFS domain */ HAL_CAP_EXT_CHAN_DFS = 45, /* DFS support for extension channel */ HAL_CAP_COMBINED_RADAR_RSSI = 46, /* Is combined RSSI for radar accurate */ HAL_CAP_AUTO_SLEEP = 48, /* hardware can go to network sleep automatically after waking up to receive TIM */ HAL_CAP_MBSSID_AGGR_SUPPORT = 49, /* Support for mBSSID Aggregation */ HAL_CAP_SPLIT_4KB_TRANS = 50, /* hardware supports descriptors straddling a 4k page boundary */ HAL_CAP_REG_FLAG = 51, /* Regulatory domain flags */ HAL_CAP_BB_RIFS_HANG = 52, HAL_CAP_RIFS_RX_ENABLED = 53, HAL_CAP_BB_DFS_HANG = 54, HAL_CAP_RX_STBC = 58, HAL_CAP_TX_STBC = 59, HAL_CAP_BT_COEX = 60, /* hardware is capable of bluetooth coexistence */ HAL_CAP_DYNAMIC_SMPS = 61, /* Dynamic MIMO Power Save hardware support */ HAL_CAP_DS = 67, /* 2 stream */ HAL_CAP_BB_RX_CLEAR_STUCK_HANG = 68, HAL_CAP_MAC_HANG = 69, /* can MAC hang */ HAL_CAP_MFP = 70, /* Management Frame Protection in hardware */ HAL_CAP_TS = 72, /* 3 stream */ HAL_CAP_ENHANCED_DMA_SUPPORT = 75, /* DMA FIFO support */ HAL_CAP_NUM_TXMAPS = 76, /* Number of buffers in a transmit descriptor */ HAL_CAP_TXDESCLEN = 77, /* Length of transmit descriptor */ HAL_CAP_TXSTATUSLEN = 78, /* Length of transmit status descriptor */ HAL_CAP_RXSTATUSLEN = 79, /* Length of transmit status descriptor */ HAL_CAP_RXFIFODEPTH = 80, /* Receive hardware FIFO depth */ HAL_CAP_RXBUFSIZE = 81, /* Receive Buffer Length */ HAL_CAP_NUM_MR_RETRIES = 82, /* limit on multirate retries */ HAL_CAP_OL_PWRCTRL = 84, /* Open loop TX power control */ HAL_CAP_SPECTRAL_SCAN = 90, /* Hardware supports spectral scan */ HAL_CAP_BB_PANIC_WATCHDOG = 92, HAL_CAP_HT20_SGI = 96, /* hardware supports HT20 short GI */ HAL_CAP_LDPC = 99, HAL_CAP_RXTSTAMP_PREC = 100, /* rx desc tstamp precision (bits) */ HAL_CAP_ANT_DIV_COMB = 105, /* Enable antenna diversity/combining */ HAL_CAP_PHYRESTART_CLR_WAR = 106, /* in some cases, clear phy restart to fix bb hang */ HAL_CAP_ENTERPRISE_MODE = 107, /* Enterprise mode features */ HAL_CAP_LDPCWAR = 108, HAL_CAP_CHANNEL_SWITCH_TIME_USEC = 109, /* Channel change time, usec */ HAL_CAP_ENABLE_APM = 110, /* APM enabled */ HAL_CAP_PCIE_LCR_EXTSYNC_EN = 111, HAL_CAP_PCIE_LCR_OFFSET = 112, HAL_CAP_ENHANCED_DFS_SUPPORT = 117, /* hardware supports enhanced DFS */ HAL_CAP_MCI = 118, HAL_CAP_SMARTANTENNA = 119, HAL_CAP_TRAFFIC_FAST_RECOVER = 120, HAL_CAP_TX_DIVERSITY = 121, HAL_CAP_CRDC = 122, /* The following are private to the FreeBSD HAL (224 onward) */ HAL_CAP_INTMIT = 229, /* interference mitigation */ HAL_CAP_RXORN_FATAL = 230, /* HAL_INT_RXORN treated as fatal */ HAL_CAP_BB_HANG = 235, /* can baseband hang */ HAL_CAP_INTRMASK = 237, /* bitmask of supported interrupts */ HAL_CAP_BSSIDMATCH = 238, /* hardware has disable bssid match */ HAL_CAP_STREAMS = 239, /* how many 802.11n spatial streams are available */ HAL_CAP_RXDESC_SELFLINK = 242, /* support a self-linked tail RX descriptor */ HAL_CAP_BB_READ_WAR = 244, /* baseband read WAR */ HAL_CAP_SERIALISE_WAR = 245, /* serialise register access on PCI */ HAL_CAP_ENFORCE_TXOP = 246, /* Enforce TXOP if supported */ HAL_CAP_RX_LNA_MIXING = 247, /* RX hardware uses LNA mixing */ HAL_CAP_DO_MYBEACON = 248, /* Supports HAL_RX_FILTER_MYBEACON */ HAL_CAP_TOA_LOCATIONING = 249, /* time of flight / arrival locationing */ HAL_CAP_TXTSTAMP_PREC = 250, /* tx desc tstamp precision (bits) */ } HAL_CAPABILITY_TYPE; /* * "States" for setting the LED. These correspond to * the possible 802.11 operational states and there may * be a many-to-one mapping between these states and the * actual hardware state for the LED's (i.e. the hardware * may have fewer states). */ typedef enum { HAL_LED_INIT = 0, HAL_LED_SCAN = 1, HAL_LED_AUTH = 2, HAL_LED_ASSOC = 3, HAL_LED_RUN = 4 } HAL_LED_STATE; /* * Transmit queue types/numbers. These are used to tag * each transmit queue in the hardware and to identify a set * of transmit queues for operations such as start/stop dma. */ typedef enum { HAL_TX_QUEUE_INACTIVE = 0, /* queue is inactive/unused */ HAL_TX_QUEUE_DATA = 1, /* data xmit q's */ HAL_TX_QUEUE_BEACON = 2, /* beacon xmit q */ HAL_TX_QUEUE_CAB = 3, /* "crap after beacon" xmit q */ HAL_TX_QUEUE_UAPSD = 4, /* u-apsd power save xmit q */ HAL_TX_QUEUE_PSPOLL = 5, /* power save poll xmit q */ HAL_TX_QUEUE_CFEND = 6, HAL_TX_QUEUE_PAPRD = 7, } HAL_TX_QUEUE; #define HAL_NUM_TX_QUEUES 10 /* max possible # of queues */ /* * Receive queue types. These are used to tag * each transmit queue in the hardware and to identify a set * of transmit queues for operations such as start/stop dma. */ typedef enum { HAL_RX_QUEUE_HP = 0, /* high priority recv queue */ HAL_RX_QUEUE_LP = 1, /* low priority recv queue */ } HAL_RX_QUEUE; #define HAL_NUM_RX_QUEUES 2 /* max possible # of queues */ #define HAL_TXFIFO_DEPTH 8 /* transmit fifo depth */ /* * Transmit queue subtype. These map directly to * WME Access Categories (except for UPSD). Refer * to Table 5 of the WME spec. */ typedef enum { HAL_WME_AC_BK = 0, /* background access category */ HAL_WME_AC_BE = 1, /* best effort access category*/ HAL_WME_AC_VI = 2, /* video access category */ HAL_WME_AC_VO = 3, /* voice access category */ HAL_WME_UPSD = 4, /* uplink power save */ } HAL_TX_QUEUE_SUBTYPE; /* * Transmit queue flags that control various * operational parameters. */ typedef enum { /* * Per queue interrupt enables. When set the associated * interrupt may be delivered for packets sent through * the queue. Without these enabled no interrupts will * be delivered for transmits through the queue. */ HAL_TXQ_TXOKINT_ENABLE = 0x0001, /* enable TXOK interrupt */ HAL_TXQ_TXERRINT_ENABLE = 0x0001, /* enable TXERR interrupt */ HAL_TXQ_TXDESCINT_ENABLE = 0x0002, /* enable TXDESC interrupt */ HAL_TXQ_TXEOLINT_ENABLE = 0x0004, /* enable TXEOL interrupt */ HAL_TXQ_TXURNINT_ENABLE = 0x0008, /* enable TXURN interrupt */ /* * Enable hardware compression for packets sent through * the queue. The compression buffer must be setup and * packets must have a key entry marked in the tx descriptor. */ HAL_TXQ_COMPRESSION_ENABLE = 0x0010, /* enable h/w compression */ /* * Disable queue when veol is hit or ready time expires. * By default the queue is disabled only on reaching the * physical end of queue (i.e. a null link ptr in the * descriptor chain). */ HAL_TXQ_RDYTIME_EXP_POLICY_ENABLE = 0x0020, /* * Schedule frames on delivery of a DBA (DMA Beacon Alert) * event. Frames will be transmitted only when this timer * fires, e.g to transmit a beacon in ap or adhoc modes. */ HAL_TXQ_DBA_GATED = 0x0040, /* schedule based on DBA */ /* * Each transmit queue has a counter that is incremented * each time the queue is enabled and decremented when * the list of frames to transmit is traversed (or when * the ready time for the queue expires). This counter * must be non-zero for frames to be scheduled for * transmission. The following controls disable bumping * this counter under certain conditions. Typically this * is used to gate frames based on the contents of another * queue (e.g. CAB traffic may only follow a beacon frame). * These are meaningful only when frames are scheduled * with a non-ASAP policy (e.g. DBA-gated). */ HAL_TXQ_CBR_DIS_QEMPTY = 0x0080, /* disable on this q empty */ HAL_TXQ_CBR_DIS_BEMPTY = 0x0100, /* disable on beacon q empty */ /* * Fragment burst backoff policy. Normally the no backoff * is done after a successful transmission, the next fragment * is sent at SIFS. If this flag is set backoff is done * after each fragment, regardless whether it was ack'd or * not, after the backoff count reaches zero a normal channel * access procedure is done before the next transmit (i.e. * wait AIFS instead of SIFS). */ HAL_TXQ_FRAG_BURST_BACKOFF_ENABLE = 0x00800000, /* * Disable post-tx backoff following each frame. */ HAL_TXQ_BACKOFF_DISABLE = 0x00010000, /* disable post backoff */ /* * DCU arbiter lockout control. This controls how * lower priority tx queues are handled with respect to * to a specific queue when multiple queues have frames * to send. No lockout means lower priority queues arbitrate * concurrently with this queue. Intra-frame lockout * means lower priority queues are locked out until the * current frame transmits (e.g. including backoffs and bursting). * Global lockout means nothing lower can arbitrary so * long as there is traffic activity on this queue (frames, * backoff, etc). */ HAL_TXQ_ARB_LOCKOUT_INTRA = 0x00020000, /* intra-frame lockout */ HAL_TXQ_ARB_LOCKOUT_GLOBAL = 0x00040000, /* full lockout s */ HAL_TXQ_IGNORE_VIRTCOL = 0x00080000, /* ignore virt collisions */ HAL_TXQ_SEQNUM_INC_DIS = 0x00100000, /* disable seqnum increment */ } HAL_TX_QUEUE_FLAGS; typedef struct { uint32_t tqi_ver; /* hal TXQ version */ HAL_TX_QUEUE_SUBTYPE tqi_subtype; /* subtype if applicable */ HAL_TX_QUEUE_FLAGS tqi_qflags; /* flags (see above) */ uint32_t tqi_priority; /* (not used) */ uint32_t tqi_aifs; /* aifs */ uint32_t tqi_cwmin; /* cwMin */ uint32_t tqi_cwmax; /* cwMax */ uint16_t tqi_shretry; /* rts retry limit */ uint16_t tqi_lgretry; /* long retry limit (not used)*/ uint32_t tqi_cbrPeriod; /* CBR period (us) */ uint32_t tqi_cbrOverflowLimit; /* threshold for CBROVF int */ uint32_t tqi_burstTime; /* max burst duration (us) */ uint32_t tqi_readyTime; /* frame schedule time (us) */ uint32_t tqi_compBuf; /* comp buffer phys addr */ } HAL_TXQ_INFO; #define HAL_TQI_NONVAL 0xffff /* token to use for aifs, cwmin, cwmax */ #define HAL_TXQ_USEDEFAULT ((uint32_t) -1) /* compression definitions */ #define HAL_COMP_BUF_MAX_SIZE 9216 /* 9K */ #define HAL_COMP_BUF_ALIGN_SIZE 512 /* * Transmit packet types. This belongs in ah_desc.h, but * is here so we can give a proper type to various parameters * (and not require everyone include the file). * * NB: These values are intentionally assigned for * direct use when setting up h/w descriptors. */ typedef enum { HAL_PKT_TYPE_NORMAL = 0, HAL_PKT_TYPE_ATIM = 1, HAL_PKT_TYPE_PSPOLL = 2, HAL_PKT_TYPE_BEACON = 3, HAL_PKT_TYPE_PROBE_RESP = 4, HAL_PKT_TYPE_CHIRP = 5, HAL_PKT_TYPE_GRP_POLL = 6, HAL_PKT_TYPE_AMPDU = 7, } HAL_PKT_TYPE; /* Rx Filter Frame Types */ typedef enum { /* * These bits correspond to AR_RX_FILTER for all chips. * Not all bits are supported by all chips. */ HAL_RX_FILTER_UCAST = 0x00000001, /* Allow unicast frames */ HAL_RX_FILTER_MCAST = 0x00000002, /* Allow multicast frames */ HAL_RX_FILTER_BCAST = 0x00000004, /* Allow broadcast frames */ HAL_RX_FILTER_CONTROL = 0x00000008, /* Allow control frames */ HAL_RX_FILTER_BEACON = 0x00000010, /* Allow beacon frames */ HAL_RX_FILTER_PROM = 0x00000020, /* Promiscuous mode */ HAL_RX_FILTER_PROBEREQ = 0x00000080, /* Allow probe request frames */ HAL_RX_FILTER_PHYERR = 0x00000100, /* Allow phy errors */ HAL_RX_FILTER_MYBEACON = 0x00000200, /* Filter beacons other than mine */ HAL_RX_FILTER_COMPBAR = 0x00000400, /* Allow compressed BAR */ HAL_RX_FILTER_COMP_BA = 0x00000800, /* Allow compressed blockack */ HAL_RX_FILTER_PHYRADAR = 0x00002000, /* Allow phy radar errors */ HAL_RX_FILTER_PSPOLL = 0x00004000, /* Allow PS-POLL frames */ HAL_RX_FILTER_MCAST_BCAST_ALL = 0x00008000, /* Allow all mcast/bcast frames */ /* * Magic RX filter flags that aren't targeting hardware bits * but instead the HAL sets individual bits - eg PHYERR will result * in OFDM/CCK timing error frames being received. */ HAL_RX_FILTER_BSSID = 0x40000000, /* Disable BSSID match */ } HAL_RX_FILTER; typedef enum { HAL_PM_AWAKE = 0, HAL_PM_FULL_SLEEP = 1, HAL_PM_NETWORK_SLEEP = 2, HAL_PM_UNDEFINED = 3 } HAL_POWER_MODE; /* * Enterprise mode flags */ #define AH_ENT_DUAL_BAND_DISABLE 0x00000001 #define AH_ENT_CHAIN2_DISABLE 0x00000002 #define AH_ENT_5MHZ_DISABLE 0x00000004 #define AH_ENT_10MHZ_DISABLE 0x00000008 #define AH_ENT_49GHZ_DISABLE 0x00000010 #define AH_ENT_LOOPBACK_DISABLE 0x00000020 #define AH_ENT_TPC_PERF_DISABLE 0x00000040 #define AH_ENT_MIN_PKT_SIZE_DISABLE 0x00000080 #define AH_ENT_SPECTRAL_PRECISION 0x00000300 #define AH_ENT_SPECTRAL_PRECISION_S 8 #define AH_ENT_RTSCTS_DELIM_WAR 0x00010000 #define AH_FIRST_DESC_NDELIMS 60 /* * NOTE WELL: * These are mapped to take advantage of the common locations for many of * the bits on all of the currently supported MAC chips. This is to make * the ISR as efficient as possible, while still abstracting HW differences. * When new hardware breaks this commonality this enumerated type, as well * as the HAL functions using it, must be modified. All values are directly * mapped unless commented otherwise. */ typedef enum { HAL_INT_RX = 0x00000001, /* Non-common mapping */ HAL_INT_RXDESC = 0x00000002, /* Legacy mapping */ HAL_INT_RXERR = 0x00000004, HAL_INT_RXHP = 0x00000001, /* EDMA */ HAL_INT_RXLP = 0x00000002, /* EDMA */ HAL_INT_RXNOFRM = 0x00000008, HAL_INT_RXEOL = 0x00000010, HAL_INT_RXORN = 0x00000020, HAL_INT_TX = 0x00000040, /* Non-common mapping */ HAL_INT_TXDESC = 0x00000080, HAL_INT_TIM_TIMER= 0x00000100, HAL_INT_MCI = 0x00000200, HAL_INT_BBPANIC = 0x00000400, HAL_INT_TXURN = 0x00000800, HAL_INT_MIB = 0x00001000, HAL_INT_RXPHY = 0x00004000, HAL_INT_RXKCM = 0x00008000, HAL_INT_SWBA = 0x00010000, HAL_INT_BRSSI = 0x00020000, HAL_INT_BMISS = 0x00040000, HAL_INT_BNR = 0x00100000, HAL_INT_TIM = 0x00200000, /* Non-common mapping */ HAL_INT_DTIM = 0x00400000, /* Non-common mapping */ HAL_INT_DTIMSYNC= 0x00800000, /* Non-common mapping */ HAL_INT_GPIO = 0x01000000, HAL_INT_CABEND = 0x02000000, /* Non-common mapping */ HAL_INT_TSFOOR = 0x04000000, /* Non-common mapping */ HAL_INT_TBTT = 0x08000000, /* Non-common mapping */ /* Atheros ref driver has a generic timer interrupt now..*/ HAL_INT_GENTIMER = 0x08000000, /* Non-common mapping */ HAL_INT_CST = 0x10000000, /* Non-common mapping */ HAL_INT_GTT = 0x20000000, /* Non-common mapping */ HAL_INT_FATAL = 0x40000000, /* Non-common mapping */ #define HAL_INT_GLOBAL 0x80000000 /* Set/clear IER */ HAL_INT_BMISC = HAL_INT_TIM | HAL_INT_DTIM | HAL_INT_DTIMSYNC | HAL_INT_CABEND | HAL_INT_TBTT, /* Interrupt bits that map directly to ISR/IMR bits */ HAL_INT_COMMON = HAL_INT_RXNOFRM | HAL_INT_RXDESC | HAL_INT_RXEOL | HAL_INT_RXORN | HAL_INT_TXDESC | HAL_INT_TXURN | HAL_INT_MIB | HAL_INT_RXPHY | HAL_INT_RXKCM | HAL_INT_SWBA | HAL_INT_BMISS | HAL_INT_BRSSI | HAL_INT_BNR | HAL_INT_GPIO, } HAL_INT; /* * MSI vector assignments */ typedef enum { HAL_MSIVEC_MISC = 0, HAL_MSIVEC_TX = 1, HAL_MSIVEC_RXLP = 2, HAL_MSIVEC_RXHP = 3, } HAL_MSIVEC; typedef enum { HAL_INT_LINE = 0, HAL_INT_MSI = 1, } HAL_INT_TYPE; /* For interrupt mitigation registers */ typedef enum { HAL_INT_RX_FIRSTPKT=0, HAL_INT_RX_LASTPKT, HAL_INT_TX_FIRSTPKT, HAL_INT_TX_LASTPKT, HAL_INT_THRESHOLD } HAL_INT_MITIGATION; /* XXX this is duplicate information! */ typedef struct { u_int32_t cyclecnt_diff; /* delta cycle count */ u_int32_t rxclr_cnt; /* rx clear count */ u_int32_t extrxclr_cnt; /* ext chan rx clear count */ u_int32_t txframecnt_diff; /* delta tx frame count */ u_int32_t rxframecnt_diff; /* delta rx frame count */ u_int32_t listen_time; /* listen time in msec - time for which ch is free */ u_int32_t ofdmphyerr_cnt; /* OFDM err count since last reset */ u_int32_t cckphyerr_cnt; /* CCK err count since last reset */ u_int32_t ofdmphyerrcnt_diff; /* delta OFDM Phy Error Count */ HAL_BOOL valid; /* if the stats are valid*/ } HAL_ANISTATS; typedef struct { u_int8_t txctl_offset; u_int8_t txctl_numwords; u_int8_t txstatus_offset; u_int8_t txstatus_numwords; u_int8_t rxctl_offset; u_int8_t rxctl_numwords; u_int8_t rxstatus_offset; u_int8_t rxstatus_numwords; u_int8_t macRevision; } HAL_DESC_INFO; typedef enum { HAL_GPIO_OUTPUT_MUX_AS_OUTPUT = 0, HAL_GPIO_OUTPUT_MUX_PCIE_ATTENTION_LED = 1, HAL_GPIO_OUTPUT_MUX_PCIE_POWER_LED = 2, HAL_GPIO_OUTPUT_MUX_MAC_NETWORK_LED = 3, HAL_GPIO_OUTPUT_MUX_MAC_POWER_LED = 4, HAL_GPIO_OUTPUT_MUX_AS_WLAN_ACTIVE = 5, HAL_GPIO_OUTPUT_MUX_AS_TX_FRAME = 6, HAL_GPIO_OUTPUT_MUX_AS_MCI_WLAN_DATA, HAL_GPIO_OUTPUT_MUX_AS_MCI_WLAN_CLK, HAL_GPIO_OUTPUT_MUX_AS_MCI_BT_DATA, HAL_GPIO_OUTPUT_MUX_AS_MCI_BT_CLK, HAL_GPIO_OUTPUT_MUX_AS_WL_IN_TX, HAL_GPIO_OUTPUT_MUX_AS_WL_IN_RX, HAL_GPIO_OUTPUT_MUX_AS_BT_IN_TX, HAL_GPIO_OUTPUT_MUX_AS_BT_IN_RX, HAL_GPIO_OUTPUT_MUX_AS_RUCKUS_STROBE, HAL_GPIO_OUTPUT_MUX_AS_RUCKUS_DATA, HAL_GPIO_OUTPUT_MUX_AS_SMARTANT_CTRL0, HAL_GPIO_OUTPUT_MUX_AS_SMARTANT_CTRL1, HAL_GPIO_OUTPUT_MUX_AS_SMARTANT_CTRL2, HAL_GPIO_OUTPUT_MUX_NUM_ENTRIES } HAL_GPIO_MUX_TYPE; typedef enum { HAL_GPIO_INTR_LOW = 0, HAL_GPIO_INTR_HIGH = 1, HAL_GPIO_INTR_DISABLE = 2 } HAL_GPIO_INTR_TYPE; typedef struct halCounters { u_int32_t tx_frame_count; u_int32_t rx_frame_count; u_int32_t rx_clear_count; u_int32_t cycle_count; u_int8_t is_rx_active; // true (1) or false (0) u_int8_t is_tx_active; // true (1) or false (0) } HAL_COUNTERS; typedef enum { HAL_RFGAIN_INACTIVE = 0, HAL_RFGAIN_READ_REQUESTED = 1, HAL_RFGAIN_NEED_CHANGE = 2 } HAL_RFGAIN; typedef uint16_t HAL_CTRY_CODE; /* country code */ typedef uint16_t HAL_REG_DOMAIN; /* regulatory domain code */ #define HAL_ANTENNA_MIN_MODE 0 #define HAL_ANTENNA_FIXED_A 1 #define HAL_ANTENNA_FIXED_B 2 #define HAL_ANTENNA_MAX_MODE 3 typedef struct { uint32_t ackrcv_bad; uint32_t rts_bad; uint32_t rts_good; uint32_t fcs_bad; uint32_t beacons; } HAL_MIB_STATS; /* * These bits represent what's in ah_currentRDext. */ typedef enum { REG_EXT_FCC_MIDBAND = 0, REG_EXT_JAPAN_MIDBAND = 1, REG_EXT_FCC_DFS_HT40 = 2, REG_EXT_JAPAN_NONDFS_HT40 = 3, REG_EXT_JAPAN_DFS_HT40 = 4, REG_EXT_FCC_CH_144 = 5, } REG_EXT_BITMAP; enum { HAL_MODE_11A = 0x001, /* 11a channels */ HAL_MODE_TURBO = 0x002, /* 11a turbo-only channels */ HAL_MODE_11B = 0x004, /* 11b channels */ HAL_MODE_PUREG = 0x008, /* 11g channels (OFDM only) */ #ifdef notdef HAL_MODE_11G = 0x010, /* 11g channels (OFDM/CCK) */ #else HAL_MODE_11G = 0x008, /* XXX historical */ #endif HAL_MODE_108G = 0x020, /* 11g+Turbo channels */ HAL_MODE_108A = 0x040, /* 11a+Turbo channels */ HAL_MODE_11A_HALF_RATE = 0x200, /* 11a half width channels */ HAL_MODE_11A_QUARTER_RATE = 0x400, /* 11a quarter width channels */ HAL_MODE_11G_HALF_RATE = 0x800, /* 11g half width channels */ HAL_MODE_11G_QUARTER_RATE = 0x1000, /* 11g quarter width channels */ HAL_MODE_11NG_HT20 = 0x008000, HAL_MODE_11NA_HT20 = 0x010000, HAL_MODE_11NG_HT40PLUS = 0x020000, HAL_MODE_11NG_HT40MINUS = 0x040000, HAL_MODE_11NA_HT40PLUS = 0x080000, HAL_MODE_11NA_HT40MINUS = 0x100000, HAL_MODE_ALL = 0xffffff }; typedef struct { int rateCount; /* NB: for proper padding */ uint8_t rateCodeToIndex[256]; /* back mapping */ struct { uint8_t valid; /* valid for rate control use */ uint8_t phy; /* CCK/OFDM/XR */ uint32_t rateKbps; /* transfer rate in kbs */ uint8_t rateCode; /* rate for h/w descriptors */ uint8_t shortPreamble; /* mask for enabling short * preamble in CCK rate code */ uint8_t dot11Rate; /* value for supported rates * info element of MLME */ uint8_t controlRate; /* index of next lower basic * rate; used for dur. calcs */ uint16_t lpAckDuration; /* long preamble ACK duration */ uint16_t spAckDuration; /* short preamble ACK duration*/ } info[64]; } HAL_RATE_TABLE; typedef struct { u_int rs_count; /* number of valid entries */ uint8_t rs_rates[64]; /* rates */ } HAL_RATE_SET; /* * 802.11n specific structures and enums */ typedef enum { HAL_CHAINTYPE_TX = 1, /* Tx chain type */ HAL_CHAINTYPE_RX = 2, /* RX chain type */ } HAL_CHAIN_TYPE; typedef struct { u_int Tries; u_int Rate; /* hardware rate code */ u_int RateIndex; /* rate series table index */ u_int PktDuration; u_int ChSel; u_int RateFlags; #define HAL_RATESERIES_RTS_CTS 0x0001 /* use rts/cts w/this series */ #define HAL_RATESERIES_2040 0x0002 /* use ext channel for series */ #define HAL_RATESERIES_HALFGI 0x0004 /* use half-gi for series */ #define HAL_RATESERIES_STBC 0x0008 /* use STBC for series */ u_int tx_power_cap; /* in 1/2 dBm units XXX TODO */ } HAL_11N_RATE_SERIES; typedef enum { HAL_HT_MACMODE_20 = 0, /* 20 MHz operation */ HAL_HT_MACMODE_2040 = 1, /* 20/40 MHz operation */ } HAL_HT_MACMODE; typedef enum { HAL_HT_PHYMODE_20 = 0, /* 20 MHz operation */ HAL_HT_PHYMODE_2040 = 1, /* 20/40 MHz operation */ } HAL_HT_PHYMODE; typedef enum { HAL_HT_EXTPROTSPACING_20 = 0, /* 20 MHz spacing */ HAL_HT_EXTPROTSPACING_25 = 1, /* 25 MHz spacing */ } HAL_HT_EXTPROTSPACING; typedef enum { HAL_RX_CLEAR_CTL_LOW = 0x1, /* force control channel to appear busy */ HAL_RX_CLEAR_EXT_LOW = 0x2, /* force extension channel to appear busy */ } HAL_HT_RXCLEAR; typedef enum { HAL_FREQ_BAND_5GHZ = 0, HAL_FREQ_BAND_2GHZ = 1, } HAL_FREQ_BAND; /* * Antenna switch control. By default antenna selection * enables multiple (2) antenna use. To force use of the * A or B antenna only specify a fixed setting. Fixing * the antenna will also disable any diversity support. */ typedef enum { HAL_ANT_VARIABLE = 0, /* variable by programming */ HAL_ANT_FIXED_A = 1, /* fixed antenna A */ HAL_ANT_FIXED_B = 2, /* fixed antenna B */ } HAL_ANT_SETTING; typedef enum { HAL_M_STA = 1, /* infrastructure station */ HAL_M_IBSS = 0, /* IBSS (adhoc) station */ HAL_M_HOSTAP = 6, /* Software Access Point */ HAL_M_MONITOR = 8 /* Monitor mode */ } HAL_OPMODE; typedef enum { HAL_RESET_NORMAL = 0, /* Do normal reset */ HAL_RESET_BBPANIC = 1, /* Reset because of BB panic */ HAL_RESET_FORCE_COLD = 2, /* Force full reset */ } HAL_RESET_TYPE; typedef struct { uint8_t kv_type; /* one of HAL_CIPHER */ uint8_t kv_apsd; /* Mask for APSD enabled ACs */ uint16_t kv_len; /* length in bits */ uint8_t kv_val[16]; /* enough for 128-bit keys */ uint8_t kv_mic[8]; /* TKIP MIC key */ uint8_t kv_txmic[8]; /* TKIP TX MIC key (optional) */ } HAL_KEYVAL; /* * This is the TX descriptor field which marks the key padding requirement. * The naming is unfortunately unclear. */ #define AH_KEYTYPE_MASK 0x0F typedef enum { HAL_KEY_TYPE_CLEAR, HAL_KEY_TYPE_WEP, HAL_KEY_TYPE_AES, HAL_KEY_TYPE_TKIP, } HAL_KEY_TYPE; typedef enum { HAL_CIPHER_WEP = 0, HAL_CIPHER_AES_OCB = 1, HAL_CIPHER_AES_CCM = 2, HAL_CIPHER_CKIP = 3, HAL_CIPHER_TKIP = 4, HAL_CIPHER_CLR = 5, /* no encryption */ HAL_CIPHER_MIC = 127 /* TKIP-MIC, not a cipher */ } HAL_CIPHER; enum { HAL_SLOT_TIME_6 = 6, /* NB: for turbo mode */ HAL_SLOT_TIME_9 = 9, HAL_SLOT_TIME_20 = 20, }; /* * Per-station beacon timer state. Note that the specified * beacon interval (given in TU's) can also include flags * to force a TSF reset and to enable the beacon xmit logic. * If bs_cfpmaxduration is non-zero the hardware is setup to * coexist with a PCF-capable AP. */ typedef struct { uint32_t bs_nexttbtt; /* next beacon in TU */ uint32_t bs_nextdtim; /* next DTIM in TU */ uint32_t bs_intval; /* beacon interval+flags */ /* * HAL_BEACON_PERIOD, HAL_BEACON_ENA and HAL_BEACON_RESET_TSF * are all 1:1 correspondances with the pre-11n chip AR_BEACON * register. */ #define HAL_BEACON_PERIOD 0x0000ffff /* beacon interval period */ #define HAL_BEACON_PERIOD_TU8 0x0007ffff /* beacon interval, tu/8 */ #define HAL_BEACON_ENA 0x00800000 /* beacon xmit enable */ #define HAL_BEACON_RESET_TSF 0x01000000 /* clear TSF */ #define HAL_TSFOOR_THRESHOLD 0x00004240 /* TSF OOR thresh (16k uS) */ uint32_t bs_dtimperiod; uint16_t bs_cfpperiod; /* CFP period in TU */ uint16_t bs_cfpmaxduration; /* max CFP duration in TU */ uint32_t bs_cfpnext; /* next CFP in TU */ uint16_t bs_timoffset; /* byte offset to TIM bitmap */ uint16_t bs_bmissthreshold; /* beacon miss threshold */ uint32_t bs_sleepduration; /* max sleep duration */ uint32_t bs_tsfoor_threshold; /* TSF out of range threshold */ } HAL_BEACON_STATE; /* * Like HAL_BEACON_STATE but for non-station mode setup. * NB: see above flag definitions for bt_intval. */ typedef struct { uint32_t bt_intval; /* beacon interval+flags */ uint32_t bt_nexttbtt; /* next beacon in TU */ uint32_t bt_nextatim; /* next ATIM in TU */ uint32_t bt_nextdba; /* next DBA in 1/8th TU */ uint32_t bt_nextswba; /* next SWBA in 1/8th TU */ uint32_t bt_flags; /* timer enables */ #define HAL_BEACON_TBTT_EN 0x00000001 #define HAL_BEACON_DBA_EN 0x00000002 #define HAL_BEACON_SWBA_EN 0x00000004 } HAL_BEACON_TIMERS; /* * Per-node statistics maintained by the driver for use in * optimizing signal quality and other operational aspects. */ typedef struct { uint32_t ns_avgbrssi; /* average beacon rssi */ uint32_t ns_avgrssi; /* average data rssi */ uint32_t ns_avgtxrssi; /* average tx rssi */ } HAL_NODE_STATS; #define HAL_RSSI_EP_MULTIPLIER (1<<7) /* pow2 to optimize out * and / */ /* * This is the ANI state and MIB stats. * * It's used by the HAL modules to keep state /and/ by the debug ioctl * to fetch ANI information. */ typedef struct { uint32_t ast_ani_niup; /* ANI increased noise immunity */ uint32_t ast_ani_nidown; /* ANI decreased noise immunity */ uint32_t ast_ani_spurup; /* ANI increased spur immunity */ uint32_t ast_ani_spurdown;/* ANI descreased spur immunity */ uint32_t ast_ani_ofdmon; /* ANI OFDM weak signal detect on */ uint32_t ast_ani_ofdmoff;/* ANI OFDM weak signal detect off */ uint32_t ast_ani_cckhigh;/* ANI CCK weak signal threshold high */ uint32_t ast_ani_ccklow; /* ANI CCK weak signal threshold low */ uint32_t ast_ani_stepup; /* ANI increased first step level */ uint32_t ast_ani_stepdown;/* ANI decreased first step level */ uint32_t ast_ani_ofdmerrs;/* ANI cumulative ofdm phy err count */ uint32_t ast_ani_cckerrs;/* ANI cumulative cck phy err count */ uint32_t ast_ani_reset; /* ANI parameters zero'd for non-STA */ uint32_t ast_ani_lzero; /* ANI listen time forced to zero */ uint32_t ast_ani_lneg; /* ANI listen time calculated < 0 */ HAL_MIB_STATS ast_mibstats; /* MIB counter stats */ HAL_NODE_STATS ast_nodestats; /* Latest rssi stats from driver */ } HAL_ANI_STATS; typedef struct { uint8_t noiseImmunityLevel; uint8_t spurImmunityLevel; uint8_t firstepLevel; uint8_t ofdmWeakSigDetectOff; uint8_t cckWeakSigThreshold; uint32_t listenTime; /* NB: intentionally ordered so data exported to user space is first */ uint32_t txFrameCount; /* Last txFrameCount */ uint32_t rxFrameCount; /* Last rx Frame count */ uint32_t cycleCount; /* Last cycleCount (to detect wrap-around) */ uint32_t ofdmPhyErrCount;/* OFDM err count since last reset */ uint32_t cckPhyErrCount; /* CCK err count since last reset */ } HAL_ANI_STATE; struct ath_desc; struct ath_tx_status; struct ath_rx_status; struct ieee80211_channel; /* * This is a channel survey sample entry. * * The AR5212 ANI routines fill these samples. The ANI code then uses it * when calculating listen time; it is also exported via a diagnostic * API. */ typedef struct { uint32_t seq_num; uint32_t tx_busy; uint32_t rx_busy; uint32_t chan_busy; uint32_t ext_chan_busy; uint32_t cycle_count; /* XXX TODO */ uint32_t ofdm_phyerr_count; uint32_t cck_phyerr_count; } HAL_SURVEY_SAMPLE; /* * This provides 3.2 seconds of sample space given an * ANI time of 1/10th of a second. This may not be enough! */ #define CHANNEL_SURVEY_SAMPLE_COUNT 32 typedef struct { HAL_SURVEY_SAMPLE samples[CHANNEL_SURVEY_SAMPLE_COUNT]; uint32_t cur_sample; /* current sample in sequence */ uint32_t cur_seq; /* current sequence number */ } HAL_CHANNEL_SURVEY; /* * ANI commands. * * These are used both internally and externally via the diagnostic * API. * * Note that this is NOT the ANI commands being used via the INTMIT * capability - that has a different mapping for some reason. */ typedef enum { HAL_ANI_PRESENT = 0, /* is ANI support present */ HAL_ANI_NOISE_IMMUNITY_LEVEL = 1, /* set level */ HAL_ANI_OFDM_WEAK_SIGNAL_DETECTION = 2, /* enable/disable */ HAL_ANI_CCK_WEAK_SIGNAL_THR = 3, /* enable/disable */ HAL_ANI_FIRSTEP_LEVEL = 4, /* set level */ HAL_ANI_SPUR_IMMUNITY_LEVEL = 5, /* set level */ HAL_ANI_MODE = 6, /* 0 => manual, 1 => auto (XXX do not change) */ HAL_ANI_PHYERR_RESET = 7, /* reset phy error stats */ HAL_ANI_MRC_CCK = 8, } HAL_ANI_CMD; #define HAL_ANI_ALL 0xffffffff /* * This is the layout of the ANI INTMIT capability. * * Notice that the command values differ to HAL_ANI_CMD. */ typedef enum { HAL_CAP_INTMIT_PRESENT = 0, HAL_CAP_INTMIT_ENABLE = 1, HAL_CAP_INTMIT_NOISE_IMMUNITY_LEVEL = 2, HAL_CAP_INTMIT_OFDM_WEAK_SIGNAL_LEVEL = 3, HAL_CAP_INTMIT_CCK_WEAK_SIGNAL_THR = 4, HAL_CAP_INTMIT_FIRSTEP_LEVEL = 5, HAL_CAP_INTMIT_SPUR_IMMUNITY_LEVEL = 6 } HAL_CAP_INTMIT_CMD; typedef struct { int32_t pe_firpwr; /* FIR pwr out threshold */ int32_t pe_rrssi; /* Radar rssi thresh */ int32_t pe_height; /* Pulse height thresh */ int32_t pe_prssi; /* Pulse rssi thresh */ int32_t pe_inband; /* Inband thresh */ /* The following params are only for AR5413 and later */ u_int32_t pe_relpwr; /* Relative power threshold in 0.5dB steps */ u_int32_t pe_relstep; /* Pulse Relative step threshold in 0.5dB steps */ u_int32_t pe_maxlen; /* Max length of radar sign in 0.8us units */ int32_t pe_usefir128; /* Use the average in-band power measured over 128 cycles */ int32_t pe_blockradar; /* * Enable to block radar check if pkt detect is done via OFDM * weak signal detect or pkt is detected immediately after tx * to rx transition */ int32_t pe_enmaxrssi; /* * Enable to use the max rssi instead of the last rssi during * fine gain changes for radar detection */ int32_t pe_extchannel; /* Enable DFS on ext channel */ int32_t pe_enabled; /* Whether radar detection is enabled */ int32_t pe_enrelpwr; int32_t pe_en_relstep_check; } HAL_PHYERR_PARAM; #define HAL_PHYERR_PARAM_NOVAL 65535 typedef struct { u_int16_t ss_fft_period; /* Skip interval for FFT reports */ u_int16_t ss_period; /* Spectral scan period */ u_int16_t ss_count; /* # of reports to return from ss_active */ u_int16_t ss_short_report;/* Set to report ony 1 set of FFT results */ u_int8_t radar_bin_thresh_sel; /* strong signal radar FFT threshold configuration */ u_int16_t ss_spectral_pri; /* are we doing a noise power cal ? */ int8_t ss_nf_cal[AH_MAX_CHAINS*2]; /* nf calibrated values for ctl+ext from eeprom */ int8_t ss_nf_pwr[AH_MAX_CHAINS*2]; /* nf pwr values for ctl+ext from eeprom */ int32_t ss_nf_temp_data; /* temperature data taken during nf scan */ int ss_enabled; int ss_active; } HAL_SPECTRAL_PARAM; #define HAL_SPECTRAL_PARAM_NOVAL 0xFFFF #define HAL_SPECTRAL_PARAM_ENABLE 0x8000 /* Enable/Disable if applicable */ /* * DFS operating mode flags. */ typedef enum { HAL_DFS_UNINIT_DOMAIN = 0, /* Uninitialized dfs domain */ HAL_DFS_FCC_DOMAIN = 1, /* FCC3 dfs domain */ HAL_DFS_ETSI_DOMAIN = 2, /* ETSI dfs domain */ HAL_DFS_MKK4_DOMAIN = 3, /* Japan dfs domain */ } HAL_DFS_DOMAIN; /* * MFP decryption options for initializing the MAC. */ typedef enum { HAL_MFP_QOSDATA = 0, /* Decrypt MFP frames like QoS data frames. All chips before Merlin. */ HAL_MFP_PASSTHRU, /* Don't decrypt MFP frames at all. Passthrough */ HAL_MFP_HW_CRYPTO /* hardware decryption enabled. Merlin can do it. */ } HAL_MFP_OPT_T; /* LNA config supported */ typedef enum { HAL_ANT_DIV_COMB_LNA1_MINUS_LNA2 = 0, HAL_ANT_DIV_COMB_LNA2 = 1, HAL_ANT_DIV_COMB_LNA1 = 2, HAL_ANT_DIV_COMB_LNA1_PLUS_LNA2 = 3, } HAL_ANT_DIV_COMB_LNA_CONF; typedef struct { u_int8_t main_lna_conf; u_int8_t alt_lna_conf; u_int8_t fast_div_bias; u_int8_t main_gaintb; u_int8_t alt_gaintb; u_int8_t antdiv_configgroup; int8_t lna1_lna2_delta; } HAL_ANT_COMB_CONFIG; #define DEFAULT_ANTDIV_CONFIG_GROUP 0x00 #define HAL_ANTDIV_CONFIG_GROUP_1 0x01 #define HAL_ANTDIV_CONFIG_GROUP_2 0x02 #define HAL_ANTDIV_CONFIG_GROUP_3 0x03 /* * Flag for setting QUIET period */ typedef enum { HAL_QUIET_DISABLE = 0x0, HAL_QUIET_ENABLE = 0x1, HAL_QUIET_ADD_CURRENT_TSF = 0x2, /* add current TSF to next_start offset */ HAL_QUIET_ADD_SWBA_RESP_TIME = 0x4, /* add beacon response time to next_start offset */ } HAL_QUIET_FLAG; #define HAL_DFS_EVENT_PRICH 0x0000001 #define HAL_DFS_EVENT_EXTCH 0x0000002 #define HAL_DFS_EVENT_EXTEARLY 0x0000004 #define HAL_DFS_EVENT_ISDC 0x0000008 struct hal_dfs_event { uint64_t re_full_ts; /* 64-bit full timestamp from interrupt time */ uint32_t re_ts; /* Original 15 bit recv timestamp */ uint8_t re_rssi; /* rssi of radar event */ uint8_t re_dur; /* duration of radar pulse */ uint32_t re_flags; /* Flags (see above) */ }; typedef struct hal_dfs_event HAL_DFS_EVENT; /* * Generic Timer domain */ typedef enum { HAL_GEN_TIMER_TSF = 0, HAL_GEN_TIMER_TSF2, HAL_GEN_TIMER_TSF_ANY } HAL_GEN_TIMER_DOMAIN; /* * BT Co-existence definitions */ #include "ath_hal/ah_btcoex.h" struct hal_bb_panic_info { u_int32_t status; u_int32_t tsf; u_int32_t phy_panic_wd_ctl1; u_int32_t phy_panic_wd_ctl2; u_int32_t phy_gen_ctrl; u_int32_t rxc_pcnt; u_int32_t rxf_pcnt; u_int32_t txf_pcnt; u_int32_t cycles; u_int32_t wd; u_int32_t det; u_int32_t rdar; u_int32_t r_odfm; u_int32_t r_cck; u_int32_t t_odfm; u_int32_t t_cck; u_int32_t agc; u_int32_t src; }; /* Serialize Register Access Mode */ typedef enum { SER_REG_MODE_OFF = 0, SER_REG_MODE_ON = 1, SER_REG_MODE_AUTO = 2, } SER_REG_MODE; typedef struct { int ah_debug; /* only used if AH_DEBUG is defined */ int ah_ar5416_biasadj; /* enable AR2133 radio specific bias fiddling */ /* NB: these are deprecated; they exist for now for compatibility */ int ah_dma_beacon_response_time;/* in TU's */ int ah_sw_beacon_response_time; /* in TU's */ int ah_additional_swba_backoff; /* in TU's */ int ah_force_full_reset; /* force full chip reset rather then warm reset */ int ah_serialise_reg_war; /* force serialisation of register IO */ /* XXX these don't belong here, they're just for the ar9300 HAL port effort */ int ath_hal_desc_tpc; /* Per-packet TPC */ int ath_hal_sta_update_tx_pwr_enable; /* GreenTX */ int ath_hal_sta_update_tx_pwr_enable_S1; /* GreenTX */ int ath_hal_sta_update_tx_pwr_enable_S2; /* GreenTX */ int ath_hal_sta_update_tx_pwr_enable_S3; /* GreenTX */ /* I'm not sure what the default values for these should be */ int ath_hal_pll_pwr_save; int ath_hal_pcie_power_save_enable; int ath_hal_intr_mitigation_rx; int ath_hal_intr_mitigation_tx; int ath_hal_pcie_clock_req; #define AR_PCIE_PLL_PWRSAVE_CONTROL (1<<0) #define AR_PCIE_PLL_PWRSAVE_ON_D3 (1<<1) #define AR_PCIE_PLL_PWRSAVE_ON_D0 (1<<2) int ath_hal_pcie_waen; int ath_hal_pcie_ser_des_write; /* these are important for correct AR9300 behaviour */ int ath_hal_ht_enable; /* needs to be enabled for AR9300 HT */ int ath_hal_diversity_control; int ath_hal_antenna_switch_swap; int ath_hal_ext_lna_ctl_gpio; int ath_hal_spur_mode; int ath_hal_6mb_ack; /* should set this to 1 for 11a/11na? */ int ath_hal_enable_msi; /* enable MSI interrupts (needed?) */ int ath_hal_beacon_filter_interval; /* ok to be 0 for now? */ /* For now, set this to 0 - net80211 needs to know about hardware MFP support */ int ath_hal_mfp_support; int ath_hal_enable_ani; /* should set this.. */ int ath_hal_cwm_ignore_ext_cca; int ath_hal_show_bb_panic; int ath_hal_ant_ctrl_comm2g_switch_enable; int ath_hal_ext_atten_margin_cfg; int ath_hal_min_gainidx; int ath_hal_war70c; uint32_t ath_hal_mci_config; } HAL_OPS_CONFIG; /* * Hardware Access Layer (HAL) API. * * Clients of the HAL call ath_hal_attach to obtain a reference to an * ath_hal structure for use with the device. Hardware-related operations * that follow must call back into the HAL through interface, supplying * the reference as the first parameter. Note that before using the * reference returned by ath_hal_attach the caller should verify the * ABI version number. */ struct ath_hal { uint32_t ah_magic; /* consistency check magic number */ uint16_t ah_devid; /* PCI device ID */ uint16_t ah_subvendorid; /* PCI subvendor ID */ HAL_SOFTC ah_sc; /* back pointer to driver/os state */ HAL_BUS_TAG ah_st; /* params for register r+w */ HAL_BUS_HANDLE ah_sh; HAL_CTRY_CODE ah_countryCode; uint32_t ah_macVersion; /* MAC version id */ uint16_t ah_macRev; /* MAC revision */ uint16_t ah_phyRev; /* PHY revision */ /* NB: when only one radio is present the rev is in 5Ghz */ uint16_t ah_analog5GhzRev;/* 5GHz radio revision */ uint16_t ah_analog2GhzRev;/* 2GHz radio revision */ uint16_t *ah_eepromdata; /* eeprom buffer, if needed */ uint32_t ah_intrstate[8]; /* last int state */ uint32_t ah_syncstate; /* last sync intr state */ /* Current powerstate from HAL calls */ HAL_POWER_MODE ah_powerMode; HAL_OPS_CONFIG ah_config; const HAL_RATE_TABLE *__ahdecl(*ah_getRateTable)(struct ath_hal *, u_int mode); void __ahdecl(*ah_detach)(struct ath_hal*); /* Reset functions */ HAL_BOOL __ahdecl(*ah_reset)(struct ath_hal *, HAL_OPMODE, struct ieee80211_channel *, HAL_BOOL bChannelChange, HAL_RESET_TYPE resetType, HAL_STATUS *status); HAL_BOOL __ahdecl(*ah_phyDisable)(struct ath_hal *); HAL_BOOL __ahdecl(*ah_disable)(struct ath_hal *); void __ahdecl(*ah_configPCIE)(struct ath_hal *, HAL_BOOL restore, HAL_BOOL power_off); void __ahdecl(*ah_disablePCIE)(struct ath_hal *); void __ahdecl(*ah_setPCUConfig)(struct ath_hal *); HAL_BOOL __ahdecl(*ah_perCalibration)(struct ath_hal*, struct ieee80211_channel *, HAL_BOOL *); HAL_BOOL __ahdecl(*ah_perCalibrationN)(struct ath_hal *, struct ieee80211_channel *, u_int chainMask, HAL_BOOL longCal, HAL_BOOL *isCalDone); HAL_BOOL __ahdecl(*ah_resetCalValid)(struct ath_hal *, const struct ieee80211_channel *); HAL_BOOL __ahdecl(*ah_setTxPower)(struct ath_hal *, const struct ieee80211_channel *, uint16_t *); HAL_BOOL __ahdecl(*ah_setTxPowerLimit)(struct ath_hal *, uint32_t); HAL_BOOL __ahdecl(*ah_setBoardValues)(struct ath_hal *, const struct ieee80211_channel *); /* Transmit functions */ HAL_BOOL __ahdecl(*ah_updateTxTrigLevel)(struct ath_hal*, HAL_BOOL incTrigLevel); int __ahdecl(*ah_setupTxQueue)(struct ath_hal *, HAL_TX_QUEUE, const HAL_TXQ_INFO *qInfo); HAL_BOOL __ahdecl(*ah_setTxQueueProps)(struct ath_hal *, int q, const HAL_TXQ_INFO *qInfo); HAL_BOOL __ahdecl(*ah_getTxQueueProps)(struct ath_hal *, int q, HAL_TXQ_INFO *qInfo); HAL_BOOL __ahdecl(*ah_releaseTxQueue)(struct ath_hal *ah, u_int q); HAL_BOOL __ahdecl(*ah_resetTxQueue)(struct ath_hal *ah, u_int q); uint32_t __ahdecl(*ah_getTxDP)(struct ath_hal*, u_int); HAL_BOOL __ahdecl(*ah_setTxDP)(struct ath_hal*, u_int, uint32_t txdp); uint32_t __ahdecl(*ah_numTxPending)(struct ath_hal *, u_int q); HAL_BOOL __ahdecl(*ah_startTxDma)(struct ath_hal*, u_int); HAL_BOOL __ahdecl(*ah_stopTxDma)(struct ath_hal*, u_int); HAL_BOOL __ahdecl(*ah_setupTxDesc)(struct ath_hal *, struct ath_desc *, u_int pktLen, u_int hdrLen, HAL_PKT_TYPE type, u_int txPower, u_int txRate0, u_int txTries0, u_int keyIx, u_int antMode, u_int flags, u_int rtsctsRate, u_int rtsctsDuration, u_int compicvLen, u_int compivLen, u_int comp); HAL_BOOL __ahdecl(*ah_setupXTxDesc)(struct ath_hal *, struct ath_desc*, u_int txRate1, u_int txTries1, u_int txRate2, u_int txTries2, u_int txRate3, u_int txTries3); HAL_BOOL __ahdecl(*ah_fillTxDesc)(struct ath_hal *, struct ath_desc *, HAL_DMA_ADDR *bufAddrList, uint32_t *segLenList, u_int descId, u_int qcuId, HAL_BOOL firstSeg, HAL_BOOL lastSeg, const struct ath_desc *); HAL_STATUS __ahdecl(*ah_procTxDesc)(struct ath_hal *, struct ath_desc *, struct ath_tx_status *); void __ahdecl(*ah_getTxIntrQueue)(struct ath_hal *, uint32_t *); void __ahdecl(*ah_reqTxIntrDesc)(struct ath_hal *, struct ath_desc*); HAL_BOOL __ahdecl(*ah_getTxCompletionRates)(struct ath_hal *, const struct ath_desc *ds, int *rates, int *tries); void __ahdecl(*ah_setTxDescLink)(struct ath_hal *ah, void *ds, uint32_t link); void __ahdecl(*ah_getTxDescLink)(struct ath_hal *ah, void *ds, uint32_t *link); void __ahdecl(*ah_getTxDescLinkPtr)(struct ath_hal *ah, void *ds, uint32_t **linkptr); void __ahdecl(*ah_setupTxStatusRing)(struct ath_hal *, void *ts_start, uint32_t ts_paddr_start, uint16_t size); void __ahdecl(*ah_getTxRawTxDesc)(struct ath_hal *, u_int32_t *); /* Receive Functions */ uint32_t __ahdecl(*ah_getRxDP)(struct ath_hal*, HAL_RX_QUEUE); void __ahdecl(*ah_setRxDP)(struct ath_hal*, uint32_t rxdp, HAL_RX_QUEUE); void __ahdecl(*ah_enableReceive)(struct ath_hal*); HAL_BOOL __ahdecl(*ah_stopDmaReceive)(struct ath_hal*); void __ahdecl(*ah_startPcuReceive)(struct ath_hal*); void __ahdecl(*ah_stopPcuReceive)(struct ath_hal*); void __ahdecl(*ah_setMulticastFilter)(struct ath_hal*, uint32_t filter0, uint32_t filter1); HAL_BOOL __ahdecl(*ah_setMulticastFilterIndex)(struct ath_hal*, uint32_t index); HAL_BOOL __ahdecl(*ah_clrMulticastFilterIndex)(struct ath_hal*, uint32_t index); uint32_t __ahdecl(*ah_getRxFilter)(struct ath_hal*); void __ahdecl(*ah_setRxFilter)(struct ath_hal*, uint32_t); HAL_BOOL __ahdecl(*ah_setupRxDesc)(struct ath_hal *, struct ath_desc *, uint32_t size, u_int flags); HAL_STATUS __ahdecl(*ah_procRxDesc)(struct ath_hal *, struct ath_desc *, uint32_t phyAddr, struct ath_desc *next, uint64_t tsf, struct ath_rx_status *); void __ahdecl(*ah_rxMonitor)(struct ath_hal *, const HAL_NODE_STATS *, const struct ieee80211_channel *); void __ahdecl(*ah_aniPoll)(struct ath_hal *, const struct ieee80211_channel *); void __ahdecl(*ah_procMibEvent)(struct ath_hal *, const HAL_NODE_STATS *); /* Misc Functions */ HAL_STATUS __ahdecl(*ah_getCapability)(struct ath_hal *, HAL_CAPABILITY_TYPE, uint32_t capability, uint32_t *result); HAL_BOOL __ahdecl(*ah_setCapability)(struct ath_hal *, HAL_CAPABILITY_TYPE, uint32_t capability, uint32_t setting, HAL_STATUS *); HAL_BOOL __ahdecl(*ah_getDiagState)(struct ath_hal *, int request, const void *args, uint32_t argsize, void **result, uint32_t *resultsize); void __ahdecl(*ah_getMacAddress)(struct ath_hal *, uint8_t *); HAL_BOOL __ahdecl(*ah_setMacAddress)(struct ath_hal *, const uint8_t*); void __ahdecl(*ah_getBssIdMask)(struct ath_hal *, uint8_t *); HAL_BOOL __ahdecl(*ah_setBssIdMask)(struct ath_hal *, const uint8_t*); HAL_BOOL __ahdecl(*ah_setRegulatoryDomain)(struct ath_hal*, uint16_t, HAL_STATUS *); void __ahdecl(*ah_setLedState)(struct ath_hal*, HAL_LED_STATE); void __ahdecl(*ah_writeAssocid)(struct ath_hal*, const uint8_t *bssid, uint16_t assocId); HAL_BOOL __ahdecl(*ah_gpioCfgOutput)(struct ath_hal *, uint32_t gpio, HAL_GPIO_MUX_TYPE); HAL_BOOL __ahdecl(*ah_gpioCfgInput)(struct ath_hal *, uint32_t gpio); uint32_t __ahdecl(*ah_gpioGet)(struct ath_hal *, uint32_t gpio); HAL_BOOL __ahdecl(*ah_gpioSet)(struct ath_hal *, uint32_t gpio, uint32_t val); void __ahdecl(*ah_gpioSetIntr)(struct ath_hal*, u_int, uint32_t); uint32_t __ahdecl(*ah_getTsf32)(struct ath_hal*); uint64_t __ahdecl(*ah_getTsf64)(struct ath_hal*); void __ahdecl(*ah_setTsf64)(struct ath_hal *, uint64_t); void __ahdecl(*ah_resetTsf)(struct ath_hal*); HAL_BOOL __ahdecl(*ah_detectCardPresent)(struct ath_hal*); void __ahdecl(*ah_updateMibCounters)(struct ath_hal*, HAL_MIB_STATS*); HAL_RFGAIN __ahdecl(*ah_getRfGain)(struct ath_hal*); u_int __ahdecl(*ah_getDefAntenna)(struct ath_hal*); void __ahdecl(*ah_setDefAntenna)(struct ath_hal*, u_int); HAL_ANT_SETTING __ahdecl(*ah_getAntennaSwitch)(struct ath_hal*); HAL_BOOL __ahdecl(*ah_setAntennaSwitch)(struct ath_hal*, HAL_ANT_SETTING); HAL_BOOL __ahdecl(*ah_setSifsTime)(struct ath_hal*, u_int); u_int __ahdecl(*ah_getSifsTime)(struct ath_hal*); HAL_BOOL __ahdecl(*ah_setSlotTime)(struct ath_hal*, u_int); u_int __ahdecl(*ah_getSlotTime)(struct ath_hal*); HAL_BOOL __ahdecl(*ah_setAckTimeout)(struct ath_hal*, u_int); u_int __ahdecl(*ah_getAckTimeout)(struct ath_hal*); HAL_BOOL __ahdecl(*ah_setAckCTSRate)(struct ath_hal*, u_int); u_int __ahdecl(*ah_getAckCTSRate)(struct ath_hal*); HAL_BOOL __ahdecl(*ah_setCTSTimeout)(struct ath_hal*, u_int); u_int __ahdecl(*ah_getCTSTimeout)(struct ath_hal*); HAL_BOOL __ahdecl(*ah_setDecompMask)(struct ath_hal*, uint16_t, int); void __ahdecl(*ah_setCoverageClass)(struct ath_hal*, uint8_t, int); HAL_STATUS __ahdecl(*ah_setQuiet)(struct ath_hal *ah, uint32_t period, uint32_t duration, uint32_t nextStart, HAL_QUIET_FLAG flag); void __ahdecl(*ah_setChainMasks)(struct ath_hal *, uint32_t, uint32_t); /* DFS functions */ void __ahdecl(*ah_enableDfs)(struct ath_hal *ah, HAL_PHYERR_PARAM *pe); void __ahdecl(*ah_getDfsThresh)(struct ath_hal *ah, HAL_PHYERR_PARAM *pe); HAL_BOOL __ahdecl(*ah_getDfsDefaultThresh)(struct ath_hal *ah, HAL_PHYERR_PARAM *pe); HAL_BOOL __ahdecl(*ah_procRadarEvent)(struct ath_hal *ah, struct ath_rx_status *rxs, uint64_t fulltsf, const char *buf, HAL_DFS_EVENT *event); HAL_BOOL __ahdecl(*ah_isFastClockEnabled)(struct ath_hal *ah); + void __ahdecl(*ah_setDfsCacTxQuiet)(struct ath_hal *, HAL_BOOL); /* Spectral Scan functions */ void __ahdecl(*ah_spectralConfigure)(struct ath_hal *ah, HAL_SPECTRAL_PARAM *sp); void __ahdecl(*ah_spectralGetConfig)(struct ath_hal *ah, HAL_SPECTRAL_PARAM *sp); void __ahdecl(*ah_spectralStart)(struct ath_hal *); void __ahdecl(*ah_spectralStop)(struct ath_hal *); HAL_BOOL __ahdecl(*ah_spectralIsEnabled)(struct ath_hal *); HAL_BOOL __ahdecl(*ah_spectralIsActive)(struct ath_hal *); /* XXX getNfPri() and getNfExt() */ /* Key Cache Functions */ uint32_t __ahdecl(*ah_getKeyCacheSize)(struct ath_hal*); HAL_BOOL __ahdecl(*ah_resetKeyCacheEntry)(struct ath_hal*, uint16_t); HAL_BOOL __ahdecl(*ah_isKeyCacheEntryValid)(struct ath_hal *, uint16_t); HAL_BOOL __ahdecl(*ah_setKeyCacheEntry)(struct ath_hal*, uint16_t, const HAL_KEYVAL *, const uint8_t *, int); HAL_BOOL __ahdecl(*ah_setKeyCacheEntryMac)(struct ath_hal*, uint16_t, const uint8_t *); /* Power Management Functions */ HAL_BOOL __ahdecl(*ah_setPowerMode)(struct ath_hal*, HAL_POWER_MODE mode, int setChip); HAL_POWER_MODE __ahdecl(*ah_getPowerMode)(struct ath_hal*); int16_t __ahdecl(*ah_getChanNoise)(struct ath_hal *, const struct ieee80211_channel *); /* Beacon Management Functions */ void __ahdecl(*ah_setBeaconTimers)(struct ath_hal*, const HAL_BEACON_TIMERS *); /* NB: deprecated, use ah_setBeaconTimers instead */ void __ahdecl(*ah_beaconInit)(struct ath_hal *, uint32_t nexttbtt, uint32_t intval); void __ahdecl(*ah_setStationBeaconTimers)(struct ath_hal*, const HAL_BEACON_STATE *); void __ahdecl(*ah_resetStationBeaconTimers)(struct ath_hal*); uint64_t __ahdecl(*ah_getNextTBTT)(struct ath_hal *); /* 802.11n Functions */ HAL_BOOL __ahdecl(*ah_chainTxDesc)(struct ath_hal *, struct ath_desc *, HAL_DMA_ADDR *bufAddrList, uint32_t *segLenList, u_int, u_int, HAL_PKT_TYPE, u_int, HAL_CIPHER, uint8_t, HAL_BOOL, HAL_BOOL, HAL_BOOL); HAL_BOOL __ahdecl(*ah_setupFirstTxDesc)(struct ath_hal *, struct ath_desc *, u_int, u_int, u_int, u_int, u_int, u_int, u_int, u_int); HAL_BOOL __ahdecl(*ah_setupLastTxDesc)(struct ath_hal *, struct ath_desc *, const struct ath_desc *); void __ahdecl(*ah_set11nRateScenario)(struct ath_hal *, struct ath_desc *, u_int, u_int, HAL_11N_RATE_SERIES [], u_int, u_int); /* * The next 4 (set11ntxdesc -> set11naggrlast) are specific * to the EDMA HAL. Descriptors are chained together by * using filltxdesc (not ChainTxDesc) and then setting the * aggregate flags appropriately using first/middle/last. */ void __ahdecl(*ah_set11nTxDesc)(struct ath_hal *, void *, u_int, HAL_PKT_TYPE, u_int, u_int, u_int); void __ahdecl(*ah_set11nAggrFirst)(struct ath_hal *, struct ath_desc *, u_int, u_int); void __ahdecl(*ah_set11nAggrMiddle)(struct ath_hal *, struct ath_desc *, u_int); void __ahdecl(*ah_set11nAggrLast)(struct ath_hal *, struct ath_desc *); void __ahdecl(*ah_clr11nAggr)(struct ath_hal *, struct ath_desc *); void __ahdecl(*ah_set11nBurstDuration)(struct ath_hal *, struct ath_desc *, u_int); void __ahdecl(*ah_set11nVirtMoreFrag)(struct ath_hal *, struct ath_desc *, u_int); HAL_BOOL __ahdecl(*ah_getMibCycleCounts) (struct ath_hal *, HAL_SURVEY_SAMPLE *); uint32_t __ahdecl(*ah_get11nExtBusy)(struct ath_hal *); void __ahdecl(*ah_set11nMac2040)(struct ath_hal *, HAL_HT_MACMODE); HAL_HT_RXCLEAR __ahdecl(*ah_get11nRxClear)(struct ath_hal *ah); void __ahdecl(*ah_set11nRxClear)(struct ath_hal *, HAL_HT_RXCLEAR); /* Interrupt functions */ HAL_BOOL __ahdecl(*ah_isInterruptPending)(struct ath_hal*); HAL_BOOL __ahdecl(*ah_getPendingInterrupts)(struct ath_hal*, HAL_INT*); HAL_INT __ahdecl(*ah_getInterrupts)(struct ath_hal*); HAL_INT __ahdecl(*ah_setInterrupts)(struct ath_hal*, HAL_INT); /* Bluetooth Coexistence functions */ void __ahdecl(*ah_btCoexSetInfo)(struct ath_hal *, HAL_BT_COEX_INFO *); void __ahdecl(*ah_btCoexSetConfig)(struct ath_hal *, HAL_BT_COEX_CONFIG *); void __ahdecl(*ah_btCoexSetQcuThresh)(struct ath_hal *, int); void __ahdecl(*ah_btCoexSetWeights)(struct ath_hal *, uint32_t); void __ahdecl(*ah_btCoexSetBmissThresh)(struct ath_hal *, uint32_t); void __ahdecl(*ah_btCoexSetParameter)(struct ath_hal *, uint32_t, uint32_t); void __ahdecl(*ah_btCoexDisable)(struct ath_hal *); int __ahdecl(*ah_btCoexEnable)(struct ath_hal *); /* Bluetooth MCI methods */ void __ahdecl(*ah_btMciSetup)(struct ath_hal *, uint32_t, void *, uint16_t, uint32_t); HAL_BOOL __ahdecl(*ah_btMciSendMessage)(struct ath_hal *, uint8_t, uint32_t, uint32_t *, uint8_t, HAL_BOOL, HAL_BOOL); uint32_t __ahdecl(*ah_btMciGetInterrupt)(struct ath_hal *, uint32_t *, uint32_t *); uint32_t __ahdecl(*ah_btMciState)(struct ath_hal *, uint32_t, uint32_t *); void __ahdecl(*ah_btMciDetach)(struct ath_hal *); /* LNA diversity configuration */ void __ahdecl(*ah_divLnaConfGet)(struct ath_hal *, HAL_ANT_COMB_CONFIG *); void __ahdecl(*ah_divLnaConfSet)(struct ath_hal *, HAL_ANT_COMB_CONFIG *); }; /* * Check the PCI vendor ID and device ID against Atheros' values * and return a printable description for any Atheros hardware. * AH_NULL is returned if the ID's do not describe Atheros hardware. */ extern const char *__ahdecl ath_hal_probe(uint16_t vendorid, uint16_t devid); /* * Attach the HAL for use with the specified device. The device is * defined by the PCI device ID. The caller provides an opaque pointer * to an upper-layer data structure (HAL_SOFTC) that is stored in the * HAL state block for later use. Hardware register accesses are done * using the specified bus tag and handle. On successful return a * reference to a state block is returned that must be supplied in all * subsequent HAL calls. Storage associated with this reference is * dynamically allocated and must be freed by calling the ah_detach * method when the client is done. If the attach operation fails a * null (AH_NULL) reference will be returned and a status code will * be returned if the status parameter is non-zero. */ extern struct ath_hal * __ahdecl ath_hal_attach(uint16_t devid, HAL_SOFTC, HAL_BUS_TAG, HAL_BUS_HANDLE, uint16_t *eepromdata, HAL_OPS_CONFIG *ah_config, HAL_STATUS* status); extern const char *ath_hal_mac_name(struct ath_hal *); extern const char *ath_hal_rf_name(struct ath_hal *); /* * Regulatory interfaces. Drivers should use ath_hal_init_channels to * request a set of channels for a particular country code and/or * regulatory domain. If CTRY_DEFAULT and SKU_NONE are specified then * this list is constructed according to the contents of the EEPROM. * ath_hal_getchannels acts similarly but does not alter the operating * state; this can be used to collect information for a particular * regulatory configuration. Finally ath_hal_set_channels installs a * channel list constructed outside the driver. The HAL will adopt the * channel list and setup internal state according to the specified * regulatory configuration (e.g. conformance test limits). * * For all interfaces the channel list is returned in the supplied array. * maxchans defines the maximum size of this array. nchans contains the * actual number of channels returned. If a problem occurred then a * status code != HAL_OK is returned. */ struct ieee80211_channel; /* * Return a list of channels according to the specified regulatory. */ extern HAL_STATUS __ahdecl ath_hal_getchannels(struct ath_hal *, struct ieee80211_channel *chans, u_int maxchans, int *nchans, u_int modeSelect, HAL_CTRY_CODE cc, HAL_REG_DOMAIN regDmn, HAL_BOOL enableExtendedChannels); /* * Return a list of channels and install it as the current operating * regulatory list. */ extern HAL_STATUS __ahdecl ath_hal_init_channels(struct ath_hal *, struct ieee80211_channel *chans, u_int maxchans, int *nchans, u_int modeSelect, HAL_CTRY_CODE cc, HAL_REG_DOMAIN rd, HAL_BOOL enableExtendedChannels); /* * Install the list of channels as the current operating regulatory * and setup related state according to the country code and sku. */ extern HAL_STATUS __ahdecl ath_hal_set_channels(struct ath_hal *, struct ieee80211_channel *chans, int nchans, HAL_CTRY_CODE cc, HAL_REG_DOMAIN regDmn); /* * Fetch the ctl/ext noise floor values reported by a MIMO * radio. Returns 1 for valid results, 0 for invalid channel. */ extern int __ahdecl ath_hal_get_mimo_chan_noise(struct ath_hal *ah, const struct ieee80211_channel *chan, int16_t *nf_ctl, int16_t *nf_ext); /* * Calibrate noise floor data following a channel scan or similar. * This must be called prior retrieving noise floor data. */ extern void __ahdecl ath_hal_process_noisefloor(struct ath_hal *ah); /* * Return bit mask of wireless modes supported by the hardware. */ extern u_int __ahdecl ath_hal_getwirelessmodes(struct ath_hal*); /* * Get the HAL wireless mode for the given channel. */ extern int ath_hal_get_curmode(struct ath_hal *ah, const struct ieee80211_channel *chan); /* * Calculate the packet TX time for a legacy or 11n frame */ extern uint32_t __ahdecl ath_hal_pkt_txtime(struct ath_hal *ah, const HAL_RATE_TABLE *rates, uint32_t frameLen, uint16_t rateix, HAL_BOOL isht40, HAL_BOOL shortPreamble, HAL_BOOL includeSifs); /* * Calculate the duration of an 11n frame. */ extern uint32_t __ahdecl ath_computedur_ht(uint32_t frameLen, uint16_t rate, int streams, HAL_BOOL isht40, HAL_BOOL isShortGI); /* * Calculate the transmit duration of a legacy frame. */ extern uint16_t __ahdecl ath_hal_computetxtime(struct ath_hal *, const HAL_RATE_TABLE *rates, uint32_t frameLen, uint16_t rateix, HAL_BOOL shortPreamble, HAL_BOOL includeSifs); /* * Adjust the TSF. */ extern void __ahdecl ath_hal_adjusttsf(struct ath_hal *ah, int32_t tsfdelta); /* * Enable or disable CCA. */ void __ahdecl ath_hal_setcca(struct ath_hal *ah, int ena); /* * Get CCA setting. */ int __ahdecl ath_hal_getcca(struct ath_hal *ah); + +/* + * Enable/disable and get self-gen frame (ACK, CTS) for CAC. + */ +void __ahdecl ath_hal_set_dfs_cac_tx_quiet(struct ath_hal *ah, HAL_BOOL ena); /* * Read EEPROM data from ah_eepromdata */ HAL_BOOL __ahdecl ath_hal_EepromDataRead(struct ath_hal *ah, u_int off, uint16_t *data); /* * For now, simply pass through MFP frames. */ static inline u_int32_t ath_hal_get_mfp_qos(struct ath_hal *ah) { //return AH_PRIVATE(ah)->ah_mfp_qos; return HAL_MFP_QOSDATA; } /* * Convert between microseconds and core system clocks. */ extern u_int ath_hal_mac_clks(struct ath_hal *ah, u_int usecs); extern u_int ath_hal_mac_usec(struct ath_hal *ah, u_int clks); extern uint64_t ath_hal_mac_psec(struct ath_hal *ah, u_int clks); #endif /* _ATH_AH_H_ */ Index: projects/ipsec/sys/dev/pci/pci_pci.c =================================================================== --- projects/ipsec/sys/dev/pci/pci_pci.c (revision 313186) +++ projects/ipsec/sys/dev/pci/pci_pci.c (revision 313187) @@ -1,2837 +1,2831 @@ /*- * Copyright (c) 1994,1995 Stefan Esser, Wolfgang StanglMeier * Copyright (c) 2000 Michael Smith * Copyright (c) 2000 BSDi * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * PCI:PCI bridge support. */ #include "opt_pci.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pcib_if.h" static int pcib_probe(device_t dev); static int pcib_suspend(device_t dev); static int pcib_resume(device_t dev); static int pcib_power_for_sleep(device_t pcib, device_t dev, int *pstate); static int pcib_ari_get_id(device_t pcib, device_t dev, enum pci_id_type type, uintptr_t *id); static uint32_t pcib_read_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, int width); static void pcib_write_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, uint32_t val, int width); static int pcib_ari_maxslots(device_t dev); static int pcib_ari_maxfuncs(device_t dev); static int pcib_try_enable_ari(device_t pcib, device_t dev); static int pcib_ari_enabled(device_t pcib); static void pcib_ari_decode_rid(device_t pcib, uint16_t rid, int *bus, int *slot, int *func); #ifdef PCI_HP static void pcib_pcie_ab_timeout(void *arg); static void pcib_pcie_cc_timeout(void *arg); static void pcib_pcie_dll_timeout(void *arg); #endif static device_method_t pcib_methods[] = { /* Device interface */ DEVMETHOD(device_probe, pcib_probe), DEVMETHOD(device_attach, pcib_attach), DEVMETHOD(device_detach, pcib_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, pcib_suspend), DEVMETHOD(device_resume, pcib_resume), /* Bus interface */ DEVMETHOD(bus_child_present, pcib_child_present), DEVMETHOD(bus_read_ivar, pcib_read_ivar), DEVMETHOD(bus_write_ivar, pcib_write_ivar), DEVMETHOD(bus_alloc_resource, pcib_alloc_resource), #ifdef NEW_PCIB DEVMETHOD(bus_adjust_resource, pcib_adjust_resource), DEVMETHOD(bus_release_resource, pcib_release_resource), #else DEVMETHOD(bus_adjust_resource, bus_generic_adjust_resource), DEVMETHOD(bus_release_resource, bus_generic_release_resource), #endif DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), /* pcib interface */ DEVMETHOD(pcib_maxslots, pcib_ari_maxslots), DEVMETHOD(pcib_maxfuncs, pcib_ari_maxfuncs), DEVMETHOD(pcib_read_config, pcib_read_config), DEVMETHOD(pcib_write_config, pcib_write_config), DEVMETHOD(pcib_route_interrupt, pcib_route_interrupt), DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi), DEVMETHOD(pcib_release_msi, pcib_release_msi), DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix), DEVMETHOD(pcib_release_msix, pcib_release_msix), DEVMETHOD(pcib_map_msi, pcib_map_msi), DEVMETHOD(pcib_power_for_sleep, pcib_power_for_sleep), DEVMETHOD(pcib_get_id, pcib_ari_get_id), DEVMETHOD(pcib_try_enable_ari, pcib_try_enable_ari), DEVMETHOD(pcib_ari_enabled, pcib_ari_enabled), DEVMETHOD(pcib_decode_rid, pcib_ari_decode_rid), DEVMETHOD_END }; static devclass_t pcib_devclass; DEFINE_CLASS_0(pcib, pcib_driver, pcib_methods, sizeof(struct pcib_softc)); DRIVER_MODULE(pcib, pci, pcib_driver, pcib_devclass, NULL, NULL); #if defined(NEW_PCIB) || defined(PCI_HP) SYSCTL_DECL(_hw_pci); #endif #ifdef NEW_PCIB static int pci_clear_pcib; SYSCTL_INT(_hw_pci, OID_AUTO, clear_pcib, CTLFLAG_RDTUN, &pci_clear_pcib, 0, "Clear firmware-assigned resources for PCI-PCI bridge I/O windows."); /* * Is a resource from a child device sub-allocated from one of our * resource managers? */ static int pcib_is_resource_managed(struct pcib_softc *sc, int type, struct resource *r) { switch (type) { #ifdef PCI_RES_BUS case PCI_RES_BUS: return (rman_is_region_manager(r, &sc->bus.rman)); #endif case SYS_RES_IOPORT: return (rman_is_region_manager(r, &sc->io.rman)); case SYS_RES_MEMORY: /* Prefetchable resources may live in either memory rman. */ if (rman_get_flags(r) & RF_PREFETCHABLE && rman_is_region_manager(r, &sc->pmem.rman)) return (1); return (rman_is_region_manager(r, &sc->mem.rman)); } return (0); } static int pcib_is_window_open(struct pcib_window *pw) { return (pw->valid && pw->base < pw->limit); } /* * XXX: If RF_ACTIVE did not also imply allocating a bus space tag and * handle for the resource, we could pass RF_ACTIVE up to the PCI bus * when allocating the resource windows and rely on the PCI bus driver * to do this for us. */ static void pcib_activate_window(struct pcib_softc *sc, int type) { PCI_ENABLE_IO(device_get_parent(sc->dev), sc->dev, type); } static void pcib_write_windows(struct pcib_softc *sc, int mask) { device_t dev; uint32_t val; dev = sc->dev; if (sc->io.valid && mask & WIN_IO) { val = pci_read_config(dev, PCIR_IOBASEL_1, 1); if ((val & PCIM_BRIO_MASK) == PCIM_BRIO_32) { pci_write_config(dev, PCIR_IOBASEH_1, sc->io.base >> 16, 2); pci_write_config(dev, PCIR_IOLIMITH_1, sc->io.limit >> 16, 2); } pci_write_config(dev, PCIR_IOBASEL_1, sc->io.base >> 8, 1); pci_write_config(dev, PCIR_IOLIMITL_1, sc->io.limit >> 8, 1); } if (mask & WIN_MEM) { pci_write_config(dev, PCIR_MEMBASE_1, sc->mem.base >> 16, 2); pci_write_config(dev, PCIR_MEMLIMIT_1, sc->mem.limit >> 16, 2); } if (sc->pmem.valid && mask & WIN_PMEM) { val = pci_read_config(dev, PCIR_PMBASEL_1, 2); if ((val & PCIM_BRPM_MASK) == PCIM_BRPM_64) { pci_write_config(dev, PCIR_PMBASEH_1, sc->pmem.base >> 32, 4); pci_write_config(dev, PCIR_PMLIMITH_1, sc->pmem.limit >> 32, 4); } pci_write_config(dev, PCIR_PMBASEL_1, sc->pmem.base >> 16, 2); pci_write_config(dev, PCIR_PMLIMITL_1, sc->pmem.limit >> 16, 2); } } /* * This is used to reject I/O port allocations that conflict with an * ISA alias range. */ static int pcib_is_isa_range(struct pcib_softc *sc, rman_res_t start, rman_res_t end, rman_res_t count) { rman_res_t next_alias; if (!(sc->bridgectl & PCIB_BCR_ISA_ENABLE)) return (0); /* Only check fixed ranges for overlap. */ if (start + count - 1 != end) return (0); /* ISA aliases are only in the lower 64KB of I/O space. */ if (start >= 65536) return (0); /* Check for overlap with 0x000 - 0x0ff as a special case. */ if (start < 0x100) goto alias; /* * If the start address is an alias, the range is an alias. * Otherwise, compute the start of the next alias range and * check if it is before the end of the candidate range. */ if ((start & 0x300) != 0) goto alias; next_alias = (start & ~0x3fful) | 0x100; if (next_alias <= end) goto alias; return (0); alias: if (bootverbose) device_printf(sc->dev, "I/O range %#jx-%#jx overlaps with an ISA alias\n", start, end); return (1); } static void pcib_add_window_resources(struct pcib_window *w, struct resource **res, int count) { struct resource **newarray; int error, i; newarray = malloc(sizeof(struct resource *) * (w->count + count), M_DEVBUF, M_WAITOK); if (w->res != NULL) bcopy(w->res, newarray, sizeof(struct resource *) * w->count); bcopy(res, newarray + w->count, sizeof(struct resource *) * count); free(w->res, M_DEVBUF); w->res = newarray; w->count += count; for (i = 0; i < count; i++) { error = rman_manage_region(&w->rman, rman_get_start(res[i]), rman_get_end(res[i])); if (error) panic("Failed to add resource to rman"); } } typedef void (nonisa_callback)(rman_res_t start, rman_res_t end, void *arg); static void pcib_walk_nonisa_ranges(rman_res_t start, rman_res_t end, nonisa_callback *cb, void *arg) { rman_res_t next_end; /* * If start is within an ISA alias range, move up to the start * of the next non-alias range. As a special case, addresses * in the range 0x000 - 0x0ff should also be skipped since * those are used for various system I/O devices in ISA * systems. */ if (start <= 65535) { if (start < 0x100 || (start & 0x300) != 0) { start &= ~0x3ff; start += 0x400; } } /* ISA aliases are only in the lower 64KB of I/O space. */ while (start <= MIN(end, 65535)) { next_end = MIN(start | 0xff, end); cb(start, next_end, arg); start += 0x400; } if (start <= end) cb(start, end, arg); } static void count_ranges(rman_res_t start, rman_res_t end, void *arg) { int *countp; countp = arg; (*countp)++; } struct alloc_state { struct resource **res; struct pcib_softc *sc; int count, error; }; static void alloc_ranges(rman_res_t start, rman_res_t end, void *arg) { struct alloc_state *as; struct pcib_window *w; int rid; as = arg; if (as->error != 0) return; w = &as->sc->io; rid = w->reg; if (bootverbose) device_printf(as->sc->dev, "allocating non-ISA range %#jx-%#jx\n", start, end); as->res[as->count] = bus_alloc_resource(as->sc->dev, SYS_RES_IOPORT, &rid, start, end, end - start + 1, 0); if (as->res[as->count] == NULL) as->error = ENXIO; else as->count++; } static int pcib_alloc_nonisa_ranges(struct pcib_softc *sc, rman_res_t start, rman_res_t end) { struct alloc_state as; int i, new_count; /* First, see how many ranges we need. */ new_count = 0; pcib_walk_nonisa_ranges(start, end, count_ranges, &new_count); /* Second, allocate the ranges. */ as.res = malloc(sizeof(struct resource *) * new_count, M_DEVBUF, M_WAITOK); as.sc = sc; as.count = 0; as.error = 0; pcib_walk_nonisa_ranges(start, end, alloc_ranges, &as); if (as.error != 0) { for (i = 0; i < as.count; i++) bus_release_resource(sc->dev, SYS_RES_IOPORT, sc->io.reg, as.res[i]); free(as.res, M_DEVBUF); return (as.error); } KASSERT(as.count == new_count, ("%s: count mismatch", __func__)); /* Third, add the ranges to the window. */ pcib_add_window_resources(&sc->io, as.res, as.count); free(as.res, M_DEVBUF); return (0); } static void pcib_alloc_window(struct pcib_softc *sc, struct pcib_window *w, int type, int flags, pci_addr_t max_address) { struct resource *res; char buf[64]; int error, rid; if (max_address != (rman_res_t)max_address) max_address = ~0; w->rman.rm_start = 0; w->rman.rm_end = max_address; w->rman.rm_type = RMAN_ARRAY; snprintf(buf, sizeof(buf), "%s %s window", device_get_nameunit(sc->dev), w->name); w->rman.rm_descr = strdup(buf, M_DEVBUF); error = rman_init(&w->rman); if (error) panic("Failed to initialize %s %s rman", device_get_nameunit(sc->dev), w->name); if (!pcib_is_window_open(w)) return; if (w->base > max_address || w->limit > max_address) { device_printf(sc->dev, "initial %s window has too many bits, ignoring\n", w->name); return; } if (type == SYS_RES_IOPORT && sc->bridgectl & PCIB_BCR_ISA_ENABLE) (void)pcib_alloc_nonisa_ranges(sc, w->base, w->limit); else { rid = w->reg; res = bus_alloc_resource(sc->dev, type, &rid, w->base, w->limit, w->limit - w->base + 1, flags); if (res != NULL) pcib_add_window_resources(w, &res, 1); } if (w->res == NULL) { device_printf(sc->dev, "failed to allocate initial %s window: %#jx-%#jx\n", w->name, (uintmax_t)w->base, (uintmax_t)w->limit); w->base = max_address; w->limit = 0; pcib_write_windows(sc, w->mask); return; } pcib_activate_window(sc, type); } /* * Initialize I/O windows. */ static void pcib_probe_windows(struct pcib_softc *sc) { pci_addr_t max; device_t dev; uint32_t val; dev = sc->dev; if (pci_clear_pcib) { pcib_bridge_init(dev); } /* Determine if the I/O port window is implemented. */ val = pci_read_config(dev, PCIR_IOBASEL_1, 1); if (val == 0) { /* * If 'val' is zero, then only 16-bits of I/O space * are supported. */ pci_write_config(dev, PCIR_IOBASEL_1, 0xff, 1); if (pci_read_config(dev, PCIR_IOBASEL_1, 1) != 0) { sc->io.valid = 1; pci_write_config(dev, PCIR_IOBASEL_1, 0, 1); } } else sc->io.valid = 1; /* Read the existing I/O port window. */ if (sc->io.valid) { sc->io.reg = PCIR_IOBASEL_1; sc->io.step = 12; sc->io.mask = WIN_IO; sc->io.name = "I/O port"; if ((val & PCIM_BRIO_MASK) == PCIM_BRIO_32) { sc->io.base = PCI_PPBIOBASE( pci_read_config(dev, PCIR_IOBASEH_1, 2), val); sc->io.limit = PCI_PPBIOLIMIT( pci_read_config(dev, PCIR_IOLIMITH_1, 2), pci_read_config(dev, PCIR_IOLIMITL_1, 1)); max = 0xffffffff; } else { sc->io.base = PCI_PPBIOBASE(0, val); sc->io.limit = PCI_PPBIOLIMIT(0, pci_read_config(dev, PCIR_IOLIMITL_1, 1)); max = 0xffff; } pcib_alloc_window(sc, &sc->io, SYS_RES_IOPORT, 0, max); } /* Read the existing memory window. */ sc->mem.valid = 1; sc->mem.reg = PCIR_MEMBASE_1; sc->mem.step = 20; sc->mem.mask = WIN_MEM; sc->mem.name = "memory"; sc->mem.base = PCI_PPBMEMBASE(0, pci_read_config(dev, PCIR_MEMBASE_1, 2)); sc->mem.limit = PCI_PPBMEMLIMIT(0, pci_read_config(dev, PCIR_MEMLIMIT_1, 2)); pcib_alloc_window(sc, &sc->mem, SYS_RES_MEMORY, 0, 0xffffffff); /* Determine if the prefetchable memory window is implemented. */ val = pci_read_config(dev, PCIR_PMBASEL_1, 2); if (val == 0) { /* * If 'val' is zero, then only 32-bits of memory space * are supported. */ pci_write_config(dev, PCIR_PMBASEL_1, 0xffff, 2); if (pci_read_config(dev, PCIR_PMBASEL_1, 2) != 0) { sc->pmem.valid = 1; pci_write_config(dev, PCIR_PMBASEL_1, 0, 2); } } else sc->pmem.valid = 1; /* Read the existing prefetchable memory window. */ if (sc->pmem.valid) { sc->pmem.reg = PCIR_PMBASEL_1; sc->pmem.step = 20; sc->pmem.mask = WIN_PMEM; sc->pmem.name = "prefetch"; if ((val & PCIM_BRPM_MASK) == PCIM_BRPM_64) { sc->pmem.base = PCI_PPBMEMBASE( pci_read_config(dev, PCIR_PMBASEH_1, 4), val); sc->pmem.limit = PCI_PPBMEMLIMIT( pci_read_config(dev, PCIR_PMLIMITH_1, 4), pci_read_config(dev, PCIR_PMLIMITL_1, 2)); max = 0xffffffffffffffff; } else { sc->pmem.base = PCI_PPBMEMBASE(0, val); sc->pmem.limit = PCI_PPBMEMLIMIT(0, pci_read_config(dev, PCIR_PMLIMITL_1, 2)); max = 0xffffffff; } pcib_alloc_window(sc, &sc->pmem, SYS_RES_MEMORY, RF_PREFETCHABLE, max); } } static void pcib_release_window(struct pcib_softc *sc, struct pcib_window *w, int type) { device_t dev; int error, i; if (!w->valid) return; dev = sc->dev; error = rman_fini(&w->rman); if (error) { device_printf(dev, "failed to release %s rman\n", w->name); return; } free(__DECONST(char *, w->rman.rm_descr), M_DEVBUF); for (i = 0; i < w->count; i++) { error = bus_free_resource(dev, type, w->res[i]); if (error) device_printf(dev, "failed to release %s resource: %d\n", w->name, error); } free(w->res, M_DEVBUF); } static void pcib_free_windows(struct pcib_softc *sc) { pcib_release_window(sc, &sc->pmem, SYS_RES_MEMORY); pcib_release_window(sc, &sc->mem, SYS_RES_MEMORY); pcib_release_window(sc, &sc->io, SYS_RES_IOPORT); } #ifdef PCI_RES_BUS /* * Allocate a suitable secondary bus for this bridge if needed and * initialize the resource manager for the secondary bus range. Note * that the minimum count is a desired value and this may allocate a * smaller range. */ void pcib_setup_secbus(device_t dev, struct pcib_secbus *bus, int min_count) { char buf[64]; int error, rid, sec_reg; switch (pci_read_config(dev, PCIR_HDRTYPE, 1) & PCIM_HDRTYPE) { case PCIM_HDRTYPE_BRIDGE: sec_reg = PCIR_SECBUS_1; bus->sub_reg = PCIR_SUBBUS_1; break; case PCIM_HDRTYPE_CARDBUS: sec_reg = PCIR_SECBUS_2; bus->sub_reg = PCIR_SUBBUS_2; break; default: panic("not a PCI bridge"); } bus->sec = pci_read_config(dev, sec_reg, 1); bus->sub = pci_read_config(dev, bus->sub_reg, 1); bus->dev = dev; bus->rman.rm_start = 0; bus->rman.rm_end = PCI_BUSMAX; bus->rman.rm_type = RMAN_ARRAY; snprintf(buf, sizeof(buf), "%s bus numbers", device_get_nameunit(dev)); bus->rman.rm_descr = strdup(buf, M_DEVBUF); error = rman_init(&bus->rman); if (error) panic("Failed to initialize %s bus number rman", device_get_nameunit(dev)); /* * Allocate a bus range. This will return an existing bus range * if one exists, or a new bus range if one does not. */ rid = 0; bus->res = bus_alloc_resource_anywhere(dev, PCI_RES_BUS, &rid, min_count, 0); if (bus->res == NULL) { /* * Fall back to just allocating a range of a single bus * number. */ bus->res = bus_alloc_resource_anywhere(dev, PCI_RES_BUS, &rid, 1, 0); } else if (rman_get_size(bus->res) < min_count) /* * Attempt to grow the existing range to satisfy the * minimum desired count. */ (void)bus_adjust_resource(dev, PCI_RES_BUS, bus->res, rman_get_start(bus->res), rman_get_start(bus->res) + min_count - 1); /* * Add the initial resource to the rman. */ if (bus->res != NULL) { error = rman_manage_region(&bus->rman, rman_get_start(bus->res), rman_get_end(bus->res)); if (error) panic("Failed to add resource to rman"); bus->sec = rman_get_start(bus->res); bus->sub = rman_get_end(bus->res); } } void pcib_free_secbus(device_t dev, struct pcib_secbus *bus) { int error; error = rman_fini(&bus->rman); if (error) { device_printf(dev, "failed to release bus number rman\n"); return; } free(__DECONST(char *, bus->rman.rm_descr), M_DEVBUF); error = bus_free_resource(dev, PCI_RES_BUS, bus->res); if (error) device_printf(dev, "failed to release bus numbers resource: %d\n", error); } static struct resource * pcib_suballoc_bus(struct pcib_secbus *bus, device_t child, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct resource *res; res = rman_reserve_resource(&bus->rman, start, end, count, flags, child); if (res == NULL) return (NULL); if (bootverbose) device_printf(bus->dev, "allocated bus range (%ju-%ju) for rid %d of %s\n", rman_get_start(res), rman_get_end(res), *rid, pcib_child_name(child)); rman_set_rid(res, *rid); return (res); } /* * Attempt to grow the secondary bus range. This is much simpler than * for I/O windows as the range can only be grown by increasing * subbus. */ static int pcib_grow_subbus(struct pcib_secbus *bus, rman_res_t new_end) { rman_res_t old_end; int error; old_end = rman_get_end(bus->res); KASSERT(new_end > old_end, ("attempt to shrink subbus")); error = bus_adjust_resource(bus->dev, PCI_RES_BUS, bus->res, rman_get_start(bus->res), new_end); if (error) return (error); if (bootverbose) device_printf(bus->dev, "grew bus range to %ju-%ju\n", rman_get_start(bus->res), rman_get_end(bus->res)); error = rman_manage_region(&bus->rman, old_end + 1, rman_get_end(bus->res)); if (error) panic("Failed to add resource to rman"); bus->sub = rman_get_end(bus->res); pci_write_config(bus->dev, bus->sub_reg, bus->sub, 1); return (0); } struct resource * pcib_alloc_subbus(struct pcib_secbus *bus, device_t child, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct resource *res; rman_res_t start_free, end_free, new_end; /* * First, see if the request can be satisified by the existing * bus range. */ res = pcib_suballoc_bus(bus, child, rid, start, end, count, flags); if (res != NULL) return (res); /* * Figure out a range to grow the bus range. First, find the * first bus number after the last allocated bus in the rman and * enforce that as a minimum starting point for the range. */ if (rman_last_free_region(&bus->rman, &start_free, &end_free) != 0 || end_free != bus->sub) start_free = bus->sub + 1; if (start_free < start) start_free = start; new_end = start_free + count - 1; /* * See if this new range would satisfy the request if it * succeeds. */ if (new_end > end) return (NULL); /* Finally, attempt to grow the existing resource. */ if (bootverbose) { device_printf(bus->dev, "attempting to grow bus range for %ju buses\n", count); printf("\tback candidate range: %ju-%ju\n", start_free, new_end); } if (pcib_grow_subbus(bus, new_end) == 0) return (pcib_suballoc_bus(bus, child, rid, start, end, count, flags)); return (NULL); } #endif #else /* * Is the prefetch window open (eg, can we allocate memory in it?) */ static int pcib_is_prefetch_open(struct pcib_softc *sc) { return (sc->pmembase > 0 && sc->pmembase < sc->pmemlimit); } /* * Is the nonprefetch window open (eg, can we allocate memory in it?) */ static int pcib_is_nonprefetch_open(struct pcib_softc *sc) { return (sc->membase > 0 && sc->membase < sc->memlimit); } /* * Is the io window open (eg, can we allocate ports in it?) */ static int pcib_is_io_open(struct pcib_softc *sc) { return (sc->iobase > 0 && sc->iobase < sc->iolimit); } /* * Get current I/O decode. */ static void pcib_get_io_decode(struct pcib_softc *sc) { device_t dev; uint32_t iolow; dev = sc->dev; iolow = pci_read_config(dev, PCIR_IOBASEL_1, 1); if ((iolow & PCIM_BRIO_MASK) == PCIM_BRIO_32) sc->iobase = PCI_PPBIOBASE( pci_read_config(dev, PCIR_IOBASEH_1, 2), iolow); else sc->iobase = PCI_PPBIOBASE(0, iolow); iolow = pci_read_config(dev, PCIR_IOLIMITL_1, 1); if ((iolow & PCIM_BRIO_MASK) == PCIM_BRIO_32) sc->iolimit = PCI_PPBIOLIMIT( pci_read_config(dev, PCIR_IOLIMITH_1, 2), iolow); else sc->iolimit = PCI_PPBIOLIMIT(0, iolow); } /* * Get current memory decode. */ static void pcib_get_mem_decode(struct pcib_softc *sc) { device_t dev; pci_addr_t pmemlow; dev = sc->dev; sc->membase = PCI_PPBMEMBASE(0, pci_read_config(dev, PCIR_MEMBASE_1, 2)); sc->memlimit = PCI_PPBMEMLIMIT(0, pci_read_config(dev, PCIR_MEMLIMIT_1, 2)); pmemlow = pci_read_config(dev, PCIR_PMBASEL_1, 2); if ((pmemlow & PCIM_BRPM_MASK) == PCIM_BRPM_64) sc->pmembase = PCI_PPBMEMBASE( pci_read_config(dev, PCIR_PMBASEH_1, 4), pmemlow); else sc->pmembase = PCI_PPBMEMBASE(0, pmemlow); pmemlow = pci_read_config(dev, PCIR_PMLIMITL_1, 2); if ((pmemlow & PCIM_BRPM_MASK) == PCIM_BRPM_64) sc->pmemlimit = PCI_PPBMEMLIMIT( pci_read_config(dev, PCIR_PMLIMITH_1, 4), pmemlow); else sc->pmemlimit = PCI_PPBMEMLIMIT(0, pmemlow); } /* * Restore previous I/O decode. */ static void pcib_set_io_decode(struct pcib_softc *sc) { device_t dev; uint32_t iohi; dev = sc->dev; iohi = sc->iobase >> 16; if (iohi > 0) pci_write_config(dev, PCIR_IOBASEH_1, iohi, 2); pci_write_config(dev, PCIR_IOBASEL_1, sc->iobase >> 8, 1); iohi = sc->iolimit >> 16; if (iohi > 0) pci_write_config(dev, PCIR_IOLIMITH_1, iohi, 2); pci_write_config(dev, PCIR_IOLIMITL_1, sc->iolimit >> 8, 1); } /* * Restore previous memory decode. */ static void pcib_set_mem_decode(struct pcib_softc *sc) { device_t dev; pci_addr_t pmemhi; dev = sc->dev; pci_write_config(dev, PCIR_MEMBASE_1, sc->membase >> 16, 2); pci_write_config(dev, PCIR_MEMLIMIT_1, sc->memlimit >> 16, 2); pmemhi = sc->pmembase >> 32; if (pmemhi > 0) pci_write_config(dev, PCIR_PMBASEH_1, pmemhi, 4); pci_write_config(dev, PCIR_PMBASEL_1, sc->pmembase >> 16, 2); pmemhi = sc->pmemlimit >> 32; if (pmemhi > 0) pci_write_config(dev, PCIR_PMLIMITH_1, pmemhi, 4); pci_write_config(dev, PCIR_PMLIMITL_1, sc->pmemlimit >> 16, 2); } #endif #ifdef PCI_HP /* * PCI-express HotPlug support. */ static int pci_enable_pcie_hp = 1; SYSCTL_INT(_hw_pci, OID_AUTO, enable_pcie_hp, CTLFLAG_RDTUN, &pci_enable_pcie_hp, 0, "Enable support for native PCI-express HotPlug."); static void pcib_probe_hotplug(struct pcib_softc *sc) { device_t dev; + uint32_t link_cap; uint16_t link_sta, slot_sta; if (!pci_enable_pcie_hp) return; dev = sc->dev; if (pci_find_cap(dev, PCIY_EXPRESS, NULL) != 0) return; if (!(pcie_read_config(dev, PCIER_FLAGS, 2) & PCIEM_FLAGS_SLOT)) return; - sc->pcie_link_cap = pcie_read_config(dev, PCIER_LINK_CAP, 4); sc->pcie_slot_cap = pcie_read_config(dev, PCIER_SLOT_CAP, 4); if ((sc->pcie_slot_cap & PCIEM_SLOT_CAP_HPC) == 0) return; - if ((sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) == 0) + link_cap = pcie_read_config(dev, PCIER_LINK_CAP, 4); + if ((link_cap & PCIEM_LINK_CAP_DL_ACTIVE) == 0) return; /* * Some devices report that they have an MRL when they actually * do not. Since they always report that the MRL is open, child * devices would be ignored. Try to detect these devices and * ignore their claim of HotPlug support. * * If there is an open MRL but the Data Link Layer is active, * the MRL is not real. */ - if ((sc->pcie_slot_cap & PCIEM_SLOT_CAP_MRLSP) != 0 && - (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) != 0) { + if ((sc->pcie_slot_cap & PCIEM_SLOT_CAP_MRLSP) != 0) { link_sta = pcie_read_config(dev, PCIER_LINK_STA, 2); slot_sta = pcie_read_config(dev, PCIER_SLOT_STA, 2); if ((slot_sta & PCIEM_SLOT_STA_MRLSS) != 0 && (link_sta & PCIEM_LINK_STA_DL_ACTIVE) != 0) { return; } } sc->flags |= PCIB_HOTPLUG; } /* * Send a HotPlug command to the slot control register. If this slot * uses command completion interrupts and a previous command is still * in progress, then the command is dropped. Once the previous * command completes or times out, pcib_pcie_hotplug_update() will be * invoked to post a new command based on the slot's state at that * time. */ static void pcib_pcie_hotplug_command(struct pcib_softc *sc, uint16_t val, uint16_t mask) { device_t dev; uint16_t ctl, new; dev = sc->dev; if (sc->flags & PCIB_HOTPLUG_CMD_PENDING) return; ctl = pcie_read_config(dev, PCIER_SLOT_CTL, 2); new = (ctl & ~mask) | val; if (new == ctl) return; if (bootverbose) device_printf(dev, "HotPlug command: %04x -> %04x\n", ctl, new); pcie_write_config(dev, PCIER_SLOT_CTL, new, 2); if (!(sc->pcie_slot_cap & PCIEM_SLOT_CAP_NCCS) && (ctl & new) & PCIEM_SLOT_CTL_CCIE) { sc->flags |= PCIB_HOTPLUG_CMD_PENDING; if (!cold) callout_reset(&sc->pcie_cc_timer, hz, pcib_pcie_cc_timeout, sc); } } static void pcib_pcie_hotplug_command_completed(struct pcib_softc *sc) { device_t dev; dev = sc->dev; if (bootverbose) device_printf(dev, "Command Completed\n"); if (!(sc->flags & PCIB_HOTPLUG_CMD_PENDING)) return; callout_stop(&sc->pcie_cc_timer); sc->flags &= ~PCIB_HOTPLUG_CMD_PENDING; wakeup(sc); } /* * Returns true if a card is fully inserted from the user's * perspective. It may not yet be ready for access, but the driver * can now start enabling access if necessary. */ static bool pcib_hotplug_inserted(struct pcib_softc *sc) { /* Pretend the card isn't present if a detach is forced. */ if (sc->flags & PCIB_DETACHING) return (false); /* Card must be present in the slot. */ if ((sc->pcie_slot_sta & PCIEM_SLOT_STA_PDS) == 0) return (false); /* A power fault implicitly turns off power to the slot. */ if (sc->pcie_slot_sta & PCIEM_SLOT_STA_PFD) return (false); /* If the MRL is disengaged, the slot is powered off. */ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_MRLSP && (sc->pcie_slot_sta & PCIEM_SLOT_STA_MRLSS) != 0) return (false); return (true); } /* * Returns -1 if the card is fully inserted, powered, and ready for * access. Otherwise, returns 0. */ static int pcib_hotplug_present(struct pcib_softc *sc) { /* Card must be inserted. */ if (!pcib_hotplug_inserted(sc)) return (0); /* * Require the Electromechanical Interlock to be engaged if * present. */ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_EIP && (sc->pcie_slot_sta & PCIEM_SLOT_STA_EIS) == 0) return (0); /* Require the Data Link Layer to be active. */ - if (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) { - if (!(sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE)) - return (0); - } + if (!(sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE)) + return (0); return (-1); } static void pcib_pcie_hotplug_update(struct pcib_softc *sc, uint16_t val, uint16_t mask, bool schedule_task) { bool card_inserted, ei_engaged; /* Clear DETACHING if Presence Detect has cleared. */ if ((sc->pcie_slot_sta & (PCIEM_SLOT_STA_PDC | PCIEM_SLOT_STA_PDS)) == PCIEM_SLOT_STA_PDC) sc->flags &= ~PCIB_DETACHING; card_inserted = pcib_hotplug_inserted(sc); /* Turn the power indicator on if a card is inserted. */ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_PIP) { mask |= PCIEM_SLOT_CTL_PIC; if (card_inserted) val |= PCIEM_SLOT_CTL_PI_ON; else if (sc->flags & PCIB_DETACH_PENDING) val |= PCIEM_SLOT_CTL_PI_BLINK; else val |= PCIEM_SLOT_CTL_PI_OFF; } /* Turn the power on via the Power Controller if a card is inserted. */ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_PCP) { mask |= PCIEM_SLOT_CTL_PCC; if (card_inserted) val |= PCIEM_SLOT_CTL_PC_ON; else val |= PCIEM_SLOT_CTL_PC_OFF; } /* * If a card is inserted, enable the Electromechanical * Interlock. If a card is not inserted (or we are in the * process of detaching), disable the Electromechanical * Interlock. */ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_EIP) { mask |= PCIEM_SLOT_CTL_EIC; ei_engaged = (sc->pcie_slot_sta & PCIEM_SLOT_STA_EIS) != 0; if (card_inserted != ei_engaged) val |= PCIEM_SLOT_CTL_EIC; } /* * Start a timer to see if the Data Link Layer times out. * Note that we only start the timer if Presence Detect or MRL Sensor * changed on this interrupt. Stop any scheduled timer if * the Data Link Layer is active. */ - if (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) { - if (card_inserted && - !(sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE) && - sc->pcie_slot_sta & - (PCIEM_SLOT_STA_MRLSC | PCIEM_SLOT_STA_PDC)) { - if (cold) - device_printf(sc->dev, - "Data Link Layer inactive\n"); - else - callout_reset(&sc->pcie_dll_timer, hz, - pcib_pcie_dll_timeout, sc); - } else if (sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE) - callout_stop(&sc->pcie_dll_timer); - } + if (card_inserted && + !(sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE) && + sc->pcie_slot_sta & + (PCIEM_SLOT_STA_MRLSC | PCIEM_SLOT_STA_PDC)) { + if (cold) + device_printf(sc->dev, + "Data Link Layer inactive\n"); + else + callout_reset(&sc->pcie_dll_timer, hz, + pcib_pcie_dll_timeout, sc); + } else if (sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE) + callout_stop(&sc->pcie_dll_timer); pcib_pcie_hotplug_command(sc, val, mask); /* * During attach the child "pci" device is added synchronously; * otherwise, the task is scheduled to manage the child * device. */ if (schedule_task && (pcib_hotplug_present(sc) != 0) != (sc->child != NULL)) taskqueue_enqueue(taskqueue_thread, &sc->pcie_hp_task); } static void pcib_pcie_intr(void *arg) { struct pcib_softc *sc; device_t dev; sc = arg; dev = sc->dev; sc->pcie_slot_sta = pcie_read_config(dev, PCIER_SLOT_STA, 2); /* Clear the events just reported. */ pcie_write_config(dev, PCIER_SLOT_STA, sc->pcie_slot_sta, 2); if (bootverbose) device_printf(dev, "HotPlug interrupt: %#x\n", sc->pcie_slot_sta); if (sc->pcie_slot_sta & PCIEM_SLOT_STA_ABP) { if (sc->flags & PCIB_DETACH_PENDING) { device_printf(dev, "Attention Button Pressed: Detach Cancelled\n"); sc->flags &= ~PCIB_DETACH_PENDING; callout_stop(&sc->pcie_ab_timer); } else { device_printf(dev, "Attention Button Pressed: Detaching in 5 seconds\n"); sc->flags |= PCIB_DETACH_PENDING; callout_reset(&sc->pcie_ab_timer, 5 * hz, pcib_pcie_ab_timeout, sc); } } if (sc->pcie_slot_sta & PCIEM_SLOT_STA_PFD) device_printf(dev, "Power Fault Detected\n"); if (sc->pcie_slot_sta & PCIEM_SLOT_STA_MRLSC) device_printf(dev, "MRL Sensor Changed to %s\n", sc->pcie_slot_sta & PCIEM_SLOT_STA_MRLSS ? "open" : "closed"); if (bootverbose && sc->pcie_slot_sta & PCIEM_SLOT_STA_PDC) device_printf(dev, "Presence Detect Changed to %s\n", sc->pcie_slot_sta & PCIEM_SLOT_STA_PDS ? "card present" : "empty"); if (sc->pcie_slot_sta & PCIEM_SLOT_STA_CC) pcib_pcie_hotplug_command_completed(sc); if (sc->pcie_slot_sta & PCIEM_SLOT_STA_DLLSC) { sc->pcie_link_sta = pcie_read_config(dev, PCIER_LINK_STA, 2); if (bootverbose) device_printf(dev, "Data Link Layer State Changed to %s\n", sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE ? "active" : "inactive"); } pcib_pcie_hotplug_update(sc, 0, 0, true); } static void pcib_pcie_hotplug_task(void *context, int pending) { struct pcib_softc *sc; device_t dev; sc = context; mtx_lock(&Giant); dev = sc->dev; if (pcib_hotplug_present(sc) != 0) { if (sc->child == NULL) { sc->child = device_add_child(dev, "pci", -1); bus_generic_attach(dev); } } else { if (sc->child != NULL) { if (device_delete_child(dev, sc->child) == 0) sc->child = NULL; } } mtx_unlock(&Giant); } static void pcib_pcie_ab_timeout(void *arg) { struct pcib_softc *sc; device_t dev; sc = arg; dev = sc->dev; mtx_assert(&Giant, MA_OWNED); if (sc->flags & PCIB_DETACH_PENDING) { sc->flags |= PCIB_DETACHING; sc->flags &= ~PCIB_DETACH_PENDING; pcib_pcie_hotplug_update(sc, 0, 0, true); } } static void pcib_pcie_cc_timeout(void *arg) { struct pcib_softc *sc; device_t dev; uint16_t sta; sc = arg; dev = sc->dev; mtx_assert(&Giant, MA_OWNED); sta = pcie_read_config(dev, PCIER_SLOT_STA, 2); if (!(sta & PCIEM_SLOT_STA_CC)) { device_printf(dev, "HotPlug Command Timed Out - forcing detach\n"); sc->flags &= ~(PCIB_HOTPLUG_CMD_PENDING | PCIB_DETACH_PENDING); sc->flags |= PCIB_DETACHING; pcib_pcie_hotplug_update(sc, 0, 0, true); } else { device_printf(dev, "Missed HotPlug interrupt waiting for Command Completion\n"); pcib_pcie_intr(sc); } } static void pcib_pcie_dll_timeout(void *arg) { struct pcib_softc *sc; device_t dev; uint16_t sta; sc = arg; dev = sc->dev; mtx_assert(&Giant, MA_OWNED); sta = pcie_read_config(dev, PCIER_LINK_STA, 2); if (!(sta & PCIEM_LINK_STA_DL_ACTIVE)) { device_printf(dev, "Timed out waiting for Data Link Layer Active\n"); sc->flags |= PCIB_DETACHING; pcib_pcie_hotplug_update(sc, 0, 0, true); } else if (sta != sc->pcie_link_sta) { device_printf(dev, "Missed HotPlug interrupt waiting for DLL Active\n"); pcib_pcie_intr(sc); } } static int pcib_alloc_pcie_irq(struct pcib_softc *sc) { device_t dev; int count, error, rid; rid = -1; dev = sc->dev; /* * For simplicity, only use MSI-X if there is a single message. * To support a device with multiple messages we would have to * use remap intr if the MSI number is not 0. */ count = pci_msix_count(dev); if (count == 1) { error = pci_alloc_msix(dev, &count); if (error == 0) rid = 1; } if (rid < 0 && pci_msi_count(dev) > 0) { count = 1; error = pci_alloc_msi(dev, &count); if (error == 0) rid = 1; } if (rid < 0) rid = 0; sc->pcie_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->pcie_irq == NULL) { device_printf(dev, "Failed to allocate interrupt for PCI-e events\n"); if (rid > 0) pci_release_msi(dev); return (ENXIO); } error = bus_setup_intr(dev, sc->pcie_irq, INTR_TYPE_MISC, NULL, pcib_pcie_intr, sc, &sc->pcie_ihand); if (error) { device_printf(dev, "Failed to setup PCI-e interrupt handler\n"); bus_release_resource(dev, SYS_RES_IRQ, rid, sc->pcie_irq); if (rid > 0) pci_release_msi(dev); return (error); } return (0); } static int pcib_release_pcie_irq(struct pcib_softc *sc) { device_t dev; int error; dev = sc->dev; error = bus_teardown_intr(dev, sc->pcie_irq, sc->pcie_ihand); if (error) return (error); error = bus_free_resource(dev, SYS_RES_IRQ, sc->pcie_irq); if (error) return (error); return (pci_release_msi(dev)); } static void pcib_setup_hotplug(struct pcib_softc *sc) { device_t dev; uint16_t mask, val; dev = sc->dev; callout_init(&sc->pcie_ab_timer, 0); callout_init(&sc->pcie_cc_timer, 0); callout_init(&sc->pcie_dll_timer, 0); TASK_INIT(&sc->pcie_hp_task, 0, pcib_pcie_hotplug_task, sc); /* Allocate IRQ. */ if (pcib_alloc_pcie_irq(sc) != 0) return; sc->pcie_link_sta = pcie_read_config(dev, PCIER_LINK_STA, 2); sc->pcie_slot_sta = pcie_read_config(dev, PCIER_SLOT_STA, 2); /* Clear any events previously pending. */ pcie_write_config(dev, PCIER_SLOT_STA, sc->pcie_slot_sta, 2); /* Enable HotPlug events. */ mask = PCIEM_SLOT_CTL_DLLSCE | PCIEM_SLOT_CTL_HPIE | PCIEM_SLOT_CTL_CCIE | PCIEM_SLOT_CTL_PDCE | PCIEM_SLOT_CTL_MRLSCE | PCIEM_SLOT_CTL_PFDE | PCIEM_SLOT_CTL_ABPE; - val = PCIEM_SLOT_CTL_PDCE | PCIEM_SLOT_CTL_HPIE; + val = PCIEM_SLOT_CTL_DLLSCE | PCIEM_SLOT_CTL_HPIE | PCIEM_SLOT_CTL_PDCE; if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_APB) val |= PCIEM_SLOT_CTL_ABPE; if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_PCP) val |= PCIEM_SLOT_CTL_PFDE; if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_MRLSP) val |= PCIEM_SLOT_CTL_MRLSCE; if (!(sc->pcie_slot_cap & PCIEM_SLOT_CAP_NCCS)) val |= PCIEM_SLOT_CTL_CCIE; - if (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) - val |= PCIEM_SLOT_CTL_DLLSCE; /* Turn the attention indicator off. */ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_AIP) { mask |= PCIEM_SLOT_CTL_AIC; val |= PCIEM_SLOT_CTL_AI_OFF; } pcib_pcie_hotplug_update(sc, val, mask, false); } static int pcib_detach_hotplug(struct pcib_softc *sc) { uint16_t mask, val; int error; /* Disable the card in the slot and force it to detach. */ if (sc->flags & PCIB_DETACH_PENDING) { sc->flags &= ~PCIB_DETACH_PENDING; callout_stop(&sc->pcie_ab_timer); } sc->flags |= PCIB_DETACHING; if (sc->flags & PCIB_HOTPLUG_CMD_PENDING) { callout_stop(&sc->pcie_cc_timer); tsleep(sc, 0, "hpcmd", hz); sc->flags &= ~PCIB_HOTPLUG_CMD_PENDING; } /* Disable HotPlug events. */ mask = PCIEM_SLOT_CTL_DLLSCE | PCIEM_SLOT_CTL_HPIE | PCIEM_SLOT_CTL_CCIE | PCIEM_SLOT_CTL_PDCE | PCIEM_SLOT_CTL_MRLSCE | PCIEM_SLOT_CTL_PFDE | PCIEM_SLOT_CTL_ABPE; val = 0; /* Turn the attention indicator off. */ if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_AIP) { mask |= PCIEM_SLOT_CTL_AIC; val |= PCIEM_SLOT_CTL_AI_OFF; } pcib_pcie_hotplug_update(sc, val, mask, false); error = pcib_release_pcie_irq(sc); if (error) return (error); taskqueue_drain(taskqueue_thread, &sc->pcie_hp_task); callout_drain(&sc->pcie_ab_timer); callout_drain(&sc->pcie_cc_timer); callout_drain(&sc->pcie_dll_timer); return (0); } #endif /* * Get current bridge configuration. */ static void pcib_cfg_save(struct pcib_softc *sc) { #ifndef NEW_PCIB device_t dev; uint16_t command; dev = sc->dev; command = pci_read_config(dev, PCIR_COMMAND, 2); if (command & PCIM_CMD_PORTEN) pcib_get_io_decode(sc); if (command & PCIM_CMD_MEMEN) pcib_get_mem_decode(sc); #endif } /* * Restore previous bridge configuration. */ static void pcib_cfg_restore(struct pcib_softc *sc) { device_t dev; #ifndef NEW_PCIB uint16_t command; #endif dev = sc->dev; #ifdef NEW_PCIB pcib_write_windows(sc, WIN_IO | WIN_MEM | WIN_PMEM); #else command = pci_read_config(dev, PCIR_COMMAND, 2); if (command & PCIM_CMD_PORTEN) pcib_set_io_decode(sc); if (command & PCIM_CMD_MEMEN) pcib_set_mem_decode(sc); #endif } /* * Generic device interface */ static int pcib_probe(device_t dev) { if ((pci_get_class(dev) == PCIC_BRIDGE) && (pci_get_subclass(dev) == PCIS_BRIDGE_PCI)) { device_set_desc(dev, "PCI-PCI bridge"); return(-10000); } return(ENXIO); } void pcib_attach_common(device_t dev) { struct pcib_softc *sc; struct sysctl_ctx_list *sctx; struct sysctl_oid *soid; int comma; sc = device_get_softc(dev); sc->dev = dev; /* * Get current bridge configuration. */ sc->domain = pci_get_domain(dev); #if !(defined(NEW_PCIB) && defined(PCI_RES_BUS)) sc->bus.sec = pci_read_config(dev, PCIR_SECBUS_1, 1); sc->bus.sub = pci_read_config(dev, PCIR_SUBBUS_1, 1); #endif sc->bridgectl = pci_read_config(dev, PCIR_BRIDGECTL_1, 2); pcib_cfg_save(sc); /* * The primary bus register should always be the bus of the * parent. */ sc->pribus = pci_get_bus(dev); pci_write_config(dev, PCIR_PRIBUS_1, sc->pribus, 1); /* * Setup sysctl reporting nodes */ sctx = device_get_sysctl_ctx(dev); soid = device_get_sysctl_tree(dev); SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "domain", CTLFLAG_RD, &sc->domain, 0, "Domain number"); SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "pribus", CTLFLAG_RD, &sc->pribus, 0, "Primary bus number"); SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "secbus", CTLFLAG_RD, &sc->bus.sec, 0, "Secondary bus number"); SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "subbus", CTLFLAG_RD, &sc->bus.sub, 0, "Subordinate bus number"); /* * Quirk handling. */ switch (pci_get_devid(dev)) { #if !(defined(NEW_PCIB) && defined(PCI_RES_BUS)) case 0x12258086: /* Intel 82454KX/GX (Orion) */ { uint8_t supbus; supbus = pci_read_config(dev, 0x41, 1); if (supbus != 0xff) { sc->bus.sec = supbus + 1; sc->bus.sub = supbus + 1; } break; } #endif /* * The i82380FB mobile docking controller is a PCI-PCI bridge, * and it is a subtractive bridge. However, the ProgIf is wrong * so the normal setting of PCIB_SUBTRACTIVE bit doesn't * happen. There are also Toshiba and Cavium ThunderX bridges * that behave this way. */ case 0xa002177d: /* Cavium ThunderX */ case 0x124b8086: /* Intel 82380FB Mobile */ case 0x060513d7: /* Toshiba ???? */ sc->flags |= PCIB_SUBTRACTIVE; break; #if !(defined(NEW_PCIB) && defined(PCI_RES_BUS)) /* Compaq R3000 BIOS sets wrong subordinate bus number. */ case 0x00dd10de: { char *cp; if ((cp = kern_getenv("smbios.planar.maker")) == NULL) break; if (strncmp(cp, "Compal", 6) != 0) { freeenv(cp); break; } freeenv(cp); if ((cp = kern_getenv("smbios.planar.product")) == NULL) break; if (strncmp(cp, "08A0", 4) != 0) { freeenv(cp); break; } freeenv(cp); if (sc->bus.sub < 0xa) { pci_write_config(dev, PCIR_SUBBUS_1, 0xa, 1); sc->bus.sub = pci_read_config(dev, PCIR_SUBBUS_1, 1); } break; } #endif } if (pci_msi_device_blacklisted(dev)) sc->flags |= PCIB_DISABLE_MSI; if (pci_msix_device_blacklisted(dev)) sc->flags |= PCIB_DISABLE_MSIX; /* * Intel 815, 845 and other chipsets say they are PCI-PCI bridges, * but have a ProgIF of 0x80. The 82801 family (AA, AB, BAM/CAM, * BA/CA/DB and E) PCI bridges are HUB-PCI bridges, in Intelese. * This means they act as if they were subtractively decoding * bridges and pass all transactions. Mark them and real ProgIf 1 * parts as subtractive. */ if ((pci_get_devid(dev) & 0xff00ffff) == 0x24008086 || pci_read_config(dev, PCIR_PROGIF, 1) == PCIP_BRIDGE_PCI_SUBTRACTIVE) sc->flags |= PCIB_SUBTRACTIVE; #ifdef PCI_HP pcib_probe_hotplug(sc); #endif #ifdef NEW_PCIB #ifdef PCI_RES_BUS pcib_setup_secbus(dev, &sc->bus, 1); #endif pcib_probe_windows(sc); #endif #ifdef PCI_HP if (sc->flags & PCIB_HOTPLUG) pcib_setup_hotplug(sc); #endif if (bootverbose) { device_printf(dev, " domain %d\n", sc->domain); device_printf(dev, " secondary bus %d\n", sc->bus.sec); device_printf(dev, " subordinate bus %d\n", sc->bus.sub); #ifdef NEW_PCIB if (pcib_is_window_open(&sc->io)) device_printf(dev, " I/O decode 0x%jx-0x%jx\n", (uintmax_t)sc->io.base, (uintmax_t)sc->io.limit); if (pcib_is_window_open(&sc->mem)) device_printf(dev, " memory decode 0x%jx-0x%jx\n", (uintmax_t)sc->mem.base, (uintmax_t)sc->mem.limit); if (pcib_is_window_open(&sc->pmem)) device_printf(dev, " prefetched decode 0x%jx-0x%jx\n", (uintmax_t)sc->pmem.base, (uintmax_t)sc->pmem.limit); #else if (pcib_is_io_open(sc)) device_printf(dev, " I/O decode 0x%x-0x%x\n", sc->iobase, sc->iolimit); if (pcib_is_nonprefetch_open(sc)) device_printf(dev, " memory decode 0x%jx-0x%jx\n", (uintmax_t)sc->membase, (uintmax_t)sc->memlimit); if (pcib_is_prefetch_open(sc)) device_printf(dev, " prefetched decode 0x%jx-0x%jx\n", (uintmax_t)sc->pmembase, (uintmax_t)sc->pmemlimit); #endif if (sc->bridgectl & (PCIB_BCR_ISA_ENABLE | PCIB_BCR_VGA_ENABLE) || sc->flags & PCIB_SUBTRACTIVE) { device_printf(dev, " special decode "); comma = 0; if (sc->bridgectl & PCIB_BCR_ISA_ENABLE) { printf("ISA"); comma = 1; } if (sc->bridgectl & PCIB_BCR_VGA_ENABLE) { printf("%sVGA", comma ? ", " : ""); comma = 1; } if (sc->flags & PCIB_SUBTRACTIVE) printf("%ssubtractive", comma ? ", " : ""); printf("\n"); } } /* * Always enable busmastering on bridges so that transactions * initiated on the secondary bus are passed through to the * primary bus. */ pci_enable_busmaster(dev); } #ifdef PCI_HP static int pcib_present(struct pcib_softc *sc) { if (sc->flags & PCIB_HOTPLUG) return (pcib_hotplug_present(sc) != 0); return (1); } #endif int pcib_attach_child(device_t dev) { struct pcib_softc *sc; sc = device_get_softc(dev); if (sc->bus.sec == 0) { /* no secondary bus; we should have fixed this */ return(0); } #ifdef PCI_HP if (!pcib_present(sc)) { /* An empty HotPlug slot, so don't add a PCI bus yet. */ return (0); } #endif sc->child = device_add_child(dev, "pci", -1); return (bus_generic_attach(dev)); } int pcib_attach(device_t dev) { pcib_attach_common(dev); return (pcib_attach_child(dev)); } int pcib_detach(device_t dev) { #if defined(PCI_HP) || defined(NEW_PCIB) struct pcib_softc *sc; #endif int error; #if defined(PCI_HP) || defined(NEW_PCIB) sc = device_get_softc(dev); #endif error = bus_generic_detach(dev); if (error) return (error); #ifdef PCI_HP if (sc->flags & PCIB_HOTPLUG) { error = pcib_detach_hotplug(sc); if (error) return (error); } #endif error = device_delete_children(dev); if (error) return (error); #ifdef NEW_PCIB pcib_free_windows(sc); #ifdef PCI_RES_BUS pcib_free_secbus(dev, &sc->bus); #endif #endif return (0); } int pcib_suspend(device_t dev) { pcib_cfg_save(device_get_softc(dev)); return (bus_generic_suspend(dev)); } int pcib_resume(device_t dev) { pcib_cfg_restore(device_get_softc(dev)); return (bus_generic_resume(dev)); } void pcib_bridge_init(device_t dev) { pci_write_config(dev, PCIR_IOBASEL_1, 0xff, 1); pci_write_config(dev, PCIR_IOBASEH_1, 0xffff, 2); pci_write_config(dev, PCIR_IOLIMITL_1, 0, 1); pci_write_config(dev, PCIR_IOLIMITH_1, 0, 2); pci_write_config(dev, PCIR_MEMBASE_1, 0xffff, 2); pci_write_config(dev, PCIR_MEMLIMIT_1, 0, 2); pci_write_config(dev, PCIR_PMBASEL_1, 0xffff, 2); pci_write_config(dev, PCIR_PMBASEH_1, 0xffffffff, 4); pci_write_config(dev, PCIR_PMLIMITL_1, 0, 2); pci_write_config(dev, PCIR_PMLIMITH_1, 0, 4); } int pcib_child_present(device_t dev, device_t child) { #ifdef PCI_HP struct pcib_softc *sc = device_get_softc(dev); int retval; retval = bus_child_present(dev); if (retval != 0 && sc->flags & PCIB_HOTPLUG) retval = pcib_hotplug_present(sc); return (retval); #else return (bus_child_present(dev)); #endif } int pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) { struct pcib_softc *sc = device_get_softc(dev); switch (which) { case PCIB_IVAR_DOMAIN: *result = sc->domain; return(0); case PCIB_IVAR_BUS: *result = sc->bus.sec; return(0); } return(ENOENT); } int pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t value) { switch (which) { case PCIB_IVAR_DOMAIN: return(EINVAL); case PCIB_IVAR_BUS: return(EINVAL); } return(ENOENT); } #ifdef NEW_PCIB /* * Attempt to allocate a resource from the existing resources assigned * to a window. */ static struct resource * pcib_suballoc_resource(struct pcib_softc *sc, struct pcib_window *w, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct resource *res; if (!pcib_is_window_open(w)) return (NULL); res = rman_reserve_resource(&w->rman, start, end, count, flags & ~RF_ACTIVE, child); if (res == NULL) return (NULL); if (bootverbose) device_printf(sc->dev, "allocated %s range (%#jx-%#jx) for rid %x of %s\n", w->name, rman_get_start(res), rman_get_end(res), *rid, pcib_child_name(child)); rman_set_rid(res, *rid); /* * If the resource should be active, pass that request up the * tree. This assumes the parent drivers can handle * activating sub-allocated resources. */ if (flags & RF_ACTIVE) { if (bus_activate_resource(child, type, *rid, res) != 0) { rman_release_resource(res); return (NULL); } } return (res); } /* Allocate a fresh resource range for an unconfigured window. */ static int pcib_alloc_new_window(struct pcib_softc *sc, struct pcib_window *w, int type, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct resource *res; rman_res_t base, limit, wmask; int rid; /* * If this is an I/O window on a bridge with ISA enable set * and the start address is below 64k, then try to allocate an * initial window of 0x1000 bytes long starting at address * 0xf000 and walking down. Note that if the original request * was larger than the non-aliased range size of 0x100 our * caller would have raised the start address up to 64k * already. */ if (type == SYS_RES_IOPORT && sc->bridgectl & PCIB_BCR_ISA_ENABLE && start < 65536) { for (base = 0xf000; (long)base >= 0; base -= 0x1000) { limit = base + 0xfff; /* * Skip ranges that wouldn't work for the * original request. Note that the actual * window that overlaps are the non-alias * ranges within [base, limit], so this isn't * quite a simple comparison. */ if (start + count > limit - 0x400) continue; if (base == 0) { /* * The first open region for the window at * 0 is 0x400-0x4ff. */ if (end - count + 1 < 0x400) continue; } else { if (end - count + 1 < base) continue; } if (pcib_alloc_nonisa_ranges(sc, base, limit) == 0) { w->base = base; w->limit = limit; return (0); } } return (ENOSPC); } wmask = ((rman_res_t)1 << w->step) - 1; if (RF_ALIGNMENT(flags) < w->step) { flags &= ~RF_ALIGNMENT_MASK; flags |= RF_ALIGNMENT_LOG2(w->step); } start &= ~wmask; end |= wmask; count = roundup2(count, (rman_res_t)1 << w->step); rid = w->reg; res = bus_alloc_resource(sc->dev, type, &rid, start, end, count, flags & ~RF_ACTIVE); if (res == NULL) return (ENOSPC); pcib_add_window_resources(w, &res, 1); pcib_activate_window(sc, type); w->base = rman_get_start(res); w->limit = rman_get_end(res); return (0); } /* Try to expand an existing window to the requested base and limit. */ static int pcib_expand_window(struct pcib_softc *sc, struct pcib_window *w, int type, rman_res_t base, rman_res_t limit) { struct resource *res; int error, i, force_64k_base; KASSERT(base <= w->base && limit >= w->limit, ("attempting to shrink window")); /* * XXX: pcib_grow_window() doesn't try to do this anyway and * the error handling for all the edge cases would be tedious. */ KASSERT(limit == w->limit || base == w->base, ("attempting to grow both ends of a window")); /* * Yet more special handling for requests to expand an I/O * window behind an ISA-enabled bridge. Since I/O windows * have to grow in 0x1000 increments and the end of the 0xffff * range is an alias, growing a window below 64k will always * result in allocating new resources and never adjusting an * existing resource. */ if (type == SYS_RES_IOPORT && sc->bridgectl & PCIB_BCR_ISA_ENABLE && (limit <= 65535 || (base <= 65535 && base != w->base))) { KASSERT(limit == w->limit || limit <= 65535, ("attempting to grow both ends across 64k ISA alias")); if (base != w->base) error = pcib_alloc_nonisa_ranges(sc, base, w->base - 1); else error = pcib_alloc_nonisa_ranges(sc, w->limit + 1, limit); if (error == 0) { w->base = base; w->limit = limit; } return (error); } /* * Find the existing resource to adjust. Usually there is only one, * but for an ISA-enabled bridge we might be growing the I/O window * above 64k and need to find the existing resource that maps all * of the area above 64k. */ for (i = 0; i < w->count; i++) { if (rman_get_end(w->res[i]) == w->limit) break; } KASSERT(i != w->count, ("did not find existing resource")); res = w->res[i]; /* * Usually the resource we found should match the window's * existing range. The one exception is the ISA-enabled case * mentioned above in which case the resource should start at * 64k. */ if (type == SYS_RES_IOPORT && sc->bridgectl & PCIB_BCR_ISA_ENABLE && w->base <= 65535) { KASSERT(rman_get_start(res) == 65536, ("existing resource mismatch")); force_64k_base = 1; } else { KASSERT(w->base == rman_get_start(res), ("existing resource mismatch")); force_64k_base = 0; } error = bus_adjust_resource(sc->dev, type, res, force_64k_base ? rman_get_start(res) : base, limit); if (error) return (error); /* Add the newly allocated region to the resource manager. */ if (w->base != base) { error = rman_manage_region(&w->rman, base, w->base - 1); w->base = base; } else { error = rman_manage_region(&w->rman, w->limit + 1, limit); w->limit = limit; } if (error) { if (bootverbose) device_printf(sc->dev, "failed to expand %s resource manager\n", w->name); (void)bus_adjust_resource(sc->dev, type, res, force_64k_base ? rman_get_start(res) : w->base, w->limit); } return (error); } /* * Attempt to grow a window to make room for a given resource request. */ static int pcib_grow_window(struct pcib_softc *sc, struct pcib_window *w, int type, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { rman_res_t align, start_free, end_free, front, back, wmask; int error; /* * Clamp the desired resource range to the maximum address * this window supports. Reject impossible requests. * * For I/O port requests behind a bridge with the ISA enable * bit set, force large allocations to start above 64k. */ if (!w->valid) return (EINVAL); if (sc->bridgectl & PCIB_BCR_ISA_ENABLE && count > 0x100 && start < 65536) start = 65536; if (end > w->rman.rm_end) end = w->rman.rm_end; if (start + count - 1 > end || start + count < start) return (EINVAL); wmask = ((rman_res_t)1 << w->step) - 1; /* * If there is no resource at all, just try to allocate enough * aligned space for this resource. */ if (w->res == NULL) { error = pcib_alloc_new_window(sc, w, type, start, end, count, flags); if (error) { if (bootverbose) device_printf(sc->dev, "failed to allocate initial %s window (%#jx-%#jx,%#jx)\n", w->name, start, end, count); return (error); } if (bootverbose) device_printf(sc->dev, "allocated initial %s window of %#jx-%#jx\n", w->name, (uintmax_t)w->base, (uintmax_t)w->limit); goto updatewin; } /* * See if growing the window would help. Compute the minimum * amount of address space needed on both the front and back * ends of the existing window to satisfy the allocation. * * For each end, build a candidate region adjusting for the * required alignment, etc. If there is a free region at the * edge of the window, grow from the inner edge of the free * region. Otherwise grow from the window boundary. * * Growing an I/O window below 64k for a bridge with the ISA * enable bit doesn't require any special magic as the step * size of an I/O window (1k) always includes multiple * non-alias ranges when it is grown in either direction. * * XXX: Special case: if w->res is completely empty and the * request size is larger than w->res, we should find the * optimal aligned buffer containing w->res and allocate that. */ if (bootverbose) device_printf(sc->dev, "attempting to grow %s window for (%#jx-%#jx,%#jx)\n", w->name, start, end, count); align = (rman_res_t)1 << RF_ALIGNMENT(flags); if (start < w->base) { if (rman_first_free_region(&w->rman, &start_free, &end_free) != 0 || start_free != w->base) end_free = w->base; if (end_free > end) end_free = end + 1; /* Move end_free down until it is properly aligned. */ end_free &= ~(align - 1); end_free--; front = end_free - (count - 1); /* * The resource would now be allocated at (front, * end_free). Ensure that fits in the (start, end) * bounds. end_free is checked above. If 'front' is * ok, ensure it is properly aligned for this window. * Also check for underflow. */ if (front >= start && front <= end_free) { if (bootverbose) printf("\tfront candidate range: %#jx-%#jx\n", front, end_free); front &= ~wmask; front = w->base - front; } else front = 0; } else front = 0; if (end > w->limit) { if (rman_last_free_region(&w->rman, &start_free, &end_free) != 0 || end_free != w->limit) start_free = w->limit + 1; if (start_free < start) start_free = start; /* Move start_free up until it is properly aligned. */ start_free = roundup2(start_free, align); back = start_free + count - 1; /* * The resource would now be allocated at (start_free, * back). Ensure that fits in the (start, end) * bounds. start_free is checked above. If 'back' is * ok, ensure it is properly aligned for this window. * Also check for overflow. */ if (back <= end && start_free <= back) { if (bootverbose) printf("\tback candidate range: %#jx-%#jx\n", start_free, back); back |= wmask; back -= w->limit; } else back = 0; } else back = 0; /* * Try to allocate the smallest needed region first. * If that fails, fall back to the other region. */ error = ENOSPC; while (front != 0 || back != 0) { if (front != 0 && (front <= back || back == 0)) { error = pcib_expand_window(sc, w, type, w->base - front, w->limit); if (error == 0) break; front = 0; } else { error = pcib_expand_window(sc, w, type, w->base, w->limit + back); if (error == 0) break; back = 0; } } if (error) return (error); if (bootverbose) device_printf(sc->dev, "grew %s window to %#jx-%#jx\n", w->name, (uintmax_t)w->base, (uintmax_t)w->limit); updatewin: /* Write the new window. */ KASSERT((w->base & wmask) == 0, ("start address is not aligned")); KASSERT((w->limit & wmask) == wmask, ("end address is not aligned")); pcib_write_windows(sc, w->mask); return (0); } /* * We have to trap resource allocation requests and ensure that the bridge * is set up to, or capable of handling them. */ struct resource * pcib_alloc_resource(device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct pcib_softc *sc; struct resource *r; sc = device_get_softc(dev); /* * VGA resources are decoded iff the VGA enable bit is set in * the bridge control register. VGA resources do not fall into * the resource windows and are passed up to the parent. */ if ((type == SYS_RES_IOPORT && pci_is_vga_ioport_range(start, end)) || (type == SYS_RES_MEMORY && pci_is_vga_memory_range(start, end))) { if (sc->bridgectl & PCIB_BCR_VGA_ENABLE) return (bus_generic_alloc_resource(dev, child, type, rid, start, end, count, flags)); else return (NULL); } switch (type) { #ifdef PCI_RES_BUS case PCI_RES_BUS: return (pcib_alloc_subbus(&sc->bus, child, rid, start, end, count, flags)); #endif case SYS_RES_IOPORT: if (pcib_is_isa_range(sc, start, end, count)) return (NULL); r = pcib_suballoc_resource(sc, &sc->io, child, type, rid, start, end, count, flags); if (r != NULL || (sc->flags & PCIB_SUBTRACTIVE) != 0) break; if (pcib_grow_window(sc, &sc->io, type, start, end, count, flags) == 0) r = pcib_suballoc_resource(sc, &sc->io, child, type, rid, start, end, count, flags); break; case SYS_RES_MEMORY: /* * For prefetchable resources, prefer the prefetchable * memory window, but fall back to the regular memory * window if that fails. Try both windows before * attempting to grow a window in case the firmware * has used a range in the regular memory window to * map a prefetchable BAR. */ if (flags & RF_PREFETCHABLE) { r = pcib_suballoc_resource(sc, &sc->pmem, child, type, rid, start, end, count, flags); if (r != NULL) break; } r = pcib_suballoc_resource(sc, &sc->mem, child, type, rid, start, end, count, flags); if (r != NULL || (sc->flags & PCIB_SUBTRACTIVE) != 0) break; if (flags & RF_PREFETCHABLE) { if (pcib_grow_window(sc, &sc->pmem, type, start, end, count, flags) == 0) { r = pcib_suballoc_resource(sc, &sc->pmem, child, type, rid, start, end, count, flags); if (r != NULL) break; } } if (pcib_grow_window(sc, &sc->mem, type, start, end, count, flags & ~RF_PREFETCHABLE) == 0) r = pcib_suballoc_resource(sc, &sc->mem, child, type, rid, start, end, count, flags); break; default: return (bus_generic_alloc_resource(dev, child, type, rid, start, end, count, flags)); } /* * If attempts to suballocate from the window fail but this is a * subtractive bridge, pass the request up the tree. */ if (sc->flags & PCIB_SUBTRACTIVE && r == NULL) return (bus_generic_alloc_resource(dev, child, type, rid, start, end, count, flags)); return (r); } int pcib_adjust_resource(device_t bus, device_t child, int type, struct resource *r, rman_res_t start, rman_res_t end) { struct pcib_softc *sc; sc = device_get_softc(bus); if (pcib_is_resource_managed(sc, type, r)) return (rman_adjust_resource(r, start, end)); return (bus_generic_adjust_resource(bus, child, type, r, start, end)); } int pcib_release_resource(device_t dev, device_t child, int type, int rid, struct resource *r) { struct pcib_softc *sc; int error; sc = device_get_softc(dev); if (pcib_is_resource_managed(sc, type, r)) { if (rman_get_flags(r) & RF_ACTIVE) { error = bus_deactivate_resource(child, type, rid, r); if (error) return (error); } return (rman_release_resource(r)); } return (bus_generic_release_resource(dev, child, type, rid, r)); } #else /* * We have to trap resource allocation requests and ensure that the bridge * is set up to, or capable of handling them. */ struct resource * pcib_alloc_resource(device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) { struct pcib_softc *sc = device_get_softc(dev); const char *name, *suffix; int ok; /* * Fail the allocation for this range if it's not supported. */ name = device_get_nameunit(child); if (name == NULL) { name = ""; suffix = ""; } else suffix = " "; switch (type) { case SYS_RES_IOPORT: ok = 0; if (!pcib_is_io_open(sc)) break; ok = (start >= sc->iobase && end <= sc->iolimit); /* * Make sure we allow access to VGA I/O addresses when the * bridge has the "VGA Enable" bit set. */ if (!ok && pci_is_vga_ioport_range(start, end)) ok = (sc->bridgectl & PCIB_BCR_VGA_ENABLE) ? 1 : 0; if ((sc->flags & PCIB_SUBTRACTIVE) == 0) { if (!ok) { if (start < sc->iobase) start = sc->iobase; if (end > sc->iolimit) end = sc->iolimit; if (start < end) ok = 1; } } else { ok = 1; #if 0 /* * If we overlap with the subtractive range, then * pick the upper range to use. */ if (start < sc->iolimit && end > sc->iobase) start = sc->iolimit + 1; #endif } if (end < start) { device_printf(dev, "ioport: end (%jx) < start (%jx)\n", end, start); start = 0; end = 0; ok = 0; } if (!ok) { device_printf(dev, "%s%srequested unsupported I/O " "range 0x%jx-0x%jx (decoding 0x%x-0x%x)\n", name, suffix, start, end, sc->iobase, sc->iolimit); return (NULL); } if (bootverbose) device_printf(dev, "%s%srequested I/O range 0x%jx-0x%jx: in range\n", name, suffix, start, end); break; case SYS_RES_MEMORY: ok = 0; if (pcib_is_nonprefetch_open(sc)) ok = ok || (start >= sc->membase && end <= sc->memlimit); if (pcib_is_prefetch_open(sc)) ok = ok || (start >= sc->pmembase && end <= sc->pmemlimit); /* * Make sure we allow access to VGA memory addresses when the * bridge has the "VGA Enable" bit set. */ if (!ok && pci_is_vga_memory_range(start, end)) ok = (sc->bridgectl & PCIB_BCR_VGA_ENABLE) ? 1 : 0; if ((sc->flags & PCIB_SUBTRACTIVE) == 0) { if (!ok) { ok = 1; if (flags & RF_PREFETCHABLE) { if (pcib_is_prefetch_open(sc)) { if (start < sc->pmembase) start = sc->pmembase; if (end > sc->pmemlimit) end = sc->pmemlimit; } else { ok = 0; } } else { /* non-prefetchable */ if (pcib_is_nonprefetch_open(sc)) { if (start < sc->membase) start = sc->membase; if (end > sc->memlimit) end = sc->memlimit; } else { ok = 0; } } } } else if (!ok) { ok = 1; /* subtractive bridge: always ok */ #if 0 if (pcib_is_nonprefetch_open(sc)) { if (start < sc->memlimit && end > sc->membase) start = sc->memlimit + 1; } if (pcib_is_prefetch_open(sc)) { if (start < sc->pmemlimit && end > sc->pmembase) start = sc->pmemlimit + 1; } #endif } if (end < start) { device_printf(dev, "memory: end (%jx) < start (%jx)\n", end, start); start = 0; end = 0; ok = 0; } if (!ok && bootverbose) device_printf(dev, "%s%srequested unsupported memory range %#jx-%#jx " "(decoding %#jx-%#jx, %#jx-%#jx)\n", name, suffix, start, end, (uintmax_t)sc->membase, (uintmax_t)sc->memlimit, (uintmax_t)sc->pmembase, (uintmax_t)sc->pmemlimit); if (!ok) return (NULL); if (bootverbose) device_printf(dev,"%s%srequested memory range " "0x%jx-0x%jx: good\n", name, suffix, start, end); break; default: break; } /* * Bridge is OK decoding this resource, so pass it up. */ return (bus_generic_alloc_resource(dev, child, type, rid, start, end, count, flags)); } #endif /* * If ARI is enabled on this downstream port, translate the function number * to the non-ARI slot/function. The downstream port will convert it back in * hardware. If ARI is not enabled slot and func are not modified. */ static __inline void pcib_xlate_ari(device_t pcib, int bus, int *slot, int *func) { struct pcib_softc *sc; int ari_func; sc = device_get_softc(pcib); ari_func = *func; if (sc->flags & PCIB_ENABLE_ARI) { KASSERT(*slot == 0, ("Non-zero slot number with ARI enabled!")); *slot = PCIE_ARI_SLOT(ari_func); *func = PCIE_ARI_FUNC(ari_func); } } static void pcib_enable_ari(struct pcib_softc *sc, uint32_t pcie_pos) { uint32_t ctl2; ctl2 = pci_read_config(sc->dev, pcie_pos + PCIER_DEVICE_CTL2, 4); ctl2 |= PCIEM_CTL2_ARI; pci_write_config(sc->dev, pcie_pos + PCIER_DEVICE_CTL2, ctl2, 4); sc->flags |= PCIB_ENABLE_ARI; } /* * PCIB interface. */ int pcib_maxslots(device_t dev) { return (PCI_SLOTMAX); } static int pcib_ari_maxslots(device_t dev) { struct pcib_softc *sc; sc = device_get_softc(dev); if (sc->flags & PCIB_ENABLE_ARI) return (PCIE_ARI_SLOTMAX); else return (PCI_SLOTMAX); } static int pcib_ari_maxfuncs(device_t dev) { struct pcib_softc *sc; sc = device_get_softc(dev); if (sc->flags & PCIB_ENABLE_ARI) return (PCIE_ARI_FUNCMAX); else return (PCI_FUNCMAX); } static void pcib_ari_decode_rid(device_t pcib, uint16_t rid, int *bus, int *slot, int *func) { struct pcib_softc *sc; sc = device_get_softc(pcib); *bus = PCI_RID2BUS(rid); if (sc->flags & PCIB_ENABLE_ARI) { *slot = PCIE_ARI_RID2SLOT(rid); *func = PCIE_ARI_RID2FUNC(rid); } else { *slot = PCI_RID2SLOT(rid); *func = PCI_RID2FUNC(rid); } } /* * Since we are a child of a PCI bus, its parent must support the pcib interface. */ static uint32_t pcib_read_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, int width) { #ifdef PCI_HP struct pcib_softc *sc; sc = device_get_softc(dev); if (!pcib_present(sc)) { switch (width) { case 2: return (0xffff); case 1: return (0xff); default: return (0xffffffff); } } #endif pcib_xlate_ari(dev, b, &s, &f); return(PCIB_READ_CONFIG(device_get_parent(device_get_parent(dev)), b, s, f, reg, width)); } static void pcib_write_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, uint32_t val, int width) { #ifdef PCI_HP struct pcib_softc *sc; sc = device_get_softc(dev); if (!pcib_present(sc)) return; #endif pcib_xlate_ari(dev, b, &s, &f); PCIB_WRITE_CONFIG(device_get_parent(device_get_parent(dev)), b, s, f, reg, val, width); } /* * Route an interrupt across a PCI bridge. */ int pcib_route_interrupt(device_t pcib, device_t dev, int pin) { device_t bus; int parent_intpin; int intnum; /* * * The PCI standard defines a swizzle of the child-side device/intpin to * the parent-side intpin as follows. * * device = device on child bus * child_intpin = intpin on child bus slot (0-3) * parent_intpin = intpin on parent bus slot (0-3) * * parent_intpin = (device + child_intpin) % 4 */ parent_intpin = (pci_get_slot(dev) + (pin - 1)) % 4; /* * Our parent is a PCI bus. Its parent must export the pcib interface * which includes the ability to route interrupts. */ bus = device_get_parent(pcib); intnum = PCIB_ROUTE_INTERRUPT(device_get_parent(bus), pcib, parent_intpin + 1); if (PCI_INTERRUPT_VALID(intnum) && bootverbose) { device_printf(pcib, "slot %d INT%c is routed to irq %d\n", pci_get_slot(dev), 'A' + pin - 1, intnum); } return(intnum); } /* Pass request to alloc MSI/MSI-X messages up to the parent bridge. */ int pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs) { struct pcib_softc *sc = device_get_softc(pcib); device_t bus; if (sc->flags & PCIB_DISABLE_MSI) return (ENXIO); bus = device_get_parent(pcib); return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount, irqs)); } /* Pass request to release MSI/MSI-X messages up to the parent bridge. */ int pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs) { device_t bus; bus = device_get_parent(pcib); return (PCIB_RELEASE_MSI(device_get_parent(bus), dev, count, irqs)); } /* Pass request to alloc an MSI-X message up to the parent bridge. */ int pcib_alloc_msix(device_t pcib, device_t dev, int *irq) { struct pcib_softc *sc = device_get_softc(pcib); device_t bus; if (sc->flags & PCIB_DISABLE_MSIX) return (ENXIO); bus = device_get_parent(pcib); return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq)); } /* Pass request to release an MSI-X message up to the parent bridge. */ int pcib_release_msix(device_t pcib, device_t dev, int irq) { device_t bus; bus = device_get_parent(pcib); return (PCIB_RELEASE_MSIX(device_get_parent(bus), dev, irq)); } /* Pass request to map MSI/MSI-X message up to parent bridge. */ int pcib_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr, uint32_t *data) { device_t bus; int error; bus = device_get_parent(pcib); error = PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data); if (error) return (error); pci_ht_map_msi(pcib, *addr); return (0); } /* Pass request for device power state up to parent bridge. */ int pcib_power_for_sleep(device_t pcib, device_t dev, int *pstate) { device_t bus; bus = device_get_parent(pcib); return (PCIB_POWER_FOR_SLEEP(bus, dev, pstate)); } static int pcib_ari_enabled(device_t pcib) { struct pcib_softc *sc; sc = device_get_softc(pcib); return ((sc->flags & PCIB_ENABLE_ARI) != 0); } static int pcib_ari_get_id(device_t pcib, device_t dev, enum pci_id_type type, uintptr_t *id) { struct pcib_softc *sc; device_t bus_dev; uint8_t bus, slot, func; if (type != PCI_ID_RID) { bus_dev = device_get_parent(pcib); return (PCIB_GET_ID(device_get_parent(bus_dev), dev, type, id)); } sc = device_get_softc(pcib); if (sc->flags & PCIB_ENABLE_ARI) { bus = pci_get_bus(dev); func = pci_get_function(dev); *id = (PCI_ARI_RID(bus, func)); } else { bus = pci_get_bus(dev); slot = pci_get_slot(dev); func = pci_get_function(dev); *id = (PCI_RID(bus, slot, func)); } return (0); } /* * Check that the downstream port (pcib) and the endpoint device (dev) both * support ARI. If so, enable it and return 0, otherwise return an error. */ static int pcib_try_enable_ari(device_t pcib, device_t dev) { struct pcib_softc *sc; int error; uint32_t cap2; int ari_cap_off; uint32_t ari_ver; uint32_t pcie_pos; sc = device_get_softc(pcib); /* * ARI is controlled in a register in the PCIe capability structure. * If the downstream port does not have the PCIe capability structure * then it does not support ARI. */ error = pci_find_cap(pcib, PCIY_EXPRESS, &pcie_pos); if (error != 0) return (ENODEV); /* Check that the PCIe port advertises ARI support. */ cap2 = pci_read_config(pcib, pcie_pos + PCIER_DEVICE_CAP2, 4); if (!(cap2 & PCIEM_CAP2_ARI)) return (ENODEV); /* * Check that the endpoint device advertises ARI support via the ARI * extended capability structure. */ error = pci_find_extcap(dev, PCIZ_ARI, &ari_cap_off); if (error != 0) return (ENODEV); /* * Finally, check that the endpoint device supports the same version * of ARI that we do. */ ari_ver = pci_read_config(dev, ari_cap_off, 4); if (PCI_EXTCAP_VER(ari_ver) != PCIB_SUPPORTED_ARI_VER) { if (bootverbose) device_printf(pcib, "Unsupported version of ARI (%d) detected\n", PCI_EXTCAP_VER(ari_ver)); return (ENXIO); } pcib_enable_ari(sc, pcie_pos); return (0); } Index: projects/ipsec/sys/dev/pci/pcib_private.h =================================================================== --- projects/ipsec/sys/dev/pci/pcib_private.h (revision 313186) +++ projects/ipsec/sys/dev/pci/pcib_private.h (revision 313187) @@ -1,198 +1,197 @@ /*- * Copyright (c) 1994,1995 Stefan Esser, Wolfgang StanglMeier * Copyright (c) 2000 Michael Smith * Copyright (c) 2000 BSDi * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __PCIB_PRIVATE_H__ #define __PCIB_PRIVATE_H__ #include #include #ifdef NEW_PCIB /* * Data structure and routines that Host to PCI bridge drivers can use * to restrict allocations for child devices to ranges decoded by the * bridge. */ struct pcib_host_resources { device_t hr_pcib; struct resource_list hr_rl; }; int pcib_host_res_init(device_t pcib, struct pcib_host_resources *hr); int pcib_host_res_free(device_t pcib, struct pcib_host_resources *hr); int pcib_host_res_decodes(struct pcib_host_resources *hr, int type, rman_res_t start, rman_res_t end, u_int flags); struct resource *pcib_host_res_alloc(struct pcib_host_resources *hr, device_t dev, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags); int pcib_host_res_adjust(struct pcib_host_resources *hr, device_t dev, int type, struct resource *r, rman_res_t start, rman_res_t end); #endif /* * Export portions of generic PCI:PCI bridge support so that it can be * used by subclasses. */ DECLARE_CLASS(pcib_driver); #ifdef NEW_PCIB #define WIN_IO 0x1 #define WIN_MEM 0x2 #define WIN_PMEM 0x4 struct pcib_window { pci_addr_t base; /* base address */ pci_addr_t limit; /* topmost address */ struct rman rman; struct resource **res; int count; /* size of 'res' array */ int reg; /* resource id from parent */ int valid; int mask; /* WIN_* bitmask of this window */ int step; /* log_2 of window granularity */ const char *name; }; #endif struct pcib_secbus { u_int sec; u_int sub; #if defined(NEW_PCIB) && defined(PCI_RES_BUS) device_t dev; struct rman rman; struct resource *res; const char *name; int sub_reg; #endif }; /* * Bridge-specific data. */ struct pcib_softc { device_t dev; device_t child; uint32_t flags; /* flags */ #define PCIB_SUBTRACTIVE 0x1 #define PCIB_DISABLE_MSI 0x2 #define PCIB_DISABLE_MSIX 0x4 #define PCIB_ENABLE_ARI 0x8 #define PCIB_HOTPLUG 0x10 #define PCIB_HOTPLUG_CMD_PENDING 0x20 #define PCIB_DETACH_PENDING 0x40 #define PCIB_DETACHING 0x80 u_int domain; /* domain number */ u_int pribus; /* primary bus number */ struct pcib_secbus bus; /* secondary bus numbers */ #ifdef NEW_PCIB struct pcib_window io; /* I/O port window */ struct pcib_window mem; /* memory window */ struct pcib_window pmem; /* prefetchable memory window */ #else pci_addr_t pmembase; /* base address of prefetchable memory */ pci_addr_t pmemlimit; /* topmost address of prefetchable memory */ pci_addr_t membase; /* base address of memory window */ pci_addr_t memlimit; /* topmost address of memory window */ uint32_t iobase; /* base address of port window */ uint32_t iolimit; /* topmost address of port window */ #endif uint16_t bridgectl; /* bridge control register */ uint16_t pcie_link_sta; uint16_t pcie_slot_sta; - uint32_t pcie_link_cap; uint32_t pcie_slot_cap; struct resource *pcie_irq; void *pcie_ihand; struct task pcie_hp_task; struct callout pcie_ab_timer; struct callout pcie_cc_timer; struct callout pcie_dll_timer; }; #define PCIB_SUPPORTED_ARI_VER 1 typedef uint32_t pci_read_config_fn(int b, int s, int f, int reg, int width); int host_pcib_get_busno(pci_read_config_fn read_config, int bus, int slot, int func, uint8_t *busnum); #if defined(NEW_PCIB) && defined(PCI_RES_BUS) struct resource *pci_domain_alloc_bus(int domain, device_t dev, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags); int pci_domain_adjust_bus(int domain, device_t dev, struct resource *r, rman_res_t start, rman_res_t end); int pci_domain_release_bus(int domain, device_t dev, int rid, struct resource *r); struct resource *pcib_alloc_subbus(struct pcib_secbus *bus, device_t child, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags); void pcib_free_secbus(device_t dev, struct pcib_secbus *bus); void pcib_setup_secbus(device_t dev, struct pcib_secbus *bus, int min_count); #endif int pcib_attach(device_t dev); int pcib_attach_child(device_t dev); void pcib_attach_common(device_t dev); void pcib_bridge_init(device_t dev); #ifdef NEW_PCIB const char *pcib_child_name(device_t child); #endif int pcib_child_present(device_t dev, device_t child); int pcib_detach(device_t dev); int pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *result); int pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t value); struct resource *pcib_alloc_resource(device_t dev, device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags); #ifdef NEW_PCIB int pcib_adjust_resource(device_t bus, device_t child, int type, struct resource *r, rman_res_t start, rman_res_t end); int pcib_release_resource(device_t dev, device_t child, int type, int rid, struct resource *r); #endif int pcib_maxslots(device_t dev); int pcib_maxfuncs(device_t dev); int pcib_route_interrupt(device_t pcib, device_t dev, int pin); int pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs); int pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs); int pcib_alloc_msix(device_t pcib, device_t dev, int *irq); int pcib_release_msix(device_t pcib, device_t dev, int irq); int pcib_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr, uint32_t *data); int pcib_get_id(device_t pcib, device_t dev, enum pci_id_type type, uintptr_t *id); void pcib_decode_rid(device_t pcib, uint16_t rid, int *bus, int *slot, int *func); #endif Index: projects/ipsec/sys/geom/part/g_part_gpt.c =================================================================== --- projects/ipsec/sys/geom/part/g_part_gpt.c (revision 313186) +++ projects/ipsec/sys/geom/part/g_part_gpt.c (revision 313187) @@ -1,1391 +1,1392 @@ /*- * Copyright (c) 2002, 2005-2007, 2011 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "g_part_if.h" FEATURE(geom_part_gpt, "GEOM partitioning class for GPT partitions support"); CTASSERT(offsetof(struct gpt_hdr, padding) == 92); CTASSERT(sizeof(struct gpt_ent) == 128); #define EQUUID(a,b) (memcmp(a, b, sizeof(struct uuid)) == 0) #define MBRSIZE 512 enum gpt_elt { GPT_ELT_PRIHDR, GPT_ELT_PRITBL, GPT_ELT_SECHDR, GPT_ELT_SECTBL, GPT_ELT_COUNT }; enum gpt_state { GPT_STATE_UNKNOWN, /* Not determined. */ GPT_STATE_MISSING, /* No signature found. */ GPT_STATE_CORRUPT, /* Checksum mismatch. */ GPT_STATE_INVALID, /* Nonconformant/invalid. */ GPT_STATE_OK /* Perfectly fine. */ }; struct g_part_gpt_table { struct g_part_table base; u_char mbr[MBRSIZE]; struct gpt_hdr *hdr; quad_t lba[GPT_ELT_COUNT]; enum gpt_state state[GPT_ELT_COUNT]; int bootcamp; }; struct g_part_gpt_entry { struct g_part_entry base; struct gpt_ent ent; }; static void g_gpt_printf_utf16(struct sbuf *, uint16_t *, size_t); static void g_gpt_utf8_to_utf16(const uint8_t *, uint16_t *, size_t); static void g_gpt_set_defaults(struct g_part_table *, struct g_provider *); static int g_part_gpt_add(struct g_part_table *, struct g_part_entry *, struct g_part_parms *); static int g_part_gpt_bootcode(struct g_part_table *, struct g_part_parms *); static int g_part_gpt_create(struct g_part_table *, struct g_part_parms *); static int g_part_gpt_destroy(struct g_part_table *, struct g_part_parms *); static void g_part_gpt_dumpconf(struct g_part_table *, struct g_part_entry *, struct sbuf *, const char *); static int g_part_gpt_dumpto(struct g_part_table *, struct g_part_entry *); static int g_part_gpt_modify(struct g_part_table *, struct g_part_entry *, struct g_part_parms *); static const char *g_part_gpt_name(struct g_part_table *, struct g_part_entry *, char *, size_t); static int g_part_gpt_probe(struct g_part_table *, struct g_consumer *); static int g_part_gpt_read(struct g_part_table *, struct g_consumer *); static int g_part_gpt_setunset(struct g_part_table *table, struct g_part_entry *baseentry, const char *attrib, unsigned int set); static const char *g_part_gpt_type(struct g_part_table *, struct g_part_entry *, char *, size_t); static int g_part_gpt_write(struct g_part_table *, struct g_consumer *); static int g_part_gpt_resize(struct g_part_table *, struct g_part_entry *, struct g_part_parms *); static int g_part_gpt_recover(struct g_part_table *); static kobj_method_t g_part_gpt_methods[] = { KOBJMETHOD(g_part_add, g_part_gpt_add), KOBJMETHOD(g_part_bootcode, g_part_gpt_bootcode), KOBJMETHOD(g_part_create, g_part_gpt_create), KOBJMETHOD(g_part_destroy, g_part_gpt_destroy), KOBJMETHOD(g_part_dumpconf, g_part_gpt_dumpconf), KOBJMETHOD(g_part_dumpto, g_part_gpt_dumpto), KOBJMETHOD(g_part_modify, g_part_gpt_modify), KOBJMETHOD(g_part_resize, g_part_gpt_resize), KOBJMETHOD(g_part_name, g_part_gpt_name), KOBJMETHOD(g_part_probe, g_part_gpt_probe), KOBJMETHOD(g_part_read, g_part_gpt_read), KOBJMETHOD(g_part_recover, g_part_gpt_recover), KOBJMETHOD(g_part_setunset, g_part_gpt_setunset), KOBJMETHOD(g_part_type, g_part_gpt_type), KOBJMETHOD(g_part_write, g_part_gpt_write), { 0, 0 } }; static struct g_part_scheme g_part_gpt_scheme = { "GPT", g_part_gpt_methods, sizeof(struct g_part_gpt_table), .gps_entrysz = sizeof(struct g_part_gpt_entry), .gps_minent = 128, .gps_maxent = 4096, .gps_bootcodesz = MBRSIZE, }; G_PART_SCHEME_DECLARE(g_part_gpt); static struct uuid gpt_uuid_apple_boot = GPT_ENT_TYPE_APPLE_BOOT; static struct uuid gpt_uuid_apple_core_storage = GPT_ENT_TYPE_APPLE_CORE_STORAGE; static struct uuid gpt_uuid_apple_hfs = GPT_ENT_TYPE_APPLE_HFS; static struct uuid gpt_uuid_apple_label = GPT_ENT_TYPE_APPLE_LABEL; static struct uuid gpt_uuid_apple_raid = GPT_ENT_TYPE_APPLE_RAID; static struct uuid gpt_uuid_apple_raid_offline = GPT_ENT_TYPE_APPLE_RAID_OFFLINE; static struct uuid gpt_uuid_apple_tv_recovery = GPT_ENT_TYPE_APPLE_TV_RECOVERY; static struct uuid gpt_uuid_apple_ufs = GPT_ENT_TYPE_APPLE_UFS; static struct uuid gpt_uuid_bios_boot = GPT_ENT_TYPE_BIOS_BOOT; static struct uuid gpt_uuid_chromeos_firmware = GPT_ENT_TYPE_CHROMEOS_FIRMWARE; static struct uuid gpt_uuid_chromeos_kernel = GPT_ENT_TYPE_CHROMEOS_KERNEL; static struct uuid gpt_uuid_chromeos_reserved = GPT_ENT_TYPE_CHROMEOS_RESERVED; static struct uuid gpt_uuid_chromeos_root = GPT_ENT_TYPE_CHROMEOS_ROOT; static struct uuid gpt_uuid_dfbsd_ccd = GPT_ENT_TYPE_DRAGONFLY_CCD; static struct uuid gpt_uuid_dfbsd_hammer = GPT_ENT_TYPE_DRAGONFLY_HAMMER; static struct uuid gpt_uuid_dfbsd_hammer2 = GPT_ENT_TYPE_DRAGONFLY_HAMMER2; static struct uuid gpt_uuid_dfbsd_label32 = GPT_ENT_TYPE_DRAGONFLY_LABEL32; static struct uuid gpt_uuid_dfbsd_label64 = GPT_ENT_TYPE_DRAGONFLY_LABEL64; static struct uuid gpt_uuid_dfbsd_legacy = GPT_ENT_TYPE_DRAGONFLY_LEGACY; static struct uuid gpt_uuid_dfbsd_swap = GPT_ENT_TYPE_DRAGONFLY_SWAP; static struct uuid gpt_uuid_dfbsd_ufs1 = GPT_ENT_TYPE_DRAGONFLY_UFS1; static struct uuid gpt_uuid_dfbsd_vinum = GPT_ENT_TYPE_DRAGONFLY_VINUM; static struct uuid gpt_uuid_efi = GPT_ENT_TYPE_EFI; static struct uuid gpt_uuid_freebsd = GPT_ENT_TYPE_FREEBSD; static struct uuid gpt_uuid_freebsd_boot = GPT_ENT_TYPE_FREEBSD_BOOT; static struct uuid gpt_uuid_freebsd_nandfs = GPT_ENT_TYPE_FREEBSD_NANDFS; static struct uuid gpt_uuid_freebsd_swap = GPT_ENT_TYPE_FREEBSD_SWAP; static struct uuid gpt_uuid_freebsd_ufs = GPT_ENT_TYPE_FREEBSD_UFS; static struct uuid gpt_uuid_freebsd_vinum = GPT_ENT_TYPE_FREEBSD_VINUM; static struct uuid gpt_uuid_freebsd_zfs = GPT_ENT_TYPE_FREEBSD_ZFS; static struct uuid gpt_uuid_linux_data = GPT_ENT_TYPE_LINUX_DATA; static struct uuid gpt_uuid_linux_lvm = GPT_ENT_TYPE_LINUX_LVM; static struct uuid gpt_uuid_linux_raid = GPT_ENT_TYPE_LINUX_RAID; static struct uuid gpt_uuid_linux_swap = GPT_ENT_TYPE_LINUX_SWAP; static struct uuid gpt_uuid_mbr = GPT_ENT_TYPE_MBR; static struct uuid gpt_uuid_ms_basic_data = GPT_ENT_TYPE_MS_BASIC_DATA; static struct uuid gpt_uuid_ms_ldm_data = GPT_ENT_TYPE_MS_LDM_DATA; static struct uuid gpt_uuid_ms_ldm_metadata = GPT_ENT_TYPE_MS_LDM_METADATA; static struct uuid gpt_uuid_ms_recovery = GPT_ENT_TYPE_MS_RECOVERY; static struct uuid gpt_uuid_ms_reserved = GPT_ENT_TYPE_MS_RESERVED; static struct uuid gpt_uuid_ms_spaces = GPT_ENT_TYPE_MS_SPACES; static struct uuid gpt_uuid_netbsd_ccd = GPT_ENT_TYPE_NETBSD_CCD; static struct uuid gpt_uuid_netbsd_cgd = GPT_ENT_TYPE_NETBSD_CGD; static struct uuid gpt_uuid_netbsd_ffs = GPT_ENT_TYPE_NETBSD_FFS; static struct uuid gpt_uuid_netbsd_lfs = GPT_ENT_TYPE_NETBSD_LFS; static struct uuid gpt_uuid_netbsd_raid = GPT_ENT_TYPE_NETBSD_RAID; static struct uuid gpt_uuid_netbsd_swap = GPT_ENT_TYPE_NETBSD_SWAP; static struct uuid gpt_uuid_openbsd_data = GPT_ENT_TYPE_OPENBSD_DATA; static struct uuid gpt_uuid_prep_boot = GPT_ENT_TYPE_PREP_BOOT; static struct uuid gpt_uuid_unused = GPT_ENT_TYPE_UNUSED; static struct uuid gpt_uuid_vmfs = GPT_ENT_TYPE_VMFS; static struct uuid gpt_uuid_vmkdiag = GPT_ENT_TYPE_VMKDIAG; static struct uuid gpt_uuid_vmreserved = GPT_ENT_TYPE_VMRESERVED; static struct uuid gpt_uuid_vmvsanhdr = GPT_ENT_TYPE_VMVSANHDR; static struct g_part_uuid_alias { struct uuid *uuid; int alias; int mbrtype; } gpt_uuid_alias_match[] = { { &gpt_uuid_apple_boot, G_PART_ALIAS_APPLE_BOOT, 0xab }, { &gpt_uuid_apple_core_storage, G_PART_ALIAS_APPLE_CORE_STORAGE, 0 }, { &gpt_uuid_apple_hfs, G_PART_ALIAS_APPLE_HFS, 0xaf }, { &gpt_uuid_apple_label, G_PART_ALIAS_APPLE_LABEL, 0 }, { &gpt_uuid_apple_raid, G_PART_ALIAS_APPLE_RAID, 0 }, { &gpt_uuid_apple_raid_offline, G_PART_ALIAS_APPLE_RAID_OFFLINE, 0 }, { &gpt_uuid_apple_tv_recovery, G_PART_ALIAS_APPLE_TV_RECOVERY, 0 }, { &gpt_uuid_apple_ufs, G_PART_ALIAS_APPLE_UFS, 0 }, { &gpt_uuid_bios_boot, G_PART_ALIAS_BIOS_BOOT, 0 }, { &gpt_uuid_chromeos_firmware, G_PART_ALIAS_CHROMEOS_FIRMWARE, 0 }, { &gpt_uuid_chromeos_kernel, G_PART_ALIAS_CHROMEOS_KERNEL, 0 }, { &gpt_uuid_chromeos_reserved, G_PART_ALIAS_CHROMEOS_RESERVED, 0 }, { &gpt_uuid_chromeos_root, G_PART_ALIAS_CHROMEOS_ROOT, 0 }, { &gpt_uuid_dfbsd_ccd, G_PART_ALIAS_DFBSD_CCD, 0 }, { &gpt_uuid_dfbsd_hammer, G_PART_ALIAS_DFBSD_HAMMER, 0 }, { &gpt_uuid_dfbsd_hammer2, G_PART_ALIAS_DFBSD_HAMMER2, 0 }, { &gpt_uuid_dfbsd_label32, G_PART_ALIAS_DFBSD, 0xa5 }, { &gpt_uuid_dfbsd_label64, G_PART_ALIAS_DFBSD64, 0xa5 }, { &gpt_uuid_dfbsd_legacy, G_PART_ALIAS_DFBSD_LEGACY, 0 }, { &gpt_uuid_dfbsd_swap, G_PART_ALIAS_DFBSD_SWAP, 0 }, { &gpt_uuid_dfbsd_ufs1, G_PART_ALIAS_DFBSD_UFS, 0 }, { &gpt_uuid_dfbsd_vinum, G_PART_ALIAS_DFBSD_VINUM, 0 }, { &gpt_uuid_efi, G_PART_ALIAS_EFI, 0xee }, { &gpt_uuid_freebsd, G_PART_ALIAS_FREEBSD, 0xa5 }, { &gpt_uuid_freebsd_boot, G_PART_ALIAS_FREEBSD_BOOT, 0 }, { &gpt_uuid_freebsd_nandfs, G_PART_ALIAS_FREEBSD_NANDFS, 0 }, { &gpt_uuid_freebsd_swap, G_PART_ALIAS_FREEBSD_SWAP, 0 }, { &gpt_uuid_freebsd_ufs, G_PART_ALIAS_FREEBSD_UFS, 0 }, { &gpt_uuid_freebsd_vinum, G_PART_ALIAS_FREEBSD_VINUM, 0 }, { &gpt_uuid_freebsd_zfs, G_PART_ALIAS_FREEBSD_ZFS, 0 }, { &gpt_uuid_linux_data, G_PART_ALIAS_LINUX_DATA, 0x0b }, { &gpt_uuid_linux_lvm, G_PART_ALIAS_LINUX_LVM, 0 }, { &gpt_uuid_linux_raid, G_PART_ALIAS_LINUX_RAID, 0 }, { &gpt_uuid_linux_swap, G_PART_ALIAS_LINUX_SWAP, 0 }, { &gpt_uuid_mbr, G_PART_ALIAS_MBR, 0 }, { &gpt_uuid_ms_basic_data, G_PART_ALIAS_MS_BASIC_DATA, 0x0b }, { &gpt_uuid_ms_ldm_data, G_PART_ALIAS_MS_LDM_DATA, 0 }, { &gpt_uuid_ms_ldm_metadata, G_PART_ALIAS_MS_LDM_METADATA, 0 }, { &gpt_uuid_ms_recovery, G_PART_ALIAS_MS_RECOVERY, 0 }, { &gpt_uuid_ms_reserved, G_PART_ALIAS_MS_RESERVED, 0 }, { &gpt_uuid_ms_spaces, G_PART_ALIAS_MS_SPACES, 0 }, { &gpt_uuid_netbsd_ccd, G_PART_ALIAS_NETBSD_CCD, 0 }, { &gpt_uuid_netbsd_cgd, G_PART_ALIAS_NETBSD_CGD, 0 }, { &gpt_uuid_netbsd_ffs, G_PART_ALIAS_NETBSD_FFS, 0 }, { &gpt_uuid_netbsd_lfs, G_PART_ALIAS_NETBSD_LFS, 0 }, { &gpt_uuid_netbsd_raid, G_PART_ALIAS_NETBSD_RAID, 0 }, { &gpt_uuid_netbsd_swap, G_PART_ALIAS_NETBSD_SWAP, 0 }, { &gpt_uuid_openbsd_data, G_PART_ALIAS_OPENBSD_DATA, 0 }, { &gpt_uuid_prep_boot, G_PART_ALIAS_PREP_BOOT, 0x41 }, { &gpt_uuid_vmfs, G_PART_ALIAS_VMFS, 0 }, { &gpt_uuid_vmkdiag, G_PART_ALIAS_VMKDIAG, 0 }, { &gpt_uuid_vmreserved, G_PART_ALIAS_VMRESERVED, 0 }, { &gpt_uuid_vmvsanhdr, G_PART_ALIAS_VMVSANHDR, 0 }, { NULL, 0, 0 } }; static int gpt_write_mbr_entry(u_char *mbr, int idx, int typ, quad_t start, quad_t end) { if (typ == 0 || start > UINT32_MAX || end > UINT32_MAX) return (EINVAL); mbr += DOSPARTOFF + idx * DOSPARTSIZE; mbr[0] = 0; if (start == 1) { /* * Treat the PMBR partition specially to maximize * interoperability with BIOSes. */ mbr[1] = mbr[3] = 0; mbr[2] = 2; } else mbr[1] = mbr[2] = mbr[3] = 0xff; mbr[4] = typ; mbr[5] = mbr[6] = mbr[7] = 0xff; le32enc(mbr + 8, (uint32_t)start); le32enc(mbr + 12, (uint32_t)(end - start + 1)); return (0); } static int gpt_map_type(struct uuid *t) { struct g_part_uuid_alias *uap; for (uap = &gpt_uuid_alias_match[0]; uap->uuid; uap++) { if (EQUUID(t, uap->uuid)) return (uap->mbrtype); } return (0); } static void gpt_create_pmbr(struct g_part_gpt_table *table, struct g_provider *pp) { bzero(table->mbr + DOSPARTOFF, DOSPARTSIZE * NDOSPART); gpt_write_mbr_entry(table->mbr, 0, 0xee, 1, MIN(pp->mediasize / pp->sectorsize - 1, UINT32_MAX)); le16enc(table->mbr + DOSMAGICOFFSET, DOSMAGIC); } /* * Under Boot Camp the PMBR partition (type 0xEE) doesn't cover the * whole disk anymore. Rather, it covers the GPT table and the EFI * system partition only. This way the HFS+ partition and any FAT * partitions can be added to the MBR without creating an overlap. */ static int gpt_is_bootcamp(struct g_part_gpt_table *table, const char *provname) { uint8_t *p; p = table->mbr + DOSPARTOFF; if (p[4] != 0xee || le32dec(p + 8) != 1) return (0); p += DOSPARTSIZE; if (p[4] != 0xaf) return (0); printf("GEOM: %s: enabling Boot Camp\n", provname); return (1); } static void gpt_update_bootcamp(struct g_part_table *basetable, struct g_provider *pp) { struct g_part_entry *baseentry; struct g_part_gpt_entry *entry; struct g_part_gpt_table *table; int bootable, error, index, slices, typ; table = (struct g_part_gpt_table *)basetable; bootable = -1; for (index = 0; index < NDOSPART; index++) { if (table->mbr[DOSPARTOFF + DOSPARTSIZE * index]) bootable = index; } bzero(table->mbr + DOSPARTOFF, DOSPARTSIZE * NDOSPART); slices = 0; LIST_FOREACH(baseentry, &basetable->gpt_entry, gpe_entry) { if (baseentry->gpe_deleted) continue; index = baseentry->gpe_index - 1; if (index >= NDOSPART) continue; entry = (struct g_part_gpt_entry *)baseentry; switch (index) { case 0: /* This must be the EFI system partition. */ if (!EQUUID(&entry->ent.ent_type, &gpt_uuid_efi)) goto disable; error = gpt_write_mbr_entry(table->mbr, index, 0xee, 1ull, entry->ent.ent_lba_end); break; case 1: /* This must be the HFS+ partition. */ if (!EQUUID(&entry->ent.ent_type, &gpt_uuid_apple_hfs)) goto disable; error = gpt_write_mbr_entry(table->mbr, index, 0xaf, entry->ent.ent_lba_start, entry->ent.ent_lba_end); break; default: typ = gpt_map_type(&entry->ent.ent_type); error = gpt_write_mbr_entry(table->mbr, index, typ, entry->ent.ent_lba_start, entry->ent.ent_lba_end); break; } if (error) continue; if (index == bootable) table->mbr[DOSPARTOFF + DOSPARTSIZE * index] = 0x80; slices |= 1 << index; } if ((slices & 3) == 3) return; disable: table->bootcamp = 0; gpt_create_pmbr(table, pp); } static struct gpt_hdr * gpt_read_hdr(struct g_part_gpt_table *table, struct g_consumer *cp, enum gpt_elt elt) { struct gpt_hdr *buf, *hdr; struct g_provider *pp; quad_t lba, last; int error; uint32_t crc, sz; pp = cp->provider; last = (pp->mediasize / pp->sectorsize) - 1; table->state[elt] = GPT_STATE_MISSING; /* * If the primary header is valid look for secondary * header in AlternateLBA, otherwise in the last medium's LBA. */ if (elt == GPT_ELT_SECHDR) { if (table->state[GPT_ELT_PRIHDR] != GPT_STATE_OK) table->lba[elt] = last; } else table->lba[elt] = 1; buf = g_read_data(cp, table->lba[elt] * pp->sectorsize, pp->sectorsize, &error); if (buf == NULL) return (NULL); hdr = NULL; if (memcmp(buf->hdr_sig, GPT_HDR_SIG, sizeof(buf->hdr_sig)) != 0) goto fail; table->state[elt] = GPT_STATE_CORRUPT; sz = le32toh(buf->hdr_size); if (sz < 92 || sz > pp->sectorsize) goto fail; hdr = g_malloc(sz, M_WAITOK | M_ZERO); bcopy(buf, hdr, sz); hdr->hdr_size = sz; crc = le32toh(buf->hdr_crc_self); buf->hdr_crc_self = 0; if (crc32(buf, sz) != crc) goto fail; hdr->hdr_crc_self = crc; table->state[elt] = GPT_STATE_INVALID; hdr->hdr_revision = le32toh(buf->hdr_revision); if (hdr->hdr_revision < GPT_HDR_REVISION) goto fail; hdr->hdr_lba_self = le64toh(buf->hdr_lba_self); if (hdr->hdr_lba_self != table->lba[elt]) goto fail; hdr->hdr_lba_alt = le64toh(buf->hdr_lba_alt); if (hdr->hdr_lba_alt == hdr->hdr_lba_self || hdr->hdr_lba_alt > last) goto fail; /* Check the managed area. */ hdr->hdr_lba_start = le64toh(buf->hdr_lba_start); if (hdr->hdr_lba_start < 2 || hdr->hdr_lba_start >= last) goto fail; hdr->hdr_lba_end = le64toh(buf->hdr_lba_end); if (hdr->hdr_lba_end < hdr->hdr_lba_start || hdr->hdr_lba_end >= last) goto fail; /* Check the table location and size of the table. */ hdr->hdr_entries = le32toh(buf->hdr_entries); hdr->hdr_entsz = le32toh(buf->hdr_entsz); if (hdr->hdr_entries == 0 || hdr->hdr_entsz < 128 || (hdr->hdr_entsz & 7) != 0) goto fail; hdr->hdr_lba_table = le64toh(buf->hdr_lba_table); if (hdr->hdr_lba_table < 2 || hdr->hdr_lba_table >= last) goto fail; if (hdr->hdr_lba_table >= hdr->hdr_lba_start && hdr->hdr_lba_table <= hdr->hdr_lba_end) goto fail; lba = hdr->hdr_lba_table + howmany(hdr->hdr_entries * hdr->hdr_entsz, pp->sectorsize) - 1; if (lba >= last) goto fail; if (lba >= hdr->hdr_lba_start && lba <= hdr->hdr_lba_end) goto fail; table->state[elt] = GPT_STATE_OK; le_uuid_dec(&buf->hdr_uuid, &hdr->hdr_uuid); hdr->hdr_crc_table = le32toh(buf->hdr_crc_table); /* save LBA for secondary header */ if (elt == GPT_ELT_PRIHDR) table->lba[GPT_ELT_SECHDR] = hdr->hdr_lba_alt; g_free(buf); return (hdr); fail: if (hdr != NULL) g_free(hdr); g_free(buf); return (NULL); } static struct gpt_ent * gpt_read_tbl(struct g_part_gpt_table *table, struct g_consumer *cp, enum gpt_elt elt, struct gpt_hdr *hdr) { struct g_provider *pp; struct gpt_ent *ent, *tbl; char *buf, *p; unsigned int idx, sectors, tblsz, size; int error; if (hdr == NULL) return (NULL); pp = cp->provider; table->lba[elt] = hdr->hdr_lba_table; table->state[elt] = GPT_STATE_MISSING; tblsz = hdr->hdr_entries * hdr->hdr_entsz; sectors = howmany(tblsz, pp->sectorsize); buf = g_malloc(sectors * pp->sectorsize, M_WAITOK | M_ZERO); for (idx = 0; idx < sectors; idx += MAXPHYS / pp->sectorsize) { size = (sectors - idx > MAXPHYS / pp->sectorsize) ? MAXPHYS: (sectors - idx) * pp->sectorsize; p = g_read_data(cp, (table->lba[elt] + idx) * pp->sectorsize, size, &error); if (p == NULL) { g_free(buf); return (NULL); } bcopy(p, buf + idx * pp->sectorsize, size); g_free(p); } table->state[elt] = GPT_STATE_CORRUPT; if (crc32(buf, tblsz) != hdr->hdr_crc_table) { g_free(buf); return (NULL); } table->state[elt] = GPT_STATE_OK; tbl = g_malloc(hdr->hdr_entries * sizeof(struct gpt_ent), M_WAITOK | M_ZERO); for (idx = 0, ent = tbl, p = buf; idx < hdr->hdr_entries; idx++, ent++, p += hdr->hdr_entsz) { le_uuid_dec(p, &ent->ent_type); le_uuid_dec(p + 16, &ent->ent_uuid); ent->ent_lba_start = le64dec(p + 32); ent->ent_lba_end = le64dec(p + 40); ent->ent_attr = le64dec(p + 48); /* Keep UTF-16 in little-endian. */ bcopy(p + 56, ent->ent_name, sizeof(ent->ent_name)); } g_free(buf); return (tbl); } static int gpt_matched_hdrs(struct gpt_hdr *pri, struct gpt_hdr *sec) { if (pri == NULL || sec == NULL) return (0); if (!EQUUID(&pri->hdr_uuid, &sec->hdr_uuid)) return (0); return ((pri->hdr_revision == sec->hdr_revision && pri->hdr_size == sec->hdr_size && pri->hdr_lba_start == sec->hdr_lba_start && pri->hdr_lba_end == sec->hdr_lba_end && pri->hdr_entries == sec->hdr_entries && pri->hdr_entsz == sec->hdr_entsz && pri->hdr_crc_table == sec->hdr_crc_table) ? 1 : 0); } static int gpt_parse_type(const char *type, struct uuid *uuid) { struct uuid tmp; const char *alias; int error; struct g_part_uuid_alias *uap; if (type[0] == '!') { error = parse_uuid(type + 1, &tmp); if (error) return (error); if (EQUUID(&tmp, &gpt_uuid_unused)) return (EINVAL); *uuid = tmp; return (0); } for (uap = &gpt_uuid_alias_match[0]; uap->uuid; uap++) { alias = g_part_alias_name(uap->alias); if (!strcasecmp(type, alias)) { *uuid = *uap->uuid; return (0); } } return (EINVAL); } static int g_part_gpt_add(struct g_part_table *basetable, struct g_part_entry *baseentry, struct g_part_parms *gpp) { struct g_part_gpt_entry *entry; int error; entry = (struct g_part_gpt_entry *)baseentry; error = gpt_parse_type(gpp->gpp_type, &entry->ent.ent_type); if (error) return (error); kern_uuidgen(&entry->ent.ent_uuid, 1); entry->ent.ent_lba_start = baseentry->gpe_start; entry->ent.ent_lba_end = baseentry->gpe_end; if (baseentry->gpe_deleted) { entry->ent.ent_attr = 0; bzero(entry->ent.ent_name, sizeof(entry->ent.ent_name)); } if (gpp->gpp_parms & G_PART_PARM_LABEL) g_gpt_utf8_to_utf16(gpp->gpp_label, entry->ent.ent_name, sizeof(entry->ent.ent_name) / sizeof(entry->ent.ent_name[0])); return (0); } static int g_part_gpt_bootcode(struct g_part_table *basetable, struct g_part_parms *gpp) { struct g_part_gpt_table *table; size_t codesz; codesz = DOSPARTOFF; table = (struct g_part_gpt_table *)basetable; bzero(table->mbr, codesz); codesz = MIN(codesz, gpp->gpp_codesize); if (codesz > 0) bcopy(gpp->gpp_codeptr, table->mbr, codesz); return (0); } static int g_part_gpt_create(struct g_part_table *basetable, struct g_part_parms *gpp) { struct g_provider *pp; struct g_part_gpt_table *table; size_t tblsz; /* We don't nest, which means that our depth should be 0. */ if (basetable->gpt_depth != 0) return (ENXIO); table = (struct g_part_gpt_table *)basetable; pp = gpp->gpp_provider; tblsz = howmany(basetable->gpt_entries * sizeof(struct gpt_ent), pp->sectorsize); if (pp->sectorsize < MBRSIZE || pp->mediasize < (3 + 2 * tblsz + basetable->gpt_entries) * pp->sectorsize) return (ENOSPC); gpt_create_pmbr(table, pp); /* Allocate space for the header */ table->hdr = g_malloc(sizeof(struct gpt_hdr), M_WAITOK | M_ZERO); bcopy(GPT_HDR_SIG, table->hdr->hdr_sig, sizeof(table->hdr->hdr_sig)); table->hdr->hdr_revision = GPT_HDR_REVISION; table->hdr->hdr_size = offsetof(struct gpt_hdr, padding); kern_uuidgen(&table->hdr->hdr_uuid, 1); table->hdr->hdr_entries = basetable->gpt_entries; table->hdr->hdr_entsz = sizeof(struct gpt_ent); g_gpt_set_defaults(basetable, pp); return (0); } static int g_part_gpt_destroy(struct g_part_table *basetable, struct g_part_parms *gpp) { struct g_part_gpt_table *table; struct g_provider *pp; table = (struct g_part_gpt_table *)basetable; pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider; g_free(table->hdr); table->hdr = NULL; /* - * Wipe the first 2 sectors to clear the partitioning. Wipe the last - * sector only if it has valid secondary header. + * Wipe the first 2 sectors and last one to clear the partitioning. + * Wipe sectors only if they have valid metadata. */ - basetable->gpt_smhead |= 3; + if (table->state[GPT_ELT_PRIHDR] == GPT_STATE_OK) + basetable->gpt_smhead |= 3; if (table->state[GPT_ELT_SECHDR] == GPT_STATE_OK && table->lba[GPT_ELT_SECHDR] == pp->mediasize / pp->sectorsize - 1) basetable->gpt_smtail |= 1; return (0); } static void g_part_gpt_dumpconf(struct g_part_table *table, struct g_part_entry *baseentry, struct sbuf *sb, const char *indent) { struct g_part_gpt_entry *entry; entry = (struct g_part_gpt_entry *)baseentry; if (indent == NULL) { /* conftxt: libdisk compatibility */ sbuf_printf(sb, " xs GPT xt "); sbuf_printf_uuid(sb, &entry->ent.ent_type); } else if (entry != NULL) { /* confxml: partition entry information */ sbuf_printf(sb, "%s\n"); if (entry->ent.ent_attr & GPT_ENT_ATTR_BOOTME) sbuf_printf(sb, "%sbootme\n", indent); if (entry->ent.ent_attr & GPT_ENT_ATTR_BOOTONCE) { sbuf_printf(sb, "%sbootonce\n", indent); } if (entry->ent.ent_attr & GPT_ENT_ATTR_BOOTFAILED) { sbuf_printf(sb, "%sbootfailed\n", indent); } sbuf_printf(sb, "%s", indent); sbuf_printf_uuid(sb, &entry->ent.ent_type); sbuf_printf(sb, "\n"); sbuf_printf(sb, "%s", indent); sbuf_printf_uuid(sb, &entry->ent.ent_uuid); sbuf_printf(sb, "\n"); } else { /* confxml: scheme information */ } } static int g_part_gpt_dumpto(struct g_part_table *table, struct g_part_entry *baseentry) { struct g_part_gpt_entry *entry; entry = (struct g_part_gpt_entry *)baseentry; return ((EQUUID(&entry->ent.ent_type, &gpt_uuid_freebsd_swap) || EQUUID(&entry->ent.ent_type, &gpt_uuid_linux_swap) || EQUUID(&entry->ent.ent_type, &gpt_uuid_dfbsd_swap)) ? 1 : 0); } static int g_part_gpt_modify(struct g_part_table *basetable, struct g_part_entry *baseentry, struct g_part_parms *gpp) { struct g_part_gpt_entry *entry; int error; entry = (struct g_part_gpt_entry *)baseentry; if (gpp->gpp_parms & G_PART_PARM_TYPE) { error = gpt_parse_type(gpp->gpp_type, &entry->ent.ent_type); if (error) return (error); } if (gpp->gpp_parms & G_PART_PARM_LABEL) g_gpt_utf8_to_utf16(gpp->gpp_label, entry->ent.ent_name, sizeof(entry->ent.ent_name) / sizeof(entry->ent.ent_name[0])); return (0); } static int g_part_gpt_resize(struct g_part_table *basetable, struct g_part_entry *baseentry, struct g_part_parms *gpp) { struct g_part_gpt_entry *entry; if (baseentry == NULL) return (g_part_gpt_recover(basetable)); entry = (struct g_part_gpt_entry *)baseentry; baseentry->gpe_end = baseentry->gpe_start + gpp->gpp_size - 1; entry->ent.ent_lba_end = baseentry->gpe_end; return (0); } static const char * g_part_gpt_name(struct g_part_table *table, struct g_part_entry *baseentry, char *buf, size_t bufsz) { struct g_part_gpt_entry *entry; char c; entry = (struct g_part_gpt_entry *)baseentry; c = (EQUUID(&entry->ent.ent_type, &gpt_uuid_freebsd)) ? 's' : 'p'; snprintf(buf, bufsz, "%c%d", c, baseentry->gpe_index); return (buf); } static int g_part_gpt_probe(struct g_part_table *table, struct g_consumer *cp) { struct g_provider *pp; u_char *buf; int error, index, pri, res; /* We don't nest, which means that our depth should be 0. */ if (table->gpt_depth != 0) return (ENXIO); pp = cp->provider; /* * Sanity-check the provider. Since the first sector on the provider * must be a PMBR and a PMBR is 512 bytes large, the sector size * must be at least 512 bytes. Also, since the theoretical minimum * number of sectors needed by GPT is 6, any medium that has less * than 6 sectors is never going to be able to hold a GPT. The * number 6 comes from: * 1 sector for the PMBR * 2 sectors for the GPT headers (each 1 sector) * 2 sectors for the GPT tables (each 1 sector) * 1 sector for an actual partition * It's better to catch this pathological case early than behaving * pathologically later on... */ if (pp->sectorsize < MBRSIZE || pp->mediasize < 6 * pp->sectorsize) return (ENOSPC); /* * Check that there's a MBR or a PMBR. If it's a PMBR, we return * as the highest priority on a match, otherwise we assume some * GPT-unaware tool has destroyed the GPT by recreating a MBR and * we really want the MBR scheme to take precedence. */ buf = g_read_data(cp, 0L, pp->sectorsize, &error); if (buf == NULL) return (error); res = le16dec(buf + DOSMAGICOFFSET); pri = G_PART_PROBE_PRI_LOW; if (res == DOSMAGIC) { for (index = 0; index < NDOSPART; index++) { if (buf[DOSPARTOFF + DOSPARTSIZE * index + 4] == 0xee) pri = G_PART_PROBE_PRI_HIGH; } g_free(buf); /* Check that there's a primary header. */ buf = g_read_data(cp, pp->sectorsize, pp->sectorsize, &error); if (buf == NULL) return (error); res = memcmp(buf, GPT_HDR_SIG, 8); g_free(buf); if (res == 0) return (pri); } else g_free(buf); /* No primary? Check that there's a secondary. */ buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, &error); if (buf == NULL) return (error); res = memcmp(buf, GPT_HDR_SIG, 8); g_free(buf); return ((res == 0) ? pri : ENXIO); } static int g_part_gpt_read(struct g_part_table *basetable, struct g_consumer *cp) { struct gpt_hdr *prihdr, *sechdr; struct gpt_ent *tbl, *pritbl, *sectbl; struct g_provider *pp; struct g_part_gpt_table *table; struct g_part_gpt_entry *entry; u_char *buf; uint64_t last; int error, index; table = (struct g_part_gpt_table *)basetable; pp = cp->provider; last = (pp->mediasize / pp->sectorsize) - 1; /* Read the PMBR */ buf = g_read_data(cp, 0, pp->sectorsize, &error); if (buf == NULL) return (error); bcopy(buf, table->mbr, MBRSIZE); g_free(buf); /* Read the primary header and table. */ prihdr = gpt_read_hdr(table, cp, GPT_ELT_PRIHDR); if (table->state[GPT_ELT_PRIHDR] == GPT_STATE_OK) { pritbl = gpt_read_tbl(table, cp, GPT_ELT_PRITBL, prihdr); } else { table->state[GPT_ELT_PRITBL] = GPT_STATE_MISSING; pritbl = NULL; } /* Read the secondary header and table. */ sechdr = gpt_read_hdr(table, cp, GPT_ELT_SECHDR); if (table->state[GPT_ELT_SECHDR] == GPT_STATE_OK) { sectbl = gpt_read_tbl(table, cp, GPT_ELT_SECTBL, sechdr); } else { table->state[GPT_ELT_SECTBL] = GPT_STATE_MISSING; sectbl = NULL; } /* Fail if we haven't got any good tables at all. */ if (table->state[GPT_ELT_PRITBL] != GPT_STATE_OK && table->state[GPT_ELT_SECTBL] != GPT_STATE_OK) { printf("GEOM: %s: corrupt or invalid GPT detected.\n", pp->name); printf("GEOM: %s: GPT rejected -- may not be recoverable.\n", pp->name); return (EINVAL); } /* * If both headers are good but they disagree with each other, * then invalidate one. We prefer to keep the primary header, * unless the primary table is corrupt. */ if (table->state[GPT_ELT_PRIHDR] == GPT_STATE_OK && table->state[GPT_ELT_SECHDR] == GPT_STATE_OK && !gpt_matched_hdrs(prihdr, sechdr)) { if (table->state[GPT_ELT_PRITBL] == GPT_STATE_OK) { table->state[GPT_ELT_SECHDR] = GPT_STATE_INVALID; table->state[GPT_ELT_SECTBL] = GPT_STATE_MISSING; g_free(sechdr); sechdr = NULL; } else { table->state[GPT_ELT_PRIHDR] = GPT_STATE_INVALID; table->state[GPT_ELT_PRITBL] = GPT_STATE_MISSING; g_free(prihdr); prihdr = NULL; } } if (table->state[GPT_ELT_PRITBL] != GPT_STATE_OK) { printf("GEOM: %s: the primary GPT table is corrupt or " "invalid.\n", pp->name); printf("GEOM: %s: using the secondary instead -- recovery " "strongly advised.\n", pp->name); table->hdr = sechdr; basetable->gpt_corrupt = 1; if (prihdr != NULL) g_free(prihdr); tbl = sectbl; if (pritbl != NULL) g_free(pritbl); } else { if (table->state[GPT_ELT_SECTBL] != GPT_STATE_OK) { printf("GEOM: %s: the secondary GPT table is corrupt " "or invalid.\n", pp->name); printf("GEOM: %s: using the primary only -- recovery " "suggested.\n", pp->name); basetable->gpt_corrupt = 1; } else if (table->lba[GPT_ELT_SECHDR] != last) { printf( "GEOM: %s: the secondary GPT header is not in " "the last LBA.\n", pp->name); basetable->gpt_corrupt = 1; } table->hdr = prihdr; if (sechdr != NULL) g_free(sechdr); tbl = pritbl; if (sectbl != NULL) g_free(sectbl); } basetable->gpt_first = table->hdr->hdr_lba_start; basetable->gpt_last = table->hdr->hdr_lba_end; basetable->gpt_entries = (table->hdr->hdr_lba_start - 2) * pp->sectorsize / table->hdr->hdr_entsz; for (index = table->hdr->hdr_entries - 1; index >= 0; index--) { if (EQUUID(&tbl[index].ent_type, &gpt_uuid_unused)) continue; entry = (struct g_part_gpt_entry *)g_part_new_entry( basetable, index + 1, tbl[index].ent_lba_start, tbl[index].ent_lba_end); entry->ent = tbl[index]; } g_free(tbl); /* * Under Mac OS X, the MBR mirrors the first 4 GPT partitions * if (and only if) any FAT32 or FAT16 partitions have been * created. This happens irrespective of whether Boot Camp is * used/enabled, though it's generally understood to be done * to support legacy Windows under Boot Camp. We refer to this * mirroring simply as Boot Camp. We try to detect Boot Camp * so that we can update the MBR if and when GPT changes have * been made. Note that we do not enable Boot Camp if not * previously enabled because we can't assume that we're on a * Mac alongside Mac OS X. */ table->bootcamp = gpt_is_bootcamp(table, pp->name); return (0); } static int g_part_gpt_recover(struct g_part_table *basetable) { struct g_part_gpt_table *table; struct g_provider *pp; table = (struct g_part_gpt_table *)basetable; pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider; gpt_create_pmbr(table, pp); g_gpt_set_defaults(basetable, pp); basetable->gpt_corrupt = 0; return (0); } static int g_part_gpt_setunset(struct g_part_table *basetable, struct g_part_entry *baseentry, const char *attrib, unsigned int set) { struct g_part_gpt_entry *entry; struct g_part_gpt_table *table; struct g_provider *pp; uint8_t *p; uint64_t attr; int i; table = (struct g_part_gpt_table *)basetable; entry = (struct g_part_gpt_entry *)baseentry; if (strcasecmp(attrib, "active") == 0) { if (table->bootcamp) { /* The active flag must be set on a valid entry. */ if (entry == NULL) return (ENXIO); if (baseentry->gpe_index > NDOSPART) return (EINVAL); for (i = 0; i < NDOSPART; i++) { p = &table->mbr[DOSPARTOFF + i * DOSPARTSIZE]; p[0] = (i == baseentry->gpe_index - 1) ? ((set) ? 0x80 : 0) : 0; } } else { /* The PMBR is marked as active without an entry. */ if (entry != NULL) return (ENXIO); for (i = 0; i < NDOSPART; i++) { p = &table->mbr[DOSPARTOFF + i * DOSPARTSIZE]; p[0] = (p[4] == 0xee) ? ((set) ? 0x80 : 0) : 0; } } return (0); } else if (strcasecmp(attrib, "lenovofix") == 0) { /* * Write the 0xee GPT entry to slot #1 (2nd slot) in the pMBR. * This workaround allows Lenovo X220, T420, T520, etc to boot * from GPT Partitions in BIOS mode. */ if (entry != NULL) return (ENXIO); pp = LIST_FIRST(&basetable->gpt_gp->consumer)->provider; bzero(table->mbr + DOSPARTOFF, DOSPARTSIZE * NDOSPART); gpt_write_mbr_entry(table->mbr, ((set) ? 1 : 0), 0xee, 1, MIN(pp->mediasize / pp->sectorsize - 1, UINT32_MAX)); return (0); } if (entry == NULL) return (ENODEV); attr = 0; if (strcasecmp(attrib, "bootme") == 0) { attr |= GPT_ENT_ATTR_BOOTME; } else if (strcasecmp(attrib, "bootonce") == 0) { attr |= GPT_ENT_ATTR_BOOTONCE; if (set) attr |= GPT_ENT_ATTR_BOOTME; } else if (strcasecmp(attrib, "bootfailed") == 0) { /* * It should only be possible to unset BOOTFAILED, but it might * be useful for test purposes to also be able to set it. */ attr |= GPT_ENT_ATTR_BOOTFAILED; } if (attr == 0) return (EINVAL); if (set) attr = entry->ent.ent_attr | attr; else attr = entry->ent.ent_attr & ~attr; if (attr != entry->ent.ent_attr) { entry->ent.ent_attr = attr; if (!baseentry->gpe_created) baseentry->gpe_modified = 1; } return (0); } static const char * g_part_gpt_type(struct g_part_table *basetable, struct g_part_entry *baseentry, char *buf, size_t bufsz) { struct g_part_gpt_entry *entry; struct uuid *type; struct g_part_uuid_alias *uap; entry = (struct g_part_gpt_entry *)baseentry; type = &entry->ent.ent_type; for (uap = &gpt_uuid_alias_match[0]; uap->uuid; uap++) if (EQUUID(type, uap->uuid)) return (g_part_alias_name(uap->alias)); buf[0] = '!'; snprintf_uuid(buf + 1, bufsz - 1, type); return (buf); } static int g_part_gpt_write(struct g_part_table *basetable, struct g_consumer *cp) { unsigned char *buf, *bp; struct g_provider *pp; struct g_part_entry *baseentry; struct g_part_gpt_entry *entry; struct g_part_gpt_table *table; size_t tblsz; uint32_t crc; int error, index; pp = cp->provider; table = (struct g_part_gpt_table *)basetable; tblsz = howmany(table->hdr->hdr_entries * table->hdr->hdr_entsz, pp->sectorsize); /* Reconstruct the MBR from the GPT if under Boot Camp. */ if (table->bootcamp) gpt_update_bootcamp(basetable, pp); /* Write the PMBR */ buf = g_malloc(pp->sectorsize, M_WAITOK | M_ZERO); bcopy(table->mbr, buf, MBRSIZE); error = g_write_data(cp, 0, buf, pp->sectorsize); g_free(buf); if (error) return (error); /* Allocate space for the header and entries. */ buf = g_malloc((tblsz + 1) * pp->sectorsize, M_WAITOK | M_ZERO); memcpy(buf, table->hdr->hdr_sig, sizeof(table->hdr->hdr_sig)); le32enc(buf + 8, table->hdr->hdr_revision); le32enc(buf + 12, table->hdr->hdr_size); le64enc(buf + 40, table->hdr->hdr_lba_start); le64enc(buf + 48, table->hdr->hdr_lba_end); le_uuid_enc(buf + 56, &table->hdr->hdr_uuid); le32enc(buf + 80, table->hdr->hdr_entries); le32enc(buf + 84, table->hdr->hdr_entsz); LIST_FOREACH(baseentry, &basetable->gpt_entry, gpe_entry) { if (baseentry->gpe_deleted) continue; entry = (struct g_part_gpt_entry *)baseentry; index = baseentry->gpe_index - 1; bp = buf + pp->sectorsize + table->hdr->hdr_entsz * index; le_uuid_enc(bp, &entry->ent.ent_type); le_uuid_enc(bp + 16, &entry->ent.ent_uuid); le64enc(bp + 32, entry->ent.ent_lba_start); le64enc(bp + 40, entry->ent.ent_lba_end); le64enc(bp + 48, entry->ent.ent_attr); memcpy(bp + 56, entry->ent.ent_name, sizeof(entry->ent.ent_name)); } crc = crc32(buf + pp->sectorsize, table->hdr->hdr_entries * table->hdr->hdr_entsz); le32enc(buf + 88, crc); /* Write primary meta-data. */ le32enc(buf + 16, 0); /* hdr_crc_self. */ le64enc(buf + 24, table->lba[GPT_ELT_PRIHDR]); /* hdr_lba_self. */ le64enc(buf + 32, table->lba[GPT_ELT_SECHDR]); /* hdr_lba_alt. */ le64enc(buf + 72, table->lba[GPT_ELT_PRITBL]); /* hdr_lba_table. */ crc = crc32(buf, table->hdr->hdr_size); le32enc(buf + 16, crc); for (index = 0; index < tblsz; index += MAXPHYS / pp->sectorsize) { error = g_write_data(cp, (table->lba[GPT_ELT_PRITBL] + index) * pp->sectorsize, buf + (index + 1) * pp->sectorsize, (tblsz - index > MAXPHYS / pp->sectorsize) ? MAXPHYS: (tblsz - index) * pp->sectorsize); if (error) goto out; } error = g_write_data(cp, table->lba[GPT_ELT_PRIHDR] * pp->sectorsize, buf, pp->sectorsize); if (error) goto out; /* Write secondary meta-data. */ le32enc(buf + 16, 0); /* hdr_crc_self. */ le64enc(buf + 24, table->lba[GPT_ELT_SECHDR]); /* hdr_lba_self. */ le64enc(buf + 32, table->lba[GPT_ELT_PRIHDR]); /* hdr_lba_alt. */ le64enc(buf + 72, table->lba[GPT_ELT_SECTBL]); /* hdr_lba_table. */ crc = crc32(buf, table->hdr->hdr_size); le32enc(buf + 16, crc); for (index = 0; index < tblsz; index += MAXPHYS / pp->sectorsize) { error = g_write_data(cp, (table->lba[GPT_ELT_SECTBL] + index) * pp->sectorsize, buf + (index + 1) * pp->sectorsize, (tblsz - index > MAXPHYS / pp->sectorsize) ? MAXPHYS: (tblsz - index) * pp->sectorsize); if (error) goto out; } error = g_write_data(cp, table->lba[GPT_ELT_SECHDR] * pp->sectorsize, buf, pp->sectorsize); out: g_free(buf); return (error); } static void g_gpt_set_defaults(struct g_part_table *basetable, struct g_provider *pp) { struct g_part_entry *baseentry; struct g_part_gpt_entry *entry; struct g_part_gpt_table *table; quad_t start, end, min, max; quad_t lba, last; size_t spb, tblsz; table = (struct g_part_gpt_table *)basetable; last = pp->mediasize / pp->sectorsize - 1; tblsz = howmany(basetable->gpt_entries * sizeof(struct gpt_ent), pp->sectorsize); table->lba[GPT_ELT_PRIHDR] = 1; table->lba[GPT_ELT_PRITBL] = 2; table->lba[GPT_ELT_SECHDR] = last; table->lba[GPT_ELT_SECTBL] = last - tblsz; table->state[GPT_ELT_PRIHDR] = GPT_STATE_OK; table->state[GPT_ELT_PRITBL] = GPT_STATE_OK; table->state[GPT_ELT_SECHDR] = GPT_STATE_OK; table->state[GPT_ELT_SECTBL] = GPT_STATE_OK; max = start = 2 + tblsz; min = end = last - tblsz - 1; LIST_FOREACH(baseentry, &basetable->gpt_entry, gpe_entry) { if (baseentry->gpe_deleted) continue; entry = (struct g_part_gpt_entry *)baseentry; if (entry->ent.ent_lba_start < min) min = entry->ent.ent_lba_start; if (entry->ent.ent_lba_end > max) max = entry->ent.ent_lba_end; } spb = 4096 / pp->sectorsize; if (spb > 1) { lba = start + ((start % spb) ? spb - start % spb : 0); if (lba <= min) start = lba; lba = end - (end + 1) % spb; if (max <= lba) end = lba; } table->hdr->hdr_lba_start = start; table->hdr->hdr_lba_end = end; basetable->gpt_first = start; basetable->gpt_last = end; } static void g_gpt_printf_utf16(struct sbuf *sb, uint16_t *str, size_t len) { u_int bo; uint32_t ch; uint16_t c; bo = LITTLE_ENDIAN; /* GPT is little-endian */ while (len > 0 && *str != 0) { ch = (bo == BIG_ENDIAN) ? be16toh(*str) : le16toh(*str); str++, len--; if ((ch & 0xf800) == 0xd800) { if (len > 0) { c = (bo == BIG_ENDIAN) ? be16toh(*str) : le16toh(*str); str++, len--; } else c = 0xfffd; if ((ch & 0x400) == 0 && (c & 0xfc00) == 0xdc00) { ch = ((ch & 0x3ff) << 10) + (c & 0x3ff); ch += 0x10000; } else ch = 0xfffd; } else if (ch == 0xfffe) { /* BOM (U+FEFF) swapped. */ bo = (bo == BIG_ENDIAN) ? LITTLE_ENDIAN : BIG_ENDIAN; continue; } else if (ch == 0xfeff) /* BOM (U+FEFF) unswapped. */ continue; /* Write the Unicode character in UTF-8 */ if (ch < 0x80) g_conf_printf_escaped(sb, "%c", ch); else if (ch < 0x800) g_conf_printf_escaped(sb, "%c%c", 0xc0 | (ch >> 6), 0x80 | (ch & 0x3f)); else if (ch < 0x10000) g_conf_printf_escaped(sb, "%c%c%c", 0xe0 | (ch >> 12), 0x80 | ((ch >> 6) & 0x3f), 0x80 | (ch & 0x3f)); else if (ch < 0x200000) g_conf_printf_escaped(sb, "%c%c%c%c", 0xf0 | (ch >> 18), 0x80 | ((ch >> 12) & 0x3f), 0x80 | ((ch >> 6) & 0x3f), 0x80 | (ch & 0x3f)); } } static void g_gpt_utf8_to_utf16(const uint8_t *s8, uint16_t *s16, size_t s16len) { size_t s16idx, s8idx; uint32_t utfchar; unsigned int c, utfbytes; s8idx = s16idx = 0; utfchar = 0; utfbytes = 0; bzero(s16, s16len << 1); while (s8[s8idx] != 0 && s16idx < s16len) { c = s8[s8idx++]; if ((c & 0xc0) != 0x80) { /* Initial characters. */ if (utfbytes != 0) { /* Incomplete encoding of previous char. */ s16[s16idx++] = htole16(0xfffd); } if ((c & 0xf8) == 0xf0) { utfchar = c & 0x07; utfbytes = 3; } else if ((c & 0xf0) == 0xe0) { utfchar = c & 0x0f; utfbytes = 2; } else if ((c & 0xe0) == 0xc0) { utfchar = c & 0x1f; utfbytes = 1; } else { utfchar = c & 0x7f; utfbytes = 0; } } else { /* Followup characters. */ if (utfbytes > 0) { utfchar = (utfchar << 6) + (c & 0x3f); utfbytes--; } else if (utfbytes == 0) utfbytes = ~0; } /* * Write the complete Unicode character as UTF-16 when we * have all the UTF-8 charactars collected. */ if (utfbytes == 0) { /* * If we need to write 2 UTF-16 characters, but * we only have room for 1, then we truncate the * string by writing a 0 instead. */ if (utfchar >= 0x10000 && s16idx < s16len - 1) { s16[s16idx++] = htole16(0xd800 | ((utfchar >> 10) - 0x40)); s16[s16idx++] = htole16(0xdc00 | (utfchar & 0x3ff)); } else s16[s16idx++] = (utfchar >= 0x10000) ? 0 : htole16(utfchar); } } /* * If our input string was truncated, append an invalid encoding * character to the output string. */ if (utfbytes != 0 && s16idx < s16len) s16[s16idx++] = htole16(0xfffd); } Index: projects/ipsec/sys/i386/conf/DEFAULTS =================================================================== --- projects/ipsec/sys/i386/conf/DEFAULTS (revision 313186) +++ projects/ipsec/sys/i386/conf/DEFAULTS (revision 313187) @@ -1,31 +1,28 @@ # # DEFAULTS -- Default kernel configuration file for FreeBSD/i386 # # $FreeBSD$ machine i386 # Bus support. device isa options ISAPNP -# Floating point support. -device npx - # Pseudo devices. device mem # Memory and kernel memory devices device io # I/O device # UART chips on this platform device uart_ns8250 # Default partitioning schemes options GEOM_PART_BSD options GEOM_PART_EBR options GEOM_PART_EBR_COMPAT options GEOM_PART_MBR # enable support for native hardware device atpic options NEW_PCIB Index: projects/ipsec/sys/i386/conf/NOTES =================================================================== --- projects/ipsec/sys/i386/conf/NOTES (revision 313186) +++ projects/ipsec/sys/i386/conf/NOTES (revision 313187) @@ -1,1068 +1,1056 @@ # # NOTES -- Lines that can be cut/pasted into kernel and hints configs. # # This file contains machine dependent kernel configuration notes. For # machine independent notes, look in /sys/conf/NOTES. # # $FreeBSD$ # # # We want LINT to cover profiling as well. profile 2 # # Enable the kernel DTrace hooks which are required to load the DTrace # kernel modules. # options KDTRACE_HOOKS # DTrace core # NOTE: introduces CDDL-licensed components into the kernel #device dtrace # DTrace modules #device dtrace_profile #device dtrace_sdt #device dtrace_fbt #device dtrace_systrace #device dtrace_prototype #device dtnfscl #device dtmalloc # Alternatively include all the DTrace modules #device dtraceall ##################################################################### # SMP OPTIONS: # # The apic device enables the use of the I/O APIC for interrupt delivery. # The apic device can be used in both UP and SMP kernels, but is required # for SMP kernels. Thus, the apic device is not strictly an SMP option, # but it is a prerequisite for SMP. # # Notes: # # HTT CPUs should only be used if they are enabled in the BIOS. For # the ACPI case, ACPI only correctly tells us about any HTT CPUs if # they are enabled. However, most HTT systems do not list HTT CPUs # in the MP Table if they are enabled, thus we guess at the HTT CPUs # for the MP Table case. However, we shouldn't try to guess and use # these CPUs if HTT is disabled. Thus, HTT guessing is only enabled # for the MP Table if the user explicitly asks for it via the # MPTABLE_FORCE_HTT option. Do NOT use this option if you have HTT # disabled in your BIOS. # # IPI_PREEMPTION instructs the kernel to preempt threads running on other # CPUS if needed. Relies on the PREEMPTION option # Mandatory: device apic # I/O apic # Optional: options MPTABLE_FORCE_HTT # Enable HTT CPUs with the MP Table options IPI_PREEMPTION # # Watchdog routines. # options MP_WATCHDOG # Debugging options. # options COUNT_XINVLTLB_HITS # Counters for TLB events options COUNT_IPIS # Per-CPU IPI interrupt counters ##################################################################### # CPU OPTIONS # # You must specify at least one CPU (the one you intend to run on); # deleting the specification for CPUs you don't need to use may make # parts of the system run faster. # cpu I486_CPU cpu I586_CPU # aka Pentium(tm) cpu I686_CPU # aka Pentium Pro(tm) # # Options for CPU features. # # CPU_ATHLON_SSE_HACK tries to enable SSE instructions when the BIOS has # forgotten to enable them. # # CPU_BLUELIGHTNING_3X enables triple-clock mode on IBM Blue Lightning # CPU if CPU supports it. The default is double-clock mode on # BlueLightning CPU box. # # CPU_BLUELIGHTNING_FPU_OP_CACHE enables FPU operand cache on IBM # BlueLightning CPU. It works only with Cyrix FPU, and this option # should not be used with Intel FPU. # # CPU_BTB_EN enables branch target buffer on Cyrix 5x86 (NOTE 1). # # CPU_CYRIX_NO_LOCK enables weak locking for the entire address space # of Cyrix 6x86 and 6x86MX CPUs by setting the NO_LOCK bit of CCR1. # Otherwise, the NO_LOCK bit of CCR1 is cleared. (NOTE 3) # # CPU_DIRECT_MAPPED_CACHE sets L1 cache of Cyrix 486DLC CPU in direct # mapped mode. Default is 2-way set associative mode. # # CPU_DISABLE_5X86_LSSER disables load store serialize (i.e., enables # reorder). This option should not be used if you use memory mapped # I/O device(s). # -# CPU_DISABLE_CMPXCHG disables the CMPXCHG instruction on > i386 IA32 -# machines. VmWare 3.x seems to emulate this instruction poorly, causing -# the guest OS to run very slowly. This problem appears to be fixed in -# VmWare 4.x, at least in version 4.5.2, so that enabling this option with -# VmWare 4.x will result in locking operations to be 20-30 times slower. -# Enabling this with an SMP kernel will cause the kernel to be unusable. -# -# CPU_DISABLE_SSE explicitly prevents I686_CPU from turning on SSE. -# # CPU_ELAN enables support for AMDs ElanSC520 CPU. # CPU_ELAN_PPS enables precision timestamp code. # CPU_ELAN_XTAL sets the clock crystal frequency in Hz. # # CPU_ENABLE_LONGRUN enables support for Transmeta Crusoe LongRun # technology which allows to restrict power consumption of the CPU by # using group of hw.crusoe.* sysctls. # # CPU_FASTER_5X86_FPU enables faster FPU exception handler. # # CPU_GEODE is for the SC1100 Geode embedded processor. This option # is necessary because the i8254 timecounter is toast. # # CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products # for i386 machines. # # CPU_IORT defines I/O clock delay time (NOTE 1). Default values of # I/O clock delay time on Cyrix 5x86 and 6x86 are 0 and 7,respectively # (no clock delay). # # CPU_L2_LATENCY specifies the L2 cache latency value. This option is used # only when CPU_PPRO2CELERON is defined and Mendocino Celeron is detected. # The default value is 5. # # CPU_LOOP_EN prevents flushing the prefetch buffer if the destination # of a jump is already present in the prefetch buffer on Cyrix 5x86(NOTE # 1). # # CPU_PPRO2CELERON enables L2 cache of Mendocino Celeron CPUs. This option # is useful when you use Socket 8 to Socket 370 converter, because most Pentium # Pro BIOSs do not enable L2 cache of Mendocino Celeron CPUs. # # CPU_RSTK_EN enables return stack on Cyrix 5x86 (NOTE 1). # # CPU_SOEKRIS enables support www.soekris.com hardware. # # CPU_SUSP_HLT enables suspend on HALT. If this option is set, CPU # enters suspend mode following execution of HALT instruction. # # CPU_UPGRADE_HW_CACHE eliminates unneeded cache flush instruction(s). # # CPU_WT_ALLOC enables write allocation on Cyrix 6x86/6x86MX and AMD # K5/K6/K6-2 CPUs. # # CYRIX_CACHE_WORKS enables CPU cache on Cyrix 486 CPUs with cache # flush at hold state. # # CYRIX_CACHE_REALLY_WORKS enables (1) CPU cache on Cyrix 486 CPUs # without cache flush at hold state, and (2) write-back CPU cache on # Cyrix 6x86 whose revision < 2.7 (NOTE 2). # # NO_F00F_HACK disables the hack that prevents Pentiums (and ONLY # Pentiums) from locking up when a LOCK CMPXCHG8B instruction is # executed. This option is only needed if I586_CPU is also defined, # and should be included for any non-Pentium CPU that defines it. # # NO_MEMORY_HOLE is an optimisation for systems with AMD K6 processors # which indicates that the 15-16MB range is *definitely* not being # occupied by an ISA memory hole. # # NOTE 1: The options, CPU_BTB_EN, CPU_LOOP_EN, CPU_IORT, # CPU_LOOP_EN and CPU_RSTK_EN should not be used because of CPU bugs. # These options may crash your system. # # NOTE 2: If CYRIX_CACHE_REALLY_WORKS is not set, CPU cache is enabled # in write-through mode when revision < 2.7. If revision of Cyrix # 6x86 >= 2.7, CPU cache is always enabled in write-back mode. # # NOTE 3: This option may cause failures for software that requires # locked cycles in order to operate correctly. # options CPU_ATHLON_SSE_HACK options CPU_BLUELIGHTNING_3X options CPU_BLUELIGHTNING_FPU_OP_CACHE options CPU_BTB_EN options CPU_DIRECT_MAPPED_CACHE options CPU_DISABLE_5X86_LSSER -options CPU_DISABLE_CMPXCHG -#options CPU_DISABLE_SSE options CPU_ELAN options CPU_ELAN_PPS options CPU_ELAN_XTAL=32768000 options CPU_ENABLE_LONGRUN options CPU_FASTER_5X86_FPU options CPU_GEODE options CPU_I486_ON_386 options CPU_IORT options CPU_L2_LATENCY=5 options CPU_LOOP_EN options CPU_PPRO2CELERON options CPU_RSTK_EN options CPU_SOEKRIS options CPU_SUSP_HLT options CPU_UPGRADE_HW_CACHE options CPU_WT_ALLOC options CYRIX_CACHE_WORKS options CYRIX_CACHE_REALLY_WORKS #options NO_F00F_HACK # Debug options options NPX_DEBUG # enable npx debugging # # PERFMON causes the driver for Pentium/Pentium Pro performance counters # to be compiled. See perfmon(4) for more information. # options PERFMON # # XBOX causes the kernel to be bootable on the Microsoft XBox console system. # The resulting kernel will auto-detect whether it is being booted on a XBox, # so kernels compiled with this option will also work on an ordinary PC. # This option require I686_CPU. # # xboxfb includes support for the XBox frame buffer device. It is fully USB- # keyboard aware, and will only be used if an xbox is detected. This option # (obviously) requires XBOX support in your kernel. # # NOTE: xboxfb currently conflicts with syscons(4); if you have an XBOX and # include both in your kernel; you will not get any video output. Ordinary # PC's do not suffer from this. # options XBOX device xboxfb ##################################################################### # NETWORKING OPTIONS # # DEVICE_POLLING adds support for mixed interrupt-polling handling # of network device drivers, which has significant benefits in terms # of robustness to overloads and responsivity, as well as permitting # accurate scheduling of the CPU time between kernel network processing # and other activities. The drawback is a moderate (up to 1/HZ seconds) # potential increase in response times. # It is strongly recommended to use HZ=1000 or 2000 with DEVICE_POLLING # to achieve smoother behaviour. # Additionally, you can enable/disable polling at runtime with help of # the ifconfig(8) utility, and select the CPU fraction reserved to # userland with the sysctl variable kern.polling.user_frac # (default 50, range 0..100). # # Not all device drivers support this mode of operation at the time of # this writing. See polling(4) for more details. options DEVICE_POLLING # BPF_JITTER adds support for BPF just-in-time compiler. options BPF_JITTER # OpenFabrics Enterprise Distribution (Infiniband). options OFED options OFED_DEBUG_INIT # Sockets Direct Protocol options SDP options SDP_DEBUG # IP over Infiniband options IPOIB options IPOIB_DEBUG options IPOIB_CM ##################################################################### # CLOCK OPTIONS # Provide read/write access to the memory in the clock chip. device nvram # Access to rtc cmos via /dev/nvram ##################################################################### # MISCELLANEOUS DEVICES AND OPTIONS device speaker #Play IBM BASIC-style noises out your speaker hint.speaker.0.at="isa" hint.speaker.0.port="0x61" device gzip #Exec gzipped a.out's. REQUIRES COMPAT_AOUT! device apm_saver # Requires APM ##################################################################### # HARDWARE BUS CONFIGURATION # # ISA bus # -device isa # Required by npx(4) +device isa # # Options for `isa': # # AUTO_EOI_1 enables the `automatic EOI' feature for the master 8259A # interrupt controller. This saves about 0.7-1.25 usec for each interrupt. # This option breaks suspend/resume on some portables. # # AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A # interrupt controller. This saves about 0.7-1.25 usec for each interrupt. # Automatic EOI is documented not to work for for the slave with the # original i8259A, but it works for some clones and some integrated # versions. # # MAXMEM specifies the amount of RAM on the machine; if this is not # specified, FreeBSD will first read the amount of memory from the CMOS # RAM, so the amount of memory will initially be limited to 64MB or 16MB # depending on the BIOS. If the BIOS reports 64MB, a memory probe will # then attempt to detect the installed amount of RAM. If this probe # fails to detect >64MB RAM you will have to use the MAXMEM option. # The amount is in kilobytes, so for a machine with 128MB of RAM, it would # be 131072 (128 * 1024). # # BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to # reset the CPU for reboot. This is needed on some systems with broken # keyboard controllers. options AUTO_EOI_1 #options AUTO_EOI_2 options MAXMEM=(128*1024) #options BROKEN_KEYBOARD_RESET # # EISA bus # # The EISA bus device is `eisa'. It provides auto-detection and # configuration support for all devices on the EISA bus. device eisa # By default, only 10 EISA slots are probed, since the slot numbers # above clash with the configuration address space of the PCI subsystem, # and the EISA probe is not very smart about this. This is sufficient # for most machines, but in particular the HP NetServer LC series comes # with an onboard AIC7770 dual-channel SCSI controller on EISA slot #11, # thus you need to bump this figure to 12 for them. options EISA_SLOTS=12 # # MCA bus: # # The MCA bus device is `mca'. It provides auto-detection and # configuration support for all devices on the MCA bus. # No hints are required for MCA. device mca # # AGP GART support device agp # AGP debugging. options AGP_DEBUG ##################################################################### # HARDWARE DEVICE CONFIGURATION # To include support for VGA VESA video modes options VESA # Turn on extra debugging checks and output for VESA support. options VESA_DEBUG device dpms # DPMS suspend & resume via VESA BIOS # x86 real mode BIOS emulator, required by atkbdc/dpms/vesa options X86BIOS # -# The Numeric Processing eXtension driver. This is non-optional. -device npx +# Hints for the non-optional Numeric Processing eXtension driver. hint.npx.0.flags="0x0" hint.npx.0.irq="13" # # `flags' for npx0: # 0x01 don't use the npx registers to optimize bcopy. # 0x02 don't use the npx registers to optimize bzero. # 0x04 don't use the npx registers to optimize copyin or copyout. # The npx registers are normally used to optimize copying and zeroing when # all of the following conditions are satisfied: # I586_CPU is an option # the cpu is an i586 (perhaps not a Pentium) # the probe for npx0 succeeds # INT 16 exception handling works. # Then copying and zeroing using the npx registers is normally 30-100% faster. # The flags can be used to control cases where it doesn't work or is slower. # Setting them at boot time using hints works right (the optimizations # are not used until later in the bootstrap when npx0 is attached). # Flag 0x08 automatically disables the i586 optimized routines. # # # Optional devices: # # PS/2 mouse device psm hint.psm.0.at="atkbdc" hint.psm.0.irq="12" # Options for psm: options PSM_HOOKRESUME #hook the system resume event, useful #for some laptops options PSM_RESETAFTERSUSPEND #reset the device at the resume event # The keyboard controller; it controls the keyboard and the PS/2 mouse. device atkbdc hint.atkbdc.0.at="isa" hint.atkbdc.0.port="0x060" # The AT keyboard device atkbd hint.atkbd.0.at="atkbdc" hint.atkbd.0.irq="1" # Options for atkbd: options ATKBD_DFLT_KEYMAP # specify the built-in keymap makeoptions ATKBD_DFLT_KEYMAP=fr.dvorak # `flags' for atkbd: # 0x01 Force detection of keyboard, else we always assume a keyboard # 0x02 Don't reset keyboard, useful for some newer ThinkPads # 0x03 Force detection and avoid reset, might help with certain # dockingstations # 0x04 Old-style (XT) keyboard support, useful for older ThinkPads # Video card driver for VGA adapters. device vga hint.vga.0.at="isa" # Options for vga: # Try the following option if the mouse pointer is not drawn correctly # or font does not seem to be loaded properly. May cause flicker on # some systems. options VGA_ALT_SEQACCESS # If you can dispense with some vga driver features, you may want to # use the following options to save some memory. #options VGA_NO_FONT_LOADING # don't save/load font #options VGA_NO_MODE_CHANGE # don't change video modes # Older video cards may require this option for proper operation. options VGA_SLOW_IOACCESS # do byte-wide i/o's to TS and GDC regs # The following option probably won't work with the LCD displays. options VGA_WIDTH90 # support 90 column modes # Debugging. options VGA_DEBUG # vt(4) drivers. device vt_vga # Linear framebuffer driver for S3 VESA 1.2 cards. Works on top of VESA. device s3pci # 3Dfx Voodoo Graphics, Voodoo II /dev/3dfx CDEV support. This will create # the /dev/3dfx0 device to work with glide implementations. This should get # linked to /dev/3dfx and /dev/voodoo. Note that this is not the same as # the tdfx DRI module from XFree86 and is completely unrelated. # # To enable Linuxulator support, one must also include COMPAT_LINUX in the # config as well. The other option is to load both as modules. device tdfx # Enable 3Dfx Voodoo support device tdfx_linux # Enable Linuxulator support # # ACPI support using the Intel ACPI Component Architecture reference # implementation. # # ACPI_DEBUG enables the use of the debug.acpi.level and debug.acpi.layer # kernel environment variables to select initial debugging levels for the # Intel ACPICA code. (Note that the Intel code must also have USE_DEBUGGER # defined when it is built). device acpi options ACPI_DEBUG options ACPI_DMAR # ACPI WMI Mapping driver device acpi_wmi # ACPI Asus Extras (LCD backlight/brightness, video output, etc.) device acpi_asus # ACPI Fujitsu Extras (Buttons) device acpi_fujitsu # ACPI extras driver for HP laptops device acpi_hp # ACPI extras driver for IBM laptops device acpi_ibm # ACPI Panasonic Extras (LCD backlight/brightness, video output, etc.) device acpi_panasonic # ACPI Sony extra (LCD brightness) device acpi_sony # ACPI Toshiba Extras (LCD backlight/brightness, video output, etc.) device acpi_toshiba # ACPI Video Extensions (LCD backlight/brightness, video output, etc.) device acpi_video # ACPI Docking Station device acpi_dock # ACPI ASOC ATK0110 ASUSTeK AI Booster (voltage, temperature and fan sensors) device aibs # The cpufreq(4) driver provides support for non-ACPI CPU frequency control device cpufreq # Direct Rendering modules for 3D acceleration. device drm # DRM core module required by DRM drivers device i915drm # Intel i830 through i915 device mach64drm # ATI Rage Pro, Rage Mobility P/M, Rage XL device mgadrm # AGP Matrox G200, G400, G450, G550 device r128drm # ATI Rage 128 device radeondrm # ATI Radeon device savagedrm # S3 Savage3D, Savage4 device sisdrm # SiS 300/305, 540, 630 device tdfxdrm # 3dfx Voodoo 3/4/5 and Banshee device viadrm # VIA options DRM_DEBUG # Include debug printfs (slow) # # mse: Logitech and ATI InPort bus mouse ports device mse hint.mse.0.at="isa" hint.mse.0.port="0x23c" hint.mse.0.irq="5" # # Network interfaces: # # bxe: Broadcom NetXtreme II (BCM5771X/BCM578XX) PCIe 10Gb Ethernet # adapters. # ce: Cronyx Tau-PCI/32 sync single/dual port G.703/E1 serial adaptor # with 32 HDLC subchannels (requires sppp (default), or NETGRAPH if # NETGRAPH_CRONYX is configured) # cp: Cronyx Tau-PCI sync single/dual/four port # V.35/RS-232/RS-530/RS-449/X.21/G.703/E1/E3/T3/STS-1 # serial adaptor (requires sppp (default), or NETGRAPH if # NETGRAPH_CRONYX is configured) # cs: IBM Etherjet and other Crystal Semi CS89x0-based adapters # ctau: Cronyx Tau sync dual port V.35/RS-232/RS-530/RS-449/X.21/G.703/E1 # serial adaptor (requires sppp (default), or NETGRAPH if # NETGRAPH_CRONYX is configured) # ed: Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503 # HP PC Lan+, various PC Card devices # (requires miibus) # ie: AT&T StarLAN 10 and EN100; 3Com 3C507; unknown NI5210; # Intel EtherExpress # ipw: Intel PRO/Wireless 2100 IEEE 802.11 adapter # iwi: Intel PRO/Wireless 2200BG/2225BG/2915ABG IEEE 802.11 adapters # Requires the iwi firmware module # iwn: Intel Wireless WiFi Link 1000/105/135/2000/4965/5000/6000/6050 abgn # 802.11 network adapters # Requires the iwn firmware module # mlx4ib: Mellanox ConnectX HCA InfiniBand # mlx4en: Mellanox ConnectX HCA Ethernet # mthca: Mellanox HCA InfiniBand # nfe: nVidia nForce MCP on-board Ethernet Networking (BSD open source) # sbni: Granch SBNI12-xx ISA and PCI adapters # vmx: VMware VMXNET3 Ethernet (BSD open source) # wl: Lucent Wavelan (ISA card only). # wpi: Intel 3945ABG Wireless LAN controller # Requires the wpi firmware module # Order for ISA/EISA devices is important here device bxe # Broadcom NetXtreme II BCM5771X/BCM578XX 10GbE device ce device cp device cs # Crystal Semiconductor CS89x0 NIC hint.cs.0.at="isa" hint.cs.0.port="0x300" device ctau hint.ctau.0.at="isa" hint.ctau.0.port="0x240" hint.ctau.0.irq="15" hint.ctau.0.drq="7" #options NETGRAPH_CRONYX # Enable NETGRAPH support for Cronyx adapter(s) device ed # NE[12]000, SMC Ultra, 3c503, DS8390 cards options ED_3C503 options ED_HPP options ED_SIC hint.ed.0.at="isa" hint.ed.0.port="0x280" hint.ed.0.irq="5" hint.ed.0.maddr="0xd8000" device ipw # Intel 2100 wireless NICs. device iwi # Intel 2200BG/2225BG/2915ABG wireless NICs. device iwn # Intel 4965/1000/5000/6000 wireless NICs. # Hint for the i386-only ISA front-end of le(4). hint.le.0.at="isa" hint.le.0.port="0x280" hint.le.0.irq="10" hint.le.0.drq="0" device mlx4 # Shared code module between IB and Ethernet device mlx4ib # Mellanox ConnectX HCA InfiniBand device mlx4en # Mellanox ConnectX HCA Ethernet device mthca # Mellanox HCA InfiniBand device nfe # nVidia nForce MCP on-board Ethernet device sbni hint.sbni.0.at="isa" hint.sbni.0.port="0x210" hint.sbni.0.irq="0xefdead" hint.sbni.0.flags="0" device vmx # VMware VMXNET3 Ethernet device wpi # Intel 3945ABG wireless NICs. # IEEE 802.11 adapter firmware modules # Intel PRO/Wireless 2100 firmware: # ipwfw: BSS/IBSS/monitor mode firmware # ipwbssfw: BSS mode firmware # ipwibssfw: IBSS mode firmware # ipwmonitorfw: Monitor mode firmware # Intel PRO/Wireless 2200BG/2225BG/2915ABG firmware: # iwifw: BSS/IBSS/monitor mode firmware # iwibssfw: BSS mode firmware # iwiibssfw: IBSS mode firmware # iwimonitorfw: Monitor mode firmware # Intel Wireless WiFi Link 4965/1000/5000/6000 series firmware: # iwnfw: Single module to support all devices # iwn1000fw: Specific module for the 1000 only # iwn105fw: Specific module for the 105 only # iwn135fw: Specific module for the 135 only # iwn2000fw: Specific module for the 2000 only # iwn2030fw: Specific module for the 2030 only # iwn4965fw: Specific module for the 4965 only # iwn5000fw: Specific module for the 5000 only # iwn5150fw: Specific module for the 5150 only # iwn6000fw: Specific module for the 6000 only # iwn6000g2afw: Specific module for the 6000g2a only # iwn6000g2bfw: Specific module for the 6000g2b only # iwn6050fw: Specific module for the 6050 only # wpifw: Intel 3945ABG Wireless LAN Controller firmware device iwifw device iwibssfw device iwiibssfw device iwimonitorfw device ipwfw device ipwbssfw device ipwibssfw device ipwmonitorfw device iwnfw device iwn1000fw device iwn105fw device iwn135fw device iwn2000fw device iwn2030fw device iwn4965fw device iwn5000fw device iwn5150fw device iwn6000fw device iwn6000g2afw device iwn6000g2bfw device iwn6050fw device wpifw # # ATA raid adapters # device pst # # Areca 11xx and 12xx series of SATA II RAID controllers. # CAM is required. # device arcmsr # Areca SATA II RAID # # 3ware 9000 series PATA/SATA RAID controller driver and options. # The driver is implemented as a SIM, and so, needs the CAM infrastructure. # options TWA_DEBUG # 0-10; 10 prints the most messages. options TWA_FLASH_FIRMWARE # firmware image bundled when defined. device twa # 3ware 9000 series PATA/SATA RAID # # SCSI host adapters: # # ncv: NCR 53C500 based SCSI host adapters. # nsp: Workbit Ninja SCSI-3 based PC Card SCSI host adapters. # stg: TMC 18C30, 18C50 based SCSI host adapters. device ncv device nsp device stg hint.stg.0.at="isa" hint.stg.0.port="0x140" hint.stg.0.port="11" # # Adaptec FSA RAID controllers, including integrated DELL controllers, # the Dell PERC 2/QC and the HP NetRAID-4M device aac device aacp # SCSI Passthrough interface (optional, CAM required) # # Adaptec by PMC RAID controllers, Series 6/7/8 and upcoming families device aacraid # Container interface, CAM required # # Highpoint RocketRAID 27xx. device hpt27xx # # Highpoint RocketRAID 182x. device hptmv # # Highpoint DC7280 and R750. device hptnr # # Highpoint RocketRAID. Supports RR172x, RR222x, RR2240, RR232x, RR2340, # RR2210, RR174x, RR2522, RR231x, RR230x. device hptrr # # Highpoint RocketRaid 3xxx series SATA RAID device hptiop # # IBM (now Adaptec) ServeRAID controllers device ips # # Intel C600 (Patsburg) integrated SAS controller device isci options ISCI_LOGGING # enable debugging in isci HAL # # NVM Express (NVMe) support device nvme # base NVMe driver device nvd # expose NVMe namespaces as disks, depends on nvme # # PMC-Sierra SAS/SATA controller device pmspcv # # SafeNet crypto driver: can be moved to the MI NOTES as soon as # it's tested on a big-endian machine # device safe # SafeNet 1141 options SAFE_DEBUG # enable debugging support: hw.safe.debug options SAFE_RNDTEST # enable rndtest support # # glxiic is an I2C driver for the AMD Geode LX CS5536 System Management Bus # controller. Requires 'device iicbus'. # device glxiic # AMD Geode LX CS5536 System Management Bus # # glxsb is a driver for the Security Block in AMD Geode LX processors. # Requires 'device crypto'. # device glxsb # AMD Geode LX Security Block # # VirtIO support # # The virtio entry provides a generic bus for use by the device drivers. # It must be combined with an interface that communicates with the host. # Multiple such interfaces defined by the VirtIO specification. FreeBSD # only has support for PCI. Therefore, virtio_pci must be statically # compiled in or loaded as a module for the device drivers to function. # device virtio # Generic VirtIO bus (required) device virtio_pci # VirtIO PCI Interface device vtnet # VirtIO Ethernet device device virtio_blk # VirtIO Block device device virtio_scsi # VirtIO SCSI device device virtio_balloon # VirtIO Memory Balloon device device virtio_random # VirtIO Entropy device device virtio_console # VirtIO Console device device hyperv # HyperV drivers ##################################################################### # # Miscellaneous hardware: # # apm: Laptop Advanced Power Management (experimental) # ipmi: Intelligent Platform Management Interface # smapi: System Management Application Program Interface driver # smbios: DMI/SMBIOS entry point # vpd: Vital Product Data kernel interface # pmtimer: Adjust system timer at wakeup time # pbio: Parallel (8255 PPI) basic I/O (mode 0) port (e.g. Advantech PCL-724) # asmc: Apple System Management Controller # si: Specialix International SI/XIO or SX intelligent serial card driver # tpm: Trusted Platform Module # Notes on APM # The flags takes the following meaning for apm0: # 0x0020 Statclock is broken. # Notes on the Specialix SI/XIO driver: # The host card is memory, not IO mapped. # The Rev 1 host cards use a 64K chunk, on a 32K boundary. # The Rev 2 host cards use a 32K chunk, on a 32K boundary. # The cards can use an IRQ of 11, 12 or 15. # Notes on the Sony Programmable I/O controller # This is a temporary driver that should someday be replaced by something # that hooks into the ACPI layer. The device is hooked to the PIIX4's # General Device 10 decoder, which means you have to fiddle with PCI # registers to map it in, even though it is otherwise treated here as # an ISA device. At the moment, the driver polls, although the device # is capable of generating interrupts. It largely undocumented. # The port location in the hint is where you WANT the device to be # mapped. 0x10a0 seems to be traditional. At the moment the jogdial # is the only thing truly supported, but apparently a fair percentage # of the Vaio extra features are controlled by this device. device apm hint.apm.0.flags="0x20" device ipmi device smapi device smbios device vpd device pmtimer device pbio hint.pbio.0.at="isa" hint.pbio.0.port="0x360" device asmc device tpm device padlock_rng # VIA Padlock RNG device rdrand_rng # Intel Bull Mountain RNG device aesni # AES-NI OpenCrypto module # # Laptop/Notebook options: # # See also: # apm under `Miscellaneous hardware' # above. # For older notebooks that signal a powerfail condition (external # power supply dropped, or battery state low) by issuing an NMI: options POWERFAIL_NMI # make it beep instead of panicing # # I2C Bus # # Philips i2c bus support is provided by the `iicbus' device. # # Supported interfaces: # pcf Philips PCF8584 ISA-bus controller # device pcf hint.pcf.0.at="isa" hint.pcf.0.port="0x320" hint.pcf.0.irq="5" # # Hardware watchdog timers: # # ichwd: Intel ICH watchdog timer # amdsbwd: AMD SB7xx watchdog timer # viawd: VIA south bridge watchdog timer # wbwd: Winbond watchdog timer # device ichwd device amdsbwd device viawd device wbwd # # Temperature sensors: # # coretemp: on-die sensor on Intel Core and newer CPUs # amdtemp: on-die sensor on AMD K8/K10/K11 CPUs # device coretemp device amdtemp # # CPU control pseudo-device. Provides access to MSRs, CPUID info and # microcode update feature. # device cpuctl # # System Management Bus (SMB) # options ENABLE_ALART # Control alarm on Intel intpm driver # # Set the number of PV entries per process. Increasing this can # stop panics related to heavy use of shared memory. However, that can # (combined with large amounts of physical memory) cause panics at # boot time due the kernel running out of VM space. # # If you're tweaking this, you might also want to increase the sysctls # "vm.v_free_min", "vm.v_free_reserved", and "vm.v_free_target". # # The value below is the one more than the default. # options PMAP_SHPGPERPROC=201 # # Change the size of the kernel virtual address space. Due to # constraints in loader(8) on i386, this must be a multiple of 4. # 256 = 1 GB of kernel address space. Increasing this also causes # a reduction of the address space in user processes. 512 splits # the 4GB cpu address space in half (2GB user, 2GB kernel). For PAE # kernels, the value will need to be double non-PAE. A value of 1024 # for PAE kernels is necessary to split the address space in half. # This will likely need to be increased to handle memory sizes >4GB. # PAE kernels default to a value of 512. # options KVA_PAGES=260 # # Number of initial kernel page table pages used for early bootstrap. # This number should include enough pages to map the kernel, any # modules or other data loaded with the kernel by the loader, and data # structures allocated before the VM system is initialized such as the # vm_page_t array. Each page table page maps 4MB (2MB with PAE). # options NKPT=31 ##################################################################### # ABI Emulation # Enable iBCS2 runtime support for SCO and ISC binaries #options IBCS2 # Emulate spx device for client side of SVR3 local X interface options SPX_HACK # Enable 32-bit runtime support for CloudABI binaries. options COMPAT_CLOUDABI32 # Enable Linux ABI emulation options COMPAT_LINUX # Enable i386 a.out binary support options COMPAT_AOUT # Enable the linux-like proc filesystem support (requires COMPAT_LINUX # and PSEUDOFS) options LINPROCFS #Enable the linux-like sys filesystem support (requires COMPAT_LINUX # and PSEUDOFS) options LINSYSFS # # SysVR4 ABI emulation # # The svr4 ABI emulator can be statically compiled into the kernel or loaded as # a KLD module. # The STREAMS network emulation code can also be compiled statically or as a # module. If loaded as a module, it must be loaded before the svr4 module # (the /usr/sbin/svr4 script does this for you). If compiling statically, # the `streams' device must be configured into any kernel which also # specifies COMPAT_SVR4. It is possible to have a statically-configured # STREAMS device and a dynamically loadable svr4 emulator; the /usr/sbin/svr4 # script understands that it doesn't need to load the `streams' module under # those circumstances. # Caveat: At this time, `options KTRACE' is required for the svr4 emulator # (whether static or dynamic). # options COMPAT_SVR4 # build emulator statically options DEBUG_SVR4 # enable verbose debugging device streams # STREAMS network driver (required for svr4). # Enable NDIS binary driver support options NDISAPI device ndis ##################################################################### # VM OPTIONS # Disable the 4 MByte page PSE CPU feature. The PSE feature allows the # kernel to use 4 MByte pages to map the kernel instead of 4k pages. # This saves on the amount of memory needed for page tables needed to # map the kernel. You should only disable this feature as a temporary # workaround if you are having problems with it enabled. # #options DISABLE_PSE # Disable the global pages PGE CPU feature. The PGE feature allows pages # to be marked with the PG_G bit. TLB entries for these pages are not # flushed from the cache when %cr3 is reloaded. This can make context # switches less expensive. You should only disable this feature as a # temporary workaround if you are having problems with it enabled. # #options DISABLE_PG_G # KSTACK_PAGES is the number of memory pages to assign to the kernel # stack of each thread. options KSTACK_PAGES=3 # Enable detailed accounting by the PV entry allocator. options PV_STATS ##################################################################### # More undocumented options for linting. # Note that documenting these are not considered an affront. options FB_INSTALL_CDEV # install a CDEV entry in /dev options I586_PMC_GUPROF=0x70000 options KBDIO_DEBUG=2 options KBD_MAXRETRY=4 options KBD_MAXWAIT=6 options KBD_RESETDELAY=201 options PSM_DEBUG=1 options TIMER_FREQ=((14318182+6)/12) options VM_KMEM_SIZE options VM_KMEM_SIZE_MAX options VM_KMEM_SIZE_SCALE Index: projects/ipsec/sys/i386/i386/elf_machdep.c =================================================================== --- projects/ipsec/sys/i386/i386/elf_machdep.c (revision 313186) +++ projects/ipsec/sys/i386/i386/elf_machdep.c (revision 313187) @@ -1,289 +1,281 @@ /*- * Copyright 1996-1998 John D. Polstra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) -#define CPU_ENABLE_SSE -#endif - struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = sysent, .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = sendsig, .sv_sigcode = sigcode, .sv_szsigcode = &szsigcode, .sv_name = "FreeBSD ELF32", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 | SV_SHP | SV_TIMEKEEP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = syscallnames, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); static Elf32_Brandinfo freebsd_brand_oinfo = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/usr/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_oinfo); static Elf32_Brandinfo kfreebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_386, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/lib/ld.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = NULL, .brand_note = &elf32_kfreebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY }; SYSINIT(kelf32, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t) elf32_insert_brand_entry, &kfreebsd_brand_info); void elf32_dump_thread(struct thread *td, void *dst, size_t *off) { -#ifdef CPU_ENABLE_SSE void *buf; -#endif size_t len; len = 0; -#ifdef CPU_ENABLE_SSE if (use_xsave) { if (dst != NULL) { npxgetregs(td); len += elf32_populate_note(NT_X86_XSTATE, get_pcb_user_save_td(td), dst, cpu_max_ext_state_size, &buf); *(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) = xsave_mask; } else len += elf32_populate_note(NT_X86_XSTATE, NULL, NULL, cpu_max_ext_state_size, NULL); } -#endif *off = len; } /* Process one elf relocation with addend. */ static int elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, int local, elf_lookup_fn lookup) { Elf_Addr *where; Elf_Addr addr; Elf_Addr addend; Elf_Word rtype, symidx; const Elf_Rel *rel; const Elf_Rela *rela; int error; switch (type) { case ELF_RELOC_REL: rel = (const Elf_Rel *)data; where = (Elf_Addr *) (relocbase + rel->r_offset); addend = *where; rtype = ELF_R_TYPE(rel->r_info); symidx = ELF_R_SYM(rel->r_info); break; case ELF_RELOC_RELA: rela = (const Elf_Rela *)data; where = (Elf_Addr *) (relocbase + rela->r_offset); addend = rela->r_addend; rtype = ELF_R_TYPE(rela->r_info); symidx = ELF_R_SYM(rela->r_info); break; default: panic("unknown reloc type %d\n", type); } if (local) { if (rtype == R_386_RELATIVE) { /* A + B */ addr = elf_relocaddr(lf, relocbase + addend); if (*where != addr) *where = addr; } return (0); } switch (rtype) { case R_386_NONE: /* none */ break; case R_386_32: /* S + A */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; addr += addend; if (*where != addr) *where = addr; break; case R_386_PC32: /* S + A - P */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; addr += addend - (Elf_Addr)where; if (*where != addr) *where = addr; break; case R_386_COPY: /* none */ /* * There shouldn't be copy relocations in kernel * objects. */ printf("kldload: unexpected R_COPY relocation\n"); return -1; break; case R_386_GLOB_DAT: /* S */ error = lookup(lf, symidx, 1, &addr); if (error != 0) return -1; if (*where != addr) *where = addr; break; case R_386_RELATIVE: break; default: printf("kldload: unexpected relocation type %d\n", rtype); return -1; } return(0); } int elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup)); } int elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data, int type, elf_lookup_fn lookup) { return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup)); } int elf_cpu_load_file(linker_file_t lf __unused) { return (0); } int elf_cpu_unload_file(linker_file_t lf __unused) { return (0); } Index: projects/ipsec/sys/i386/i386/exception.s =================================================================== --- projects/ipsec/sys/i386/i386/exception.s (revision 313186) +++ projects/ipsec/sys/i386/i386/exception.s (revision 313187) @@ -1,482 +1,481 @@ /*- * Copyright (c) 1989, 1990 William F. Jolitz. * Copyright (c) 1990 The Regents of the University of California. * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_apic.h" #include "opt_atpic.h" #include "opt_hwpmc_hooks.h" -#include "opt_npx.h" #include #include #include #include "assym.s" #define SEL_RPL_MASK 0x0003 #define GSEL_KPL 0x0020 /* GSEL(GCODE_SEL, SEL_KPL) */ #ifdef KDTRACE_HOOKS .bss .globl dtrace_invop_jump_addr .align 4 .type dtrace_invop_jump_addr, @object .size dtrace_invop_jump_addr, 4 dtrace_invop_jump_addr: .zero 4 .globl dtrace_invop_calltrap_addr .align 4 .type dtrace_invop_calltrap_addr, @object .size dtrace_invop_calltrap_addr, 4 dtrace_invop_calltrap_addr: .zero 8 #endif .text #ifdef HWPMC_HOOKS ENTRY(start_exceptions) #endif /*****************************************************************************/ /* Trap handling */ /*****************************************************************************/ /* * Trap and fault vector routines. * * Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on * the stack that mostly looks like an interrupt, but does not disable * interrupts. A few of the traps we are use are interrupt gates, * SDT_SYS386IGT, which are nearly the same thing except interrupts are * disabled on entry. * * The cpu will push a certain amount of state onto the kernel stack for * the current process. The amount of state depends on the type of trap * and whether the trap crossed rings or not. See i386/include/frame.h. * At the very least the current EFLAGS (status register, which includes * the interrupt disable state prior to the trap), the code segment register, * and the return instruction pointer are pushed by the cpu. The cpu * will also push an 'error' code for certain traps. We push a dummy * error code for those traps where the cpu doesn't in order to maintain * a consistent frame. We also push a contrived 'trap number'. * * The cpu does not push the general registers, we must do that, and we * must restore them prior to calling 'iret'. The cpu adjusts the %cs and * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we * must load them with appropriate values for supervisor mode operation. */ MCOUNT_LABEL(user) MCOUNT_LABEL(btrap) #define TRAP(a) pushl $(a) ; jmp alltraps IDTVEC(div) pushl $0; TRAP(T_DIVIDE) IDTVEC(dbg) pushl $0; TRAP(T_TRCTRAP) IDTVEC(nmi) pushl $0; TRAP(T_NMI) IDTVEC(bpt) pushl $0; TRAP(T_BPTFLT) IDTVEC(dtrace_ret) pushl $0; TRAP(T_DTRACE_RET) IDTVEC(ofl) pushl $0; TRAP(T_OFLOW) IDTVEC(bnd) pushl $0; TRAP(T_BOUND) #ifndef KDTRACE_HOOKS IDTVEC(ill) pushl $0; TRAP(T_PRIVINFLT) #endif IDTVEC(dna) pushl $0; TRAP(T_DNA) IDTVEC(fpusegm) pushl $0; TRAP(T_FPOPFLT) IDTVEC(tss) TRAP(T_TSSFLT) IDTVEC(missing) TRAP(T_SEGNPFLT) IDTVEC(stk) TRAP(T_STKFLT) IDTVEC(prot) TRAP(T_PROTFLT) IDTVEC(page) TRAP(T_PAGEFLT) IDTVEC(mchk) pushl $0; TRAP(T_MCHK) IDTVEC(rsvd) pushl $0; TRAP(T_RESERVED) IDTVEC(fpu) pushl $0; TRAP(T_ARITHTRAP) IDTVEC(align) TRAP(T_ALIGNFLT) IDTVEC(xmm) pushl $0; TRAP(T_XMMFLT) /* * All traps except ones for syscalls jump to alltraps. If * interrupts were enabled when the trap occurred, then interrupts * are enabled now if the trap was through a trap gate, else * disabled if the trap was through an interrupt gate. Note that * int0x80_syscall is a trap gate. Interrupt gates are used by * page faults, non-maskable interrupts, debug and breakpoint * exceptions. */ SUPERALIGN_TEXT .globl alltraps .type alltraps,@function alltraps: pushal pushl $0 movw %ds,(%esp) pushl $0 movw %es,(%esp) pushl $0 movw %fs,(%esp) alltraps_with_regs_pushed: SET_KERNEL_SREGS cld FAKE_MCOUNT(TF_EIP(%esp)) calltrap: pushl %esp call trap add $4, %esp /* * Return via doreti to handle ASTs. */ MEXITCOUNT jmp doreti /* * Privileged instruction fault. */ #ifdef KDTRACE_HOOKS SUPERALIGN_TEXT IDTVEC(ill) /* Check if there is no DTrace hook registered. */ cmpl $0,dtrace_invop_jump_addr je norm_ill /* Check if this is a user fault. */ cmpl $GSEL_KPL, 4(%esp) /* Check the code segment. */ /* If so, just handle it as a normal trap. */ jne norm_ill /* * This is a kernel instruction fault that might have been caused * by a DTrace provider. */ pushal /* Push all registers onto the stack. */ /* * Set our jump address for the jump back in the event that * the exception wasn't caused by DTrace at all. */ movl $norm_ill, dtrace_invop_calltrap_addr /* Jump to the code hooked in by DTrace. */ jmpl *dtrace_invop_jump_addr /* * Process the instruction fault in the normal way. */ norm_ill: pushl $0 TRAP(T_PRIVINFLT) #endif /* * Call gate entry for syscalls (lcall 7,0). * This is used by FreeBSD 1.x a.out executables and "old" NetBSD executables. * * The intersegment call has been set up to specify one dummy parameter. * This leaves a place to put eflags so that the call frame can be * converted to a trap frame. Note that the eflags is (semi-)bogusly * pushed into (what will be) tf_err and then copied later into the * final spot. It has to be done this way because esp can't be just * temporarily altered for the pushfl - an interrupt might come in * and clobber the saved cs/eip. */ SUPERALIGN_TEXT IDTVEC(lcall_syscall) pushfl /* save eflags */ popl 8(%esp) /* shuffle into tf_eflags */ pushl $7 /* sizeof "lcall 7,0" */ pushl $0 /* tf_trapno */ pushal pushl $0 movw %ds,(%esp) pushl $0 movw %es,(%esp) pushl $0 movw %fs,(%esp) SET_KERNEL_SREGS cld FAKE_MCOUNT(TF_EIP(%esp)) pushl %esp call syscall add $4, %esp MEXITCOUNT jmp doreti /* * Trap gate entry for syscalls (int 0x80). * This is used by FreeBSD ELF executables, "new" NetBSD executables, and all * Linux executables. * * Even though the name says 'int0x80', this is actually a trap gate, not an * interrupt gate. Thus interrupts are enabled on entry just as they are for * a normal syscall. */ SUPERALIGN_TEXT IDTVEC(int0x80_syscall) pushl $2 /* sizeof "int 0x80" */ pushl $0 /* tf_trapno */ pushal pushl $0 movw %ds,(%esp) pushl $0 movw %es,(%esp) pushl $0 movw %fs,(%esp) SET_KERNEL_SREGS cld FAKE_MCOUNT(TF_EIP(%esp)) pushl %esp call syscall add $4, %esp MEXITCOUNT jmp doreti ENTRY(fork_trampoline) pushl %esp /* trapframe pointer */ pushl %ebx /* arg1 */ pushl %esi /* function */ call fork_exit addl $12,%esp /* cut from syscall */ /* * Return via doreti to handle ASTs. */ MEXITCOUNT jmp doreti /* * To efficiently implement classification of trap and interrupt handlers * for profiling, there must be only trap handlers between the labels btrap * and bintr, and only interrupt handlers between the labels bintr and * eintr. This is implemented (partly) by including files that contain * some of the handlers. Before including the files, set up a normal asm * environment so that the included files doen't need to know that they are * included. */ .data .p2align 4 .text SUPERALIGN_TEXT MCOUNT_LABEL(bintr) #ifdef DEV_ATPIC #include #endif #if defined(DEV_APIC) && defined(DEV_ATPIC) .data .p2align 4 .text SUPERALIGN_TEXT #endif #ifdef DEV_APIC #include #endif .data .p2align 4 .text SUPERALIGN_TEXT #include .text MCOUNT_LABEL(eintr) /* * void doreti(struct trapframe) * * Handle return from interrupts, traps and syscalls. */ .text SUPERALIGN_TEXT .type doreti,@function .globl doreti doreti: FAKE_MCOUNT($bintr) /* init "from" bintr -> doreti */ doreti_next: /* * Check if ASTs can be handled now. ASTs cannot be safely * processed when returning from an NMI. */ cmpb $T_NMI,TF_TRAPNO(%esp) #ifdef HWPMC_HOOKS je doreti_nmi #else je doreti_exit #endif /* * PSL_VM must be checked first since segment registers only * have an RPL in non-VM86 mode. * ASTs can not be handled now if we are in a vm86 call. */ testl $PSL_VM,TF_EFLAGS(%esp) jz doreti_notvm86 movl PCPU(CURPCB),%ecx testl $PCB_VM86CALL,PCB_FLAGS(%ecx) jz doreti_ast jmp doreti_exit doreti_notvm86: testb $SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */ jz doreti_exit /* can't handle ASTs now if not */ doreti_ast: /* * Check for ASTs atomically with returning. Disabling CPU * interrupts provides sufficient locking even in the SMP case, * since we will be informed of any new ASTs by an IPI. */ cli movl PCPU(CURTHREAD),%eax testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%eax) je doreti_exit sti pushl %esp /* pass a pointer to the trapframe */ call ast add $4,%esp jmp doreti_ast /* * doreti_exit: pop registers, iret. * * The segment register pop is a special case, since it may * fault if (for example) a sigreturn specifies bad segment * registers. The fault is handled in trap.c. */ doreti_exit: MEXITCOUNT .globl doreti_popl_fs doreti_popl_fs: popl %fs .globl doreti_popl_es doreti_popl_es: popl %es .globl doreti_popl_ds doreti_popl_ds: popl %ds popal addl $8,%esp .globl doreti_iret doreti_iret: iret /* * doreti_iret_fault and friends. Alternative return code for * the case where we get a fault in the doreti_exit code * above. trap() (i386/i386/trap.c) catches this specific * case, sends the process a signal and continues in the * corresponding place in the code below. */ ALIGN_TEXT .globl doreti_iret_fault doreti_iret_fault: subl $8,%esp pushal pushl $0 movw %ds,(%esp) .globl doreti_popl_ds_fault doreti_popl_ds_fault: pushl $0 movw %es,(%esp) .globl doreti_popl_es_fault doreti_popl_es_fault: pushl $0 movw %fs,(%esp) .globl doreti_popl_fs_fault doreti_popl_fs_fault: sti movl $0,TF_ERR(%esp) /* XXX should be the error code */ movl $T_PROTFLT,TF_TRAPNO(%esp) jmp alltraps_with_regs_pushed #ifdef HWPMC_HOOKS doreti_nmi: /* * Since we are returning from an NMI, check if the current trap * was from user mode and if so whether the current thread * needs a user call chain capture. */ testb $SEL_RPL_MASK,TF_CS(%esp) jz doreti_exit movl PCPU(CURTHREAD),%eax /* curthread present? */ orl %eax,%eax jz doreti_exit testl $TDP_CALLCHAIN,TD_PFLAGS(%eax) /* flagged for capture? */ jz doreti_exit /* * Take the processor out of NMI mode by executing a fake "iret". */ pushfl pushl %cs pushl $outofnmi iret outofnmi: /* * Call the callchain capture hook after turning interrupts back on. */ movl pmc_hook,%ecx orl %ecx,%ecx jz doreti_exit pushl %esp /* frame pointer */ pushl $PMC_FN_USER_CALLCHAIN /* command */ movl PCPU(CURTHREAD),%eax pushl %eax /* curthread */ sti call *%ecx addl $12,%esp jmp doreti_ast ENTRY(end_exceptions) #endif Index: projects/ipsec/sys/i386/i386/initcpu.c =================================================================== --- projects/ipsec/sys/i386/i386/initcpu.c (revision 313186) +++ projects/ipsec/sys/i386/i386/initcpu.c (revision 313187) @@ -1,989 +1,983 @@ /*- * Copyright (c) KATO Takenori, 1997, 1998. * * All rights reserved. Unpublished rights reserved under the copyright * laws of Japan. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer as * the first lines of this file unmodified. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include -#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) -#define CPU_ENABLE_SSE -#endif - #ifdef I486_CPU static void init_5x86(void); static void init_bluelightning(void); static void init_486dlc(void); static void init_cy486dx(void); #ifdef CPU_I486_ON_386 static void init_i486_on_386(void); #endif static void init_6x86(void); #endif /* I486_CPU */ #if defined(I586_CPU) && defined(CPU_WT_ALLOC) static void enable_K5_wt_alloc(void); static void enable_K6_wt_alloc(void); static void enable_K6_2_wt_alloc(void); #endif #ifdef I686_CPU static void init_6x86MX(void); static void init_ppro(void); static void init_mendocino(void); #endif static int hw_instruction_sse; SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD, &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU"); /* * -1: automatic (default) * 0: keep enable CLFLUSH * 1: force disable CLFLUSH */ static int hw_clflush_disable = -1; u_int cyrix_did; /* Device ID of Cyrix CPU */ #ifdef I486_CPU /* * IBM Blue Lightning */ static void init_bluelightning(void) { register_t saveintr; saveintr = intr_disable(); load_cr0(rcr0() | CR0_CD | CR0_NW); invd(); #ifdef CPU_BLUELIGHTNING_FPU_OP_CACHE wrmsr(0x1000, 0x9c92LL); /* FP operand can be cacheable on Cyrix FPU */ #else wrmsr(0x1000, 0x1c92LL); /* Intel FPU */ #endif /* Enables 13MB and 0-640KB cache. */ wrmsr(0x1001, (0xd0LL << 32) | 0x3ff); #ifdef CPU_BLUELIGHTNING_3X wrmsr(0x1002, 0x04000000LL); /* Enables triple-clock mode. */ #else wrmsr(0x1002, 0x03000000LL); /* Enables double-clock mode. */ #endif /* Enable caching in CR0. */ load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ invd(); intr_restore(saveintr); } /* * Cyrix 486SLC/DLC/SR/DR series */ static void init_486dlc(void) { register_t saveintr; u_char ccr0; saveintr = intr_disable(); invd(); ccr0 = read_cyrix_reg(CCR0); #ifndef CYRIX_CACHE_WORKS ccr0 |= CCR0_NC1 | CCR0_BARB; write_cyrix_reg(CCR0, ccr0); invd(); #else ccr0 &= ~CCR0_NC0; #ifndef CYRIX_CACHE_REALLY_WORKS ccr0 |= CCR0_NC1 | CCR0_BARB; #else ccr0 |= CCR0_NC1; #endif #ifdef CPU_DIRECT_MAPPED_CACHE ccr0 |= CCR0_CO; /* Direct mapped mode. */ #endif write_cyrix_reg(CCR0, ccr0); /* Clear non-cacheable region. */ write_cyrix_reg(NCR1+2, NCR_SIZE_0K); write_cyrix_reg(NCR2+2, NCR_SIZE_0K); write_cyrix_reg(NCR3+2, NCR_SIZE_0K); write_cyrix_reg(NCR4+2, NCR_SIZE_0K); write_cyrix_reg(0, 0); /* dummy write */ /* Enable caching in CR0. */ load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ invd(); #endif /* !CYRIX_CACHE_WORKS */ intr_restore(saveintr); } /* * Cyrix 486S/DX series */ static void init_cy486dx(void) { register_t saveintr; u_char ccr2; saveintr = intr_disable(); invd(); ccr2 = read_cyrix_reg(CCR2); #ifdef CPU_SUSP_HLT ccr2 |= CCR2_SUSP_HLT; #endif write_cyrix_reg(CCR2, ccr2); intr_restore(saveintr); } /* * Cyrix 5x86 */ static void init_5x86(void) { register_t saveintr; u_char ccr2, ccr3, ccr4, pcr0; saveintr = intr_disable(); load_cr0(rcr0() | CR0_CD | CR0_NW); wbinvd(); (void)read_cyrix_reg(CCR3); /* dummy */ /* Initialize CCR2. */ ccr2 = read_cyrix_reg(CCR2); ccr2 |= CCR2_WB; #ifdef CPU_SUSP_HLT ccr2 |= CCR2_SUSP_HLT; #else ccr2 &= ~CCR2_SUSP_HLT; #endif ccr2 |= CCR2_WT1; write_cyrix_reg(CCR2, ccr2); /* Initialize CCR4. */ ccr3 = read_cyrix_reg(CCR3); write_cyrix_reg(CCR3, CCR3_MAPEN0); ccr4 = read_cyrix_reg(CCR4); ccr4 |= CCR4_DTE; ccr4 |= CCR4_MEM; #ifdef CPU_FASTER_5X86_FPU ccr4 |= CCR4_FASTFPE; #else ccr4 &= ~CCR4_FASTFPE; #endif ccr4 &= ~CCR4_IOMASK; /******************************************************************** * WARNING: The "BIOS Writers Guide" mentions that I/O recovery time * should be 0 for errata fix. ********************************************************************/ #ifdef CPU_IORT ccr4 |= CPU_IORT & CCR4_IOMASK; #endif write_cyrix_reg(CCR4, ccr4); /* Initialize PCR0. */ /**************************************************************** * WARNING: RSTK_EN and LOOP_EN could make your system unstable. * BTB_EN might make your system unstable. ****************************************************************/ pcr0 = read_cyrix_reg(PCR0); #ifdef CPU_RSTK_EN pcr0 |= PCR0_RSTK; #else pcr0 &= ~PCR0_RSTK; #endif #ifdef CPU_BTB_EN pcr0 |= PCR0_BTB; #else pcr0 &= ~PCR0_BTB; #endif #ifdef CPU_LOOP_EN pcr0 |= PCR0_LOOP; #else pcr0 &= ~PCR0_LOOP; #endif /**************************************************************** * WARNING: if you use a memory mapped I/O device, don't use * DISABLE_5X86_LSSER option, which may reorder memory mapped * I/O access. * IF YOUR MOTHERBOARD HAS PCI BUS, DON'T DISABLE LSSER. ****************************************************************/ #ifdef CPU_DISABLE_5X86_LSSER pcr0 &= ~PCR0_LSSER; #else pcr0 |= PCR0_LSSER; #endif write_cyrix_reg(PCR0, pcr0); /* Restore CCR3. */ write_cyrix_reg(CCR3, ccr3); (void)read_cyrix_reg(0x80); /* dummy */ /* Unlock NW bit in CR0. */ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW); load_cr0((rcr0() & ~CR0_CD) | CR0_NW); /* CD = 0, NW = 1 */ /* Lock NW bit in CR0. */ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW); intr_restore(saveintr); } #ifdef CPU_I486_ON_386 /* * There are i486 based upgrade products for i386 machines. * In this case, BIOS doesn't enable CPU cache. */ static void init_i486_on_386(void) { register_t saveintr; saveintr = intr_disable(); load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0, NW = 0 */ intr_restore(saveintr); } #endif /* * Cyrix 6x86 * * XXX - What should I do here? Please let me know. */ static void init_6x86(void) { register_t saveintr; u_char ccr3, ccr4; saveintr = intr_disable(); load_cr0(rcr0() | CR0_CD | CR0_NW); wbinvd(); /* Initialize CCR0. */ write_cyrix_reg(CCR0, read_cyrix_reg(CCR0) | CCR0_NC1); /* Initialize CCR1. */ #ifdef CPU_CYRIX_NO_LOCK write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) | CCR1_NO_LOCK); #else write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) & ~CCR1_NO_LOCK); #endif /* Initialize CCR2. */ #ifdef CPU_SUSP_HLT write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_SUSP_HLT); #else write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_SUSP_HLT); #endif ccr3 = read_cyrix_reg(CCR3); write_cyrix_reg(CCR3, CCR3_MAPEN0); /* Initialize CCR4. */ ccr4 = read_cyrix_reg(CCR4); ccr4 |= CCR4_DTE; ccr4 &= ~CCR4_IOMASK; #ifdef CPU_IORT write_cyrix_reg(CCR4, ccr4 | (CPU_IORT & CCR4_IOMASK)); #else write_cyrix_reg(CCR4, ccr4 | 7); #endif /* Initialize CCR5. */ #ifdef CPU_WT_ALLOC write_cyrix_reg(CCR5, read_cyrix_reg(CCR5) | CCR5_WT_ALLOC); #endif /* Restore CCR3. */ write_cyrix_reg(CCR3, ccr3); /* Unlock NW bit in CR0. */ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW); /* * Earlier revision of the 6x86 CPU could crash the system if * L1 cache is in write-back mode. */ if ((cyrix_did & 0xff00) > 0x1600) load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ else { /* Revision 2.6 and lower. */ #ifdef CYRIX_CACHE_REALLY_WORKS load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ #else load_cr0((rcr0() & ~CR0_CD) | CR0_NW); /* CD = 0 and NW = 1 */ #endif } /* Lock NW bit in CR0. */ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW); intr_restore(saveintr); } #endif /* I486_CPU */ #ifdef I586_CPU /* * Rise mP6 */ static void init_rise(void) { /* * The CMPXCHG8B instruction is always available but hidden. */ cpu_feature |= CPUID_CX8; } /* * IDT WinChip C6/2/2A/2B/3 * * http://www.centtech.com/winchip_bios_writers_guide_v4_0.pdf */ static void init_winchip(void) { u_int regs[4]; uint64_t fcr; fcr = rdmsr(0x0107); /* * Set ECX8, DSMC, DTLOCK/EDCTLB, EMMX, and ERETSTK and clear DPDC. */ fcr |= (1 << 1) | (1 << 7) | (1 << 8) | (1 << 9) | (1 << 16); fcr &= ~(1ULL << 11); /* * Additionally, set EBRPRED, E2MMX and EAMD3D for WinChip 2 and 3. */ if (CPUID_TO_MODEL(cpu_id) >= 8) fcr |= (1 << 12) | (1 << 19) | (1 << 20); wrmsr(0x0107, fcr); do_cpuid(1, regs); cpu_feature = regs[3]; } #endif #ifdef I686_CPU /* * Cyrix 6x86MX (code-named M2) * * XXX - What should I do here? Please let me know. */ static void init_6x86MX(void) { register_t saveintr; u_char ccr3, ccr4; saveintr = intr_disable(); load_cr0(rcr0() | CR0_CD | CR0_NW); wbinvd(); /* Initialize CCR0. */ write_cyrix_reg(CCR0, read_cyrix_reg(CCR0) | CCR0_NC1); /* Initialize CCR1. */ #ifdef CPU_CYRIX_NO_LOCK write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) | CCR1_NO_LOCK); #else write_cyrix_reg(CCR1, read_cyrix_reg(CCR1) & ~CCR1_NO_LOCK); #endif /* Initialize CCR2. */ #ifdef CPU_SUSP_HLT write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_SUSP_HLT); #else write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_SUSP_HLT); #endif ccr3 = read_cyrix_reg(CCR3); write_cyrix_reg(CCR3, CCR3_MAPEN0); /* Initialize CCR4. */ ccr4 = read_cyrix_reg(CCR4); ccr4 &= ~CCR4_IOMASK; #ifdef CPU_IORT write_cyrix_reg(CCR4, ccr4 | (CPU_IORT & CCR4_IOMASK)); #else write_cyrix_reg(CCR4, ccr4 | 7); #endif /* Initialize CCR5. */ #ifdef CPU_WT_ALLOC write_cyrix_reg(CCR5, read_cyrix_reg(CCR5) | CCR5_WT_ALLOC); #endif /* Restore CCR3. */ write_cyrix_reg(CCR3, ccr3); /* Unlock NW bit in CR0. */ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) & ~CCR2_LOCK_NW); load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); /* CD = 0 and NW = 0 */ /* Lock NW bit in CR0. */ write_cyrix_reg(CCR2, read_cyrix_reg(CCR2) | CCR2_LOCK_NW); intr_restore(saveintr); } static int ppro_apic_used = -1; static void init_ppro(void) { u_int64_t apicbase; /* * Local APIC should be disabled if it is not going to be used. */ if (ppro_apic_used != 1) { apicbase = rdmsr(MSR_APICBASE); apicbase &= ~APICBASE_ENABLED; wrmsr(MSR_APICBASE, apicbase); ppro_apic_used = 0; } } /* * If the local APIC is going to be used after being disabled above, * re-enable it and don't disable it in the future. */ void ppro_reenable_apic(void) { u_int64_t apicbase; if (ppro_apic_used == 0) { apicbase = rdmsr(MSR_APICBASE); apicbase |= APICBASE_ENABLED; wrmsr(MSR_APICBASE, apicbase); ppro_apic_used = 1; } } /* * Initialize BBL_CR_CTL3 (Control register 3: used to configure the * L2 cache). */ static void init_mendocino(void) { #ifdef CPU_PPRO2CELERON register_t saveintr; u_int64_t bbl_cr_ctl3; saveintr = intr_disable(); load_cr0(rcr0() | CR0_CD | CR0_NW); wbinvd(); bbl_cr_ctl3 = rdmsr(MSR_BBL_CR_CTL3); /* If the L2 cache is configured, do nothing. */ if (!(bbl_cr_ctl3 & 1)) { bbl_cr_ctl3 = 0x134052bLL; /* Set L2 Cache Latency (Default: 5). */ #ifdef CPU_CELERON_L2_LATENCY #if CPU_L2_LATENCY > 15 #error invalid CPU_L2_LATENCY. #endif bbl_cr_ctl3 |= CPU_L2_LATENCY << 1; #else bbl_cr_ctl3 |= 5 << 1; #endif wrmsr(MSR_BBL_CR_CTL3, bbl_cr_ctl3); } load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); intr_restore(saveintr); #endif /* CPU_PPRO2CELERON */ } /* * Initialize special VIA features */ static void init_via(void) { u_int regs[4], val; uint64_t fcr; /* * Explicitly enable CX8 and PGE on C3. * * http://www.via.com.tw/download/mainboards/6/13/VIA_C3_EBGA%20datasheet110.pdf */ if (CPUID_TO_MODEL(cpu_id) <= 9) fcr = (1 << 1) | (1 << 7); else fcr = 0; /* * Check extended CPUID for PadLock features. * * http://www.via.com.tw/en/downloads/whitepapers/initiatives/padlock/programming_guide.pdf */ do_cpuid(0xc0000000, regs); if (regs[0] >= 0xc0000001) { do_cpuid(0xc0000001, regs); val = regs[3]; } else val = 0; /* Enable RNG if present. */ if ((val & VIA_CPUID_HAS_RNG) != 0) { via_feature_rng = VIA_HAS_RNG; wrmsr(0x110B, rdmsr(0x110B) | VIA_CPUID_DO_RNG); } /* Enable PadLock if present. */ if ((val & VIA_CPUID_HAS_ACE) != 0) via_feature_xcrypt |= VIA_HAS_AES; if ((val & VIA_CPUID_HAS_ACE2) != 0) via_feature_xcrypt |= VIA_HAS_AESCTR; if ((val & VIA_CPUID_HAS_PHE) != 0) via_feature_xcrypt |= VIA_HAS_SHA; if ((val & VIA_CPUID_HAS_PMM) != 0) via_feature_xcrypt |= VIA_HAS_MM; if (via_feature_xcrypt != 0) fcr |= 1 << 28; wrmsr(0x1107, rdmsr(0x1107) | fcr); } #endif /* I686_CPU */ #if defined(I586_CPU) || defined(I686_CPU) static void init_transmeta(void) { u_int regs[0]; /* Expose all hidden features. */ wrmsr(0x80860004, rdmsr(0x80860004) | ~0UL); do_cpuid(1, regs); cpu_feature = regs[3]; } #endif extern int elf32_nxstack; void initializecpu(void) { switch (cpu) { #ifdef I486_CPU case CPU_BLUE: init_bluelightning(); break; case CPU_486DLC: init_486dlc(); break; case CPU_CY486DX: init_cy486dx(); break; case CPU_M1SC: init_5x86(); break; #ifdef CPU_I486_ON_386 case CPU_486: init_i486_on_386(); break; #endif case CPU_M1: init_6x86(); break; #endif /* I486_CPU */ #ifdef I586_CPU case CPU_586: switch (cpu_vendor_id) { case CPU_VENDOR_AMD: #ifdef CPU_WT_ALLOC if (((cpu_id & 0x0f0) > 0) && ((cpu_id & 0x0f0) < 0x60) && ((cpu_id & 0x00f) > 3)) enable_K5_wt_alloc(); else if (((cpu_id & 0x0f0) > 0x80) || (((cpu_id & 0x0f0) == 0x80) && (cpu_id & 0x00f) > 0x07)) enable_K6_2_wt_alloc(); else if ((cpu_id & 0x0f0) > 0x50) enable_K6_wt_alloc(); #endif if ((cpu_id & 0xf0) == 0xa0) /* * Make sure the TSC runs through * suspension, otherwise we can't use * it as timecounter */ wrmsr(0x1900, rdmsr(0x1900) | 0x20ULL); break; case CPU_VENDOR_CENTAUR: init_winchip(); break; case CPU_VENDOR_TRANSMETA: init_transmeta(); break; case CPU_VENDOR_RISE: init_rise(); break; } break; #endif #ifdef I686_CPU case CPU_M2: init_6x86MX(); break; case CPU_686: switch (cpu_vendor_id) { case CPU_VENDOR_INTEL: switch (cpu_id & 0xff0) { case 0x610: init_ppro(); break; case 0x660: init_mendocino(); break; } break; #ifdef CPU_ATHLON_SSE_HACK case CPU_VENDOR_AMD: /* * Sometimes the BIOS doesn't enable SSE instructions. * According to AMD document 20734, the mobile * Duron, the (mobile) Athlon 4 and the Athlon MP * support SSE. These correspond to cpu_id 0x66X * or 0x67X. */ if ((cpu_feature & CPUID_XMM) == 0 && ((cpu_id & ~0xf) == 0x660 || (cpu_id & ~0xf) == 0x670 || (cpu_id & ~0xf) == 0x680)) { u_int regs[4]; wrmsr(MSR_HWCR, rdmsr(MSR_HWCR) & ~0x08000); do_cpuid(1, regs); cpu_feature = regs[3]; } break; #endif case CPU_VENDOR_CENTAUR: init_via(); break; case CPU_VENDOR_TRANSMETA: init_transmeta(); break; } break; #endif default: break; } -#if defined(CPU_ENABLE_SSE) if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) { load_cr4(rcr4() | CR4_FXSR | CR4_XMM); cpu_fxsr = hw_instruction_sse = 1; } -#endif #if defined(PAE) || defined(PAE_TABLES) if ((amd_feature & AMDID_NX) != 0) { uint64_t msr; msr = rdmsr(MSR_EFER) | EFER_NXE; wrmsr(MSR_EFER, msr); pg_nx = PG_NX; elf32_nxstack = 1; } #endif } void initializecpucache(void) { /* * CPUID with %eax = 1, %ebx returns * Bits 15-8: CLFLUSH line size * (Value * 8 = cache line size in bytes) */ if ((cpu_feature & CPUID_CLFSH) != 0) cpu_clflush_line_size = ((cpu_procinfo >> 8) & 0xff) * 8; /* * XXXKIB: (temporary) hack to work around traps generated * when CLFLUSHing APIC register window under virtualization * environments. These environments tend to disable the * CPUID_SS feature even though the native CPU supports it. */ TUNABLE_INT_FETCH("hw.clflush_disable", &hw_clflush_disable); if (vm_guest != VM_GUEST_NO && hw_clflush_disable == -1) { cpu_feature &= ~CPUID_CLFSH; cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT; } /* * The kernel's use of CLFLUSH{,OPT} can be disabled manually * by setting the hw.clflush_disable tunable. */ if (hw_clflush_disable == 1) { cpu_feature &= ~CPUID_CLFSH; cpu_stdext_feature &= ~CPUID_STDEXT_CLFLUSHOPT; } } #if defined(I586_CPU) && defined(CPU_WT_ALLOC) /* * Enable write allocate feature of AMD processors. * Following two functions require the Maxmem variable being set. */ static void enable_K5_wt_alloc(void) { u_int64_t msr; register_t saveintr; /* * Write allocate is supported only on models 1, 2, and 3, with * a stepping of 4 or greater. */ if (((cpu_id & 0xf0) > 0) && ((cpu_id & 0x0f) > 3)) { saveintr = intr_disable(); msr = rdmsr(0x83); /* HWCR */ wrmsr(0x83, msr & !(0x10)); /* * We have to tell the chip where the top of memory is, * since video cards could have frame bufferes there, * memory-mapped I/O could be there, etc. */ if(Maxmem > 0) msr = Maxmem / 16; else msr = 0; msr |= AMD_WT_ALLOC_TME | AMD_WT_ALLOC_FRE; /* * There is no way to know wheter 15-16M hole exists or not. * Therefore, we disable write allocate for this range. */ wrmsr(0x86, 0x0ff00f0); msr |= AMD_WT_ALLOC_PRE; wrmsr(0x85, msr); msr=rdmsr(0x83); wrmsr(0x83, msr|0x10); /* enable write allocate */ intr_restore(saveintr); } } static void enable_K6_wt_alloc(void) { quad_t size; u_int64_t whcr; register_t saveintr; saveintr = intr_disable(); wbinvd(); #ifdef CPU_DISABLE_CACHE /* * Certain K6-2 box becomes unstable when write allocation is * enabled. */ /* * The AMD-K6 processer provides the 64-bit Test Register 12(TR12), * but only the Cache Inhibit(CI) (bit 3 of TR12) is suppported. * All other bits in TR12 have no effect on the processer's operation. * The I/O Trap Restart function (bit 9 of TR12) is always enabled * on the AMD-K6. */ wrmsr(0x0000000e, (u_int64_t)0x0008); #endif /* Don't assume that memory size is aligned with 4M. */ if (Maxmem > 0) size = ((Maxmem >> 8) + 3) >> 2; else size = 0; /* Limit is 508M bytes. */ if (size > 0x7f) size = 0x7f; whcr = (rdmsr(0xc0000082) & ~(0x7fLL << 1)) | (size << 1); #if defined(NO_MEMORY_HOLE) if (whcr & (0x7fLL << 1)) whcr |= 0x0001LL; #else /* * There is no way to know wheter 15-16M hole exists or not. * Therefore, we disable write allocate for this range. */ whcr &= ~0x0001LL; #endif wrmsr(0x0c0000082, whcr); intr_restore(saveintr); } static void enable_K6_2_wt_alloc(void) { quad_t size; u_int64_t whcr; register_t saveintr; saveintr = intr_disable(); wbinvd(); #ifdef CPU_DISABLE_CACHE /* * Certain K6-2 box becomes unstable when write allocation is * enabled. */ /* * The AMD-K6 processer provides the 64-bit Test Register 12(TR12), * but only the Cache Inhibit(CI) (bit 3 of TR12) is suppported. * All other bits in TR12 have no effect on the processer's operation. * The I/O Trap Restart function (bit 9 of TR12) is always enabled * on the AMD-K6. */ wrmsr(0x0000000e, (u_int64_t)0x0008); #endif /* Don't assume that memory size is aligned with 4M. */ if (Maxmem > 0) size = ((Maxmem >> 8) + 3) >> 2; else size = 0; /* Limit is 4092M bytes. */ if (size > 0x3fff) size = 0x3ff; whcr = (rdmsr(0xc0000082) & ~(0x3ffLL << 22)) | (size << 22); #if defined(NO_MEMORY_HOLE) if (whcr & (0x3ffLL << 22)) whcr |= 1LL << 16; #else /* * There is no way to know wheter 15-16M hole exists or not. * Therefore, we disable write allocate for this range. */ whcr &= ~(1LL << 16); #endif wrmsr(0x0c0000082, whcr); intr_restore(saveintr); } #endif /* I585_CPU && CPU_WT_ALLOC */ #include "opt_ddb.h" #ifdef DDB #include DB_SHOW_COMMAND(cyrixreg, cyrixreg) { register_t saveintr; u_int cr0; u_char ccr1, ccr2, ccr3; u_char ccr0 = 0, ccr4 = 0, ccr5 = 0, pcr0 = 0; cr0 = rcr0(); if (cpu_vendor_id == CPU_VENDOR_CYRIX) { saveintr = intr_disable(); if ((cpu != CPU_M1SC) && (cpu != CPU_CY486DX)) { ccr0 = read_cyrix_reg(CCR0); } ccr1 = read_cyrix_reg(CCR1); ccr2 = read_cyrix_reg(CCR2); ccr3 = read_cyrix_reg(CCR3); if ((cpu == CPU_M1SC) || (cpu == CPU_M1) || (cpu == CPU_M2)) { write_cyrix_reg(CCR3, CCR3_MAPEN0); ccr4 = read_cyrix_reg(CCR4); if ((cpu == CPU_M1) || (cpu == CPU_M2)) ccr5 = read_cyrix_reg(CCR5); else pcr0 = read_cyrix_reg(PCR0); write_cyrix_reg(CCR3, ccr3); /* Restore CCR3. */ } intr_restore(saveintr); if ((cpu != CPU_M1SC) && (cpu != CPU_CY486DX)) printf("CCR0=%x, ", (u_int)ccr0); printf("CCR1=%x, CCR2=%x, CCR3=%x", (u_int)ccr1, (u_int)ccr2, (u_int)ccr3); if ((cpu == CPU_M1SC) || (cpu == CPU_M1) || (cpu == CPU_M2)) { printf(", CCR4=%x, ", (u_int)ccr4); if (cpu == CPU_M1SC) printf("PCR0=%x\n", pcr0); else printf("CCR5=%x\n", ccr5); } } printf("CR0=%x\n", cr0); } #endif /* DDB */ Index: projects/ipsec/sys/i386/i386/machdep.c =================================================================== --- projects/ipsec/sys/i386/i386/machdep.c (revision 313186) +++ projects/ipsec/sys/i386/i386/machdep.c (revision 313187) @@ -1,3137 +1,3094 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_apic.h" #include "opt_atpic.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" #include "opt_mp_watchdog.h" -#include "opt_npx.h" #include "opt_perfmon.h" #include "opt_platform.h" #include "opt_xbox.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #ifndef KDB #error KDB must be enabled in order for DDB to work! #endif #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PERFMON #include #endif #ifdef SMP #include #endif #ifdef FDT #include #endif #ifdef DEV_APIC #include #endif #ifdef DEV_ISA #include #endif #ifdef XBOX #include int arch_i386_is_xbox = 0; uint32_t arch_i386_xbox_memsize = 0; #endif /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); extern register_t init386(int first); extern void dblfault_handler(void); -#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) -#define CPU_ENABLE_SSE -#endif - static void cpu_startup(void *); static void fpstate_drop(struct thread *td); static void get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, size_t xfpusave_len); static int set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, size_t xfpustate_len); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); /* Intel ICH registers */ #define ICH_PMBASE 0x400 #define ICH_SMI_EN ICH_PMBASE + 0x30 int _udatasel, _ucodesel; u_int basemem; int cold = 1; #ifdef COMPAT_43 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); #endif #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); #endif long Maxmem = 0; long realmem = 0; #ifdef PAE FEATURE(pae, "Physical Address Extensions"); #endif /* * The number of PHYSMAP entries must be one less than the number of * PHYSSEG entries because the PHYSMAP entry that spans the largest * physical address that is accessible by ISA DMA is split into two * PHYSSEG entries. */ #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END (nitems(phys_avail) - 2) #define DUMP_AVAIL_ARRAY_END (nitems(dump_avail) - 2) struct kva_md_info kmi; static struct trapframe proc0_tf; struct pcpu __pcpu[MAXCPU]; struct mtx icu_lock; struct mem_range_softc mem_range_softc; /* Default init_ops implementation. */ struct init_ops init_ops = { .early_clock_source_init = i8254_init, .early_delay = i8254_delay, #ifdef DEV_APIC .msi_init = msi_init, #endif }; static void cpu_startup(dummy) void *dummy; { uintmax_t memsize; char *sysenv; /* * On MacBooks, we need to disallow the legacy USB circuit to * generate an SMI# because this can cause several problems, * namely: incorrect CPU frequency detection and failure to * start the APs. * We do this by disabling a bit in the SMI_EN (SMI Control and * Enable register) of the Intel ICH LPC Interface Bridge. */ sysenv = kern_getenv("smbios.system.product"); if (sysenv != NULL) { if (strncmp(sysenv, "MacBook1,1", 10) == 0 || strncmp(sysenv, "MacBook3,1", 10) == 0 || strncmp(sysenv, "MacBook4,1", 10) == 0 || strncmp(sysenv, "MacBookPro1,1", 13) == 0 || strncmp(sysenv, "MacBookPro1,2", 13) == 0 || strncmp(sysenv, "MacBookPro3,1", 13) == 0 || strncmp(sysenv, "MacBookPro4,1", 13) == 0 || strncmp(sysenv, "Macmini1,1", 10) == 0) { if (bootverbose) printf("Disabling LEGACY_USB_EN bit on " "Intel ICH.\n"); outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); } freeenv(sysenv); } /* * Good {morning,afternoon,evening,night}. */ startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif /* * Display physical memory if SMBIOS reports reasonable amount. */ memsize = 0; sysenv = kern_getenv("smbios.memory.enabled"); if (sysenv != NULL) { memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; freeenv(sysenv); } if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count)) memsize = ptoa((uintmax_t)Maxmem); printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); realmem = atop(memsize); /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; printf( "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)vm_cnt.v_free_count), ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); cpu_setregs(); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by call * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ #ifdef COMPAT_43 static void osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct osigframe sf, *fp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct osigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct osigframe)); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else fp = (struct osigframe *)regs->tf_esp - 1; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; bzero(&sf.sf_siginfo, sizeof(sf.sf_siginfo)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_arg2 = (register_t)&fp->sf_siginfo; sf.sf_siginfo.si_signo = sig; sf.sf_siginfo.si_code = ksi->ksi_code; sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher; sf.sf_addr = 0; } else { /* Old FreeBSD-style arguments. */ sf.sf_arg2 = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* Save most if not all of trap frame. */ sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax; sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx; sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx; sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx; sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi; sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi; sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; sf.sf_siginfo.si_sc.sc_es = regs->tf_es; sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; sf.sf_siginfo.si_sc.sc_gs = rgs(); sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp; /* Build the signal context to be used by osigreturn(). */ sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0; SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp; sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp; sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip; sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags; sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; sf.sf_siginfo.si_sc.sc_err = regs->tf_err; /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */ struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs; sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs; sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es; sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_siginfo.si_sc.sc_ps = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* See sendsig() for comments. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(*fp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)fp; if (p->p_sysent->sv_sigcode_base != 0) { regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode - szosigcode; } else { /* a.out sysentvec does not use shared page */ regs->tf_eip = p->p_sysent->sv_psstrings - szosigcode; } regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; load_gs(_udatasel); regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe4 sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); bzero(sf.sf_uc.uc_mcontext.mc_fpregs, sizeof(sf.sf_uc.uc_mcontext.mc_fpregs)); bzero(sf.sf_uc.uc_mcontext.__spare__, sizeof(sf.sf_uc.uc_mcontext.__spare__)); bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe4 *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe4)); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sfp = (struct sigframe4 *)regs->tf_esp - 1; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; bzero(&sf.sf_si, sizeof(sf.sf_si)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = ksi->ksi_code; sf.sf_si.si_addr = ksi->ksi_addr; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode - szfreebsd4_sigcode; regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_FREEBSD4 */ void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; char *sp; struct trapframe *regs; struct segment_descriptor *sdp; char *xfpusave; size_t xfpusave_len; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); #ifdef COMPAT_FREEBSD4 if (SIGISMEMBER(psp->ps_freebsd4, sig)) { freebsd4_sendsig(catcher, ksi, mask); return; } #endif #ifdef COMPAT_43 if (SIGISMEMBER(psp->ps_osigset, sig)) { osendsig(catcher, ksi, mask); return; } #endif regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); -#ifdef CPU_ENABLE_SSE if (cpu_max_ext_state_size > sizeof(union savefpu) && use_xsave) { xfpusave_len = cpu_max_ext_state_size - sizeof(union savefpu); xfpusave = __builtin_alloca(xfpusave_len); } else { -#else - { -#endif xfpusave_len = 0; xfpusave = NULL; } /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); fpstate_drop(td); /* * Unconditionally fill the fsbase and gsbase into the mcontext. */ sdp = &td->td_pcb->pcb_fsd; sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; sdp = &td->td_pcb->pcb_gsd; sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; bzero(sf.sf_uc.uc_mcontext.mc_spare2, sizeof(sf.sf_uc.uc_mcontext.mc_spare2)); bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size; #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sp = (char *)regs->tf_esp - 128; if (xfpusave != NULL) { sp -= xfpusave_len; sp = (char *)((unsigned int)sp & ~0x3F); sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp; } sp -= sizeof(struct sigframe); /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned int)sp & ~0xF); /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; bzero(&sf.sf_si, sizeof(sf.sf_si)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0 || (xfpusave != NULL && copyout(xfpusave, (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len) != 0)) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = p->p_sysent->sv_sigcode_base; if (regs->tf_eip == 0) regs->tf_eip = p->p_sysent->sv_psstrings - szsigcode; regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ #ifdef COMPAT_43 int osigreturn(td, uap) struct thread *td; struct osigreturn_args /* { struct osigcontext *sigcntxp; } */ *uap; { struct osigcontext sc; struct trapframe *regs; struct osigcontext *scp; int eflags, error; ksiginfo_t ksi; regs = td->td_frame; error = copyin(uap->sigcntxp, &sc, sizeof(sc)); if (error != 0) return (error); scp = ≻ eflags = scp->sc_ps; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } tf->tf_vm86_ds = scp->sc_ds; tf->tf_vm86_es = scp->sc_es; tf->tf_vm86_fs = scp->sc_fs; tf->tf_vm86_gs = scp->sc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ if (!EFL_SECURE(eflags, regs->tf_eflags)) { return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ if (!CS_SECURE(scp->sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } regs->tf_ds = scp->sc_ds; regs->tf_es = scp->sc_es; regs->tf_fs = scp->sc_fs; } /* Restore remaining registers. */ regs->tf_eax = scp->sc_eax; regs->tf_ebx = scp->sc_ebx; regs->tf_ecx = scp->sc_ecx; regs->tf_edx = scp->sc_edx; regs->tf_esi = scp->sc_esi; regs->tf_edi = scp->sc_edi; regs->tf_cs = scp->sc_cs; regs->tf_ss = scp->sc_ss; regs->tf_isp = scp->sc_isp; regs->tf_ebp = scp->sc_fp; regs->tf_esp = scp->sc_sp; regs->tf_eip = scp->sc_pc; regs->tf_eflags = eflags; #if defined(COMPAT_43) if (scp->sc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif kern_sigprocmask(td, SIG_SETMASK, (sigset_t *)&scp->sc_mask, NULL, SIGPROCMASK_OLD); return (EJUSTRETURN); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 /* * MPSAFE */ int freebsd4_sigreturn(td, uap) struct thread *td; struct freebsd4_sigreturn_args /* { const ucontext4 *sigcntxp; } */ *uap; { struct ucontext4 uc; struct trapframe *regs; struct ucontext4 *ucp; int cs, eflags, error; ksiginfo_t ksi; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ if (!EFL_SECURE(eflags, regs->tf_eflags)) { uprintf("pid %d (%s): freebsd4_sigreturn eflags = 0x%x\n", td->td_proc->p_pid, td->td_name, eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { uprintf("pid %d (%s): freebsd4_sigreturn cs = 0x%x\n", td->td_proc->p_pid, td->td_name, cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); return (EJUSTRETURN); } #endif /* COMPAT_FREEBSD4 */ /* * MPSAFE */ int sys_sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; struct proc *p; struct trapframe *regs; ucontext_t *ucp; char *xfpustate; size_t xfpustate_len; int cs, eflags, error, ret; ksiginfo_t ksi; p = td->td_proc; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) { uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid, td->td_name, ucp->uc_mcontext.mc_flags); return (EINVAL); } regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ if (!EFL_SECURE(eflags, regs->tf_eflags)) { uprintf("pid %d (%s): sigreturn eflags = 0x%x\n", td->td_proc->p_pid, td->td_name, eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { uprintf("pid %d (%s): sigreturn cs = 0x%x\n", td->td_proc->p_pid, td->td_name, cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) { xfpustate_len = uc.uc_mcontext.mc_xfpustate_len; if (xfpustate_len > cpu_max_ext_state_size - sizeof(union savefpu)) { uprintf( "pid %d (%s): sigreturn xfpusave_len = 0x%zx\n", p->p_pid, td->td_name, xfpustate_len); return (EINVAL); } xfpustate = __builtin_alloca(xfpustate_len); error = copyin((const void *)uc.uc_mcontext.mc_xfpustate, xfpustate, xfpustate_len); if (error != 0) { uprintf( "pid %d (%s): sigreturn copying xfpustate failed\n", p->p_pid, td->td_name); return (error); } } else { xfpustate = NULL; xfpustate_len = 0; } ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len); if (ret != 0) return (ret); bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); return (EJUSTRETURN); } /* * Reset registers to default values on exec. */ void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ pcb->pcb_gs = _udatasel; load_gs(_udatasel); mtx_lock_spin(&dt_lock); if (td->td_proc->p_md.md_ldt) user_ldt_free(td); else mtx_unlock_spin(&dt_lock); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_eip = imgp->entry_addr; regs->tf_esp = stack; regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_cs = _ucodesel; /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ regs->tf_ebx = imgp->ps_strings; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == curpcb) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } pcb->pcb_flags &= ~PCB_DBREGS; } pcb->pcb_initial_npxcw = __INITIAL_NPXCW__; /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); /* * XXX - Linux emulator * Make sure sure edx is 0x0 on entry. Linux binaries depend * on it. */ td->td_retval[1] = 0; } void cpu_setregs(void) { unsigned int cr0; cr0 = rcr0(); /* * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support: * * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT * instructions. We must set the CR0_MP bit and use the CR0_TS * bit to control the trap, because setting the CR0_EM bit does * not cause WAIT instructions to trap. It's important to trap * WAIT instructions - otherwise the "wait" variants of no-wait * control instructions would degenerate to the "no-wait" variants * after FP context switches but work correctly otherwise. It's * particularly important to trap WAITs when there is no NPX - * otherwise the "wait" variants would always degenerate. * * Try setting CR0_NE to get correct error reporting on 486DX's. * Setting it should fail or do nothing on lesser processors. */ cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; load_cr0(cr0); load_gs(_udatasel); } u_long bootdev; /* not a struct cdev *- encoding is different */ SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)"); static char bootmethod[16] = "BIOS"; SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0, "System firmware boot method"); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ int _default_ldt; union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */ union descriptor ldt[NLDT]; /* local descriptor table */ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ struct region_descriptor r_gdt, r_idt; /* table descriptors */ struct mtx dt_lock; /* lock for GDT and LDT */ static struct i386tss dblfault_tss; static char dblfault_stack[PAGE_SIZE]; extern vm_offset_t proc0kstack; /* * software prototypes -- in more palatable form. * * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it) */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = SEL_KPL, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GPRIV_SEL 1 SMP Per-Processor Private Data Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUFS_SEL 2 %fs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUGS_SEL 3 %gs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GCODE_SEL 4 Code Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GDATA_SEL 5 Data Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUCODE_SEL 6 Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUDATA_SEL 7 Data Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */ { .ssd_base = 0x400, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ { .ssd_base = 0x0, .ssd_limit = sizeof(struct i386tss)-1, .ssd_type = SDT_SYS386TSS, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GLDT_SEL 10 LDT Descriptor */ { .ssd_base = (int) ldt, .ssd_limit = sizeof(ldt)-1, .ssd_type = SDT_SYSLDT, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUSERLDT_SEL 11 User LDT Descriptor per process */ { .ssd_base = (int) ldt, .ssd_limit = (512 * sizeof(union descriptor)-1), .ssd_type = SDT_SYSLDT, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GPANIC_SEL 12 Panic Tss Descriptor */ { .ssd_base = (int) &dblfault_tss, .ssd_limit = sizeof(struct i386tss)-1, .ssd_type = SDT_SYS386TSS, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GNDIS_SEL 18 NDIS Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, }; static struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Data Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, }; void setidt(idx, func, typ, dpl, selec) int idx; inthand_t *func; int typ; int dpl; int selec; { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = selec; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), #ifdef KDTRACE_HOOKS IDTVEC(dtrace_ret), #endif #ifdef XENHVM IDTVEC(xen_intr_upcall), #endif IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); #ifdef DDB /* * Display the index and function name of any IDT entries that don't use * the default 'rsvd' entry point. */ DB_SHOW_COMMAND(idt, db_show_idt) { struct gate_descriptor *ip; int idx; uintptr_t func; ip = idt; for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { func = (ip->gd_hioffset << 16 | ip->gd_looffset); if (func != (uintptr_t)&IDTVEC(rsvd)) { db_printf("%3d\t", idx); db_printsym(func, DB_STGY_PROC); db_printf("\n"); } ip++; } } /* Show privileged registers. */ DB_SHOW_COMMAND(sysregs, db_show_sysregs) { uint64_t idtr, gdtr; idtr = ridt(); db_printf("idtr\t0x%08x/%04x\n", (u_int)(idtr >> 16), (u_int)idtr & 0xffff); gdtr = rgdt(); db_printf("gdtr\t0x%08x/%04x\n", (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff); db_printf("ldtr\t0x%04x\n", rldt()); db_printf("tr\t0x%04x\n", rtr()); db_printf("cr0\t0x%08x\n", rcr0()); db_printf("cr2\t0x%08x\n", rcr2()); db_printf("cr3\t0x%08x\n", rcr3()); db_printf("cr4\t0x%08x\n", rcr4()); if (rcr4() & CR4_XSAVE) db_printf("xcr0\t0x%016llx\n", rxcr(0)); if (amd_feature & (AMDID_NX | AMDID_LM)) db_printf("EFER\t0x%016llx\n", rdmsr(MSR_EFER)); if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX)) db_printf("FEATURES_CTL\t0x%016llx\n", rdmsr(MSR_IA32_FEATURE_CONTROL)); if ((cpu_vendor_id == CPU_VENDOR_INTEL || cpu_vendor_id == CPU_VENDOR_AMD) && CPUID_TO_FAMILY(cpu_id) >= 6) db_printf("DEBUG_CTL\t0x%016llx\n", rdmsr(MSR_DEBUGCTLMSR)); if (cpu_feature & CPUID_PAT) db_printf("PAT\t0x%016llx\n", rdmsr(MSR_PAT)); } DB_SHOW_COMMAND(dbregs, db_show_dbregs) { db_printf("dr0\t0x%08x\n", rdr0()); db_printf("dr1\t0x%08x\n", rdr1()); db_printf("dr2\t0x%08x\n", rdr2()); db_printf("dr3\t0x%08x\n", rdr3()); db_printf("dr6\t0x%08x\n", rdr6()); db_printf("dr7\t0x%08x\n", rdr7()); } #endif void sdtossd(sd, ssd) struct segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } static int add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, int *physmap_idxp) { int i, insert_idx, physmap_idx; physmap_idx = *physmap_idxp; if (length == 0) return (1); #ifndef PAE if (base > 0xffffffff) { printf("%uK of memory above 4GB ignored\n", (u_int)(length / 1024)); return (1); } #endif /* * Find insertion point while checking for overlap. Start off by * assuming the new entry will be added to the end. */ insert_idx = physmap_idx + 2; for (i = 0; i <= physmap_idx; i += 2) { if (base < physmap[i + 1]) { if (base + length <= physmap[i]) { insert_idx = i; break; } if (boothowto & RB_VERBOSE) printf( "Overlapping memory regions, ignoring second region\n"); return (1); } } /* See if we can prepend to the next entry. */ if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) { physmap[insert_idx] = base; return (1); } /* See if we can append to the previous entry. */ if (insert_idx > 0 && base == physmap[insert_idx - 1]) { physmap[insert_idx - 1] += length; return (1); } physmap_idx += 2; *physmap_idxp = physmap_idx; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); return (0); } /* * Move the last 'N' entries down to make room for the new * entry if needed. */ for (i = physmap_idx; i > insert_idx; i -= 2) { physmap[i] = physmap[i - 2]; physmap[i + 1] = physmap[i - 1]; } /* Insert the new entry. */ physmap[insert_idx] = base; physmap[insert_idx + 1] = base + length; return (1); } static int add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp) { if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%016llx len=%016llx\n", smap->type, smap->base, smap->length); if (smap->type != SMAP_TYPE_MEMORY) return (1); return (add_physmap_entry(smap->base, smap->length, physmap, physmap_idxp)); } static void add_smap_entries(struct bios_smap *smapbase, vm_paddr_t *physmap, int *physmap_idxp) { struct bios_smap *smap, *smapend; u_int32_t smapsize; /* * Memory map from INT 15:E820. * * subr_module.c says: * "Consumer may safely assume that size value precedes data." * ie: an int32_t immediately precedes SMAP. */ smapsize = *((u_int32_t *)smapbase - 1); smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); for (smap = smapbase; smap < smapend; smap++) if (!add_smap_entry(smap, physmap, physmap_idxp)) break; } static void basemem_setup(void) { vm_paddr_t pa; pt_entry_t *pte; int i; if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } /* * XXX if biosbasemem is now < 640, there is a `hole' * between the end of base memory and the start of * ISA memory. The hole may be empty or it may * contain BIOS code or data. Map it read/write so * that the BIOS can write to it. (Memory from 0 to * the physical end of the kernel is mapped read-only * to begin with and then parts of it are remapped. * The parts that aren't remapped form holes that * remain read-only and are unused by the kernel. * The base memory area is below the physical end of * the kernel and right now forms a read-only hole. * The part of it from PAGE_SIZE to * (trunc_page(biosbasemem * 1024) - 1) will be * remapped and used by the kernel later.) * * This code is similar to the code used in * pmap_mapdev, but since no memory needs to be * allocated we simply change the mapping. */ for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) pmap_kenter(KERNBASE + pa, pa); /* * Map pages between basemem and ISA_HOLE_START, if any, r/w into * the vm86 page table so that vm86 can scribble on them using * the vm86 map too. XXX: why 2 ways for this and only 1 way for * page 0, at least as initialized here? */ pte = (pt_entry_t *)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; } /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * If we cannot accurately determine the physical memory map, then use * value from the 0xE801 call, and failing that, the RTC. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. * * XXX first should be vm_paddr_t. */ static void getmemsize(int first) { int has_smap, off, physmap_idx, pa_indx, da_indx; u_long memtest; vm_paddr_t physmap[PHYSMAP_SIZE]; pt_entry_t *pte; quad_t dcons_addr, dcons_size, physmem_tunable; int hasbrokenint12, i, res; u_int extmem; struct vm86frame vmf; struct vm86context vmc; vm_paddr_t pa; struct bios_smap *smap, *smapbase; caddr_t kmdp; has_smap = 0; #ifdef XBOX if (arch_i386_is_xbox) { /* * We queried the memory size before, so chop off 4MB for * the framebuffer and inform the OS of this. */ physmap[0] = 0; physmap[1] = (arch_i386_xbox_memsize * 1024 * 1024) - XBOX_FB_SIZE; physmap_idx = 0; goto physmap_done; } #endif bzero(&vmf, sizeof(vmf)); bzero(physmap, sizeof(physmap)); basemem = 0; /* * Check if the loader supplied an SMAP memory map. If so, * use that and do not make any VM86 calls. */ physmap_idx = 0; kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf32 kernel"); smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (smapbase != NULL) { add_smap_entries(smapbase, physmap, &physmap_idx); has_smap = 1; goto have_smap; } /* * Some newer BIOSes have a broken INT 12H implementation * which causes a kernel panic immediately. In this case, we * need use the SMAP to determine the base memory size. */ hasbrokenint12 = 0; TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12); if (hasbrokenint12 == 0) { /* Use INT12 to determine base memory size. */ vm86_intcall(0x12, &vmf); basemem = vmf.vmf_ax; basemem_setup(); } /* * Fetch the memory map with INT 15:E820. Map page 1 R/W into * the kernel page table so we can use it as a buffer. The * kernel will unmap this page later. */ pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT); vmc.npages = 0; smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT)); res = vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di); KASSERT(res != 0, ("vm86_getptr() failed: address not found")); vmf.vmf_ebx = 0; do { vmf.vmf_eax = 0xE820; vmf.vmf_edx = SMAP_SIG; vmf.vmf_ecx = sizeof(struct bios_smap); i = vm86_datacall(0x15, &vmf, &vmc); if (i || vmf.vmf_eax != SMAP_SIG) break; has_smap = 1; if (!add_smap_entry(smap, physmap, &physmap_idx)) break; } while (vmf.vmf_ebx != 0); have_smap: /* * If we didn't fetch the "base memory" size from INT12, * figure it out from the SMAP (or just guess). */ if (basemem == 0) { for (i = 0; i <= physmap_idx; i += 2) { if (physmap[i] == 0x00000000) { basemem = physmap[i + 1] / 1024; break; } } /* XXX: If we couldn't find basemem from SMAP, just guess. */ if (basemem == 0) basemem = 640; basemem_setup(); } if (physmap[1] != 0) goto physmap_done; /* * If we failed to find an SMAP, figure out the extended * memory size. We will then build a simple memory map with * two segments, one for "base memory" and the second for * "extended memory". Note that "extended memory" starts at a * physical address of 1MB and that both basemem and extmem * are in units of 1KB. * * First, try to fetch the extended memory size via INT 15:E801. */ vmf.vmf_ax = 0xE801; if (vm86_intcall(0x15, &vmf) == 0) { extmem = vmf.vmf_cx + vmf.vmf_dx * 64; } else { /* * If INT15:E801 fails, this is our last ditch effort * to determine the extended memory size. Currently * we prefer the RTC value over INT15:88. */ #if 0 vmf.vmf_ah = 0x88; vm86_intcall(0x15, &vmf); extmem = vmf.vmf_ax; #else extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8); #endif } /* * Special hack for chipsets that still remap the 384k hole when * there's 16MB of memory - this really confuses people that * are trying to use bus mastering ISA controllers with the * "16MB limit"; they only have 16MB, but the remapping puts * them beyond the limit. * * If extended memory is between 15-16MB (16-17MB phys address range), * chop it to 15MB. */ if ((extmem > 15 * 1024) && (extmem < 16 * 1024)) extmem = 15 * 1024; physmap[0] = 0; physmap[1] = basemem * 1024; physmap_idx = 2; physmap[physmap_idx] = 0x100000; physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; physmap_done: /* * Now, physmap contains a map of physical memory. */ #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1]); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. * * This is especially confusing when it is much larger than the * memory size and is displayed as "realmem". */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif if (TUNABLE_QUAD_FETCH("hw.physmem", &physmem_tunable)) Maxmem = atop(physmem_tunable); /* * If we have an SMAP, don't allow MAXMEM or hw.physmem to extend * the amount of memory in the system. */ if (has_smap && Maxmem > atop(physmap[physmap_idx + 1])) Maxmem = atop(physmap[physmap_idx + 1]); /* * By default enable the memory test on real hardware, and disable * it if we appear to be running in a VM. This avoids touching all * pages unnecessarily, which doesn't matter on real hardware but is * bad for shared VM hosts. Use a general name so that * one could eventually do more with the code than just disable it. */ memtest = (vm_guest > VM_GUEST_NO) ? 0 : 1; TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* * If Maxmem has been increased beyond what the system has detected, * extend the last memory segment to the new limit. */ if (atop(physmap[physmap_idx + 1]) < Maxmem) physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(first); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; da_indx = 1; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; dump_avail[da_indx] = physmap[0]; pte = CMAP3; /* * Get dcons buffer address */ if (getenv_quad("dcons.addr", &dcons_addr) == 0 || getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad, full; int *ptr = (int *)CADDR3; full = FALSE; /* * block out kernel memory as not available. */ if (pa >= KERNLOAD && pa < first) goto do_dump_avail; /* * block out dcons buffer */ if (dcons_addr > 0 && pa >= trunc_page(dcons_addr) && pa < dcons_addr + dcons_size) goto do_dump_avail; page_bad = FALSE; if (memtest == 0) goto skip_memtest; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_N; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) page_bad = TRUE; /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) page_bad = TRUE; /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) page_bad = TRUE; /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) page_bad = TRUE; /* * Restore original value. */ *(int *)ptr = tmp; skip_memtest: /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) continue; /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; full = TRUE; goto do_dump_avail; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; do_dump_avail: if (dump_avail[da_indx] == pa) { dump_avail[da_indx] += PAGE_SIZE; } else { da_indx++; if (da_indx == DUMP_AVAIL_ARRAY_END) { da_indx--; goto do_next; } dump_avail[da_indx++] = pa; /* start */ dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ } do_next: if (full) break; } } *pte = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(msgbufsize) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(msgbufsize); /* Map the message buffer. */ for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] + off); } static void i386_kdb_init(void) { #ifdef DDB db_fetch_ksymtab(bootinfo.bi_symtab, bootinfo.bi_esymtab); #endif kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif } register_t init386(int first) { struct gate_descriptor *gdp; int gsel_tss, metadata_missing, x, pa; struct pcpu *pc; -#ifdef CPU_ENABLE_SSE struct xstate_hdr *xhdr; -#endif int late_console; thread0.td_kstack = proc0kstack; thread0.td_kstack_pages = TD0_KSTACK_PAGES; /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup0(&proc0, &thread0); metadata_missing = 0; if (bootinfo.bi_modulep) { preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE; preload_bootstrap_relocate(KERNBASE); } else { metadata_missing = 1; } if (bootinfo.bi_envp != 0) init_static_kenv((char *)bootinfo.bi_envp + KERNBASE, 0); else init_static_kenv(NULL, 0); /* Init basic tunables, hz etc */ init_param1(); /* * Make gdt memory segments. All segments cover the full 4GB * of address space and permissions are enforced at page level. */ gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1); pc = &__pcpu[0]; gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1); gdt_segs[GPRIV_SEL].ssd_base = (int) pc; gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; for (x = 0; x < NGDT; x++) ssdtosd(&gdt_segs[x], &gdt[x].sd); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (int) gdt; mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN); lgdt(&r_gdt); pcpu_init(pc, 0, sizeof(struct pcpu)); for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE) pmap_kenter(pa + KERNBASE, pa); dpcpu_init((void *)(first + KERNBASE), 0); first += DPCPU_SIZE; PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); /* Non-late cninit() and printf() can be moved up to here. */ /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE); /* make ldt memory segments */ ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1); ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1); for (x = 0; x < nitems(ldt_segs); x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); _default_ldt = GSEL(GLDT_SEL, SEL_KPL); lldt(_default_ldt); PCPU_SET(currentldt, _default_ldt); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DE, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL , GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); #ifdef KDTRACE_HOOKS setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); #endif #ifdef XENHVM setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); #endif r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (int) idt; lidt(&r_idt); #ifdef XBOX /* * The following code queries the PCI ID of 0:0:0. For the XBOX, * This should be 0x10de / 0x02a5. * * This is exactly what Linux does. */ outl(0xcf8, 0x80000000); if (inl(0xcfc) == 0x02a510de) { arch_i386_is_xbox = 1; pic16l_setled(XBOX_LED_GREEN); /* * We are an XBOX, but we may have either 64MB or 128MB of * memory. The PCI host bridge should be programmed for this, * so we just query it. */ outl(0xcf8, 0x80000084); arch_i386_xbox_memsize = (inl(0xcfc) == 0x7FFFFFF) ? 128 : 64; } #endif /* XBOX */ /* * Initialize the clock before the console so that console * initialization can use DELAY(). */ clock_init(); finishidentcpu(); /* Final stage of CPU initialization */ setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ initializecpucache(); /* pointer to selector slot for %fs/%gs */ PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); #if defined(PAE) || defined(PAE_TABLES) dblfault_tss.tss_cr3 = (int)IdlePDPT; #else dblfault_tss.tss_cr3 = (int)IdlePTD; #endif dblfault_tss.tss_eip = (int)dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); /* Initialize the tss (except for the final esp0) early for vm86. */ PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + thread0.td_kstack_pages * PAGE_SIZE - 16); PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd); PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); ltr(gsel_tss); /* Initialize the PIC early for vm86 calls. */ #ifdef DEV_ISA #ifdef DEV_ATPIC elcr_probe(); atpic_startup(); #else /* Reset and mask the atpics and leave them shut down. */ atpic_reset(); /* * Point the ICU spurious interrupt vectors at the APIC spurious * interrupt handler. */ setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #endif #endif /* * The console and kdb should be initialized even earlier than here, * but some console drivers don't work until after getmemsize(). * Default to late console initialization to support these drivers. * This loses mainly printf()s in getmemsize() and early debugging. */ late_console = 1; TUNABLE_INT_FETCH("debug.late_console", &late_console); if (!late_console) { cninit(); i386_kdb_init(); } vm86_initialize(); getmemsize(first); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ if (late_console) cninit(); if (metadata_missing) printf("WARNING: loader(8) metadata is missing!\n"); if (late_console) i386_kdb_init(); msgbufinit(msgbufp, msgbufsize); -#ifdef DEV_NPX npxinit(true); -#endif /* * Set up thread0 pcb after npxinit calculated pcb + fpu save * area size. Zero out the extended state header in fpu save * area. */ thread0.td_pcb = get_pcb_td(&thread0); bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size); -#ifdef CPU_ENABLE_SSE if (use_xsave) { xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) + 1); xhdr->xstate_bv = xsave_mask; } -#endif PCPU_SET(curpcb, thread0.td_pcb); /* Move esp0 in the tss to its final place. */ /* Note: -16 is so we can grow the trapframe if we came from vm86 */ PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16); gdt[GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; /* clear busy bit */ ltr(gsel_tss); /* make a call gate to reenter kernel with */ gdp = &ldt[LSYS5CALLS_SEL].gd; x = (int) &IDTVEC(lcall_syscall); gdp->gd_looffset = x; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 1; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = x >> 16; /* XXX does this work? */ /* XXX yes! */ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL]; /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; #if defined(PAE) || defined(PAE_TABLES) thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; #else thread0.td_pcb->pcb_cr3 = (int)IdlePTD; #endif thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; cpu_probe_amdc1e(); #ifdef FDT x86_init_fdt(); #endif /* Location of kernel stack for locore */ return ((register_t)thread0.td_pcb); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; } static int smap_sysctl_handler(SYSCTL_HANDLER_ARGS) { struct bios_smap *smapbase; struct bios_smap_xattr smap; caddr_t kmdp; uint32_t *smapattr; int count, error, i; /* Retrieve the system memory map from the loader. */ kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf32 kernel"); smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (smapbase == NULL) return (0); smapattr = (uint32_t *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP_XATTR); count = *((u_int32_t *)smapbase - 1) / sizeof(*smapbase); error = 0; for (i = 0; i < count; i++) { smap.base = smapbase[i].base; smap.length = smapbase[i].length; smap.type = smapbase[i].type; if (smapattr != NULL) smap.xattr = smapattr[i]; else smap.xattr = 0; error = SYSCTL_OUT(req, &smap, sizeof(smap)); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0, smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data"); void spinlock_enter(void) { struct thread *td; register_t flags; td = curthread; if (td->td_md.md_spinlock_count == 0) { flags = intr_disable(); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_flags = flags; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t flags; td = curthread; critical_exit(); flags = td->td_md.md_saved_flags; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(flags); } #if defined(I586_CPU) && !defined(NO_F00F_HACK) static void f00f_hack(void *unused); SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL); static void f00f_hack(void *unused) { struct gate_descriptor *new_idt; vm_offset_t tmp; if (!has_f00f_bug) return; GIANT_REQUIRED; printf("Intel Pentium detected, installing workaround for F00F bug\n"); tmp = kmem_malloc(kernel_arena, PAGE_SIZE * 2, M_WAITOK | M_ZERO); if (tmp == 0) panic("kmem_malloc returned 0"); /* Put the problematic entry (#6) at the end of the lower page. */ new_idt = (struct gate_descriptor*) (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor)); bcopy(idt, new_idt, sizeof(idt0)); r_idt.rd_base = (u_int)new_idt; lidt(&r_idt); idt = new_idt; pmap_protect(kernel_pmap, tmp, tmp + PAGE_SIZE, VM_PROT_READ); } #endif /* defined(I586_CPU) && !NO_F00F_HACK */ /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_edi = tf->tf_edi; pcb->pcb_esi = tf->tf_esi; pcb->pcb_ebp = tf->tf_ebp; pcb->pcb_ebx = tf->tf_ebx; pcb->pcb_eip = tf->tf_eip; pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8; pcb->pcb_gs = rgs(); } int ptrace_set_pc(struct thread *td, u_long addr) { td->td_frame->tf_eip = addr; return (0); } int ptrace_single_step(struct thread *td) { td->td_frame->tf_eflags |= PSL_T; return (0); } int ptrace_clear_single_step(struct thread *td) { td->td_frame->tf_eflags &= ~PSL_T; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; pcb = td->td_pcb; regs->r_gs = pcb->pcb_gs; return (fill_frame_regs(tp, regs)); } int fill_frame_regs(struct trapframe *tp, struct reg *regs) { regs->r_fs = tp->tf_fs; regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; regs->r_edi = tp->tf_edi; regs->r_esi = tp->tf_esi; regs->r_ebp = tp->tf_ebp; regs->r_ebx = tp->tf_ebx; regs->r_edx = tp->tf_edx; regs->r_ecx = tp->tf_ecx; regs->r_eax = tp->tf_eax; regs->r_eip = tp->tf_eip; regs->r_cs = tp->tf_cs; regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; return (0); } int set_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); pcb = td->td_pcb; tp->tf_fs = regs->r_fs; tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; tp->tf_edi = regs->r_edi; tp->tf_esi = regs->r_esi; tp->tf_ebp = regs->r_ebp; tp->tf_ebx = regs->r_ebx; tp->tf_edx = regs->r_edx; tp->tf_ecx = regs->r_ecx; tp->tf_eax = regs->r_eax; tp->tf_eip = regs->r_eip; tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; pcb->pcb_gs = regs->r_gs; return (0); } int fill_fpregs(struct thread *td, struct fpreg *fpregs) { KASSERT(td == curthread || TD_IS_SUSPENDED(td) || P_SHOULDSTOP(td->td_proc), ("not suspended thread %p", td)); -#ifdef DEV_NPX npxgetregs(td); -#else - bzero(fpregs, sizeof(*fpregs)); -#endif -#ifdef CPU_ENABLE_SSE if (cpu_fxsr) npx_fill_fpregs_xmm(&get_pcb_user_save_td(td)->sv_xmm, (struct save87 *)fpregs); else -#endif /* CPU_ENABLE_SSE */ bcopy(&get_pcb_user_save_td(td)->sv_87, fpregs, sizeof(*fpregs)); return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { -#ifdef CPU_ENABLE_SSE if (cpu_fxsr) npx_set_fpregs_xmm((struct save87 *)fpregs, &get_pcb_user_save_td(td)->sv_xmm); else -#endif /* CPU_ENABLE_SSE */ bcopy(fpregs, &get_pcb_user_save_td(td)->sv_87, sizeof(*fpregs)); -#ifdef DEV_NPX npxuserinited(td); -#endif return (0); } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct trapframe *tp; struct segment_descriptor *sdp; tp = td->td_frame; PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(tp->tf_esp); PROC_UNLOCK(curthread->td_proc); mcp->mc_gs = td->td_pcb->pcb_gs; mcp->mc_fs = tp->tf_fs; mcp->mc_es = tp->tf_es; mcp->mc_ds = tp->tf_ds; mcp->mc_edi = tp->tf_edi; mcp->mc_esi = tp->tf_esi; mcp->mc_ebp = tp->tf_ebp; mcp->mc_isp = tp->tf_isp; mcp->mc_eflags = tp->tf_eflags; if (flags & GET_MC_CLEAR_RET) { mcp->mc_eax = 0; mcp->mc_edx = 0; mcp->mc_eflags &= ~PSL_C; } else { mcp->mc_eax = tp->tf_eax; mcp->mc_edx = tp->tf_edx; } mcp->mc_ebx = tp->tf_ebx; mcp->mc_ecx = tp->tf_ecx; mcp->mc_eip = tp->tf_eip; mcp->mc_cs = tp->tf_cs; mcp->mc_esp = tp->tf_esp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp, NULL, 0); sdp = &td->td_pcb->pcb_fsd; mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; sdp = &td->td_pcb->pcb_gsd; mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; mcp->mc_flags = 0; mcp->mc_xfpustate = 0; mcp->mc_xfpustate_len = 0; bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2)); return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, mcontext_t *mcp) { struct trapframe *tp; char *xfpustate; int eflags, ret; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp) || (mcp->mc_flags & ~_MC_FLAG_MASK) != 0) return (EINVAL); eflags = (mcp->mc_eflags & PSL_USERCHANGE) | (tp->tf_eflags & ~PSL_USERCHANGE); if (mcp->mc_flags & _MC_HASFPXSTATE) { if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - sizeof(union savefpu)) return (EINVAL); xfpustate = __builtin_alloca(mcp->mc_xfpustate_len); ret = copyin((void *)mcp->mc_xfpustate, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); } else xfpustate = NULL; ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); tp->tf_fs = mcp->mc_fs; tp->tf_es = mcp->mc_es; tp->tf_ds = mcp->mc_ds; tp->tf_edi = mcp->mc_edi; tp->tf_esi = mcp->mc_esi; tp->tf_ebp = mcp->mc_ebp; tp->tf_ebx = mcp->mc_ebx; tp->tf_edx = mcp->mc_edx; tp->tf_ecx = mcp->mc_ecx; tp->tf_eax = mcp->mc_eax; tp->tf_eip = mcp->mc_eip; tp->tf_eflags = eflags; tp->tf_esp = mcp->mc_esp; tp->tf_ss = mcp->mc_ss; td->td_pcb->pcb_gs = mcp->mc_gs; return (0); } static void get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, size_t xfpusave_len) { -#ifdef CPU_ENABLE_SSE size_t max_len, len; -#endif -#ifndef DEV_NPX - mcp->mc_fpformat = _MC_FPFMT_NODEV; - mcp->mc_ownedfp = _MC_FPOWNED_NONE; - bzero(mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); -#else mcp->mc_ownedfp = npxgetregs(td); bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0], sizeof(mcp->mc_fpstate)); mcp->mc_fpformat = npxformat(); -#ifdef CPU_ENABLE_SSE if (!use_xsave || xfpusave_len == 0) return; max_len = cpu_max_ext_state_size - sizeof(union savefpu); len = xfpusave_len; if (len > max_len) { len = max_len; bzero(xfpusave + max_len, len - max_len); } mcp->mc_flags |= _MC_HASFPXSTATE; mcp->mc_xfpustate_len = len; bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len); -#endif -#endif } static int set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, size_t xfpustate_len) { union savefpu *fpstate; int error; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_387 && mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) { /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); error = 0; } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { -#ifdef DEV_NPX fpstate = (union savefpu *)&mcp->mc_fpstate; -#ifdef CPU_ENABLE_SSE if (cpu_fxsr) fpstate->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask; -#endif error = npxsetregs(td, fpstate, xfpustate, xfpustate_len); -#else - error = EINVAL; -#endif } else return (EINVAL); return (error); } static void fpstate_drop(struct thread *td) { KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu")); critical_enter(); -#ifdef DEV_NPX if (PCPU_GET(fpcurthread) == td) npxdrop(); -#endif /* * XXX force a full drop of the npx. The above only drops it if we * owned it. npxgetregs() has the same bug in the !cpu_fxsr case. * * XXX I don't much like npxgetregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But * sendsig() is the only caller of npxgetregs()... perhaps we just * have too many layers. */ curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE | PCB_NPXUSERINITDONE); critical_exit(); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; if (td == NULL) { dbregs->dr[0] = rdr0(); dbregs->dr[1] = rdr1(); dbregs->dr[2] = rdr2(); dbregs->dr[3] = rdr3(); dbregs->dr[4] = rdr4(); dbregs->dr[5] = rdr5(); dbregs->dr[6] = rdr6(); dbregs->dr[7] = rdr7(); } else { pcb = td->td_pcb; dbregs->dr[0] = pcb->pcb_dr0; dbregs->dr[1] = pcb->pcb_dr1; dbregs->dr[2] = pcb->pcb_dr2; dbregs->dr[3] = pcb->pcb_dr3; dbregs->dr[4] = 0; dbregs->dr[5] = 0; dbregs->dr[6] = pcb->pcb_dr6; dbregs->dr[7] = pcb->pcb_dr7; } return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; int i; if (td == NULL) { load_dr0(dbregs->dr[0]); load_dr1(dbregs->dr[1]); load_dr2(dbregs->dr[2]); load_dr3(dbregs->dr[3]); load_dr4(dbregs->dr[4]); load_dr5(dbregs->dr[5]); load_dr6(dbregs->dr[6]); load_dr7(dbregs->dr[7]); } else { /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns * result in undefined behaviour and can lead to an unexpected * TRCTRAP. */ for (i = 0; i < 4; i++) { if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02) return (EINVAL); if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02) return (EINVAL); } pcb = td->td_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) { /* dr0 is enabled */ if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) { /* dr1 is enabled */ if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) { /* dr2 is enabled */ if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) { /* dr3 is enabled */ if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) return (EINVAL); } pcb->pcb_dr0 = dbregs->dr[0]; pcb->pcb_dr1 = dbregs->dr[1]; pcb->pcb_dr2 = dbregs->dr[2]; pcb->pcb_dr3 = dbregs->dr[3]; pcb->pcb_dr6 = dbregs->dr[6]; pcb->pcb_dr7 = dbregs->dr[7]; pcb->pcb_flags |= PCB_DBREGS; } return (0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(void) { u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ u_int32_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; dr6 = rdr6(); bp = dr6 & 0x0000000f; if (!bp) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i = 0; i < nbp; i++) { if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) { /* * addr[i] is in user space */ return nbp; } } /* * None of the breakpoints are in user space. */ return 0; } #ifdef KDB /* * Provide inb() and outb() as functions. They are normally only available as * inline functions, thus cannot be called from the debugger. */ /* silence compiler warnings */ u_char inb_(u_short); void outb_(u_short, u_char); u_char inb_(u_short port) { return inb(port); } void outb_(u_short port, u_char data) { outb(port, data); } #endif /* KDB */ Index: projects/ipsec/sys/i386/i386/mp_machdep.c =================================================================== --- projects/ipsec/sys/i386/i386/mp_machdep.c (revision 313186) +++ projects/ipsec/sys/i386/i386/mp_machdep.c (revision 313187) @@ -1,472 +1,469 @@ /*- * Copyright (c) 1996, by Steve Passe * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_apic.h" #include "opt_cpu.h" #include "opt_kstack_pages.h" #include "opt_pmap.h" #include "opt_sched.h" #include "opt_smp.h" #if !defined(lint) #if !defined(SMP) #error How did you get here? #endif #ifndef DEV_APIC #error The apic device is required for SMP, add "device apic" to your config file. #endif -#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT) -#error SMP not supported with CPU_DISABLE_CMPXCHG -#endif #endif /* not lint */ #include #include #include #include /* cngetc() */ #include #ifdef GPROF #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define WARMBOOT_TARGET 0 #define WARMBOOT_OFF (KERNBASE + 0x0467) #define WARMBOOT_SEG (KERNBASE + 0x0469) #define CMOS_REG (0x70) #define CMOS_DATA (0x71) #define BIOS_RESET (0x0f) #define BIOS_WARM (0x0a) /* * this code MUST be enabled here and in mpboot.s. * it follows the very early stages of AP boot by placing values in CMOS ram. * it NORMALLY will never be needed and thus the primitive method for enabling. * #define CHECK_POINTS */ #if defined(CHECK_POINTS) #define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) #define CHECK_INIT(D); \ CHECK_WRITE(0x34, (D)); \ CHECK_WRITE(0x35, (D)); \ CHECK_WRITE(0x36, (D)); \ CHECK_WRITE(0x37, (D)); \ CHECK_WRITE(0x38, (D)); \ CHECK_WRITE(0x39, (D)); #define CHECK_PRINT(S); \ printf("%s: %d, %d, %d, %d, %d, %d\n", \ (S), \ CHECK_READ(0x34), \ CHECK_READ(0x35), \ CHECK_READ(0x36), \ CHECK_READ(0x37), \ CHECK_READ(0x38), \ CHECK_READ(0x39)); #else /* CHECK_POINTS */ #define CHECK_INIT(D) #define CHECK_PRINT(S) #define CHECK_WRITE(A, D) #endif /* CHECK_POINTS */ extern struct pcpu __pcpu[]; /* * Local data and functions. */ static void install_ap_tramp(void); static int start_all_aps(void); static int start_ap(int apic_id); static u_int boot_address; /* * Calculate usable address in base memory for AP trampoline code. */ u_int mp_bootaddress(u_int basemem) { boot_address = trunc_page(basemem); /* round down to 4k boundary */ if ((basemem - boot_address) < bootMP_size) boot_address -= PAGE_SIZE; /* not enough, lower by 4k */ return boot_address; } /* * Initialize the IPI handlers and start up the AP's. */ void cpu_mp_start(void) { int i; /* Initialize the logical ID to APIC ID table. */ for (i = 0; i < MAXCPU; i++) { cpu_apic_ids[i] = -1; cpu_ipi_pending[i] = 0; } /* Install an inter-CPU IPI for TLB invalidation */ setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install an inter-CPU IPI for cache invalidation. */ setidt(IPI_INVLCACHE, IDTVEC(invlcache), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install an inter-CPU IPI for all-CPU rendezvous */ setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install generic inter-CPU IPI handler */ setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install an inter-CPU IPI for CPU stop/restart */ setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install an inter-CPU IPI for CPU suspend/resume */ setidt(IPI_SUSPEND, IDTVEC(cpususpend), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Set boot_cpu_id if needed. */ if (boot_cpu_id == -1) { boot_cpu_id = PCPU_GET(apic_id); cpu_info[boot_cpu_id].cpu_bsp = 1; } else KASSERT(boot_cpu_id == PCPU_GET(apic_id), ("BSP's APIC ID doesn't match boot_cpu_id")); /* Probe logical/physical core configuration. */ topo_probe(); assign_cpu_ids(); /* Start each Application Processor */ start_all_aps(); set_interrupt_apic_ids(); } /* * AP CPU's call this to initialize themselves. */ void init_secondary(void) { struct pcpu *pc; vm_offset_t addr; int gsel_tss; int x, myid; u_int cr0; /* bootAP is set in start_ap() to our ID. */ myid = bootAP; /* Get per-cpu data */ pc = &__pcpu[myid]; /* prime data page for it to use */ pcpu_init(pc, myid, sizeof(struct pcpu)); dpcpu_init(dpcpu, myid); pc->pc_apic_id = cpu_apic_ids[myid]; pc->pc_prvspace = pc; pc->pc_curthread = 0; fix_cpuid(); gdt_segs[GPRIV_SEL].ssd_base = (int) pc; gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; for (x = 0; x < NGDT; x++) { ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); } r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (int) &gdt[myid * NGDT]; lgdt(&r_gdt); /* does magic intra-segment return */ lidt(&r_idt); lldt(_default_ldt); PCPU_SET(currentldt, _default_ldt); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd); PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); ltr(gsel_tss); PCPU_SET(fsgs_gdt, &gdt[myid * NGDT + GUFS_SEL].sd); /* * Set to a known state: * Set by mpboot.s: CR0_PG, CR0_PE * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM */ cr0 = rcr0(); cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); load_cr0(cr0); CHECK_WRITE(0x38, 5); /* signal our startup to the BSP. */ mp_naps++; CHECK_WRITE(0x39, 6); /* Spin until the BSP releases the AP's. */ while (atomic_load_acq_int(&aps_ready) == 0) ia32_pause(); /* BSP may have changed PTD while we were waiting */ invltlb(); for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE) invlpg(addr); #if defined(I586_CPU) && !defined(NO_F00F_HACK) lidt(&r_idt); #endif init_secondary_tail(); } /* * start each AP in our list */ /* Lowest 1MB is already mapped: don't touch*/ #define TMPMAP_START 1 static int start_all_aps(void) { u_char mpbiosreason; u_int32_t mpbioswarmvec; int apic_id, cpu, i; mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); /* install the AP 1st level boot code */ install_ap_tramp(); /* save the current value of the warm-start vector */ mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); outb(CMOS_REG, BIOS_RESET); mpbiosreason = inb(CMOS_DATA); /* set up temporary P==V mapping for AP boot */ /* XXX this is a hack, we should boot the AP on its own stack/PTD */ for (i = TMPMAP_START; i < NKPT; i++) PTD[i] = PTD[KPTDI + i]; invltlb(); /* start each AP */ for (cpu = 1; cpu < mp_ncpus; cpu++) { apic_id = cpu_apic_ids[cpu]; /* allocate and set up a boot stack data page */ bootstacks[cpu] = (char *)kmem_malloc(kernel_arena, kstack_pages * PAGE_SIZE, M_WAITOK | M_ZERO); dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, M_WAITOK | M_ZERO); /* setup a vector to our boot code */ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ bootSTK = (char *)bootstacks[cpu] + kstack_pages * PAGE_SIZE - 4; bootAP = cpu; /* attempt to start the Application Processor */ CHECK_INIT(99); /* setup checkpoints */ if (!start_ap(apic_id)) { printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); CHECK_PRINT("trace"); /* show checkpoints */ /* better panic as the AP may be running loose */ printf("panic y/n? [y] "); if (cngetc() != 'n') panic("bye-bye"); } CHECK_PRINT("trace"); /* show checkpoints */ CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } /* restore the warmstart vector */ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); /* Undo V==P hack from above */ for (i = TMPMAP_START; i < NKPT; i++) PTD[i] = 0; pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); /* number of APs actually started */ return mp_naps; } /* * load the 1st level AP boot code into base memory. */ /* targets for relocation */ extern void bigJump(void); extern void bootCodeSeg(void); extern void bootDataSeg(void); extern void MPentry(void); extern u_int MP_GDT; extern u_int mp_gdtbase; static void install_ap_tramp(void) { int x; int size = *(int *) ((u_long) & bootMP_size); vm_offset_t va = boot_address + KERNBASE; u_char *src = (u_char *) ((u_long) bootMP); u_char *dst = (u_char *) va; u_int boot_base = (u_int) bootMP; u_int8_t *dst8; u_int16_t *dst16; u_int32_t *dst32; KASSERT (size <= PAGE_SIZE, ("'size' do not fit into PAGE_SIZE, as expected.")); pmap_kenter(va, boot_address); pmap_invalidate_page (kernel_pmap, va); for (x = 0; x < size; ++x) *dst++ = *src++; /* * modify addresses in code we just moved to basemem. unfortunately we * need fairly detailed info about mpboot.s for this to work. changes * to mpboot.s might require changes here. */ /* boot code is located in KERNEL space */ dst = (u_char *) va; /* modify the lgdt arg */ dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); *dst32 = boot_address + ((u_int) & MP_GDT - boot_base); /* modify the ljmp target for MPentry() */ dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); *dst32 = ((u_int) MPentry - KERNBASE); /* modify the target for boot code segment */ dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); dst8 = (u_int8_t *) (dst16 + 1); *dst16 = (u_int) boot_address & 0xffff; *dst8 = ((u_int) boot_address >> 16) & 0xff; /* modify the target for boot data segment */ dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); dst8 = (u_int8_t *) (dst16 + 1); *dst16 = (u_int) boot_address & 0xffff; *dst8 = ((u_int) boot_address >> 16) & 0xff; } /* * This function starts the AP (application processor) identified * by the APIC ID 'physicalCpu'. It does quite a "song and dance" * to accomplish this. This is necessary because of the nuances * of the different hardware we might encounter. It isn't pretty, * but it seems to work. */ static int start_ap(int apic_id) { int vector, ms; int cpus; /* calculate the vector */ vector = (boot_address >> 12) & 0xff; /* used as a watchpoint to signal AP startup */ cpus = mp_naps; ipi_startup(apic_id, vector); /* Wait up to 5 seconds for it to start. */ for (ms = 0; ms < 5000; ms++) { if (mp_naps > cpus) return 1; /* return SUCCESS */ DELAY(1000); } return 0; /* return FAILURE */ } Index: projects/ipsec/sys/i386/i386/pmap.c =================================================================== --- projects/ipsec/sys/i386/i386/pmap.c (revision 313186) +++ projects/ipsec/sys/i386/i386/pmap.c (revision 313187) @@ -1,5681 +1,5675 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * Copyright (c) 2005-2010 Alan L. Cox * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 */ /*- * Copyright (c) 2003 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Jake Burkholder, * Safeport Network Services, and Network Associates Laboratories, the * Security Research Division of Network Associates, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * Since the information managed by this module is * also stored by the logical address mapping module, * this module may throw away valid virtual-to-physical * mappings at almost any time. However, invalidations * of virtual-to-physical mappings must be done as * requested. * * In order to cope with hardware architectures which * make virtual-to-physical map invalidates expensive, * this module may delay invalidate or reduced protection * operations until such time as they are actually * necessary. This module is given full information as * to which processors are currently using which maps, * and to when physical maps must be made correct. */ #include "opt_apic.h" #include "opt_cpu.h" #include "opt_pmap.h" #include "opt_smp.h" #include "opt_xbox.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_APIC #include #include #include #endif #include #include #include #include #include #ifdef SMP #include #endif #ifdef XBOX #include #endif -#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) -#define CPU_ENABLE_SSE -#endif - #ifndef PMAP_SHPGPERPROC #define PMAP_SHPGPERPROC 200 #endif #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ #define PMAP_INLINE __attribute__((__gnu_inline__)) inline #else #define PMAP_INLINE extern inline #endif #else #define PMAP_INLINE #endif #ifdef PV_STATS #define PV_STAT(x) do { x ; } while (0) #else #define PV_STAT(x) do { } while (0) #endif #define pa_index(pa) ((pa) >> PDRSHIFT) #define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) /* * Get PDEs and PTEs for user/kernel address space */ #define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) #define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) #define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) #define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) #define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) #define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) #define pmap_pte_set_w(pte, v) ((v) ? atomic_set_int((u_int *)(pte), PG_W) : \ atomic_clear_int((u_int *)(pte), PG_W)) #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) struct pmap kernel_pmap_store; LIST_HEAD(pmaplist, pmap); static struct pmaplist allpmaps; static struct mtx allpmaps_lock; vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ int pgeflag = 0; /* PG_G or-in */ int pseflag = 0; /* PG_PS or-in */ static int nkpt = NKPT; vm_offset_t kernel_vm_end = KERNBASE + NKPT * NBPDR; extern u_int32_t KERNend; extern u_int32_t KPTphys; #if defined(PAE) || defined(PAE_TABLES) pt_entry_t pg_nx; static uma_zone_t pdptzone; #endif static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); static int pat_works = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD, &pat_works, 1, "Is page attribute table fully functional?"); static int pg_ps_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pg_ps_enabled, 0, "Are large page mappings enabled?"); #define PAT_INDEX_SIZE 8 static int pat_index[PAT_INDEX_SIZE]; /* cache mode to PAT index conversion */ /* * pmap_mapdev support pre initialization (i.e. console) */ #define PMAP_PREINIT_MAPPING_COUNT 8 static struct pmap_preinit_mapping { vm_paddr_t pa; vm_offset_t va; vm_size_t sz; int mode; } pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT]; static int pmap_initialized; static struct rwlock_padalign pvh_global_lock; /* * Data for the pv entry allocation mechanism */ static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; static struct md_page *pv_table; static int shpgperproc = PMAP_SHPGPERPROC; struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ int pv_maxchunks; /* How many chunks we have KVA for */ vm_offset_t pv_vafree; /* freelist stored in the PTE */ /* * All those kernel PT submaps that BSD is so fond of */ pt_entry_t *CMAP3; static pd_entry_t *KPTD; caddr_t ptvmmap = 0; caddr_t CADDR3; struct msgbuf *msgbufp = NULL; /* * Crashdump maps. */ static caddr_t crashdumpmap; static pt_entry_t *PMAP1 = NULL, *PMAP2; static pt_entry_t *PADDR1 = NULL, *PADDR2; #ifdef SMP static int PMAP1cpu; static int PMAP1changedcpu; SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, &PMAP1changedcpu, 0, "Number of times pmap_pte_quick changed CPU with same PMAP1"); #endif static int PMAP1changed; SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, &PMAP1changed, 0, "Number of times pmap_pte_quick changed PMAP1"); static int PMAP1unchanged; SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, &PMAP1unchanged, 0, "Number of times pmap_pte_quick didn't change PMAP1"); static struct mtx PMAP2mutex; static void free_pv_chunk(struct pv_chunk *pc); static void free_pv_entry(pmap_t pmap, pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try); static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static int pmap_pvh_wired_mappings(struct md_page *pvh, int count); static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte); static void pmap_flush_page(vm_page_t m); static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); static boolean_t pmap_is_modified_pvh(struct md_page *pvh); static boolean_t pmap_is_referenced_pvh(struct md_page *pvh); static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde); static void pmap_pde_attr(pd_entry_t *pde, int cache_bits); static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot); static void pmap_pte_attr(pt_entry_t *pte, int cache_bits); static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, struct spglist *free); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, struct spglist *free); static vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va); static void pmap_remove_page(struct pmap *pmap, vm_offset_t va, struct spglist *free); static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde); static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags); static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags); static void _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, struct spglist *free); static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); static void pmap_pte_release(pt_entry_t *pte); static int pmap_unuse_pt(pmap_t, vm_offset_t, struct spglist *); #if defined(PAE) || defined(PAE_TABLES) static void *pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait); #endif static void pmap_set_pg(void); static __inline void pagezero(void *page); CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); /* * If you get an error here, then you set KVA_PAGES wrong! See the * description of KVA_PAGES in sys/i386/include/pmap.h. It must be * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE. */ CTASSERT(KERNBASE % (1 << 24) == 0); /* * Bootstrap the system enough to run with virtual memory. * * On the i386 this is called after mapping has already been enabled * and just syncs the pmap module with what has already been done. * [We can't call it easily with mapping off since the kernel is not * mapped with PA == VA, hence we would have to relocate every address * from the linked base (virtual) address "KERNBASE" to the actual * (physical) address starting relative to 0] */ void pmap_bootstrap(vm_paddr_t firstaddr) { vm_offset_t va; pt_entry_t *pte, *unused; struct pcpu *pc; int i; /* * Add a physical memory segment (vm_phys_seg) corresponding to the * preallocated kernel page table pages so that vm_page structures * representing these pages will be created. The vm_page structures * are required for promotion of the corresponding kernel virtual * addresses to superpage mappings. */ vm_phys_add_seg(KPTphys, KPTphys + ptoa(nkpt)); /* * Initialize the first available kernel virtual address. However, * using "firstaddr" may waste a few pages of the kernel virtual * address space, because locore may not have mapped every physical * page that it allocated. Preferably, locore would provide a first * unused virtual address in addition to "firstaddr". */ virtual_avail = (vm_offset_t) KERNBASE + firstaddr; virtual_end = VM_MAX_KERNEL_ADDRESS; /* * Initialize the kernel pmap (which is statically allocated). */ PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); #if defined(PAE) || defined(PAE_TABLES) kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); #endif CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); /* * Initialize the global pv list lock. */ rw_init(&pvh_global_lock, "pmap pv global"); LIST_INIT(&allpmaps); /* * Request a spin mutex so that changes to allpmaps cannot be * preempted by smp_rendezvous_cpus(). Otherwise, * pmap_update_pde_kernel() could access allpmaps while it is * being changed. */ mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); /* * Reserve some special page table entries/VA space for temporary * mapping of pages. */ #define SYSMAP(c, p, v, n) \ v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); va = virtual_avail; pte = vtopte(va); /* * Initialize temporary map objects on the current CPU for use * during early boot. * CMAP1/CMAP2 are used for zeroing and copying pages. * CMAP3 is used for the boot-time memory test. */ pc = get_pcpu(); mtx_init(&pc->pc_cmap_lock, "SYSMAPS", NULL, MTX_DEF); SYSMAP(caddr_t, pc->pc_cmap_pte1, pc->pc_cmap_addr1, 1) SYSMAP(caddr_t, pc->pc_cmap_pte2, pc->pc_cmap_addr2, 1) SYSMAP(vm_offset_t, pte, pc->pc_qmap_addr, 1) SYSMAP(caddr_t, CMAP3, CADDR3, 1); /* * Crashdump maps. */ SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS) /* * ptvmmap is used for reading arbitrary physical pages via /dev/mem. */ SYSMAP(caddr_t, unused, ptvmmap, 1) /* * msgbufp is used to map the system message buffer. */ SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(msgbufsize))) /* * KPTmap is used by pmap_kextract(). * * KPTmap is first initialized by locore. However, that initial * KPTmap can only support NKPT page table pages. Here, a larger * KPTmap is created that can support KVA_PAGES page table pages. */ SYSMAP(pt_entry_t *, KPTD, KPTmap, KVA_PAGES) for (i = 0; i < NKPT; i++) KPTD[i] = (KPTphys + (i << PAGE_SHIFT)) | pgeflag | PG_RW | PG_V; /* * Adjust the start of the KPTD and KPTmap so that the implementation * of pmap_kextract() and pmap_growkernel() can be made simpler. */ KPTD -= KPTDI; KPTmap -= i386_btop(KPTDI << PDRSHIFT); /* * PADDR1 and PADDR2 are used by pmap_pte_quick() and pmap_pte(), * respectively. */ SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1) SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1) mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); virtual_avail = va; /* * Leave in place an identity mapping (virt == phys) for the low 1 MB * physical memory region that is used by the ACPI wakeup code. This * mapping must not have PG_G set. */ #ifdef XBOX /* FIXME: This is gross, but needed for the XBOX. Since we are in such * an early stadium, we cannot yet neatly map video memory ... :-( * Better fixes are very welcome! */ if (!arch_i386_is_xbox) #endif for (i = 1; i < NKPT; i++) PTD[i] = 0; /* * Initialize the PAT MSR if present. * pmap_init_pat() clears and sets CR4_PGE, which, as a * side-effect, invalidates stale PG_G TLB entries that might * have been created in our pre-boot environment. We assume * that PAT support implies PGE and in reverse, PGE presence * comes with PAT. Both features were added for Pentium Pro. */ pmap_init_pat(); /* Turn on PG_G on kernel page(s) */ pmap_set_pg(); } static void pmap_init_reserved_pages(void) { struct pcpu *pc; vm_offset_t pages; int i; CPU_FOREACH(i) { pc = pcpu_find(i); /* * Skip if the mapping has already been initialized, * i.e. this is the BSP. */ if (pc->pc_cmap_addr1 != 0) continue; mtx_init(&pc->pc_cmap_lock, "SYSMAPS", NULL, MTX_DEF); pages = kva_alloc(PAGE_SIZE * 3); if (pages == 0) panic("%s: unable to allocate KVA", __func__); pc->pc_cmap_pte1 = vtopte(pages); pc->pc_cmap_pte2 = vtopte(pages + PAGE_SIZE); pc->pc_cmap_addr1 = (caddr_t)pages; pc->pc_cmap_addr2 = (caddr_t)(pages + PAGE_SIZE); pc->pc_qmap_addr = pages + (PAGE_SIZE * 2); } } SYSINIT(rpages_init, SI_SUB_CPU, SI_ORDER_ANY, pmap_init_reserved_pages, NULL); /* * Setup the PAT MSR. */ void pmap_init_pat(void) { int pat_table[PAT_INDEX_SIZE]; uint64_t pat_msr; u_long cr0, cr4; int i; /* Set default PAT index table. */ for (i = 0; i < PAT_INDEX_SIZE; i++) pat_table[i] = -1; pat_table[PAT_WRITE_BACK] = 0; pat_table[PAT_WRITE_THROUGH] = 1; pat_table[PAT_UNCACHEABLE] = 3; pat_table[PAT_WRITE_COMBINING] = 3; pat_table[PAT_WRITE_PROTECTED] = 3; pat_table[PAT_UNCACHED] = 3; /* * Bail if this CPU doesn't implement PAT. * We assume that PAT support implies PGE. */ if ((cpu_feature & CPUID_PAT) == 0) { for (i = 0; i < PAT_INDEX_SIZE; i++) pat_index[i] = pat_table[i]; pat_works = 0; return; } /* * Due to some Intel errata, we can only safely use the lower 4 * PAT entries. * * Intel Pentium III Processor Specification Update * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B * or Mode C Paging) * * Intel Pentium IV Processor Specification Update * Errata N46 (PAT Index MSB May Be Calculated Incorrectly) */ if (cpu_vendor_id == CPU_VENDOR_INTEL && !(CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe)) pat_works = 0; /* Initialize default PAT entries. */ pat_msr = PAT_VALUE(0, PAT_WRITE_BACK) | PAT_VALUE(1, PAT_WRITE_THROUGH) | PAT_VALUE(2, PAT_UNCACHED) | PAT_VALUE(3, PAT_UNCACHEABLE) | PAT_VALUE(4, PAT_WRITE_BACK) | PAT_VALUE(5, PAT_WRITE_THROUGH) | PAT_VALUE(6, PAT_UNCACHED) | PAT_VALUE(7, PAT_UNCACHEABLE); if (pat_works) { /* * Leave the indices 0-3 at the default of WB, WT, UC-, and UC. * Program 5 and 6 as WP and WC. * Leave 4 and 7 as WB and UC. */ pat_msr &= ~(PAT_MASK(5) | PAT_MASK(6)); pat_msr |= PAT_VALUE(5, PAT_WRITE_PROTECTED) | PAT_VALUE(6, PAT_WRITE_COMBINING); pat_table[PAT_UNCACHED] = 2; pat_table[PAT_WRITE_PROTECTED] = 5; pat_table[PAT_WRITE_COMBINING] = 6; } else { /* * Just replace PAT Index 2 with WC instead of UC-. */ pat_msr &= ~PAT_MASK(2); pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); pat_table[PAT_WRITE_COMBINING] = 2; } /* Disable PGE. */ cr4 = rcr4(); load_cr4(cr4 & ~CR4_PGE); /* Disable caches (CD = 1, NW = 0). */ cr0 = rcr0(); load_cr0((cr0 & ~CR0_NW) | CR0_CD); /* Flushes caches and TLBs. */ wbinvd(); invltlb(); /* Update PAT and index table. */ wrmsr(MSR_PAT, pat_msr); for (i = 0; i < PAT_INDEX_SIZE; i++) pat_index[i] = pat_table[i]; /* Flush caches and TLBs again. */ wbinvd(); invltlb(); /* Restore caches and PGE. */ load_cr0(cr0); load_cr4(cr4); } /* * Set PG_G on kernel pages. Only the BSP calls this when SMP is turned on. */ static void pmap_set_pg(void) { pt_entry_t *pte; vm_offset_t va, endva; if (pgeflag == 0) return; endva = KERNBASE + KERNend; if (pseflag) { va = KERNBASE + KERNLOAD; while (va < endva) { pdir_pde(PTD, va) |= pgeflag; invltlb(); /* Flush non-PG_G entries. */ va += NBPDR; } } else { va = (vm_offset_t)btext; while (va < endva) { pte = vtopte(va); if (*pte) *pte |= pgeflag; invltlb(); /* Flush non-PG_G entries. */ va += PAGE_SIZE; } } } /* * Initialize a vm_page's machine-dependent fields. */ void pmap_page_init(vm_page_t m) { TAILQ_INIT(&m->md.pv_list); m->md.pat_mode = PAT_WRITE_BACK; } #if defined(PAE) || defined(PAE_TABLES) static void * pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait) { /* Inform UMA that this allocator uses kernel_map/object. */ *flags = UMA_SLAB_KERNEL; return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait, 0x0ULL, 0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT)); } #endif /* * Abuse the pte nodes for unmapped kva to thread a kva freelist through. * Requirements: * - Must deal with pages in order to ensure that none of the PG_* bits * are ever set, PG_V in particular. * - Assumes we can write to ptes without pte_store() atomic ops, even * on PAE systems. This should be ok. * - Assumes nothing will ever test these addresses for 0 to indicate * no mapping instead of correctly checking PG_V. * - Assumes a vm_offset_t will fit in a pte (true for i386). * Because PG_V is never set, there can be no mappings to invalidate. */ static vm_offset_t pmap_ptelist_alloc(vm_offset_t *head) { pt_entry_t *pte; vm_offset_t va; va = *head; if (va == 0) panic("pmap_ptelist_alloc: exhausted ptelist KVA"); pte = vtopte(va); *head = *pte; if (*head & PG_V) panic("pmap_ptelist_alloc: va with PG_V set!"); *pte = 0; return (va); } static void pmap_ptelist_free(vm_offset_t *head, vm_offset_t va) { pt_entry_t *pte; if (va & PG_V) panic("pmap_ptelist_free: freeing va with PG_V set!"); pte = vtopte(va); *pte = *head; /* virtual! PG_V is 0 though */ *head = va; } static void pmap_ptelist_init(vm_offset_t *head, void *base, int npages) { int i; vm_offset_t va; *head = 0; for (i = npages - 1; i >= 0; i--) { va = (vm_offset_t)base + i * PAGE_SIZE; pmap_ptelist_free(head, va); } } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ void pmap_init(void) { struct pmap_preinit_mapping *ppim; vm_page_t mpte; vm_size_t s; int i, pv_npg; /* * Initialize the vm page array entries for the kernel pmap's * page table pages. */ for (i = 0; i < NKPT; i++) { mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT)); KASSERT(mpte >= vm_page_array && mpte < &vm_page_array[vm_page_array_size], ("pmap_init: page table page is out of range")); mpte->pindex = i + KPTDI; mpte->phys_addr = KPTphys + (i << PAGE_SHIFT); } /* * Initialize the address space (zone) for the pv entries. Set a * high water mark so that the system can recover from excessive * numbers of pv entries. */ TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count; TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); pv_entry_max = roundup(pv_entry_max, _NPCPV); pv_entry_high_water = 9 * (pv_entry_max / 10); /* * If the kernel is running on a virtual machine, then it must assume * that MCA is enabled by the hypervisor. Moreover, the kernel must * be prepared for the hypervisor changing the vendor and family that * are reported by CPUID. Consequently, the workaround for AMD Family * 10h Erratum 383 is enabled if the processor's feature set does not * include at least one feature that is only supported by older Intel * or newer AMD processors. */ if (vm_guest != VM_GUEST_NO && (cpu_feature & CPUID_SS) == 0 && (cpu_feature2 & (CPUID2_SSSE3 | CPUID2_SSE41 | CPUID2_AESNI | CPUID2_AVX | CPUID2_XSAVE)) == 0 && (amd_feature2 & (AMDID2_XOP | AMDID2_FMA4)) == 0) workaround_erratum383 = 1; /* * Are large page mappings supported and enabled? */ TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled); if (pseflag == 0) pg_ps_enabled = 0; else if (pg_ps_enabled) { KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, ("pmap_init: can't assign to pagesizes[1]")); pagesizes[1] = NBPDR; } /* * Calculate the size of the pv head table for superpages. * Handle the possibility that "vm_phys_segs[...].end" is zero. */ pv_npg = trunc_4mpage(vm_phys_segs[vm_phys_nsegs - 1].end - PAGE_SIZE) / NBPDR + 1; /* * Allocate memory for the pv head table for superpages. */ s = (vm_size_t)(pv_npg * sizeof(struct md_page)); s = round_page(s); pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, M_WAITOK | M_ZERO); for (i = 0; i < pv_npg; i++) TAILQ_INIT(&pv_table[i].pv_list); pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks); if (pv_chunkbase == NULL) panic("pmap_init: not enough kvm for pv chunks"); pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks); #if defined(PAE) || defined(PAE_TABLES) pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, UMA_ZONE_VM | UMA_ZONE_NOFREE); uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf); #endif pmap_initialized = 1; if (!bootverbose) return; for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; if (ppim->va == 0) continue; printf("PPIM %u: PA=%#jx, VA=%#x, size=%#x, mode=%#x\n", i, (uintmax_t)ppim->pa, ppim->va, ppim->sz, ppim->mode); } } SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, "Max number of PV entries"); SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, "Page share factor per proc"); static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, "2/4MB page mapping counters"); static u_long pmap_pde_demotions; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD, &pmap_pde_demotions, 0, "2/4MB page demotions"); static u_long pmap_pde_mappings; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, &pmap_pde_mappings, 0, "2/4MB page mappings"); static u_long pmap_pde_p_failures; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD, &pmap_pde_p_failures, 0, "2/4MB page promotion failures"); static u_long pmap_pde_promotions; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD, &pmap_pde_promotions, 0, "2/4MB page promotions"); /*************************************************** * Low level helper routines..... ***************************************************/ /* * Determine the appropriate bits to set in a PTE or PDE for a specified * caching mode. */ int pmap_cache_bits(int mode, boolean_t is_pde) { int cache_bits, pat_flag, pat_idx; if (mode < 0 || mode >= PAT_INDEX_SIZE || pat_index[mode] < 0) panic("Unknown caching mode %d\n", mode); /* The PAT bit is different for PTE's and PDE's. */ pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT; /* Map the caching mode to a PAT index. */ pat_idx = pat_index[mode]; /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */ cache_bits = 0; if (pat_idx & 0x4) cache_bits |= pat_flag; if (pat_idx & 0x2) cache_bits |= PG_NC_PCD; if (pat_idx & 0x1) cache_bits |= PG_NC_PWT; return (cache_bits); } /* * The caller is responsible for maintaining TLB consistency. */ static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde) { pd_entry_t *pde; pmap_t pmap; boolean_t PTD_updated; PTD_updated = FALSE; mtx_lock_spin(&allpmaps_lock); LIST_FOREACH(pmap, &allpmaps, pm_list) { if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)) PTD_updated = TRUE; pde = pmap_pde(pmap, va); pde_store(pde, newpde); } mtx_unlock_spin(&allpmaps_lock); KASSERT(PTD_updated, ("pmap_kenter_pde: current page table is not in allpmaps")); } /* * After changing the page size for the specified virtual address in the page * table, flush the corresponding entries from the processor's TLB. Only the * calling processor's TLB is affected. * * The calling thread must be pinned to a processor. */ static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde) { u_long cr4; if ((newpde & PG_PS) == 0) /* Demotion: flush a specific 2MB page mapping. */ invlpg(va); else if ((newpde & PG_G) == 0) /* * Promotion: flush every 4KB page mapping from the TLB * because there are too many to flush individually. */ invltlb(); else { /* * Promotion: flush every 4KB page mapping from the TLB, * including any global (PG_G) mappings. */ cr4 = rcr4(); load_cr4(cr4 & ~CR4_PGE); /* * Although preemption at this point could be detrimental to * performance, it would not lead to an error. PG_G is simply * ignored if CR4.PGE is clear. Moreover, in case this block * is re-entered, the load_cr4() either above or below will * modify CR4.PGE flushing the TLB. */ load_cr4(cr4 | CR4_PGE); } } void invltlb_glob(void) { uint64_t cr4; if (pgeflag == 0) { invltlb(); } else { cr4 = rcr4(); load_cr4(cr4 & ~CR4_PGE); load_cr4(cr4 | CR4_PGE); } } #ifdef SMP /* * For SMP, these functions have to use the IPI mechanism for coherence. * * N.B.: Before calling any of the following TLB invalidation functions, * the calling processor must ensure that all stores updating a non- * kernel page table are globally performed. Otherwise, another * processor could cache an old, pre-update entry without being * invalidated. This can happen one of two ways: (1) The pmap becomes * active on another processor after its pm_active field is checked by * one of the following functions but before a store updating the page * table is globally performed. (2) The pmap becomes active on another * processor before its pm_active field is checked but due to * speculative loads one of the following functions stills reads the * pmap as inactive on the other processor. * * The kernel page table is exempt because its pm_active field is * immutable. The kernel page table is always active on every * processor. */ void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { cpuset_t *mask, other_cpus; u_int cpuid; sched_pin(); if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invlpg(va); mask = &all_cpus; } else { cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); if (CPU_ISSET(cpuid, &pmap->pm_active)) invlpg(va); CPU_AND(&other_cpus, &pmap->pm_active); mask = &other_cpus; } smp_masked_invlpg(*mask, va); sched_unpin(); } /* 4k PTEs -- Chosen to exceed the total size of Broadwell L2 TLB */ #define PMAP_INVLPG_THRESHOLD (4 * 1024 * PAGE_SIZE) void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { cpuset_t *mask, other_cpus; vm_offset_t addr; u_int cpuid; if (eva - sva >= PMAP_INVLPG_THRESHOLD) { pmap_invalidate_all(pmap); return; } sched_pin(); if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); mask = &all_cpus; } else { cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); if (CPU_ISSET(cpuid, &pmap->pm_active)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); CPU_AND(&other_cpus, &pmap->pm_active); mask = &other_cpus; } smp_masked_invlpg_range(*mask, sva, eva); sched_unpin(); } void pmap_invalidate_all(pmap_t pmap) { cpuset_t *mask, other_cpus; u_int cpuid; sched_pin(); if (pmap == kernel_pmap) { invltlb_glob(); mask = &all_cpus; } else if (!CPU_CMP(&pmap->pm_active, &all_cpus)) { invltlb(); mask = &all_cpus; } else { cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); if (CPU_ISSET(cpuid, &pmap->pm_active)) invltlb(); CPU_AND(&other_cpus, &pmap->pm_active); mask = &other_cpus; } smp_masked_invltlb(*mask, pmap); sched_unpin(); } void pmap_invalidate_cache(void) { sched_pin(); wbinvd(); smp_cache_flush(); sched_unpin(); } struct pde_action { cpuset_t invalidate; /* processors that invalidate their TLB */ vm_offset_t va; pd_entry_t *pde; pd_entry_t newpde; u_int store; /* processor that updates the PDE */ }; static void pmap_update_pde_kernel(void *arg) { struct pde_action *act = arg; pd_entry_t *pde; pmap_t pmap; if (act->store == PCPU_GET(cpuid)) { /* * Elsewhere, this operation requires allpmaps_lock for * synchronization. Here, it does not because it is being * performed in the context of an all_cpus rendezvous. */ LIST_FOREACH(pmap, &allpmaps, pm_list) { pde = pmap_pde(pmap, act->va); pde_store(pde, act->newpde); } } } static void pmap_update_pde_user(void *arg) { struct pde_action *act = arg; if (act->store == PCPU_GET(cpuid)) pde_store(act->pde, act->newpde); } static void pmap_update_pde_teardown(void *arg) { struct pde_action *act = arg; if (CPU_ISSET(PCPU_GET(cpuid), &act->invalidate)) pmap_update_pde_invalidate(act->va, act->newpde); } /* * Change the page size for the specified virtual address in a way that * prevents any possibility of the TLB ever having two entries that map the * same virtual address using different page sizes. This is the recommended * workaround for Erratum 383 on AMD Family 10h processors. It prevents a * machine check exception for a TLB state that is improperly diagnosed as a * hardware error. */ static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { struct pde_action act; cpuset_t active, other_cpus; u_int cpuid; sched_pin(); cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); if (pmap == kernel_pmap) active = all_cpus; else active = pmap->pm_active; if (CPU_OVERLAP(&active, &other_cpus)) { act.store = cpuid; act.invalidate = active; act.va = va; act.pde = pde; act.newpde = newpde; CPU_SET(cpuid, &active); smp_rendezvous_cpus(active, smp_no_rendevous_barrier, pmap == kernel_pmap ? pmap_update_pde_kernel : pmap_update_pde_user, pmap_update_pde_teardown, &act); } else { if (pmap == kernel_pmap) pmap_kenter_pde(va, newpde); else pde_store(pde, newpde); if (CPU_ISSET(cpuid, &active)) pmap_update_pde_invalidate(va, newpde); } sched_unpin(); } #else /* !SMP */ /* * Normal, non-SMP, 486+ invalidation functions. * We inline these within pmap.c for speed. */ PMAP_INLINE void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invlpg(va); } PMAP_INLINE void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t addr; if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); } PMAP_INLINE void pmap_invalidate_all(pmap_t pmap) { if (pmap == kernel_pmap) invltlb_glob(); else if (!CPU_EMPTY(&pmap->pm_active)) invltlb(); } PMAP_INLINE void pmap_invalidate_cache(void) { wbinvd(); } static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { if (pmap == kernel_pmap) pmap_kenter_pde(va, newpde); else pde_store(pde, newpde); if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) pmap_update_pde_invalidate(va, newpde); } #endif /* !SMP */ #define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024) void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force) { if (force) { sva &= ~(vm_offset_t)cpu_clflush_line_size; } else { KASSERT((sva & PAGE_MASK) == 0, ("pmap_invalidate_cache_range: sva not page-aligned")); KASSERT((eva & PAGE_MASK) == 0, ("pmap_invalidate_cache_range: eva not page-aligned")); } if ((cpu_feature & CPUID_SS) != 0 && !force) ; /* If "Self Snoop" is supported and allowed, do nothing. */ else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 && eva - sva < PMAP_CLFLUSH_THRESHOLD) { #ifdef DEV_APIC /* * XXX: Some CPUs fault, hang, or trash the local APIC * registers if we use CLFLUSH on the local APIC * range. The local APIC is always uncached, so we * don't need to flush for that range anyway. */ if (pmap_kextract(sva) == lapic_paddr) return; #endif /* * Otherwise, do per-cache line flush. Use the sfence * instruction to insure that previous stores are * included in the write-back. The processor * propagates flush to other processors in the cache * coherence domain. */ sfence(); for (; sva < eva; sva += cpu_clflush_line_size) clflushopt(sva); sfence(); } else if ((cpu_feature & CPUID_CLFSH) != 0 && eva - sva < PMAP_CLFLUSH_THRESHOLD) { #ifdef DEV_APIC if (pmap_kextract(sva) == lapic_paddr) return; #endif /* * Writes are ordered by CLFLUSH on Intel CPUs. */ if (cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); for (; sva < eva; sva += cpu_clflush_line_size) clflush(sva); if (cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); } else { /* * No targeted cache flush methods are supported by CPU, * or the supplied range is bigger than 2MB. * Globally invalidate cache. */ pmap_invalidate_cache(); } } void pmap_invalidate_cache_pages(vm_page_t *pages, int count) { int i; if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE || (cpu_feature & CPUID_CLFSH) == 0) { pmap_invalidate_cache(); } else { for (i = 0; i < count; i++) pmap_flush_page(pages[i]); } } /* * Are we current address space or kernel? */ static __inline int pmap_is_current(pmap_t pmap) { return (pmap == kernel_pmap || pmap == vmspace_pmap(curthread->td_proc->p_vmspace)); } /* * If the given pmap is not the current or kernel pmap, the returned pte must * be released by passing it to pmap_pte_release(). */ pt_entry_t * pmap_pte(pmap_t pmap, vm_offset_t va) { pd_entry_t newpf; pd_entry_t *pde; pde = pmap_pde(pmap, va); if (*pde & PG_PS) return (pde); if (*pde != 0) { /* are we current address space or kernel? */ if (pmap_is_current(pmap)) return (vtopte(va)); mtx_lock(&PMAP2mutex); newpf = *pde & PG_FRAME; if ((*PMAP2 & PG_FRAME) != newpf) { *PMAP2 = newpf | PG_RW | PG_V | PG_A | PG_M; pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2); } return (PADDR2 + (i386_btop(va) & (NPTEPG - 1))); } return (NULL); } /* * Releases a pte that was obtained from pmap_pte(). Be prepared for the pte * being NULL. */ static __inline void pmap_pte_release(pt_entry_t *pte) { if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) mtx_unlock(&PMAP2mutex); } /* * NB: The sequence of updating a page table followed by accesses to the * corresponding pages is subject to the situation described in the "AMD64 * Architecture Programmer's Manual Volume 2: System Programming" rev. 3.23, * "7.3.1 Special Coherency Considerations". Therefore, issuing the INVLPG * right after modifying the PTE bits is crucial. */ static __inline void invlcaddr(void *caddr) { invlpg((u_int)caddr); } /* * Super fast pmap_pte routine best used when scanning * the pv lists. This eliminates many coarse-grained * invltlb calls. Note that many of the pv list * scans are across different pmaps. It is very wasteful * to do an entire invltlb for checking a single mapping. * * If the given pmap is not the current pmap, pvh_global_lock * must be held and curthread pinned to a CPU. */ static pt_entry_t * pmap_pte_quick(pmap_t pmap, vm_offset_t va) { pd_entry_t newpf; pd_entry_t *pde; pde = pmap_pde(pmap, va); if (*pde & PG_PS) return (pde); if (*pde != 0) { /* are we current address space or kernel? */ if (pmap_is_current(pmap)) return (vtopte(va)); rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); newpf = *pde & PG_FRAME; if ((*PMAP1 & PG_FRAME) != newpf) { *PMAP1 = newpf | PG_RW | PG_V | PG_A | PG_M; #ifdef SMP PMAP1cpu = PCPU_GET(cpuid); #endif invlcaddr(PADDR1); PMAP1changed++; } else #ifdef SMP if (PMAP1cpu != PCPU_GET(cpuid)) { PMAP1cpu = PCPU_GET(cpuid); invlcaddr(PADDR1); PMAP1changedcpu++; } else #endif PMAP1unchanged++; return (PADDR1 + (i386_btop(va) & (NPTEPG - 1))); } return (0); } /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { vm_paddr_t rtval; pt_entry_t *pte; pd_entry_t pde; rtval = 0; PMAP_LOCK(pmap); pde = pmap->pm_pdir[va >> PDRSHIFT]; if (pde != 0) { if ((pde & PG_PS) != 0) rtval = (pde & PG_PS_FRAME) | (va & PDRMASK); else { pte = pmap_pte(pmap, va); rtval = (*pte & PG_FRAME) | (va & PAGE_MASK); pmap_pte_release(pte); } } PMAP_UNLOCK(pmap); return (rtval); } /* * Routine: pmap_extract_and_hold * Function: * Atomically extract and hold the physical page * with the given pmap and virtual address pair * if that mapping permits the given protection. */ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pd_entry_t pde; pt_entry_t pte, *ptep; vm_page_t m; vm_paddr_t pa; pa = 0; m = NULL; PMAP_LOCK(pmap); retry: pde = *pmap_pde(pmap, va); if (pde != 0) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | (va & PDRMASK), &pa)) goto retry; m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | (va & PDRMASK)); vm_page_hold(m); } } else { ptep = pmap_pte(pmap, va); pte = *ptep; pmap_pte_release(ptep); if (pte != 0 && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) goto retry; m = PHYS_TO_VM_PAGE(pte & PG_FRAME); vm_page_hold(m); } } } PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } /*************************************************** * Low level mapping routines..... ***************************************************/ /* * Add a wired page to the kva. * Note: not SMP coherent. * * This function may be used before pmap_bootstrap() is called. */ PMAP_INLINE void pmap_kenter(vm_offset_t va, vm_paddr_t pa) { pt_entry_t *pte; pte = vtopte(va); pte_store(pte, pa | PG_RW | PG_V | pgeflag); } static __inline void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) { pt_entry_t *pte; pte = vtopte(va); pte_store(pte, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0)); } /* * Remove a page from the kernel pagetables. * Note: not SMP coherent. * * This function may be used before pmap_bootstrap() is called. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) { pt_entry_t *pte; pte = vtopte(va); pte_clear(pte); } /* * Used to map a range of physical addresses into kernel * virtual address space. * * The value passed in '*virt' is a suggested virtual address for * the mapping. Architectures which can support a direct-mapped * physical to virtual region can return the appropriate address * within that region, leaving '*virt' unchanged. Other * architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped * region. */ vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) { vm_offset_t va, sva; vm_paddr_t superpage_offset; pd_entry_t newpde; va = *virt; /* * Does the physical address range's size and alignment permit at * least one superpage mapping to be created? */ superpage_offset = start & PDRMASK; if ((end - start) - ((NBPDR - superpage_offset) & PDRMASK) >= NBPDR) { /* * Increase the starting virtual address so that its alignment * does not preclude the use of superpage mappings. */ if ((va & PDRMASK) < superpage_offset) va = (va & ~PDRMASK) + superpage_offset; else if ((va & PDRMASK) > superpage_offset) va = ((va + PDRMASK) & ~PDRMASK) + superpage_offset; } sva = va; while (start < end) { if ((start & PDRMASK) == 0 && end - start >= NBPDR && pseflag) { KASSERT((va & PDRMASK) == 0, ("pmap_map: misaligned va %#x", va)); newpde = start | PG_PS | pgeflag | PG_RW | PG_V; pmap_kenter_pde(va, newpde); va += NBPDR; start += NBPDR; } else { pmap_kenter(va, start); va += PAGE_SIZE; start += PAGE_SIZE; } } pmap_invalidate_range(kernel_pmap, sva, va); *virt = va; return (sva); } /* * Add a list of wired pages to the kva * this routine is only used for temporary * kernel mappings that do not need to have * page modification or references recorded. * Note that old mappings are simply written * over. The page *must* be wired. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { pt_entry_t *endpte, oldpte, pa, *pte; vm_page_t m; oldpte = 0; pte = vtopte(sva); endpte = pte + count; while (pte < endpte) { m = *ma++; pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0); if ((*pte & (PG_FRAME | PG_PTE_CACHE)) != pa) { oldpte |= *pte; pte_store(pte, pa | pgeflag | PG_RW | PG_V); } pte++; } if (__predict_false((oldpte & PG_V) != 0)) pmap_invalidate_range(kernel_pmap, sva, sva + count * PAGE_SIZE); } /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qremove(vm_offset_t sva, int count) { vm_offset_t va; va = sva; while (count-- > 0) { pmap_kremove(va); va += PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /*************************************************** * Page table page management routines..... ***************************************************/ static __inline void pmap_free_zero_pages(struct spglist *free) { vm_page_t m; while ((m = SLIST_FIRST(free)) != NULL) { SLIST_REMOVE_HEAD(free, plinks.s.ss); /* Preserve the page's PG_ZERO setting. */ vm_page_free_toq(m); } } /* * Schedule the specified unused page table page to be freed. Specifically, * add the page to the specified list of pages that will be released to the * physical memory manager after the TLB has been updated. */ static __inline void pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, boolean_t set_PG_ZERO) { if (set_PG_ZERO) m->flags |= PG_ZERO; else m->flags &= ~PG_ZERO; SLIST_INSERT_HEAD(free, m, plinks.s.ss); } /* * Inserts the specified page table page into the specified pmap's collection * of idle page table pages. Each of a pmap's page table pages is responsible * for mapping a distinct range of virtual addresses. The pmap's collection is * ordered by this virtual address range. */ static __inline int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); return (vm_radix_insert(&pmap->pm_root, mpte)); } /* * Removes the page table page mapping the specified virtual address from the * specified pmap's collection of idle page table pages, and returns it. * Otherwise, returns NULL if there is no page table page corresponding to the * specified virtual address. */ static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); return (vm_radix_remove(&pmap->pm_root, va >> PDRSHIFT)); } /* * Decrements a page table page's wire count, which is used to record the * number of valid page table entries within the page. If the wire count * drops to zero, then the page table page is unmapped. Returns TRUE if the * page table page was unmapped and FALSE otherwise. */ static inline boolean_t pmap_unwire_ptp(pmap_t pmap, vm_page_t m, struct spglist *free) { --m->wire_count; if (m->wire_count == 0) { _pmap_unwire_ptp(pmap, m, free); return (TRUE); } else return (FALSE); } static void _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, struct spglist *free) { vm_offset_t pteva; /* * unmap the page table page */ pmap->pm_pdir[m->pindex] = 0; --pmap->pm_stats.resident_count; /* * This is a release store so that the ordinary store unmapping * the page table page is globally performed before TLB shoot- * down is begun. */ atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); /* * Do an invltlb to make the invalidated mapping * take effect immediately. */ pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); pmap_invalidate_page(pmap, pteva); /* * Put page on a list so that it is released after * *ALL* TLB shootdown is done */ pmap_add_delayed_free_list(m, free, TRUE); } /* * After removing a page table entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ static int pmap_unuse_pt(pmap_t pmap, vm_offset_t va, struct spglist *free) { pd_entry_t ptepde; vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) return (0); ptepde = *pmap_pde(pmap, va); mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); return (pmap_unwire_ptp(pmap, mpte, free)); } /* * Initialize the pmap for the swapper process. */ void pmap_pinit0(pmap_t pmap) { PMAP_LOCK_INIT(pmap); /* * Since the page table directory is shared with the kernel pmap, * which is already included in the list "allpmaps", this pmap does * not need to be inserted into that list. */ pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); #if defined(PAE) || defined(PAE_TABLES) pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); #endif pmap->pm_root.rt_root = 0; CPU_ZERO(&pmap->pm_active); PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); } /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ int pmap_pinit(pmap_t pmap) { vm_page_t m, ptdpg[NPGPTD]; vm_paddr_t pa; int i; /* * No need to allocate page table space yet but we do need a valid * page directory table. */ if (pmap->pm_pdir == NULL) { pmap->pm_pdir = (pd_entry_t *)kva_alloc(NBPTD); if (pmap->pm_pdir == NULL) return (0); #if defined(PAE) || defined(PAE_TABLES) pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); KASSERT(((vm_offset_t)pmap->pm_pdpt & ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, ("pmap_pinit: pdpt misaligned")); KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), ("pmap_pinit: pdpt above 4g")); #endif pmap->pm_root.rt_root = 0; } KASSERT(vm_radix_is_empty(&pmap->pm_root), ("pmap_pinit: pmap has reserved page table page(s)")); /* * allocate the page directory page(s) */ for (i = 0; i < NPGPTD;) { m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (m == NULL) VM_WAIT; else { ptdpg[i++] = m; } } pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); for (i = 0; i < NPGPTD; i++) if ((ptdpg[i]->flags & PG_ZERO) == 0) pagezero(pmap->pm_pdir + (i * NPDEPG)); mtx_lock_spin(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); /* Copy the kernel page table directory entries. */ bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); mtx_unlock_spin(&allpmaps_lock); /* install self-referential address mapping entry(s) */ for (i = 0; i < NPGPTD; i++) { pa = VM_PAGE_TO_PHYS(ptdpg[i]); pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M; #if defined(PAE) || defined(PAE_TABLES) pmap->pm_pdpt[i] = pa | PG_V; #endif } CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); return (1); } /* * this routine is called if the page table page is not * mapped correctly. */ static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags) { vm_paddr_t ptepa; vm_page_t m; /* * Allocate a page table page. */ if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { if ((flags & PMAP_ENTER_NOSLEEP) == 0) { PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); VM_WAIT; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); } /* * Indicate the need to retry. While waiting, the page table * page may have been allocated. */ return (NULL); } if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); /* * Map the pagetable page into the process address space, if * it isn't already there. */ pmap->pm_stats.resident_count++; ptepa = VM_PAGE_TO_PHYS(m); pmap->pm_pdir[ptepindex] = (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); return (m); } static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags) { u_int ptepindex; pd_entry_t ptepa; vm_page_t m; /* * Calculate pagetable page index */ ptepindex = va >> PDRSHIFT; retry: /* * Get the page directory entry */ ptepa = pmap->pm_pdir[ptepindex]; /* * This supports switching from a 4MB page to a * normal 4K page. */ if (ptepa & PG_PS) { (void)pmap_demote_pde(pmap, &pmap->pm_pdir[ptepindex], va); ptepa = pmap->pm_pdir[ptepindex]; } /* * If the page table page is mapped, we just increment the * hold count, and activate it. */ if (ptepa) { m = PHYS_TO_VM_PAGE(ptepa & PG_FRAME); m->wire_count++; } else { /* * Here if the pte page isn't mapped, or if it has * been deallocated. */ m = _pmap_allocpte(pmap, ptepindex, flags); if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0) goto retry; } return (m); } /*************************************************** * Pmap allocation/deallocation routines. ***************************************************/ /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap_t pmap) { vm_page_t m, ptdpg[NPGPTD]; int i; KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); KASSERT(vm_radix_is_empty(&pmap->pm_root), ("pmap_release: pmap has reserved page table page(s)")); KASSERT(CPU_EMPTY(&pmap->pm_active), ("releasing active pmap %p", pmap)); mtx_lock_spin(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); for (i = 0; i < NPGPTD; i++) ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i] & PG_FRAME); bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) * sizeof(*pmap->pm_pdir)); pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); for (i = 0; i < NPGPTD; i++) { m = ptdpg[i]; #if defined(PAE) || defined(PAE_TABLES) KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME), ("pmap_release: got wrong ptd page")); #endif m->wire_count--; atomic_subtract_int(&vm_cnt.v_wire_count, 1); vm_page_free_zero(m); } } static int kvm_size(SYSCTL_HANDLER_ARGS) { unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; return (sysctl_handle_long(oidp, &ksize, 0, req)); } SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_size, "IU", "Size of KVM"); static int kvm_free(SYSCTL_HANDLER_ARGS) { unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; return (sysctl_handle_long(oidp, &kfree, 0, req)); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "IU", "Amount of KVM free"); /* * grow the number of kernel page table entries, if needed */ void pmap_growkernel(vm_offset_t addr) { vm_paddr_t ptppaddr; vm_page_t nkpg; pd_entry_t newpdir; mtx_assert(&kernel_map->system_mtx, MA_OWNED); addr = roundup2(addr, NBPDR); if (addr - 1 >= kernel_map->max_offset) addr = kernel_map->max_offset; while (kernel_vm_end < addr) { if (pdir_pde(PTD, kernel_vm_end)) { kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } continue; } nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); nkpt++; if ((nkpg->flags & PG_ZERO) == 0) pmap_zero_page(nkpg); ptppaddr = VM_PAGE_TO_PHYS(nkpg); newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); pdir_pde(KPTD, kernel_vm_end) = pgeflag | newpdir; pmap_kenter_pde(kernel_vm_end, newpdir); kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } } } /*************************************************** * page management routines. ***************************************************/ CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); CTASSERT(_NPCM == 11); CTASSERT(_NPCPV == 336); static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) #define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ #define PC_FREE10 0x0000fffful /* Free values for index 10 */ static const uint32_t pc_freemask[_NPCM] = { PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, PC_FREE10 }; SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, "Current number of pv entries"); #ifdef PV_STATS static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, "Current number of pv entry chunks"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, "Current number of pv entry chunks allocated"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, "Current number of pv entry chunks frees"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, "Number of times tried to get a chunk page but failed."); static long pv_entry_frees, pv_entry_allocs; static int pv_entry_spare; SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, "Current number of pv entry frees"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, "Current number of pv entry allocs"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); #endif /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate * another pv entry chunk. */ static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap) { struct pch newtail; struct pv_chunk *pc; struct md_page *pvh; pd_entry_t *pde; pmap_t pmap; pt_entry_t *pte, tpte; pv_entry_t pv; vm_offset_t va; vm_page_t m, m_pc; struct spglist free; uint32_t inuse; int bit, field, freed; PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); pmap = NULL; m_pc = NULL; SLIST_INIT(&free); TAILQ_INIT(&newtail); while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 || SLIST_EMPTY(&free))) { TAILQ_REMOVE(&pv_chunks, pc, pc_lru); if (pmap != pc->pc_pmap) { if (pmap != NULL) { pmap_invalidate_all(pmap); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } pmap = pc->pc_pmap; /* Avoid deadlock and lock recursion. */ if (pmap > locked_pmap) PMAP_LOCK(pmap); else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { pmap = NULL; TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); continue; } } /* * Destroy every non-wired, 4 KB page mapping in the chunk. */ freed = 0; for (field = 0; field < _NPCM; field++) { for (inuse = ~pc->pc_map[field] & pc_freemask[field]; inuse != 0; inuse &= ~(1UL << bit)) { bit = bsfl(inuse); pv = &pc->pc_pventry[field * 32 + bit]; va = pv->pv_va; pde = pmap_pde(pmap, va); if ((*pde & PG_PS) != 0) continue; pte = pmap_pte(pmap, va); tpte = *pte; if ((tpte & PG_W) == 0) tpte = pte_load_clear(pte); pmap_pte_release(pte); if ((tpte & PG_W) != 0) continue; KASSERT(tpte != 0, ("pmap_pv_reclaim: pmap %p va %x zero pte", pmap, va)); if ((tpte & PG_G) != 0) pmap_invalidate_page(pmap, va); m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if ((tpte & PG_A) != 0) vm_page_aflag_set(m, PGA_REFERENCED); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) { vm_page_aflag_clear(m, PGA_WRITEABLE); } } pc->pc_map[field] |= 1UL << bit; pmap_unuse_pt(pmap, va, &free); freed++; } } if (freed == 0) { TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); continue; } /* Every freed mapping is for a 4 KB page. */ pmap->pm_stats.resident_count -= freed; PV_STAT(pv_entry_frees += freed); PV_STAT(pv_entry_spare += freed); pv_entry_count -= freed; TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); for (field = 0; field < _NPCM; field++) if (pc->pc_map[field] != pc_freemask[field]) { TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); /* * One freed pv entry in locked_pmap is * sufficient. */ if (pmap == locked_pmap) goto out; break; } if (field == _NPCM) { PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); /* Entire chunk is free; return it. */ m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); break; } } out: TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); if (pmap != NULL) { pmap_invalidate_all(pmap); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } if (m_pc == NULL && pv_vafree != 0 && SLIST_EMPTY(&free)) { m_pc = SLIST_FIRST(&free); SLIST_REMOVE_HEAD(&free, plinks.s.ss); /* Recycle a freed page table page. */ m_pc->wire_count = 1; atomic_add_int(&vm_cnt.v_wire_count, 1); } pmap_free_zero_pages(&free); return (m_pc); } /* * free the pv_entry back to the free list */ static void free_pv_entry(pmap_t pmap, pv_entry_t pv) { struct pv_chunk *pc; int idx, field, bit; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(pv_entry_frees++); PV_STAT(pv_entry_spare++); pv_entry_count--; pc = pv_to_chunk(pv); idx = pv - &pc->pc_pventry[0]; field = idx / 32; bit = idx % 32; pc->pc_map[field] |= 1ul << bit; for (idx = 0; idx < _NPCM; idx++) if (pc->pc_map[idx] != pc_freemask[idx]) { /* * 98% of the time, pc is already at the head of the * list. If it isn't already, move it to the head. */ if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) != pc)) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); } return; } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } static void free_pv_chunk(struct pv_chunk *pc) { vm_page_t m; TAILQ_REMOVE(&pv_chunks, pc, pc_lru); PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); vm_page_unwire(m, PQ_NONE); vm_page_free(m); pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); } /* * get a new pv_entry, allocating a block from the system * when needed. */ static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try) { static const struct timeval printinterval = { 60, 0 }; static struct timeval lastprint; int bit, field; pv_entry_t pv; struct pv_chunk *pc; vm_page_t m; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(pv_entry_allocs++); pv_entry_count++; if (pv_entry_count > pv_entry_high_water) if (ratecheck(&lastprint, &printinterval)) printf("Approaching the limit on PV entries, consider " "increasing either the vm.pmap.shpgperproc or the " "vm.pmap.pv_entry_max tunable.\n"); retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { for (field = 0; field < _NPCM; field++) { if (pc->pc_map[field]) { bit = bsfl(pc->pc_map[field]); break; } } if (field < _NPCM) { pv = &pc->pc_pventry[field * 32 + bit]; pc->pc_map[field] &= ~(1ul << bit); /* If this was the last item, move it to tail */ for (field = 0; field < _NPCM; field++) if (pc->pc_map[field] != 0) { PV_STAT(pv_entry_spare--); return (pv); /* not full, return */ } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare--); return (pv); } } /* * Access to the ptelist "pv_vafree" is synchronized by the pvh * global lock. If "pv_vafree" is currently non-empty, it will * remain non-empty until pmap_ptelist_alloc() completes. */ if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { if (try) { pv_entry_count--; PV_STAT(pc_chunk_tryfail++); return (NULL); } m = pmap_pv_reclaim(pmap); if (m == NULL) goto retry; } PV_STAT(pc_chunk_count++); PV_STAT(pc_chunk_allocs++); pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree); pmap_qenter((vm_offset_t)pc, &m, 1); pc->pc_pmap = pmap; pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ for (field = 1; field < _NPCM; field++) pc->pc_map[field] = pc_freemask[field]; TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare += _NPCPV - 1); return (pv); } static __inline pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); break; } } return (pv); } static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) { struct md_page *pvh; pv_entry_t pv; vm_offset_t va_last; vm_page_t m; rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT((pa & PDRMASK) == 0, ("pmap_pv_demote_pde: pa is not 4mpage aligned")); /* * Transfer the 4mpage's pv entry for this mapping to the first * page's pv list. */ pvh = pa_to_pvh(pa); va = trunc_4mpage(va); pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found")); m = PHYS_TO_VM_PAGE(pa); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); /* Instantiate the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { m++; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_pv_demote_pde: page %p is not managed", m)); va += PAGE_SIZE; pmap_insert_entry(pmap, va, m); } while (va < va_last); } static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) { struct md_page *pvh; pv_entry_t pv; vm_offset_t va_last; vm_page_t m; rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT((pa & PDRMASK) == 0, ("pmap_pv_promote_pde: pa is not 4mpage aligned")); /* * Transfer the first page's pv entry for this mapping to the * 4mpage's pv list. Aside from avoiding the cost of a call * to get_pv_entry(), a transfer avoids the possibility that * get_pv_entry() calls pmap_collect() and that pmap_collect() * removes one of the mappings that is being promoted. */ m = PHYS_TO_VM_PAGE(pa); va = trunc_4mpage(va); pv = pmap_pvh_remove(&m->md, pmap, va); KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found")); pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); /* Free the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { m++; va += PAGE_SIZE; pmap_pvh_free(&m->md, pmap, va); } while (va < va_last); } static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); free_pv_entry(pmap, pv); } static void pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) { struct md_page *pvh; rw_assert(&pvh_global_lock, RA_WLOCKED); pmap_pvh_free(&m->md, pmap, va); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } } /* * Create a pv entry for page at pa for * (pmap, va). */ static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); } /* * Conditionally create a pv entry. */ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); return (TRUE); } else return (FALSE); } /* * Create the pv entries for each of the pages within a superpage. */ static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) { struct md_page *pvh; pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); if (pv_entry_count < pv_entry_high_water && (pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); return (TRUE); } else return (FALSE); } /* * Fills a page table page with mappings to consecutive physical pages. */ static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte) { pt_entry_t *pte; for (pte = firstpte; pte < firstpte + NPTEPG; pte++) { *pte = newpte; newpte += PAGE_SIZE; } } /* * Tries to demote a 2- or 4MB page mapping. If demotion fails, the * 2- or 4MB page mapping is invalidated. */ static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde, oldpde; pt_entry_t *firstpte, newpte; vm_paddr_t mptepa; vm_page_t mpte; struct spglist free; vm_offset_t sva; PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldpde = *pde; KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); if ((oldpde & PG_A) == 0 || (mpte = pmap_remove_pt_page(pmap, va)) == NULL) { KASSERT((oldpde & PG_W) == 0, ("pmap_demote_pde: page table page for a wired mapping" " is missing")); /* * Invalidate the 2- or 4MB page mapping and return * "failure" if the mapping was never accessed or the * allocation of the new page table page fails. */ if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL, va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED)) == NULL) { SLIST_INIT(&free); sva = trunc_4mpage(va); pmap_remove_pde(pmap, pde, sva, &free); pmap_invalidate_range(pmap, sva, sva + NBPDR - 1); pmap_free_zero_pages(&free); CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x" " in pmap %p", va, pmap); return (FALSE); } if (va < VM_MAXUSER_ADDRESS) pmap->pm_stats.resident_count++; } mptepa = VM_PAGE_TO_PHYS(mpte); /* * If the page mapping is in the kernel's address space, then the * KPTmap can provide access to the page table page. Otherwise, * temporarily map the page table page (mpte) into the kernel's * address space at either PADDR1 or PADDR2. */ if (va >= KERNBASE) firstpte = &KPTmap[i386_btop(trunc_4mpage(va))]; else if (curthread->td_pinned > 0 && rw_wowned(&pvh_global_lock)) { if ((*PMAP1 & PG_FRAME) != mptepa) { *PMAP1 = mptepa | PG_RW | PG_V | PG_A | PG_M; #ifdef SMP PMAP1cpu = PCPU_GET(cpuid); #endif invlcaddr(PADDR1); PMAP1changed++; } else #ifdef SMP if (PMAP1cpu != PCPU_GET(cpuid)) { PMAP1cpu = PCPU_GET(cpuid); invlcaddr(PADDR1); PMAP1changedcpu++; } else #endif PMAP1unchanged++; firstpte = PADDR1; } else { mtx_lock(&PMAP2mutex); if ((*PMAP2 & PG_FRAME) != mptepa) { *PMAP2 = mptepa | PG_RW | PG_V | PG_A | PG_M; pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2); } firstpte = PADDR2; } newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; KASSERT((oldpde & PG_A) != 0, ("pmap_demote_pde: oldpde is missing PG_A")); KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, ("pmap_demote_pde: oldpde is missing PG_M")); newpte = oldpde & ~PG_PS; if ((newpte & PG_PDE_PAT) != 0) newpte ^= PG_PDE_PAT | PG_PTE_PAT; /* * If the page table page is new, initialize it. */ if (mpte->wire_count == 1) { mpte->wire_count = NPTEPG; pmap_fill_ptp(firstpte, newpte); } KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), ("pmap_demote_pde: firstpte and newpte map different physical" " addresses")); /* * If the mapping has changed attributes, update the page table * entries. */ if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE)) pmap_fill_ptp(firstpte, newpte); /* * Demote the mapping. This pmap is locked. The old PDE has * PG_A set. If the old PDE has PG_RW set, it also has PG_M * set. Thus, there is no danger of a race with another * processor changing the setting of PG_A and/or PG_M between * the read above and the store below. */ if (workaround_erratum383) pmap_update_pde(pmap, va, pde, newpde); else if (pmap == kernel_pmap) pmap_kenter_pde(va, newpde); else pde_store(pde, newpde); if (firstpte == PADDR2) mtx_unlock(&PMAP2mutex); /* * Invalidate the recursive mapping of the page table page. */ pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); /* * Demote the pv entry. This depends on the earlier demotion * of the mapping. Specifically, the (re)creation of a per- * page pv entry might trigger the execution of pmap_collect(), * which might reclaim a newly (re)created per-page pv entry * and destroy the associated mapping. In order to destroy * the mapping, the PDE must have already changed from mapping * the 2mpage to referencing the page table page. */ if ((oldpde & PG_MANAGED) != 0) pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME); pmap_pde_demotions++; CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#x" " in pmap %p", va, pmap); return (TRUE); } /* * Removes a 2- or 4MB page mapping from the kernel pmap. */ static void pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde; vm_paddr_t mptepa; vm_page_t mpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); mpte = pmap_remove_pt_page(pmap, va); if (mpte == NULL) panic("pmap_remove_kernel_pde: Missing pt page."); mptepa = VM_PAGE_TO_PHYS(mpte); newpde = mptepa | PG_M | PG_A | PG_RW | PG_V; /* * Initialize the page table page. */ pagezero((void *)&KPTmap[i386_btop(trunc_4mpage(va))]); /* * Remove the mapping. */ if (workaround_erratum383) pmap_update_pde(pmap, va, pde, newpde); else pmap_kenter_pde(va, newpde); /* * Invalidate the recursive mapping of the page table page. */ pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); } /* * pmap_remove_pde: do the things to unmap a superpage in a process */ static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, struct spglist *free) { struct md_page *pvh; pd_entry_t oldpde; vm_offset_t eva, va; vm_page_t m, mpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & PDRMASK) == 0, ("pmap_remove_pde: sva is not 4mpage aligned")); oldpde = pte_load_clear(pdq); if (oldpde & PG_W) pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; /* * Machines that don't support invlpg, also don't support * PG_G. * * When workaround_erratum383 is false, a promotion to a 2M/4M * page mapping does not invalidate the 512/1024 4K page mappings * from the TLB. Consequently, at this point, the TLB may * hold both 4K and 2M/4M page mappings. Therefore, the entire * range of addresses must be invalidated here. In contrast, * when workaround_erratum383 is true, a promotion does * invalidate the 512/1024 4K page mappings, and so a single INVLPG * suffices to invalidate the 2M/4M page mapping. */ if ((oldpde & PG_G) != 0) { if (workaround_erratum383) pmap_invalidate_page(kernel_pmap, sva); else pmap_invalidate_range(kernel_pmap, sva, sva + NBPDR - 1); } pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; if (oldpde & PG_MANAGED) { pvh = pa_to_pvh(oldpde & PG_PS_FRAME); pmap_pvh_free(pvh, pmap, sva); eva = sva + NBPDR; for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); va < eva; va += PAGE_SIZE, m++) { if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if (oldpde & PG_A) vm_page_aflag_set(m, PGA_REFERENCED); if (TAILQ_EMPTY(&m->md.pv_list) && TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } } if (pmap == kernel_pmap) { pmap_remove_kernel_pde(pmap, pdq, sva); } else { mpte = pmap_remove_pt_page(pmap, sva); if (mpte != NULL) { pmap->pm_stats.resident_count--; KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pde: pte page wire count error")); mpte->wire_count = 0; pmap_add_delayed_free_list(mpte, free, FALSE); atomic_subtract_int(&vm_cnt.v_wire_count, 1); } } } /* * pmap_remove_pte: do the things to unmap a page in a process */ static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, struct spglist *free) { pt_entry_t oldpte; vm_page_t m; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldpte = pte_load_clear(ptq); KASSERT(oldpte != 0, ("pmap_remove_pte: pmap %p va %x zero pte", pmap, va)); if (oldpte & PG_W) pmap->pm_stats.wired_count -= 1; /* * Machines that don't support invlpg, also don't support * PG_G. */ if (oldpte & PG_G) pmap_invalidate_page(kernel_pmap, va); pmap->pm_stats.resident_count -= 1; if (oldpte & PG_MANAGED) { m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME); if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if (oldpte & PG_A) vm_page_aflag_set(m, PGA_REFERENCED); pmap_remove_entry(pmap, m, va); } return (pmap_unuse_pt(pmap, va, free)); } /* * Remove a single page from a process address space */ static void pmap_remove_page(pmap_t pmap, vm_offset_t va, struct spglist *free) { pt_entry_t *pte; rw_assert(&pvh_global_lock, RA_WLOCKED); KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0) return; pmap_remove_pte(pmap, pte, va, free); pmap_invalidate_page(pmap, va); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t pdnxt; pd_entry_t ptpaddr; pt_entry_t *pte; struct spglist free; int anyvalid; /* * Perform an unsynchronized read. This is, however, safe. */ if (pmap->pm_stats.resident_count == 0) return; anyvalid = 0; SLIST_INIT(&free); rw_wlock(&pvh_global_lock); sched_pin(); PMAP_LOCK(pmap); /* * special handling of removing one page. a very * common operation and easy to short circuit some * code. */ if ((sva + PAGE_SIZE == eva) && ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { pmap_remove_page(pmap, sva, &free); goto out; } for (; sva < eva; sva = pdnxt) { u_int pdirindex; /* * Calculate index for next page table. */ pdnxt = (sva + NBPDR) & ~PDRMASK; if (pdnxt < sva) pdnxt = eva; if (pmap->pm_stats.resident_count == 0) break; pdirindex = sva >> PDRSHIFT; ptpaddr = pmap->pm_pdir[pdirindex]; /* * Weed out invalid mappings. Note: we assume that the page * directory table is always allocated, and in kernel virtual. */ if (ptpaddr == 0) continue; /* * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { /* * Are we removing the entire large page? If not, * demote the mapping and fall through. */ if (sva + NBPDR == pdnxt && eva >= pdnxt) { /* * The TLB entry for a PG_G mapping is * invalidated by pmap_remove_pde(). */ if ((ptpaddr & PG_G) == 0) anyvalid = 1; pmap_remove_pde(pmap, &pmap->pm_pdir[pdirindex], sva, &free); continue; } else if (!pmap_demote_pde(pmap, &pmap->pm_pdir[pdirindex], sva)) { /* The large page mapping was destroyed. */ continue; } } /* * Limit our scan to either the end of the va represented * by the current page table page, or to the end of the * range being removed. */ if (pdnxt > eva) pdnxt = eva; for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, sva += PAGE_SIZE) { if (*pte == 0) continue; /* * The TLB entry for a PG_G mapping is invalidated * by pmap_remove_pte(). */ if ((*pte & PG_G) == 0) anyvalid = 1; if (pmap_remove_pte(pmap, pte, sva, &free)) break; } } out: sched_unpin(); if (anyvalid) pmap_invalidate_all(pmap); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); pmap_free_zero_pages(&free); } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ void pmap_remove_all(vm_page_t m) { struct md_page *pvh; pv_entry_t pv; pmap_t pmap; pt_entry_t *pte, tpte; pd_entry_t *pde; vm_offset_t va; struct spglist free; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_all: page %p is not managed", m)); SLIST_INIT(&free); rw_wlock(&pvh_global_lock); sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, va); (void)pmap_demote_pde(pmap, pde, va); PMAP_UNLOCK(pmap); } small_mappings: while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pmap->pm_stats.resident_count--; pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found" " a 4mpage in page %p's pv list", m)); pte = pmap_pte_quick(pmap, pv->pv_va); tpte = pte_load_clear(pte); KASSERT(tpte != 0, ("pmap_remove_all: pmap %p va %x zero pte", pmap, pv->pv_va)); if (tpte & PG_W) pmap->pm_stats.wired_count--; if (tpte & PG_A) vm_page_aflag_set(m, PGA_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, &free); pmap_invalidate_page(pmap, pv->pv_va); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); sched_unpin(); rw_wunlock(&pvh_global_lock); pmap_free_zero_pages(&free); } /* * pmap_protect_pde: do the things to protect a 4mpage in a process */ static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot) { pd_entry_t newpde, oldpde; vm_offset_t eva, va; vm_page_t m; boolean_t anychanged; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & PDRMASK) == 0, ("pmap_protect_pde: sva is not 4mpage aligned")); anychanged = FALSE; retry: oldpde = newpde = *pde; if (oldpde & PG_MANAGED) { eva = sva + NBPDR; for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); va < eva; va += PAGE_SIZE, m++) if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); } if ((prot & VM_PROT_WRITE) == 0) newpde &= ~(PG_RW | PG_M); #if defined(PAE) || defined(PAE_TABLES) if ((prot & VM_PROT_EXECUTE) == 0) newpde |= pg_nx; #endif if (newpde != oldpde) { if (!pde_cmpset(pde, oldpde, newpde)) goto retry; if (oldpde & PG_G) { /* See pmap_remove_pde() for explanation. */ if (workaround_erratum383) pmap_invalidate_page(kernel_pmap, sva); else pmap_invalidate_range(kernel_pmap, sva, sva + NBPDR - 1); } else anychanged = TRUE; } return (anychanged); } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { vm_offset_t pdnxt; pd_entry_t ptpaddr; pt_entry_t *pte; boolean_t anychanged, pv_lists_locked; KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); if (prot == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } #if defined(PAE) || defined(PAE_TABLES) if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) == (VM_PROT_WRITE|VM_PROT_EXECUTE)) return; #else if (prot & VM_PROT_WRITE) return; #endif if (pmap_is_current(pmap)) pv_lists_locked = FALSE; else { pv_lists_locked = TRUE; resume: rw_wlock(&pvh_global_lock); sched_pin(); } anychanged = FALSE; PMAP_LOCK(pmap); for (; sva < eva; sva = pdnxt) { pt_entry_t obits, pbits; u_int pdirindex; pdnxt = (sva + NBPDR) & ~PDRMASK; if (pdnxt < sva) pdnxt = eva; pdirindex = sva >> PDRSHIFT; ptpaddr = pmap->pm_pdir[pdirindex]; /* * Weed out invalid mappings. Note: we assume that the page * directory table is always allocated, and in kernel virtual. */ if (ptpaddr == 0) continue; /* * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { /* * Are we protecting the entire large page? If not, * demote the mapping and fall through. */ if (sva + NBPDR == pdnxt && eva >= pdnxt) { /* * The TLB entry for a PG_G mapping is * invalidated by pmap_protect_pde(). */ if (pmap_protect_pde(pmap, &pmap->pm_pdir[pdirindex], sva, prot)) anychanged = TRUE; continue; } else { if (!pv_lists_locked) { pv_lists_locked = TRUE; if (!rw_try_wlock(&pvh_global_lock)) { if (anychanged) pmap_invalidate_all( pmap); PMAP_UNLOCK(pmap); goto resume; } sched_pin(); } if (!pmap_demote_pde(pmap, &pmap->pm_pdir[pdirindex], sva)) { /* * The large page mapping was * destroyed. */ continue; } } } if (pdnxt > eva) pdnxt = eva; for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, sva += PAGE_SIZE) { vm_page_t m; retry: /* * Regardless of whether a pte is 32 or 64 bits in * size, PG_RW, PG_A, and PG_M are among the least * significant 32 bits. */ obits = pbits = *pte; if ((pbits & PG_V) == 0) continue; if ((prot & VM_PROT_WRITE) == 0) { if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == (PG_MANAGED | PG_M | PG_RW)) { m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); vm_page_dirty(m); } pbits &= ~(PG_RW | PG_M); } #if defined(PAE) || defined(PAE_TABLES) if ((prot & VM_PROT_EXECUTE) == 0) pbits |= pg_nx; #endif if (pbits != obits) { #if defined(PAE) || defined(PAE_TABLES) if (!atomic_cmpset_64(pte, obits, pbits)) goto retry; #else if (!atomic_cmpset_int((u_int *)pte, obits, pbits)) goto retry; #endif if (obits & PG_G) pmap_invalidate_page(pmap, sva); else anychanged = TRUE; } } } if (anychanged) pmap_invalidate_all(pmap); if (pv_lists_locked) { sched_unpin(); rw_wunlock(&pvh_global_lock); } PMAP_UNLOCK(pmap); } /* * Tries to promote the 512 or 1024, contiguous 4KB page mappings that are * within a single page table page (PTP) to a single 2- or 4MB page mapping. * For promotion to occur, two conditions must be met: (1) the 4KB page * mappings must map aligned, contiguous physical memory and (2) the 4KB page * mappings must have identical characteristics. * * Managed (PG_MANAGED) mappings within the kernel address space are not * promoted. The reason is that kernel PDEs are replicated in each pmap but * pmap_clear_ptes() and pmap_ts_referenced() only read the PDE from the kernel * pmap. */ static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde; pt_entry_t *firstpte, oldpte, pa, *pte; vm_offset_t oldpteva; vm_page_t mpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * Examine the first PTE in the specified PTP. Abort if this PTE is * either invalid, unused, or does not map the first 4KB physical page * within a 2- or 4MB page. */ firstpte = pmap_pte_quick(pmap, trunc_4mpage(va)); setpde: newpde = *firstpte; if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" " in pmap %p", va, pmap); return; } if ((*firstpte & PG_MANAGED) != 0 && pmap == kernel_pmap) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" " in pmap %p", va, pmap); return; } if ((newpde & (PG_M | PG_RW)) == PG_RW) { /* * When PG_M is already clear, PG_RW can be cleared without * a TLB invalidation. */ if (!atomic_cmpset_int((u_int *)firstpte, newpde, newpde & ~PG_RW)) goto setpde; newpde &= ~PG_RW; } /* * Examine each of the other PTEs in the specified PTP. Abort if this * PTE maps an unexpected 4KB physical page or does not have identical * characteristics to the first PTE. */ pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE; for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) { setpte: oldpte = *pte; if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" " in pmap %p", va, pmap); return; } if ((oldpte & (PG_M | PG_RW)) == PG_RW) { /* * When PG_M is already clear, PG_RW can be cleared * without a TLB invalidation. */ if (!atomic_cmpset_int((u_int *)pte, oldpte, oldpte & ~PG_RW)) goto setpte; oldpte &= ~PG_RW; oldpteva = (oldpte & PG_FRAME & PDRMASK) | (va & ~PDRMASK); CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#x" " in pmap %p", oldpteva, pmap); } if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" " in pmap %p", va, pmap); return; } pa -= PAGE_SIZE; } /* * Save the page table page in its current state until the PDE * mapping the superpage is demoted by pmap_demote_pde() or * destroyed by pmap_remove_pde(). */ mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); KASSERT(mpte >= vm_page_array && mpte < &vm_page_array[vm_page_array_size], ("pmap_promote_pde: page table page is out of range")); KASSERT(mpte->pindex == va >> PDRSHIFT, ("pmap_promote_pde: page table page's pindex is wrong")); if (pmap_insert_pt_page(pmap, mpte)) { pmap_pde_p_failures++; CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x in pmap %p", va, pmap); return; } /* * Promote the pv entries. */ if ((newpde & PG_MANAGED) != 0) pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME); /* * Propagate the PAT index to its proper position. */ if ((newpde & PG_PTE_PAT) != 0) newpde ^= PG_PDE_PAT | PG_PTE_PAT; /* * Map the superpage. */ if (workaround_erratum383) pmap_update_pde(pmap, va, pde, PG_PS | newpde); else if (pmap == kernel_pmap) pmap_kenter_pde(va, PG_PS | newpde); else pde_store(pde, PG_PS | newpde); pmap_pde_promotions++; CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#x" " in pmap %p", va, pmap); } /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ int pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind) { pd_entry_t *pde; pt_entry_t *pte; pt_entry_t newpte, origpte; pv_entry_t pv; vm_paddr_t opa, pa; vm_page_t mpte, om; boolean_t invlva, wired; va = trunc_page(va); mpte = NULL; wired = (flags & PMAP_ENTER_WIRED) != 0; KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va)); if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); sched_pin(); pde = pmap_pde(pmap, va); if (va < VM_MAXUSER_ADDRESS) { /* * va is for UVA. * In the case that a page table page is not resident, * we are creating it here. pmap_allocpte() handles * demotion. */ mpte = pmap_allocpte(pmap, va, flags); if (mpte == NULL) { KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0, ("pmap_allocpte failed with sleep allowed")); sched_unpin(); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (KERN_RESOURCE_SHORTAGE); } } else { /* * va is for KVA, so pmap_demote_pde() will never fail * to install a page table page. PG_V is also * asserted by pmap_demote_pde(). */ KASSERT(pde != NULL && (*pde & PG_V) != 0, ("KVA %#x invalid pde pdir %#jx", va, (uintmax_t)pmap->pm_pdir[PTDPTDI])); if ((*pde & PG_PS) != 0) pmap_demote_pde(pmap, pde, va); } pte = pmap_pte_quick(pmap, va); /* * Page Directory table entry is not valid, which should not * happen. We should have either allocated the page table * page or demoted the existing mapping above. */ if (pte == NULL) { panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x", (uintmax_t)pmap->pm_pdir[PTDPTDI], va); } pa = VM_PAGE_TO_PHYS(m); om = NULL; origpte = *pte; opa = origpte & PG_FRAME; /* * Mapping has not changed, must be protection or wiring change. */ if (origpte && (opa == pa)) { /* * Wiring change, just update stats. We don't worry about * wiring PT pages as they remain resident as long as there * are valid mappings in them. Hence, if a user page is wired, * the PT page will be also. */ if (wired && ((origpte & PG_W) == 0)) pmap->pm_stats.wired_count++; else if (!wired && (origpte & PG_W)) pmap->pm_stats.wired_count--; /* * Remove extra pte reference */ if (mpte) mpte->wire_count--; if (origpte & PG_MANAGED) { om = m; pa |= PG_MANAGED; } goto validate; } pv = NULL; /* * Mapping has changed, invalidate old range and fall through to * handle validating new mapping. */ if (opa) { if (origpte & PG_W) pmap->pm_stats.wired_count--; if (origpte & PG_MANAGED) { om = PHYS_TO_VM_PAGE(opa); pv = pmap_pvh_remove(&om->md, pmap, va); } if (mpte != NULL) { mpte->wire_count--; KASSERT(mpte->wire_count > 0, ("pmap_enter: missing reference to page table page," " va: 0x%x", va)); } } else pmap->pm_stats.resident_count++; /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0) { KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, ("pmap_enter: managed mapping within the clean submap")); if (pv == NULL) pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); pa |= PG_MANAGED; } else if (pv != NULL) free_pv_entry(pmap, pv); /* * Increment counters */ if (wired) pmap->pm_stats.wired_count++; validate: /* * Now validate mapping with desired protection/wiring. */ newpte = (pt_entry_t)(pa | pmap_cache_bits(m->md.pat_mode, 0) | PG_V); if ((prot & VM_PROT_WRITE) != 0) { newpte |= PG_RW; if ((newpte & PG_MANAGED) != 0) vm_page_aflag_set(m, PGA_WRITEABLE); } #if defined(PAE) || defined(PAE_TABLES) if ((prot & VM_PROT_EXECUTE) == 0) newpte |= pg_nx; #endif if (wired) newpte |= PG_W; if (va < VM_MAXUSER_ADDRESS) newpte |= PG_U; if (pmap == kernel_pmap) newpte |= pgeflag; /* * if the mapping or permission bits are different, we need * to update the pte. */ if ((origpte & ~(PG_M|PG_A)) != newpte) { newpte |= PG_A; if ((flags & VM_PROT_WRITE) != 0) newpte |= PG_M; if (origpte & PG_V) { invlva = FALSE; origpte = pte_load_store(pte, newpte); if (origpte & PG_A) { if (origpte & PG_MANAGED) vm_page_aflag_set(om, PGA_REFERENCED); if (opa != VM_PAGE_TO_PHYS(m)) invlva = TRUE; #if defined(PAE) || defined(PAE_TABLES) if ((origpte & PG_NX) == 0 && (newpte & PG_NX) != 0) invlva = TRUE; #endif } if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if ((origpte & PG_MANAGED) != 0) vm_page_dirty(om); if ((prot & VM_PROT_WRITE) == 0) invlva = TRUE; } if ((origpte & PG_MANAGED) != 0 && TAILQ_EMPTY(&om->md.pv_list) && ((om->flags & PG_FICTITIOUS) != 0 || TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) vm_page_aflag_clear(om, PGA_WRITEABLE); if (invlva) pmap_invalidate_page(pmap, va); } else pte_store(pte, newpte); } /* * If both the page table page and the reservation are fully * populated, then attempt promotion. */ if ((mpte == NULL || mpte->wire_count == NPTEPG) && pg_ps_enabled && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) pmap_promote_pde(pmap, pde, va); sched_unpin(); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (KERN_SUCCESS); } /* * Tries to create a 2- or 4MB page mapping. Returns TRUE if successful and * FALSE otherwise. Fails if (1) a page table page cannot be allocated without * blocking, (2) a mapping already exists at the specified virtual address, or * (3) a pv entry cannot be allocated without reclaiming another pv entry. */ static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { pd_entry_t *pde, newpde; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); pde = pmap_pde(pmap, va); if (*pde != 0) { CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (FALSE); } newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 1) | PG_PS | PG_V; if ((m->oflags & VPO_UNMANAGED) == 0) { newpde |= PG_MANAGED; /* * Abort this mapping if its PV entry could not be created. */ if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m))) { CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (FALSE); } } #if defined(PAE) || defined(PAE_TABLES) if ((prot & VM_PROT_EXECUTE) == 0) newpde |= pg_nx; #endif if (va < VM_MAXUSER_ADDRESS) newpde |= PG_U; /* * Increment counters. */ pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; /* * Map the superpage. */ pde_store(pde, newpde); pmap_pde_mappings++; CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx" " in pmap %p", va, pmap); return (TRUE); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { vm_offset_t va; vm_page_t m, mpte; vm_pindex_t diff, psize; VM_OBJECT_ASSERT_LOCKED(m_start->object); psize = atop(end - start); mpte = NULL; m = m_start; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); if ((va & PDRMASK) == 0 && va + NBPDR <= end && m->psind == 1 && pg_ps_enabled && pmap_enter_pde(pmap, va, m, prot)) m = &m[NBPDR / PAGE_SIZE - 1]; else mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte); m = TAILQ_NEXT(m, listq); } rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * this code makes some *MAJOR* assumptions: * 1. Current pmap & pmap exists. * 2. Not wired. * 3. Read access. * 4. No page table pages. * but is *MUCH* faster than pmap_enter... */ void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte) { pt_entry_t *pte; vm_paddr_t pa; struct spglist free; KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->oflags & VPO_UNMANAGED) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { u_int ptepindex; pd_entry_t ptepa; /* * Calculate pagetable page index */ ptepindex = va >> PDRSHIFT; if (mpte && (mpte->pindex == ptepindex)) { mpte->wire_count++; } else { /* * Get the page directory entry */ ptepa = pmap->pm_pdir[ptepindex]; /* * If the page table page is mapped, we just increment * the hold count, and activate it. */ if (ptepa) { if (ptepa & PG_PS) return (NULL); mpte = PHYS_TO_VM_PAGE(ptepa & PG_FRAME); mpte->wire_count++; } else { mpte = _pmap_allocpte(pmap, ptepindex, PMAP_ENTER_NOSLEEP); if (mpte == NULL) return (mpte); } } } else { mpte = NULL; } /* * This call to vtopte makes the assumption that we are * entering the page into the current pmap. In order to support * quick entry into any pmap, one would likely use pmap_pte_quick. * But that isn't as quick as vtopte. */ pte = vtopte(va); if (*pte) { if (mpte != NULL) { mpte->wire_count--; mpte = NULL; } return (mpte); } /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0 && !pmap_try_insert_pv_entry(pmap, va, m)) { if (mpte != NULL) { SLIST_INIT(&free); if (pmap_unwire_ptp(pmap, mpte, &free)) { pmap_invalidate_page(pmap, va); pmap_free_zero_pages(&free); } mpte = NULL; } return (mpte); } /* * Increment counters */ pmap->pm_stats.resident_count++; pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0); #if defined(PAE) || defined(PAE_TABLES) if ((prot & VM_PROT_EXECUTE) == 0) pa |= pg_nx; #endif /* * Now validate mapping with RO protection */ if ((m->oflags & VPO_UNMANAGED) != 0) pte_store(pte, pa | PG_V | PG_U); else pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); return (mpte); } /* * Make a temporary mapping for a physical address. This is only intended * to be used for panic dumps. */ void * pmap_kenter_temporary(vm_paddr_t pa, int i) { vm_offset_t va; va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); pmap_kenter(va, pa); invlpg(va); return ((void *)crashdumpmap); } /* * This code maps large physical mmap regions into the * processor address space. Note that some shortcuts * are taken, but the code works. */ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { pd_entry_t *pde; vm_paddr_t pa, ptepa; vm_page_t p; int pat_mode; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("pmap_object_init_pt: non-device object")); if (pseflag && (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) { if (!vm_object_populate(object, pindex, pindex + atop(size))) return; p = vm_page_lookup(object, pindex); KASSERT(p->valid == VM_PAGE_BITS_ALL, ("pmap_object_init_pt: invalid page %p", p)); pat_mode = p->md.pat_mode; /* * Abort the mapping if the first page is not physically * aligned to a 2/4MB page boundary. */ ptepa = VM_PAGE_TO_PHYS(p); if (ptepa & (NBPDR - 1)) return; /* * Skip the first page. Abort the mapping if the rest of * the pages are not physically contiguous or have differing * memory attributes. */ p = TAILQ_NEXT(p, listq); for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; pa += PAGE_SIZE) { KASSERT(p->valid == VM_PAGE_BITS_ALL, ("pmap_object_init_pt: invalid page %p", p)); if (pa != VM_PAGE_TO_PHYS(p) || pat_mode != p->md.pat_mode) return; p = TAILQ_NEXT(p, listq); } /* * Map using 2/4MB pages. Since "ptepa" is 2/4M aligned and * "size" is a multiple of 2/4M, adding the PAT setting to * "pa" will not affect the termination of this loop. */ PMAP_LOCK(pmap); for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa + size; pa += NBPDR) { pde = pmap_pde(pmap, addr); if (*pde == 0) { pde_store(pde, pa | PG_PS | PG_M | PG_A | PG_U | PG_RW | PG_V); pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; pmap_pde_mappings++; } /* Else continue on if the PDE is already valid. */ addr += NBPDR; } PMAP_UNLOCK(pmap); } } /* * Clear the wired attribute from the mappings for the specified range of * addresses in the given pmap. Every valid mapping within that range * must have the wired attribute set. In contrast, invalid mappings * cannot have the wired attribute set, so they are ignored. * * The wired attribute of the page table entry is not a hardware feature, * so there is no need to invalidate any TLB entries. */ void pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t pdnxt; pd_entry_t *pde; pt_entry_t *pte; boolean_t pv_lists_locked; if (pmap_is_current(pmap)) pv_lists_locked = FALSE; else { pv_lists_locked = TRUE; resume: rw_wlock(&pvh_global_lock); sched_pin(); } PMAP_LOCK(pmap); for (; sva < eva; sva = pdnxt) { pdnxt = (sva + NBPDR) & ~PDRMASK; if (pdnxt < sva) pdnxt = eva; pde = pmap_pde(pmap, sva); if ((*pde & PG_V) == 0) continue; if ((*pde & PG_PS) != 0) { if ((*pde & PG_W) == 0) panic("pmap_unwire: pde %#jx is missing PG_W", (uintmax_t)*pde); /* * Are we unwiring the entire large page? If not, * demote the mapping and fall through. */ if (sva + NBPDR == pdnxt && eva >= pdnxt) { /* * Regardless of whether a pde (or pte) is 32 * or 64 bits in size, PG_W is among the least * significant 32 bits. */ atomic_clear_int((u_int *)pde, PG_W); pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; continue; } else { if (!pv_lists_locked) { pv_lists_locked = TRUE; if (!rw_try_wlock(&pvh_global_lock)) { PMAP_UNLOCK(pmap); /* Repeat sva. */ goto resume; } sched_pin(); } if (!pmap_demote_pde(pmap, pde, sva)) panic("pmap_unwire: demotion failed"); } } if (pdnxt > eva) pdnxt = eva; for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, sva += PAGE_SIZE) { if ((*pte & PG_V) == 0) continue; if ((*pte & PG_W) == 0) panic("pmap_unwire: pte %#jx is missing PG_W", (uintmax_t)*pte); /* * PG_W must be cleared atomically. Although the pmap * lock synchronizes access to PG_W, another processor * could be setting PG_M and/or PG_A concurrently. * * PG_W is among the least significant 32 bits. */ atomic_clear_int((u_int *)pte, PG_W); pmap->pm_stats.wired_count--; } } if (pv_lists_locked) { sched_unpin(); rw_wunlock(&pvh_global_lock); } PMAP_UNLOCK(pmap); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { struct spglist free; vm_offset_t addr; vm_offset_t end_addr = src_addr + len; vm_offset_t pdnxt; if (dst_addr != src_addr) return; if (!pmap_is_current(src_pmap)) return; rw_wlock(&pvh_global_lock); if (dst_pmap < src_pmap) { PMAP_LOCK(dst_pmap); PMAP_LOCK(src_pmap); } else { PMAP_LOCK(src_pmap); PMAP_LOCK(dst_pmap); } sched_pin(); for (addr = src_addr; addr < end_addr; addr = pdnxt) { pt_entry_t *src_pte, *dst_pte; vm_page_t dstmpte, srcmpte; pd_entry_t srcptepaddr; u_int ptepindex; KASSERT(addr < UPT_MIN_ADDRESS, ("pmap_copy: invalid to pmap_copy page tables")); pdnxt = (addr + NBPDR) & ~PDRMASK; if (pdnxt < addr) pdnxt = end_addr; ptepindex = addr >> PDRSHIFT; srcptepaddr = src_pmap->pm_pdir[ptepindex]; if (srcptepaddr == 0) continue; if (srcptepaddr & PG_PS) { if ((addr & PDRMASK) != 0 || addr + NBPDR > end_addr) continue; if (dst_pmap->pm_pdir[ptepindex] == 0 && ((srcptepaddr & PG_MANAGED) == 0 || pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr & PG_PS_FRAME))) { dst_pmap->pm_pdir[ptepindex] = srcptepaddr & ~PG_W; dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; pmap_pde_mappings++; } continue; } srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME); KASSERT(srcmpte->wire_count > 0, ("pmap_copy: source page table page is unused")); if (pdnxt > end_addr) pdnxt = end_addr; src_pte = vtopte(addr); while (addr < pdnxt) { pt_entry_t ptetemp; ptetemp = *src_pte; /* * we only virtual copy managed pages */ if ((ptetemp & PG_MANAGED) != 0) { dstmpte = pmap_allocpte(dst_pmap, addr, PMAP_ENTER_NOSLEEP); if (dstmpte == NULL) goto out; dst_pte = pmap_pte_quick(dst_pmap, addr); if (*dst_pte == 0 && pmap_try_insert_pv_entry(dst_pmap, addr, PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) { /* * Clear the wired, modified, and * accessed (referenced) bits * during the copy. */ *dst_pte = ptetemp & ~(PG_W | PG_M | PG_A); dst_pmap->pm_stats.resident_count++; } else { SLIST_INIT(&free); if (pmap_unwire_ptp(dst_pmap, dstmpte, &free)) { pmap_invalidate_page(dst_pmap, addr); pmap_free_zero_pages(&free); } goto out; } if (dstmpte->wire_count >= srcmpte->wire_count) break; } addr += PAGE_SIZE; src_pte++; } } out: sched_unpin(); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(src_pmap); PMAP_UNLOCK(dst_pmap); } /* * Zero 1 page of virtual memory mapped from a hardware page by the caller. */ static __inline void pagezero(void *page) { #if defined(I686_CPU) if (cpu_class == CPUCLASS_686) { -#if defined(CPU_ENABLE_SSE) if (cpu_feature & CPUID_SSE2) sse2_pagezero(page); else -#endif i686_pagezero(page); } else #endif bzero(page, PAGE_SIZE); } /* * Zero the specified hardware page. */ void pmap_zero_page(vm_page_t m) { pt_entry_t *cmap_pte2; struct pcpu *pc; sched_pin(); pc = get_pcpu(); cmap_pte2 = pc->pc_cmap_pte2; mtx_lock(&pc->pc_cmap_lock); if (*cmap_pte2) panic("pmap_zero_page: CMAP2 busy"); *cmap_pte2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0); invlcaddr(pc->pc_cmap_addr2); pagezero(pc->pc_cmap_addr2); *cmap_pte2 = 0; /* * Unpin the thread before releasing the lock. Otherwise the thread * could be rescheduled while still bound to the current CPU, only * to unpin itself immediately upon resuming execution. */ sched_unpin(); mtx_unlock(&pc->pc_cmap_lock); } /* * Zero an an area within a single hardware page. off and size must not * cover an area beyond a single hardware page. */ void pmap_zero_page_area(vm_page_t m, int off, int size) { pt_entry_t *cmap_pte2; struct pcpu *pc; sched_pin(); pc = get_pcpu(); cmap_pte2 = pc->pc_cmap_pte2; mtx_lock(&pc->pc_cmap_lock); if (*cmap_pte2) panic("pmap_zero_page_area: CMAP2 busy"); *cmap_pte2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0); invlcaddr(pc->pc_cmap_addr2); if (off == 0 && size == PAGE_SIZE) pagezero(pc->pc_cmap_addr2); else bzero(pc->pc_cmap_addr2 + off, size); *cmap_pte2 = 0; sched_unpin(); mtx_unlock(&pc->pc_cmap_lock); } /* * Copy 1 specified hardware page to another. */ void pmap_copy_page(vm_page_t src, vm_page_t dst) { pt_entry_t *cmap_pte1, *cmap_pte2; struct pcpu *pc; sched_pin(); pc = get_pcpu(); cmap_pte1 = pc->pc_cmap_pte1; cmap_pte2 = pc->pc_cmap_pte2; mtx_lock(&pc->pc_cmap_lock); if (*cmap_pte1) panic("pmap_copy_page: CMAP1 busy"); if (*cmap_pte2) panic("pmap_copy_page: CMAP2 busy"); *cmap_pte1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A | pmap_cache_bits(src->md.pat_mode, 0); invlcaddr(pc->pc_cmap_addr1); *cmap_pte2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M | pmap_cache_bits(dst->md.pat_mode, 0); invlcaddr(pc->pc_cmap_addr2); bcopy(pc->pc_cmap_addr1, pc->pc_cmap_addr2, PAGE_SIZE); *cmap_pte1 = 0; *cmap_pte2 = 0; sched_unpin(); mtx_unlock(&pc->pc_cmap_lock); } int unmapped_buf_allowed = 1; void pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], vm_offset_t b_offset, int xfersize) { vm_page_t a_pg, b_pg; char *a_cp, *b_cp; vm_offset_t a_pg_offset, b_pg_offset; pt_entry_t *cmap_pte1, *cmap_pte2; struct pcpu *pc; int cnt; sched_pin(); pc = get_pcpu(); cmap_pte1 = pc->pc_cmap_pte1; cmap_pte2 = pc->pc_cmap_pte2; mtx_lock(&pc->pc_cmap_lock); if (*cmap_pte1 != 0) panic("pmap_copy_pages: CMAP1 busy"); if (*cmap_pte2 != 0) panic("pmap_copy_pages: CMAP2 busy"); while (xfersize > 0) { a_pg = ma[a_offset >> PAGE_SHIFT]; a_pg_offset = a_offset & PAGE_MASK; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); b_pg = mb[b_offset >> PAGE_SHIFT]; b_pg_offset = b_offset & PAGE_MASK; cnt = min(cnt, PAGE_SIZE - b_pg_offset); *cmap_pte1 = PG_V | VM_PAGE_TO_PHYS(a_pg) | PG_A | pmap_cache_bits(a_pg->md.pat_mode, 0); invlcaddr(pc->pc_cmap_addr1); *cmap_pte2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(b_pg) | PG_A | PG_M | pmap_cache_bits(b_pg->md.pat_mode, 0); invlcaddr(pc->pc_cmap_addr2); a_cp = pc->pc_cmap_addr1 + a_pg_offset; b_cp = pc->pc_cmap_addr2 + b_pg_offset; bcopy(a_cp, b_cp, cnt); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } *cmap_pte1 = 0; *cmap_pte2 = 0; sched_unpin(); mtx_unlock(&pc->pc_cmap_lock); } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { struct md_page *pvh; pv_entry_t pv; int loops = 0; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } } rw_wunlock(&pvh_global_lock); return (rv); } /* * pmap_page_wired_mappings: * * Return the number of managed mappings to the given physical page * that are wired. */ int pmap_page_wired_mappings(vm_page_t m) { int count; count = 0; if ((m->oflags & VPO_UNMANAGED) != 0) return (count); rw_wlock(&pvh_global_lock); count = pmap_pvh_wired_mappings(&m->md, count); if ((m->flags & PG_FICTITIOUS) == 0) { count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), count); } rw_wunlock(&pvh_global_lock); return (count); } /* * pmap_pvh_wired_mappings: * * Return the updated number "count" of managed mappings that are wired. */ static int pmap_pvh_wired_mappings(struct md_page *pvh, int count) { pmap_t pmap; pt_entry_t *pte; pv_entry_t pv; rw_assert(&pvh_global_lock, RA_WLOCKED); sched_pin(); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); if ((*pte & PG_W) != 0) count++; PMAP_UNLOCK(pmap); } sched_unpin(); return (count); } /* * Returns TRUE if the given page is mapped individually or as part of * a 4mpage. Otherwise, returns FALSE. */ boolean_t pmap_page_is_mapped(vm_page_t m) { boolean_t rv; if ((m->oflags & VPO_UNMANAGED) != 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = !TAILQ_EMPTY(&m->md.pv_list) || ((m->flags & PG_FICTITIOUS) == 0 && !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)); rw_wunlock(&pvh_global_lock); return (rv); } /* * Remove all pages from specified address space * this aids process exit speeds. Also, this code * is special cased for current process only, but * can have the more generic (and slightly slower) * mode enabled. This is much faster than pmap_remove * in the case of running down an entire address space. */ void pmap_remove_pages(pmap_t pmap) { pt_entry_t *pte, tpte; vm_page_t m, mpte, mt; pv_entry_t pv; struct md_page *pvh; struct pv_chunk *pc, *npc; struct spglist free; int field, idx; int32_t bit; uint32_t inuse, bitmask; int allfree; if (pmap != PCPU_GET(curpmap)) { printf("warning: pmap_remove_pages called with non-current pmap\n"); return; } SLIST_INIT(&free); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); sched_pin(); TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { KASSERT(pc->pc_pmap == pmap, ("Wrong pmap %p %p", pmap, pc->pc_pmap)); allfree = 1; for (field = 0; field < _NPCM; field++) { inuse = ~pc->pc_map[field] & pc_freemask[field]; while (inuse != 0) { bit = bsfl(inuse); bitmask = 1UL << bit; idx = field * 32 + bit; pv = &pc->pc_pventry[idx]; inuse &= ~bitmask; pte = pmap_pde(pmap, pv->pv_va); tpte = *pte; if ((tpte & PG_PS) == 0) { pte = vtopte(pv->pv_va); tpte = *pte & ~PG_PTE_PAT; } if (tpte == 0) { printf( "TPTE at %p IS ZERO @ VA %08x\n", pte, pv->pv_va); panic("bad pte"); } /* * We cannot remove wired pages from a process' mapping at this time */ if (tpte & PG_W) { allfree = 0; continue; } m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); KASSERT(m->phys_addr == (tpte & PG_FRAME), ("vm_page_t %p phys_addr mismatch %016jx %016jx", m, (uintmax_t)m->phys_addr, (uintmax_t)tpte)); KASSERT((m->flags & PG_FICTITIOUS) != 0 || m < &vm_page_array[vm_page_array_size], ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte)); pte_clear(pte); /* * Update the vm_page_t clean/reference bits. */ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if ((tpte & PG_PS) != 0) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) vm_page_dirty(mt); } else vm_page_dirty(m); } /* Mark free */ PV_STAT(pv_entry_frees++); PV_STAT(pv_entry_spare++); pv_entry_count--; pc->pc_map[field] |= bitmask; if ((tpte & PG_PS) != 0) { pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; pvh = pa_to_pvh(tpte & PG_PS_FRAME); TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) if (TAILQ_EMPTY(&mt->md.pv_list)) vm_page_aflag_clear(mt, PGA_WRITEABLE); } mpte = pmap_remove_pt_page(pmap, pv->pv_va); if (mpte != NULL) { pmap->pm_stats.resident_count--; KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pages: pte page wire count error")); mpte->wire_count = 0; pmap_add_delayed_free_list(mpte, &free, FALSE); atomic_subtract_int(&vm_cnt.v_wire_count, 1); } } else { pmap->pm_stats.resident_count--; TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } pmap_unuse_pt(pmap, pv->pv_va, &free); } } } if (allfree) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } } sched_unpin(); pmap_invalidate_all(pmap); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); pmap_free_zero_pages(&free); } /* * pmap_is_modified: * * Return whether or not the specified physical page was modified * in any physical maps. */ boolean_t pmap_is_modified(vm_page_t m) { boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_modified: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); rw_wlock(&pvh_global_lock); rv = pmap_is_modified_pvh(&m->md) || ((m->flags & PG_FICTITIOUS) == 0 && pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); rw_wunlock(&pvh_global_lock); return (rv); } /* * Returns TRUE if any of the given mappings were used to modify * physical memory. Otherwise, returns FALSE. Both page and 2mpage * mappings are supported. */ static boolean_t pmap_is_modified_pvh(struct md_page *pvh) { pv_entry_t pv; pt_entry_t *pte; pmap_t pmap; boolean_t rv; rw_assert(&pvh_global_lock, RA_WLOCKED); rv = FALSE; sched_pin(); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); rv = (*pte & (PG_M | PG_RW)) == (PG_M | PG_RW); PMAP_UNLOCK(pmap); if (rv) break; } sched_unpin(); return (rv); } /* * pmap_is_prefaultable: * * Return whether or not the specified virtual address is elgible * for prefault. */ boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) { pd_entry_t *pde; pt_entry_t *pte; boolean_t rv; rv = FALSE; PMAP_LOCK(pmap); pde = pmap_pde(pmap, addr); if (*pde != 0 && (*pde & PG_PS) == 0) { pte = vtopte(addr); rv = *pte == 0; } PMAP_UNLOCK(pmap); return (rv); } /* * pmap_is_referenced: * * Return whether or not the specified physical page was referenced * in any physical maps. */ boolean_t pmap_is_referenced(vm_page_t m) { boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_referenced: page %p is not managed", m)); rw_wlock(&pvh_global_lock); rv = pmap_is_referenced_pvh(&m->md) || ((m->flags & PG_FICTITIOUS) == 0 && pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); rw_wunlock(&pvh_global_lock); return (rv); } /* * Returns TRUE if any of the given mappings were referenced and FALSE * otherwise. Both page and 4mpage mappings are supported. */ static boolean_t pmap_is_referenced_pvh(struct md_page *pvh) { pv_entry_t pv; pt_entry_t *pte; pmap_t pmap; boolean_t rv; rw_assert(&pvh_global_lock, RA_WLOCKED); rv = FALSE; sched_pin(); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pte = pmap_pte_quick(pmap, pv->pv_va); rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V); PMAP_UNLOCK(pmap); if (rv) break; } sched_unpin(); return (rv); } /* * Clear the write and modified bits in each of the given page's mappings. */ void pmap_remove_write(vm_page_t m) { struct md_page *pvh; pv_entry_t next_pv, pv; pmap_t pmap; pd_entry_t *pde; pt_entry_t oldpte, *pte; vm_offset_t va; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_write: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * set by another thread while the object is locked. Thus, * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, va); if ((*pde & PG_RW) != 0) (void)pmap_demote_pde(pmap, pde, va); PMAP_UNLOCK(pmap); } small_mappings: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_clear_write: found" " a 4mpage in page %p's pv list", m)); pte = pmap_pte_quick(pmap, pv->pv_va); retry: oldpte = *pte; if ((oldpte & PG_RW) != 0) { /* * Regardless of whether a pte is 32 or 64 bits * in size, PG_RW and PG_M are among the least * significant 32 bits. */ if (!atomic_cmpset_int((u_int *)pte, oldpte, oldpte & ~(PG_RW | PG_M))) goto retry; if ((oldpte & PG_M) != 0) vm_page_dirty(m); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); sched_unpin(); rw_wunlock(&pvh_global_lock); } /* * pmap_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * As an optimization, update the page's dirty field if a modified bit is * found while counting reference bits. This opportunistic update can be * performed at low cost and can eliminate the need for some future calls * to pmap_is_modified(). However, since this function stops after * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some * dirty pages. Those dirty pages will only be detected by a future call * to pmap_is_modified(). */ int pmap_ts_referenced(vm_page_t m) { struct md_page *pvh; pv_entry_t pv, pvf; pmap_t pmap; pd_entry_t *pde; pt_entry_t *pte; vm_paddr_t pa; int rtval = 0; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_ts_referenced: page %p is not managed", m)); pa = VM_PAGE_TO_PHYS(m); pvh = pa_to_pvh(pa); rw_wlock(&pvh_global_lock); sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0 || (pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) goto small_mappings; pv = pvf; do { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); if ((*pde & (PG_M | PG_RW)) == (PG_M | PG_RW)) { /* * Although "*pde" is mapping a 2/4MB page, because * this function is called at a 4KB page granularity, * we only update the 4KB page under test. */ vm_page_dirty(m); } if ((*pde & PG_A) != 0) { /* * Since this reference bit is shared by either 1024 * or 512 4KB pages, it should not be cleared every * time it is tested. Apply a simple "hash" function * on the physical page number, the virtual superpage * number, and the pmap address to select one 4KB page * out of the 1024 or 512 on which testing the * reference bit will result in clearing that bit. * This function is designed to avoid the selection of * the same 4KB page for every 2- or 4MB page mapping. * * On demotion, a mapping that hasn't been referenced * is simply destroyed. To avoid the possibility of a * subsequent page fault on a demoted wired mapping, * always leave its reference bit set. Moreover, * since the superpage is wired, the current state of * its reference bit won't affect page replacement. */ if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> PDRSHIFT) ^ (uintptr_t)pmap) & (NPTEPG - 1)) == 0 && (*pde & PG_W) == 0) { atomic_clear_int((u_int *)pde, PG_A); pmap_invalidate_page(pmap, pv->pv_va); } rtval++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); } if (rtval >= PMAP_TS_REFERENCED_MAX) goto out; } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); small_mappings: if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) goto out; pv = pvf; do { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_ts_referenced: found a 4mpage in page %p's pv list", m)); pte = pmap_pte_quick(pmap, pv->pv_va); if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if ((*pte & PG_A) != 0) { atomic_clear_int((u_int *)pte, PG_A); pmap_invalidate_page(pmap, pv->pv_va); rtval++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); } } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && rtval < PMAP_TS_REFERENCED_MAX); out: sched_unpin(); rw_wunlock(&pvh_global_lock); return (rtval); } /* * Apply the given advice to the specified range of addresses within the * given pmap. Depending on the advice, clear the referenced and/or * modified flags in each mapping and set the mapped page's dirty field. */ void pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) { pd_entry_t oldpde, *pde; pt_entry_t *pte; vm_offset_t va, pdnxt; vm_page_t m; boolean_t anychanged, pv_lists_locked; if (advice != MADV_DONTNEED && advice != MADV_FREE) return; if (pmap_is_current(pmap)) pv_lists_locked = FALSE; else { pv_lists_locked = TRUE; resume: rw_wlock(&pvh_global_lock); sched_pin(); } anychanged = FALSE; PMAP_LOCK(pmap); for (; sva < eva; sva = pdnxt) { pdnxt = (sva + NBPDR) & ~PDRMASK; if (pdnxt < sva) pdnxt = eva; pde = pmap_pde(pmap, sva); oldpde = *pde; if ((oldpde & PG_V) == 0) continue; else if ((oldpde & PG_PS) != 0) { if ((oldpde & PG_MANAGED) == 0) continue; if (!pv_lists_locked) { pv_lists_locked = TRUE; if (!rw_try_wlock(&pvh_global_lock)) { if (anychanged) pmap_invalidate_all(pmap); PMAP_UNLOCK(pmap); goto resume; } sched_pin(); } if (!pmap_demote_pde(pmap, pde, sva)) { /* * The large page mapping was destroyed. */ continue; } /* * Unless the page mappings are wired, remove the * mapping to a single page so that a subsequent * access may repromote. Since the underlying page * table page is fully populated, this removal never * frees a page table page. */ if ((oldpde & PG_W) == 0) { pte = pmap_pte_quick(pmap, sva); KASSERT((*pte & PG_V) != 0, ("pmap_advise: invalid PTE")); pmap_remove_pte(pmap, pte, sva, NULL); anychanged = TRUE; } } if (pdnxt > eva) pdnxt = eva; va = pdnxt; for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, sva += PAGE_SIZE) { if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED | PG_V)) goto maybe_invlrng; else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if (advice == MADV_DONTNEED) { /* * Future calls to pmap_is_modified() * can be avoided by making the page * dirty now. */ m = PHYS_TO_VM_PAGE(*pte & PG_FRAME); vm_page_dirty(m); } atomic_clear_int((u_int *)pte, PG_M | PG_A); } else if ((*pte & PG_A) != 0) atomic_clear_int((u_int *)pte, PG_A); else goto maybe_invlrng; if ((*pte & PG_G) != 0) { if (va == pdnxt) va = sva; } else anychanged = TRUE; continue; maybe_invlrng: if (va != pdnxt) { pmap_invalidate_range(pmap, va, sva); va = pdnxt; } } if (va != pdnxt) pmap_invalidate_range(pmap, va, sva); } if (anychanged) pmap_invalidate_all(pmap); if (pv_lists_locked) { sched_unpin(); rw_wunlock(&pvh_global_lock); } PMAP_UNLOCK(pmap); } /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(vm_page_t m) { struct md_page *pvh; pv_entry_t next_pv, pv; pmap_t pmap; pd_entry_t oldpde, *pde; pt_entry_t oldpte, *pte; vm_offset_t va; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); KASSERT(!vm_page_xbusied(m), ("pmap_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. * If the object containing the page is locked and the page is not * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; rw_wlock(&pvh_global_lock); sched_pin(); if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { va = pv->pv_va; pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, va); oldpde = *pde; if ((oldpde & PG_RW) != 0) { if (pmap_demote_pde(pmap, pde, va)) { if ((oldpde & PG_W) == 0) { /* * Write protect the mapping to a * single page so that a subsequent * write access may repromote. */ va += VM_PAGE_TO_PHYS(m) - (oldpde & PG_PS_FRAME); pte = pmap_pte_quick(pmap, va); oldpte = *pte; if ((oldpte & PG_V) != 0) { /* * Regardless of whether a pte is 32 or 64 bits * in size, PG_RW and PG_M are among the least * significant 32 bits. */ while (!atomic_cmpset_int((u_int *)pte, oldpte, oldpte & ~(PG_M | PG_RW))) oldpte = *pte; vm_page_dirty(m); pmap_invalidate_page(pmap, va); } } } } PMAP_UNLOCK(pmap); } small_mappings: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found" " a 4mpage in page %p's pv list", m)); pte = pmap_pte_quick(pmap, pv->pv_va); if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { /* * Regardless of whether a pte is 32 or 64 bits * in size, PG_M is among the least significant * 32 bits. */ atomic_clear_int((u_int *)pte, PG_M); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } sched_unpin(); rw_wunlock(&pvh_global_lock); } /* * Miscellaneous support routines follow */ /* Adjust the cache mode for a 4KB page mapped via a PTE. */ static __inline void pmap_pte_attr(pt_entry_t *pte, int cache_bits) { u_int opte, npte; /* * The cache mode bits are all in the low 32-bits of the * PTE, so we can just spin on updating the low 32-bits. */ do { opte = *(u_int *)pte; npte = opte & ~PG_PTE_CACHE; npte |= cache_bits; } while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte)); } /* Adjust the cache mode for a 2/4MB page mapped via a PDE. */ static __inline void pmap_pde_attr(pd_entry_t *pde, int cache_bits) { u_int opde, npde; /* * The cache mode bits are all in the low 32-bits of the * PDE, so we can just spin on updating the low 32-bits. */ do { opde = *(u_int *)pde; npde = opde & ~PG_PDE_CACHE; npde |= cache_bits; } while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde)); } /* * Map a set of physical memory pages into the kernel virtual * address space. Return a pointer to where it is mapped. This * routine is intended to be used for mapping device memory, * NOT real memory. */ void * pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) { struct pmap_preinit_mapping *ppim; vm_offset_t va, offset; vm_size_t tmpsize; int i; offset = pa & PAGE_MASK; size = round_page(offset + size); pa = pa & PG_FRAME; if (pa < KERNLOAD && pa + size <= KERNLOAD) va = KERNBASE + pa; else if (!pmap_initialized) { va = 0; for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; if (ppim->va == 0) { ppim->pa = pa; ppim->sz = size; ppim->mode = mode; ppim->va = virtual_avail; virtual_avail += size; va = ppim->va; break; } } if (va == 0) panic("%s: too many preinit mappings", __func__); } else { /* * If we have a preinit mapping, re-use it. */ for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; if (ppim->pa == pa && ppim->sz == size && ppim->mode == mode) return ((void *)(ppim->va + offset)); } va = kva_alloc(size); if (va == 0) panic("%s: Couldn't allocate KVA", __func__); } for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode); pmap_invalidate_range(kernel_pmap, va, va + tmpsize); pmap_invalidate_cache_range(va, va + size, FALSE); return ((void *)(va + offset)); } void * pmap_mapdev(vm_paddr_t pa, vm_size_t size) { return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE)); } void * pmap_mapbios(vm_paddr_t pa, vm_size_t size) { return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK)); } void pmap_unmapdev(vm_offset_t va, vm_size_t size) { struct pmap_preinit_mapping *ppim; vm_offset_t offset; int i; if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD) return; offset = va & PAGE_MASK; size = round_page(offset + size); va = trunc_page(va); for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; if (ppim->va == va && ppim->sz == size) { if (pmap_initialized) return; ppim->pa = 0; ppim->va = 0; ppim->sz = 0; ppim->mode = 0; if (va + size == virtual_avail) virtual_avail = va; return; } } if (pmap_initialized) kva_free(va, size); } /* * Sets the memory attribute for the specified page. */ void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { m->md.pat_mode = ma; if ((m->flags & PG_FICTITIOUS) != 0) return; /* * If "m" is a normal page, flush it from the cache. * See pmap_invalidate_cache_range(). * * First, try to find an existing mapping of the page by sf * buffer. sf_buf_invalidate_cache() modifies mapping and * flushes the cache. */ if (sf_buf_invalidate_cache(m)) return; /* * If page is not mapped by sf buffer, but CPU does not * support self snoop, map the page transient and do * invalidation. In the worst case, whole cache is flushed by * pmap_invalidate_cache_range(). */ if ((cpu_feature & CPUID_SS) == 0) pmap_flush_page(m); } static void pmap_flush_page(vm_page_t m) { pt_entry_t *cmap_pte2; struct pcpu *pc; vm_offset_t sva, eva; bool useclflushopt; useclflushopt = (cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0; if (useclflushopt || (cpu_feature & CPUID_CLFSH) != 0) { sched_pin(); pc = get_pcpu(); cmap_pte2 = pc->pc_cmap_pte2; mtx_lock(&pc->pc_cmap_lock); if (*cmap_pte2) panic("pmap_flush_page: CMAP2 busy"); *cmap_pte2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0); invlcaddr(pc->pc_cmap_addr2); sva = (vm_offset_t)pc->pc_cmap_addr2; eva = sva + PAGE_SIZE; /* * Use mfence or sfence despite the ordering implied by * mtx_{un,}lock() because clflush on non-Intel CPUs * and clflushopt are not guaranteed to be ordered by * any other instruction. */ if (useclflushopt) sfence(); else if (cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); for (; sva < eva; sva += cpu_clflush_line_size) { if (useclflushopt) clflushopt(sva); else clflush(sva); } if (useclflushopt) sfence(); else if (cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); *cmap_pte2 = 0; sched_unpin(); mtx_unlock(&pc->pc_cmap_lock); } else pmap_invalidate_cache(); } /* * Changes the specified virtual address range's memory type to that given by * the parameter "mode". The specified virtual address range must be * completely contained within either the kernel map. * * Returns zero if the change completed successfully, and either EINVAL or * ENOMEM if the change failed. Specifically, EINVAL is returned if some part * of the virtual address range was not mapped, and ENOMEM is returned if * there was insufficient memory available to complete the change. */ int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) { vm_offset_t base, offset, tmpva; pd_entry_t *pde; pt_entry_t *pte; int cache_bits_pte, cache_bits_pde; boolean_t changed; base = trunc_page(va); offset = va & PAGE_MASK; size = round_page(offset + size); /* * Only supported on kernel virtual addresses above the recursive map. */ if (base < VM_MIN_KERNEL_ADDRESS) return (EINVAL); cache_bits_pde = pmap_cache_bits(mode, 1); cache_bits_pte = pmap_cache_bits(mode, 0); changed = FALSE; /* * Pages that aren't mapped aren't supported. Also break down * 2/4MB pages into 4KB pages if required. */ PMAP_LOCK(kernel_pmap); for (tmpva = base; tmpva < base + size; ) { pde = pmap_pde(kernel_pmap, tmpva); if (*pde == 0) { PMAP_UNLOCK(kernel_pmap); return (EINVAL); } if (*pde & PG_PS) { /* * If the current 2/4MB page already has * the required memory type, then we need not * demote this page. Just increment tmpva to * the next 2/4MB page frame. */ if ((*pde & PG_PDE_CACHE) == cache_bits_pde) { tmpva = trunc_4mpage(tmpva) + NBPDR; continue; } /* * If the current offset aligns with a 2/4MB * page frame and there is at least 2/4MB left * within the range, then we need not break * down this page into 4KB pages. */ if ((tmpva & PDRMASK) == 0 && tmpva + PDRMASK < base + size) { tmpva += NBPDR; continue; } if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) { PMAP_UNLOCK(kernel_pmap); return (ENOMEM); } } pte = vtopte(tmpva); if (*pte == 0) { PMAP_UNLOCK(kernel_pmap); return (EINVAL); } tmpva += PAGE_SIZE; } PMAP_UNLOCK(kernel_pmap); /* * Ok, all the pages exist, so run through them updating their * cache mode if required. */ for (tmpva = base; tmpva < base + size; ) { pde = pmap_pde(kernel_pmap, tmpva); if (*pde & PG_PS) { if ((*pde & PG_PDE_CACHE) != cache_bits_pde) { pmap_pde_attr(pde, cache_bits_pde); changed = TRUE; } tmpva = trunc_4mpage(tmpva) + NBPDR; } else { pte = vtopte(tmpva); if ((*pte & PG_PTE_CACHE) != cache_bits_pte) { pmap_pte_attr(pte, cache_bits_pte); changed = TRUE; } tmpva += PAGE_SIZE; } } /* * Flush CPU caches to make sure any data isn't cached that * shouldn't be, etc. */ if (changed) { pmap_invalidate_range(kernel_pmap, base, tmpva); pmap_invalidate_cache_range(base, tmpva, FALSE); } return (0); } /* * perform the pmap work for mincore */ int pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { pd_entry_t *pdep; pt_entry_t *ptep, pte; vm_paddr_t pa; int val; PMAP_LOCK(pmap); retry: pdep = pmap_pde(pmap, addr); if (*pdep != 0) { if (*pdep & PG_PS) { pte = *pdep; /* Compute the physical address of the 4KB page. */ pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) & PG_FRAME; val = MINCORE_SUPER; } else { ptep = pmap_pte(pmap, addr); pte = *ptep; pmap_pte_release(ptep); pa = pte & PG_FRAME; val = 0; } } else { pte = 0; pa = 0; val = 0; } if ((pte & PG_V) != 0) { val |= MINCORE_INCORE; if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if ((pte & PG_A) != 0) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; } if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) { /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) goto retry; } else PA_UNLOCK_COND(*locked_pa); PMAP_UNLOCK(pmap); return (val); } void pmap_activate(struct thread *td) { pmap_t pmap, oldpmap; u_int cpuid; u_int32_t cr3; critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); cpuid = PCPU_GET(cpuid); #if defined(SMP) CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); CPU_SET_ATOMIC(cpuid, &pmap->pm_active); #else CPU_CLR(cpuid, &oldpmap->pm_active); CPU_SET(cpuid, &pmap->pm_active); #endif #if defined(PAE) || defined(PAE_TABLES) cr3 = vtophys(pmap->pm_pdpt); #else cr3 = vtophys(pmap->pm_pdir); #endif /* * pmap_activate is for the current thread on the current cpu */ td->td_pcb->pcb_cr3 = cr3; load_cr3(cr3); PCPU_SET(curpmap, pmap); critical_exit(); } void pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) { } /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more superpage mappings. */ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { vm_offset_t superpage_offset; if (size < NBPDR) return; if (object != NULL && (object->flags & OBJ_COLORED) != 0) offset += ptoa(object->pg_color); superpage_offset = offset & PDRMASK; if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR || (*addr & PDRMASK) == superpage_offset) return; if ((*addr & PDRMASK) < superpage_offset) *addr = (*addr & ~PDRMASK) + superpage_offset; else *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset; } vm_offset_t pmap_quick_enter_page(vm_page_t m) { vm_offset_t qaddr; pt_entry_t *pte; critical_enter(); qaddr = PCPU_GET(qmap_addr); pte = vtopte(qaddr); KASSERT(*pte == 0, ("pmap_quick_enter_page: PTE busy")); *pte = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | pmap_cache_bits(pmap_page_get_memattr(m), 0); invlpg(qaddr); return (qaddr); } void pmap_quick_remove_page(vm_offset_t addr) { vm_offset_t qaddr; pt_entry_t *pte; qaddr = PCPU_GET(qmap_addr); pte = vtopte(qaddr); KASSERT(*pte != 0, ("pmap_quick_remove_page: PTE not in use")); KASSERT(addr == qaddr, ("pmap_quick_remove_page: invalid address")); *pte = 0; critical_exit(); } #if defined(PMAP_DEBUG) pmap_pid_dump(int pid) { pmap_t pmap; struct proc *p; int npte = 0; int index; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { if (p->p_pid != pid) continue; if (p->p_vmspace) { int i,j; index = 0; pmap = vmspace_pmap(p->p_vmspace); for (i = 0; i < NPDEPTD; i++) { pd_entry_t *pde; pt_entry_t *pte; vm_offset_t base = i << PDRSHIFT; pde = &pmap->pm_pdir[i]; if (pde && pmap_pde_v(pde)) { for (j = 0; j < NPTEPG; j++) { vm_offset_t va = base + (j << PAGE_SHIFT); if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { if (index) { index = 0; printf("\n"); } sx_sunlock(&allproc_lock); return (npte); } pte = pmap_pte(pmap, va); if (pte && pmap_pte_v(pte)) { pt_entry_t pa; vm_page_t m; pa = *pte; m = PHYS_TO_VM_PAGE(pa & PG_FRAME); printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", va, pa, m->hold_count, m->wire_count, m->flags); npte++; index++; if (index >= 2) { index = 0; printf("\n"); } else { printf(" "); } } } } } } } sx_sunlock(&allproc_lock); return (npte); } #endif Index: projects/ipsec/sys/i386/i386/ptrace_machdep.c =================================================================== --- projects/ipsec/sys/i386/i386/ptrace_machdep.c (revision 313186) +++ projects/ipsec/sys/i386/i386/ptrace_machdep.c (revision 313187) @@ -1,206 +1,196 @@ /*- * Copyright (c) 2005 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include #include #include #include #include #include #include #include -#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) -#define CPU_ENABLE_SSE -#endif - -#ifdef CPU_ENABLE_SSE static int cpu_ptrace_xstate(struct thread *td, int req, void *addr, int data) { struct ptrace_xstate_info info; char *savefpu; int error; if (!use_xsave) return (EOPNOTSUPP); switch (req) { case PT_GETXSTATE_OLD: npxgetregs(td); savefpu = (char *)(get_pcb_user_save_td(td) + 1); error = copyout(savefpu, addr, cpu_max_ext_state_size - sizeof(union savefpu)); break; case PT_SETXSTATE_OLD: if (data > cpu_max_ext_state_size - sizeof(union savefpu)) { error = EINVAL; break; } savefpu = malloc(data, M_TEMP, M_WAITOK); error = copyin(addr, savefpu, data); if (error == 0) { npxgetregs(td); error = npxsetxstate(td, savefpu, data); } free(savefpu, M_TEMP); break; case PT_GETXSTATE_INFO: if (data != sizeof(info)) { error = EINVAL; break; } info.xsave_len = cpu_max_ext_state_size; info.xsave_mask = xsave_mask; error = copyout(&info, addr, data); break; case PT_GETXSTATE: npxgetregs(td); savefpu = (char *)(get_pcb_user_save_td(td)); error = copyout(savefpu, addr, cpu_max_ext_state_size); break; case PT_SETXSTATE: if (data < sizeof(union savefpu) || data > cpu_max_ext_state_size) { error = EINVAL; break; } savefpu = malloc(data, M_TEMP, M_WAITOK); error = copyin(addr, savefpu, data); if (error == 0) error = npxsetregs(td, (union savefpu *)savefpu, savefpu + sizeof(union savefpu), data - sizeof(union savefpu)); free(savefpu, M_TEMP); break; default: error = EINVAL; break; } return (error); } -#endif static int cpu_ptrace_xmm(struct thread *td, int req, void *addr, int data) { -#ifdef CPU_ENABLE_SSE struct savexmm *fpstate; int error; if (!cpu_fxsr) return (EINVAL); fpstate = &get_pcb_user_save_td(td)->sv_xmm; switch (req) { case PT_GETXMMREGS: npxgetregs(td); error = copyout(fpstate, addr, sizeof(*fpstate)); break; case PT_SETXMMREGS: npxgetregs(td); error = copyin(addr, fpstate, sizeof(*fpstate)); fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask; break; case PT_GETXSTATE_OLD: case PT_SETXSTATE_OLD: case PT_GETXSTATE_INFO: case PT_GETXSTATE: case PT_SETXSTATE: error = cpu_ptrace_xstate(td, req, addr, data); break; default: return (EINVAL); } return (error); -#else - return (EINVAL); -#endif } int cpu_ptrace(struct thread *td, int req, void *addr, int data) { struct segment_descriptor *sdp, sd; register_t r; int error; switch (req) { case PT_GETXMMREGS: case PT_SETXMMREGS: case PT_GETXSTATE_OLD: case PT_SETXSTATE_OLD: case PT_GETXSTATE_INFO: case PT_GETXSTATE: case PT_SETXSTATE: error = cpu_ptrace_xmm(td, req, addr, data); break; case PT_GETFSBASE: case PT_GETGSBASE: sdp = req == PT_GETFSBASE ? &td->td_pcb->pcb_fsd : &td->td_pcb->pcb_gsd; r = sdp->sd_hibase << 24 | sdp->sd_lobase; error = copyout(&r, addr, sizeof(r)); break; case PT_SETFSBASE: case PT_SETGSBASE: error = copyin(addr, &r, sizeof(r)); if (error != 0) break; fill_based_sd(&sd, r); if (req == PT_SETFSBASE) { td->td_pcb->pcb_fsd = sd; td->td_frame->tf_fs = GSEL(GUFS_SEL, SEL_UPL); } else { td->td_pcb->pcb_gsd = sd; td->td_pcb->pcb_gs = GSEL(GUGS_SEL, SEL_UPL); } break; default: return (EINVAL); } return (error); } Index: projects/ipsec/sys/i386/i386/support.s =================================================================== --- projects/ipsec/sys/i386/i386/support.s (revision 313186) +++ projects/ipsec/sys/i386/i386/support.s (revision 313187) @@ -1,841 +1,839 @@ /*- * Copyright (c) 1993 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ -#include "opt_npx.h" - #include #include #include #include #include "assym.s" #define IDXSHIFT 10 .text /* * bcopy family * void bzero(void *buf, u_int len) */ ENTRY(bzero) pushl %edi movl 8(%esp),%edi movl 12(%esp),%ecx xorl %eax,%eax shrl $2,%ecx cld rep stosl movl 12(%esp),%ecx andl $3,%ecx rep stosb popl %edi ret END(bzero) ENTRY(sse2_pagezero) pushl %ebx movl 8(%esp),%ecx movl %ecx,%eax addl $4096,%eax xor %ebx,%ebx jmp 1f /* * The loop takes 14 bytes. Ensure that it doesn't cross a 16-byte * cache line. */ .p2align 4,0x90 1: movnti %ebx,(%ecx) movnti %ebx,4(%ecx) addl $8,%ecx cmpl %ecx,%eax jne 1b sfence popl %ebx ret END(sse2_pagezero) ENTRY(i686_pagezero) pushl %edi pushl %ebx movl 12(%esp),%edi movl $1024,%ecx cld ALIGN_TEXT 1: xorl %eax,%eax repe scasl jnz 2f popl %ebx popl %edi ret ALIGN_TEXT 2: incl %ecx subl $4,%edi movl %ecx,%edx cmpl $16,%ecx jge 3f movl %edi,%ebx andl $0x3f,%ebx shrl %ebx shrl %ebx movl $16,%ecx subl %ebx,%ecx 3: subl %ecx,%edx rep stosl movl %edx,%ecx testl %edx,%edx jnz 1b popl %ebx popl %edi ret END(i686_pagezero) /* fillw(pat, base, cnt) */ ENTRY(fillw) pushl %edi movl 8(%esp),%eax movl 12(%esp),%edi movl 16(%esp),%ecx cld rep stosw popl %edi ret END(fillw) ENTRY(bcopyb) pushl %esi pushl %edi movl 12(%esp),%esi movl 16(%esp),%edi movl 20(%esp),%ecx movl %edi,%eax subl %esi,%eax cmpl %ecx,%eax /* overlapping && src < dst? */ jb 1f cld /* nope, copy forwards */ rep movsb popl %edi popl %esi ret ALIGN_TEXT 1: addl %ecx,%edi /* copy backwards. */ addl %ecx,%esi decl %edi decl %esi std rep movsb popl %edi popl %esi cld ret END(bcopyb) /* * bcopy(src, dst, cnt) * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 */ ENTRY(bcopy) pushl %ebp movl %esp,%ebp pushl %esi pushl %edi movl 8(%ebp),%esi movl 12(%ebp),%edi movl 16(%ebp),%ecx movl %edi,%eax subl %esi,%eax cmpl %ecx,%eax /* overlapping && src < dst? */ jb 1f shrl $2,%ecx /* copy by 32-bit words */ cld /* nope, copy forwards */ rep movsl movl 16(%ebp),%ecx andl $3,%ecx /* any bytes left? */ rep movsb popl %edi popl %esi popl %ebp ret ALIGN_TEXT 1: addl %ecx,%edi /* copy backwards */ addl %ecx,%esi decl %edi decl %esi andl $3,%ecx /* any fractional bytes? */ std rep movsb movl 16(%ebp),%ecx /* copy remainder by 32-bit words */ shrl $2,%ecx subl $3,%esi subl $3,%edi rep movsl popl %edi popl %esi cld popl %ebp ret END(bcopy) /* * Note: memcpy does not support overlapping copies */ ENTRY(memcpy) pushl %edi pushl %esi movl 12(%esp),%edi movl 16(%esp),%esi movl 20(%esp),%ecx movl %edi,%eax shrl $2,%ecx /* copy by 32-bit words */ cld /* nope, copy forwards */ rep movsl movl 20(%esp),%ecx andl $3,%ecx /* any bytes left? */ rep movsb popl %esi popl %edi ret END(memcpy) /*****************************************************************************/ /* copyout and fubyte family */ /*****************************************************************************/ /* * Access user memory from inside the kernel. These routines and possibly * the math- and DOS emulators should be the only places that do this. * * We have to access the memory with user's permissions, so use a segment * selector with RPL 3. For writes to user space we have to additionally * check the PTE for write permission, because the 386 does not check * write permissions when we are executing with EPL 0. The 486 does check * this if the WP bit is set in CR0, so we can use a simpler version here. * * These routines set curpcb->pcb_onfault for the time they execute. When a * protection violation occurs inside the functions, the trap handler * returns to *curpcb->pcb_onfault instead of the function. */ /* * copyout(from_kernel, to_user, len) - MP SAFE */ ENTRY(copyout) movl PCPU(CURPCB),%eax movl $copyout_fault,PCB_ONFAULT(%eax) pushl %esi pushl %edi pushl %ebx movl 16(%esp),%esi movl 20(%esp),%edi movl 24(%esp),%ebx testl %ebx,%ebx /* anything to do? */ jz done_copyout /* * Check explicitly for non-user addresses. If 486 write protection * is being used, this check is essential because we are in kernel * mode so the h/w does not provide any protection against writing * kernel addresses. */ /* * First, prevent address wrapping. */ movl %edi,%eax addl %ebx,%eax jc copyout_fault /* * XXX STOP USING VM_MAXUSER_ADDRESS. * It is an end address, not a max, so every time it is used correctly it * looks like there is an off by one error, and of course it caused an off * by one error in several places. */ cmpl $VM_MAXUSER_ADDRESS,%eax ja copyout_fault /* bcopy(%esi, %edi, %ebx) */ movl %ebx,%ecx shrl $2,%ecx cld rep movsl movb %bl,%cl andb $3,%cl rep movsb done_copyout: popl %ebx popl %edi popl %esi xorl %eax,%eax movl PCPU(CURPCB),%edx movl %eax,PCB_ONFAULT(%edx) ret END(copyout) ALIGN_TEXT copyout_fault: popl %ebx popl %edi popl %esi movl PCPU(CURPCB),%edx movl $0,PCB_ONFAULT(%edx) movl $EFAULT,%eax ret /* * copyin(from_user, to_kernel, len) - MP SAFE */ ENTRY(copyin) movl PCPU(CURPCB),%eax movl $copyin_fault,PCB_ONFAULT(%eax) pushl %esi pushl %edi movl 12(%esp),%esi /* caddr_t from */ movl 16(%esp),%edi /* caddr_t to */ movl 20(%esp),%ecx /* size_t len */ /* * make sure address is valid */ movl %esi,%edx addl %ecx,%edx jc copyin_fault cmpl $VM_MAXUSER_ADDRESS,%edx ja copyin_fault movb %cl,%al shrl $2,%ecx /* copy longword-wise */ cld rep movsl movb %al,%cl andb $3,%cl /* copy remaining bytes */ rep movsb popl %edi popl %esi xorl %eax,%eax movl PCPU(CURPCB),%edx movl %eax,PCB_ONFAULT(%edx) ret END(copyin) ALIGN_TEXT copyin_fault: popl %edi popl %esi movl PCPU(CURPCB),%edx movl $0,PCB_ONFAULT(%edx) movl $EFAULT,%eax ret /* * casueword. Compare and set user word. Returns -1 on fault, * 0 on non-faulting access. The current value is in *oldp. */ ALTENTRY(casueword32) ENTRY(casueword) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx /* dst */ movl 8(%esp),%eax /* old */ movl 16(%esp),%ecx /* new */ cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ ja fusufault #ifdef SMP lock #endif cmpxchgl %ecx,(%edx) /* Compare and set. */ /* * The old value is in %eax. If the store succeeded it will be the * value we expected (old) from before the store, otherwise it will * be the current value. */ movl PCPU(CURPCB),%ecx movl $0,PCB_ONFAULT(%ecx) movl 12(%esp),%edx /* oldp */ movl %eax,(%edx) xorl %eax,%eax ret END(casueword32) END(casueword) /* * Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user * memory. */ ALTENTRY(fueword32) ENTRY(fueword) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx /* from */ cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */ ja fusufault movl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) movl 8(%esp),%edx movl %eax,(%edx) xorl %eax,%eax ret END(fueword32) END(fueword) /* * fuswintr() and suswintr() are specialized variants of fuword16() and * suword16(), respectively. They are called from the profiling code, * potentially at interrupt time. If they fail, that's okay; good things * will happen later. They always fail for now, until the trap code is * able to deal with this. */ ALTENTRY(suswintr) ENTRY(fuswintr) movl $-1,%eax ret END(suswintr) END(fuswintr) ENTRY(fuword16) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-2,%edx ja fusufault movzwl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret END(fuword16) ENTRY(fubyte) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-1,%edx ja fusufault movzbl (%edx),%eax movl $0,PCB_ONFAULT(%ecx) ret END(fubyte) ALIGN_TEXT fusufault: movl PCPU(CURPCB),%ecx xorl %eax,%eax movl %eax,PCB_ONFAULT(%ecx) decl %eax ret /* * Store a 32-bit word, a 16-bit word, or an 8-bit byte to user memory. * All these functions are MPSAFE. */ ALTENTRY(suword32) ENTRY(suword) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */ ja fusufault movl 8(%esp),%eax movl %eax,(%edx) xorl %eax,%eax movl PCPU(CURPCB),%ecx movl %eax,PCB_ONFAULT(%ecx) ret END(suword32) END(suword) ENTRY(suword16) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */ ja fusufault movw 8(%esp),%ax movw %ax,(%edx) xorl %eax,%eax movl PCPU(CURPCB),%ecx /* restore trashed register */ movl %eax,PCB_ONFAULT(%ecx) ret END(suword16) ENTRY(subyte) movl PCPU(CURPCB),%ecx movl $fusufault,PCB_ONFAULT(%ecx) movl 4(%esp),%edx cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */ ja fusufault movb 8(%esp),%al movb %al,(%edx) xorl %eax,%eax movl PCPU(CURPCB),%ecx /* restore trashed register */ movl %eax,PCB_ONFAULT(%ecx) ret END(subyte) /* * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE * * copy a string from from to to, stop when a 0 character is reached. * return ENAMETOOLONG if string is longer than maxlen, and * EFAULT on protection violations. If lencopied is non-zero, * return the actual length in *lencopied. */ ENTRY(copyinstr) pushl %esi pushl %edi movl PCPU(CURPCB),%ecx movl $cpystrflt,PCB_ONFAULT(%ecx) movl 12(%esp),%esi /* %esi = from */ movl 16(%esp),%edi /* %edi = to */ movl 20(%esp),%edx /* %edx = maxlen */ movl $VM_MAXUSER_ADDRESS,%eax /* make sure 'from' is within bounds */ subl %esi,%eax jbe cpystrflt /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ cmpl %edx,%eax jae 1f movl %eax,%edx movl %eax,20(%esp) 1: incl %edx cld 2: decl %edx jz 3f lodsb stosb orb %al,%al jnz 2b /* Success -- 0 byte reached */ decl %edx xorl %eax,%eax jmp cpystrflt_x 3: /* edx is zero - return ENAMETOOLONG or EFAULT */ cmpl $VM_MAXUSER_ADDRESS,%esi jae cpystrflt 4: movl $ENAMETOOLONG,%eax jmp cpystrflt_x cpystrflt: movl $EFAULT,%eax cpystrflt_x: /* set *lencopied and return %eax */ movl PCPU(CURPCB),%ecx movl $0,PCB_ONFAULT(%ecx) movl 20(%esp),%ecx subl %edx,%ecx movl 24(%esp),%edx testl %edx,%edx jz 1f movl %ecx,(%edx) 1: popl %edi popl %esi ret END(copyinstr) /* * copystr(from, to, maxlen, int *lencopied) - MP SAFE */ ENTRY(copystr) pushl %esi pushl %edi movl 12(%esp),%esi /* %esi = from */ movl 16(%esp),%edi /* %edi = to */ movl 20(%esp),%edx /* %edx = maxlen */ incl %edx cld 1: decl %edx jz 4f lodsb stosb orb %al,%al jnz 1b /* Success -- 0 byte reached */ decl %edx xorl %eax,%eax jmp 6f 4: /* edx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG,%eax 6: /* set *lencopied and return %eax */ movl 20(%esp),%ecx subl %edx,%ecx movl 24(%esp),%edx testl %edx,%edx jz 7f movl %ecx,(%edx) 7: popl %edi popl %esi ret END(copystr) ENTRY(bcmp) pushl %edi pushl %esi movl 12(%esp),%edi movl 16(%esp),%esi movl 20(%esp),%edx movl %edx,%ecx shrl $2,%ecx cld /* compare forwards */ repe cmpsl jne 1f movl %edx,%ecx andl $3,%ecx repe cmpsb 1: setne %al movsbl %al,%eax popl %esi popl %edi ret END(bcmp) /* * Handling of special 386 registers and descriptor tables etc */ /* void lgdt(struct region_descriptor *rdp); */ ENTRY(lgdt) /* reload the descriptor table */ movl 4(%esp),%eax lgdt (%eax) /* flush the prefetch q */ jmp 1f nop 1: /* reload "stale" selectors */ movl $KDSEL,%eax movl %eax,%ds movl %eax,%es movl %eax,%gs movl %eax,%ss movl $KPSEL,%eax movl %eax,%fs /* reload code selector by turning return into intersegmental return */ movl (%esp),%eax pushl %eax movl $KCSEL,4(%esp) MEXITCOUNT lret END(lgdt) /* ssdtosd(*ssdp,*sdp) */ ENTRY(ssdtosd) pushl %ebx movl 8(%esp),%ecx movl 8(%ecx),%ebx shll $16,%ebx movl (%ecx),%edx roll $16,%edx movb %dh,%bl movb %dl,%bh rorl $8,%ebx movl 4(%ecx),%eax movw %ax,%dx andl $0xf0000,%eax orl %eax,%ebx movl 12(%esp),%ecx movl %edx,(%ecx) movl %ebx,4(%ecx) popl %ebx ret END(ssdtosd) /* void reset_dbregs() */ ENTRY(reset_dbregs) movl $0,%eax movl %eax,%dr7 /* disable all breakpoints first */ movl %eax,%dr0 movl %eax,%dr1 movl %eax,%dr2 movl %eax,%dr3 movl %eax,%dr6 ret END(reset_dbregs) /*****************************************************************************/ /* setjump, longjump */ /*****************************************************************************/ ENTRY(setjmp) movl 4(%esp),%eax movl %ebx,(%eax) /* save ebx */ movl %esp,4(%eax) /* save esp */ movl %ebp,8(%eax) /* save ebp */ movl %esi,12(%eax) /* save esi */ movl %edi,16(%eax) /* save edi */ movl (%esp),%edx /* get rta */ movl %edx,20(%eax) /* save eip */ xorl %eax,%eax /* return(0); */ ret END(setjmp) ENTRY(longjmp) movl 4(%esp),%eax movl (%eax),%ebx /* restore ebx */ movl 4(%eax),%esp /* restore esp */ movl 8(%eax),%ebp /* restore ebp */ movl 12(%eax),%esi /* restore esi */ movl 16(%eax),%edi /* restore edi */ movl 20(%eax),%edx /* get rta */ movl %edx,(%esp) /* put in return frame */ xorl %eax,%eax /* return(1); */ incl %eax ret END(longjmp) /* * Support for reading MSRs in the safe manner. */ ENTRY(rdmsr_safe) /* int rdmsr_safe(u_int msr, uint64_t *data) */ movl PCPU(CURPCB),%ecx movl $msr_onfault,PCB_ONFAULT(%ecx) movl 4(%esp),%ecx rdmsr movl 8(%esp),%ecx movl %eax,(%ecx) movl %edx,4(%ecx) xorl %eax,%eax movl PCPU(CURPCB),%ecx movl %eax,PCB_ONFAULT(%ecx) ret /* * Support for writing MSRs in the safe manner. */ ENTRY(wrmsr_safe) /* int wrmsr_safe(u_int msr, uint64_t data) */ movl PCPU(CURPCB),%ecx movl $msr_onfault,PCB_ONFAULT(%ecx) movl 4(%esp),%ecx movl 8(%esp),%eax movl 12(%esp),%edx wrmsr xorl %eax,%eax movl PCPU(CURPCB),%ecx movl %eax,PCB_ONFAULT(%ecx) ret /* * MSR operations fault handler */ ALIGN_TEXT msr_onfault: movl PCPU(CURPCB),%ecx movl $0,PCB_ONFAULT(%ecx) movl $EFAULT,%eax ret Index: projects/ipsec/sys/i386/i386/swtch.s =================================================================== --- projects/ipsec/sys/i386/i386/swtch.s (revision 313186) +++ projects/ipsec/sys/i386/i386/swtch.s (revision 313187) @@ -1,471 +1,468 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ -#include "opt_npx.h" #include "opt_sched.h" #include #include "assym.s" #if defined(SMP) && defined(SCHED_ULE) #define SETOP xchgl #define BLOCK_SPIN(reg) \ movl $blocked_lock,%eax ; \ 100: ; \ lock ; \ cmpxchgl %eax,TD_LOCK(reg) ; \ jne 101f ; \ pause ; \ jmp 100b ; \ 101: #else #define SETOP movl #define BLOCK_SPIN(reg) #endif /*****************************************************************************/ /* Scheduling */ /*****************************************************************************/ .text /* * cpu_throw() * * This is the second half of cpu_switch(). It is used when the current * thread is either a dummy or slated to die, and we no longer care * about its state. This is only a slight optimization and is probably * not worth it anymore. Note that we need to clear the pm_active bits so * we do need the old proc if it still exists. * 0(%esp) = ret * 4(%esp) = oldtd * 8(%esp) = newtd */ ENTRY(cpu_throw) movl PCPU(CPUID), %esi movl 4(%esp),%ecx /* Old thread */ testl %ecx,%ecx /* no thread? */ jz 1f /* release bit from old pm_active */ movl PCPU(CURPMAP), %ebx #ifdef SMP lock #endif btrl %esi, PM_ACTIVE(%ebx) /* clear old */ 1: movl 8(%esp),%ecx /* New thread */ movl TD_PCB(%ecx),%edx movl PCB_CR3(%edx),%eax movl %eax,%cr3 /* set bit in new pm_active */ movl TD_PROC(%ecx),%eax movl P_VMSPACE(%eax), %ebx addl $VM_PMAP, %ebx movl %ebx, PCPU(CURPMAP) #ifdef SMP lock #endif btsl %esi, PM_ACTIVE(%ebx) /* set new */ jmp sw1 END(cpu_throw) /* * cpu_switch(old, new) * * Save the current thread state, then select the next thread to run * and load its state. * 0(%esp) = ret * 4(%esp) = oldtd * 8(%esp) = newtd * 12(%esp) = newlock */ ENTRY(cpu_switch) /* Switch to new thread. First, save context. */ movl 4(%esp),%ecx #ifdef INVARIANTS testl %ecx,%ecx /* no thread? */ jz badsw2 /* no, panic */ #endif movl TD_PCB(%ecx),%edx movl (%esp),%eax /* Hardware registers */ movl %eax,PCB_EIP(%edx) movl %ebx,PCB_EBX(%edx) movl %esp,PCB_ESP(%edx) movl %ebp,PCB_EBP(%edx) movl %esi,PCB_ESI(%edx) movl %edi,PCB_EDI(%edx) mov %gs,PCB_GS(%edx) /* Test if debug registers should be saved. */ testl $PCB_DBREGS,PCB_FLAGS(%edx) jz 1f /* no, skip over */ movl %dr7,%eax /* yes, do the save */ movl %eax,PCB_DR7(%edx) andl $0x0000fc00, %eax /* disable all watchpoints */ movl %eax,%dr7 movl %dr6,%eax movl %eax,PCB_DR6(%edx) movl %dr3,%eax movl %eax,PCB_DR3(%edx) movl %dr2,%eax movl %eax,PCB_DR2(%edx) movl %dr1,%eax movl %eax,PCB_DR1(%edx) movl %dr0,%eax movl %eax,PCB_DR0(%edx) 1: -#ifdef DEV_NPX /* have we used fp, and need a save? */ cmpl %ecx,PCPU(FPCURTHREAD) jne 1f pushl PCB_SAVEFPU(%edx) /* h/w bugs make saving complicated */ call npxsave /* do it in a big C function */ popl %eax 1: -#endif /* Save is done. Now fire up new thread. Leave old vmspace. */ movl 4(%esp),%edi movl 8(%esp),%ecx /* New thread */ movl 12(%esp),%esi /* New lock */ #ifdef INVARIANTS testl %ecx,%ecx /* no thread? */ jz badsw3 /* no, panic */ #endif movl TD_PCB(%ecx),%edx /* switch address space */ movl PCB_CR3(%edx),%eax movl %cr3,%ebx /* The same address space? */ cmpl %ebx,%eax je sw0 movl %eax,%cr3 /* new address space */ movl %esi,%eax movl PCPU(CPUID),%esi SETOP %eax,TD_LOCK(%edi) /* Switchout td_lock */ /* Release bit from old pmap->pm_active */ movl PCPU(CURPMAP), %ebx #ifdef SMP lock #endif btrl %esi, PM_ACTIVE(%ebx) /* clear old */ /* Set bit in new pmap->pm_active */ movl TD_PROC(%ecx),%eax /* newproc */ movl P_VMSPACE(%eax), %ebx addl $VM_PMAP, %ebx movl %ebx, PCPU(CURPMAP) #ifdef SMP lock #endif btsl %esi, PM_ACTIVE(%ebx) /* set new */ jmp sw1 sw0: SETOP %esi,TD_LOCK(%edi) /* Switchout td_lock */ sw1: BLOCK_SPIN(%ecx) /* * At this point, we've switched address spaces and are ready * to load up the rest of the next context. */ cmpl $0, PCB_EXT(%edx) /* has pcb extension? */ je 1f /* If not, use the default */ movl $1, PCPU(PRIVATE_TSS) /* mark use of private tss */ movl PCB_EXT(%edx), %edi /* new tss descriptor */ jmp 2f /* Load it up */ 1: /* * Use the common default TSS instead of our own. * Set our stack pointer into the TSS, it's set to just * below the PCB. In C, common_tss.tss_esp0 = &pcb - 16; */ leal -16(%edx), %ebx /* leave space for vm86 */ movl %ebx, PCPU(COMMON_TSS) + TSS_ESP0 /* * Test this CPU's bit in the bitmap to see if this * CPU was using a private TSS. */ cmpl $0, PCPU(PRIVATE_TSS) /* Already using the common? */ je 3f /* if so, skip reloading */ movl $0, PCPU(PRIVATE_TSS) PCPU_ADDR(COMMON_TSSD, %edi) 2: /* Move correct tss descriptor into GDT slot, then reload tr. */ movl PCPU(TSS_GDT), %ebx /* entry in GDT */ movl 0(%edi), %eax movl 4(%edi), %esi movl %eax, 0(%ebx) movl %esi, 4(%ebx) movl $GPROC0_SEL*8, %esi /* GSEL(GPROC0_SEL, SEL_KPL) */ ltr %si 3: /* Copy the %fs and %gs selectors into this pcpu gdt */ leal PCB_FSD(%edx), %esi movl PCPU(FSGS_GDT), %edi movl 0(%esi), %eax /* %fs selector */ movl 4(%esi), %ebx movl %eax, 0(%edi) movl %ebx, 4(%edi) movl 8(%esi), %eax /* %gs selector, comes straight after */ movl 12(%esi), %ebx movl %eax, 8(%edi) movl %ebx, 12(%edi) /* Restore context. */ movl PCB_EBX(%edx),%ebx movl PCB_ESP(%edx),%esp movl PCB_EBP(%edx),%ebp movl PCB_ESI(%edx),%esi movl PCB_EDI(%edx),%edi movl PCB_EIP(%edx),%eax movl %eax,(%esp) movl %edx, PCPU(CURPCB) movl TD_TID(%ecx),%eax movl %ecx, PCPU(CURTHREAD) /* into next thread */ /* * Determine the LDT to use and load it if is the default one and * that is not the current one. */ movl TD_PROC(%ecx),%eax cmpl $0,P_MD+MD_LDT(%eax) jnz 1f movl _default_ldt,%eax cmpl PCPU(CURRENTLDT),%eax je 2f lldt _default_ldt movl %eax,PCPU(CURRENTLDT) jmp 2f 1: /* Load the LDT when it is not the default one. */ pushl %edx /* Preserve pointer to pcb. */ addl $P_MD,%eax /* Pointer to mdproc is arg. */ pushl %eax call set_user_ldt addl $4,%esp popl %edx 2: /* This must be done after loading the user LDT. */ .globl cpu_switch_load_gs cpu_switch_load_gs: mov PCB_GS(%edx),%gs /* Test if debug registers should be restored. */ testl $PCB_DBREGS,PCB_FLAGS(%edx) jz 1f /* * Restore debug registers. The special code for dr7 is to * preserve the current values of its reserved bits. */ movl PCB_DR6(%edx),%eax movl %eax,%dr6 movl PCB_DR3(%edx),%eax movl %eax,%dr3 movl PCB_DR2(%edx),%eax movl %eax,%dr2 movl PCB_DR1(%edx),%eax movl %eax,%dr1 movl PCB_DR0(%edx),%eax movl %eax,%dr0 movl %dr7,%eax andl $0x0000fc00,%eax movl PCB_DR7(%edx),%ecx andl $~0x0000fc00,%ecx orl %ecx,%eax movl %eax,%dr7 1: ret #ifdef INVARIANTS badsw1: pushal pushl $sw0_1 call panic sw0_1: .asciz "cpu_throw: no newthread supplied" badsw2: pushal pushl $sw0_2 call panic sw0_2: .asciz "cpu_switch: no curthread supplied" badsw3: pushal pushl $sw0_3 call panic sw0_3: .asciz "cpu_switch: no newthread supplied" #endif END(cpu_switch) /* * savectx(pcb) * Update pcb, saving current processor state. */ ENTRY(savectx) /* Fetch PCB. */ movl 4(%esp),%ecx /* Save caller's return address. Child won't execute this routine. */ movl (%esp),%eax movl %eax,PCB_EIP(%ecx) movl %cr3,%eax movl %eax,PCB_CR3(%ecx) movl %ebx,PCB_EBX(%ecx) movl %esp,PCB_ESP(%ecx) movl %ebp,PCB_EBP(%ecx) movl %esi,PCB_ESI(%ecx) movl %edi,PCB_EDI(%ecx) mov %gs,PCB_GS(%ecx) movl %cr0,%eax movl %eax,PCB_CR0(%ecx) movl %cr2,%eax movl %eax,PCB_CR2(%ecx) movl %cr4,%eax movl %eax,PCB_CR4(%ecx) movl %dr0,%eax movl %eax,PCB_DR0(%ecx) movl %dr1,%eax movl %eax,PCB_DR1(%ecx) movl %dr2,%eax movl %eax,PCB_DR2(%ecx) movl %dr3,%eax movl %eax,PCB_DR3(%ecx) movl %dr6,%eax movl %eax,PCB_DR6(%ecx) movl %dr7,%eax movl %eax,PCB_DR7(%ecx) mov %ds,PCB_DS(%ecx) mov %es,PCB_ES(%ecx) mov %fs,PCB_FS(%ecx) mov %ss,PCB_SS(%ecx) sgdt PCB_GDT(%ecx) sidt PCB_IDT(%ecx) sldt PCB_LDT(%ecx) str PCB_TR(%ecx) movl $1,%eax ret END(savectx) /* * resumectx(pcb) __fastcall * Resuming processor state from pcb. */ ENTRY(resumectx) /* Restore GDT. */ lgdt PCB_GDT(%ecx) /* Restore segment registers */ movzwl PCB_DS(%ecx),%eax mov %ax,%ds movzwl PCB_ES(%ecx),%eax mov %ax,%es movzwl PCB_FS(%ecx),%eax mov %ax,%fs movzwl PCB_GS(%ecx),%eax movw %ax,%gs movzwl PCB_SS(%ecx),%eax mov %ax,%ss /* Restore CR2, CR4, CR3 and CR0 */ movl PCB_CR2(%ecx),%eax movl %eax,%cr2 movl PCB_CR4(%ecx),%eax movl %eax,%cr4 movl PCB_CR3(%ecx),%eax movl %eax,%cr3 movl PCB_CR0(%ecx),%eax movl %eax,%cr0 jmp 1f 1: /* Restore descriptor tables */ lidt PCB_IDT(%ecx) lldt PCB_LDT(%ecx) #define SDT_SYS386TSS 9 #define SDT_SYS386BSY 11 /* Clear "task busy" bit and reload TR */ movl PCPU(TSS_GDT),%eax andb $(~SDT_SYS386BSY | SDT_SYS386TSS),5(%eax) movzwl PCB_TR(%ecx),%eax ltr %ax #undef SDT_SYS386TSS #undef SDT_SYS386BSY /* Restore debug registers */ movl PCB_DR0(%ecx),%eax movl %eax,%dr0 movl PCB_DR1(%ecx),%eax movl %eax,%dr1 movl PCB_DR2(%ecx),%eax movl %eax,%dr2 movl PCB_DR3(%ecx),%eax movl %eax,%dr3 movl PCB_DR6(%ecx),%eax movl %eax,%dr6 movl PCB_DR7(%ecx),%eax movl %eax,%dr7 /* Restore other registers */ movl PCB_EDI(%ecx),%edi movl PCB_ESI(%ecx),%esi movl PCB_EBP(%ecx),%ebp movl PCB_ESP(%ecx),%esp movl PCB_EBX(%ecx),%ebx /* reload code selector by turning return into intersegmental return */ pushl PCB_EIP(%ecx) movl $KCSEL,4(%esp) xorl %eax,%eax lret END(resumectx) Index: projects/ipsec/sys/i386/i386/trap.c =================================================================== --- projects/ipsec/sys/i386/i386/trap.c (revision 313186) +++ projects/ipsec/sys/i386/i386/trap.c (revision 313187) @@ -1,1137 +1,1124 @@ /*- * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 */ #include __FBSDID("$FreeBSD$"); /* * 386 Trap and System call handling */ #include "opt_clock.h" #include "opt_cpu.h" #include "opt_hwpmc_hooks.h" #include "opt_isa.h" #include "opt_kdb.h" -#include "opt_npx.h" #include "opt_stack.h" #include "opt_trap.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , page_fault, all); PMC_SOFT_DEFINE( , , page_fault, read); PMC_SOFT_DEFINE( , , page_fault, write); #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #ifdef POWERFAIL_NMI #include #include #endif #ifdef KDTRACE_HOOKS #include #endif extern void trap(struct trapframe *frame); extern void syscall(struct trapframe *frame); static int trap_pfault(struct trapframe *, int, vm_offset_t); static void trap_fatal(struct trapframe *, vm_offset_t); void dblfault_handler(void); extern inthand_t IDTVEC(lcall_syscall); #define MAX_TRAP_MSG 32 static char *trap_msg[] = { "", /* 0 unused */ "privileged instruction fault", /* 1 T_PRIVINFLT */ "", /* 2 unused */ "breakpoint instruction fault", /* 3 T_BPTFLT */ "", /* 4 unused */ "", /* 5 unused */ "arithmetic trap", /* 6 T_ARITHTRAP */ "", /* 7 unused */ "", /* 8 unused */ "general protection fault", /* 9 T_PROTFLT */ "trace trap", /* 10 T_TRCTRAP */ "", /* 11 unused */ "page fault", /* 12 T_PAGEFLT */ "", /* 13 unused */ "alignment fault", /* 14 T_ALIGNFLT */ "", /* 15 unused */ "", /* 16 unused */ "", /* 17 unused */ "integer divide fault", /* 18 T_DIVIDE */ "non-maskable interrupt trap", /* 19 T_NMI */ "overflow trap", /* 20 T_OFLOW */ "FPU bounds check fault", /* 21 T_BOUND */ "FPU device not available", /* 22 T_DNA */ "double fault", /* 23 T_DOUBLEFLT */ "FPU operand fetch fault", /* 24 T_FPOPFLT */ "invalid TSS fault", /* 25 T_TSSFLT */ "segment not present fault", /* 26 T_SEGNPFLT */ "stack fault", /* 27 T_STKFLT */ "machine check trap", /* 28 T_MCHK */ "SIMD floating-point exception", /* 29 T_XMMFLT */ "reserved (unknown) fault", /* 30 T_RESERVED */ "", /* 31 unused (reserved) */ "DTrace pid return trap", /* 32 T_DTRACE_RET */ }; #if defined(I586_CPU) && !defined(NO_F00F_HACK) int has_f00f_bug = 0; /* Initialized so that it can be patched. */ #endif static int prot_fault_translation = 0; SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW, &prot_fault_translation, 0, "Select signal to deliver on protection fault"); static int uprintf_signal; SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RW, &uprintf_signal, 0, "Print debugging information on trap signal to ctty"); /* * Exception, fault, and trap interface to the FreeBSD kernel. * This common code is called from assembly language IDT gate entry * routines that prepare a suitable stack frame, and restore this * frame after the exception has been processed. */ void trap(struct trapframe *frame) { #ifdef KDTRACE_HOOKS struct reg regs; #endif struct thread *td = curthread; struct proc *p = td->td_proc; #ifdef KDB register_t dr6; #endif int i = 0, ucode = 0; u_int type; register_t addr = 0; vm_offset_t eva; ksiginfo_t ksi; #ifdef POWERFAIL_NMI static int lastalert = 0; #endif PCPU_INC(cnt.v_trap); type = frame->tf_trapno; #ifdef SMP /* Handler for NMI IPIs used for stopping CPUs. */ if (type == T_NMI) { if (ipi_nmi_handler() == 0) goto out; } #endif /* SMP */ #ifdef KDB if (kdb_active) { kdb_reenter(); goto out; } #endif if (type == T_RESERVED) { trap_fatal(frame, 0); goto out; } if (type == T_NMI) { #ifdef HWPMC_HOOKS /* * CPU PMCs interrupt using an NMI so we check for that first. * If the HWPMC module is active, 'pmc_hook' will point to * the function to be called. A non-zero return value from the * hook means that the NMI was consumed by it and that we can * return immediately. */ if (pmc_intr != NULL && (*pmc_intr)(PCPU_GET(cpuid), frame) != 0) goto out; #endif #ifdef STACK if (stack_nmi_handler(frame) != 0) goto out; #endif } if (type == T_MCHK) { mca_intr(); goto out; } #ifdef KDTRACE_HOOKS /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets * a flag in its per-cpu flags to indicate that it doesn't * want to fault. On returning from the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. */ if ((type == T_PROTFLT || type == T_PAGEFLT) && dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type)) goto out; #endif if ((frame->tf_eflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled * interrupts and then trapped. Enabling interrupts * now is wrong, but it is better than running with * interrupts disabled until they are accidentally * enabled later. */ if (TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0) uprintf( "pid %ld (%s): trap %d with interrupts disabled\n", (long)curproc->p_pid, curthread->td_name, type); else if (type != T_NMI && type != T_BPTFLT && type != T_TRCTRAP && frame->tf_eip != (int)cpu_switch_load_gs) { /* * XXX not quite right, since this may be for a * multiple fault in user mode. */ printf("kernel trap %d with interrupts disabled\n", type); /* * Page faults need interrupts disabled until later, * and we shouldn't enable interrupts while holding * a spin lock. */ if (type != T_PAGEFLT && td->td_md.md_spinlock_count == 0) enable_intr(); } } eva = 0; if (type == T_PAGEFLT) { /* * For some Cyrix CPUs, %cr2 is clobbered by * interrupts. This problem is worked around by using * an interrupt gate for the pagefault handler. We * are finally ready to read %cr2 and conditionally * reenable interrupts. If we hold a spin lock, then * we must not reenable interrupts. This might be a * spurious page fault. */ eva = rcr2(); if (td->td_md.md_spinlock_count == 0) enable_intr(); } if (TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0) { /* user trap */ td->td_pticks = 0; td->td_frame = frame; addr = frame->tf_eip; if (td->td_cowgen != p->p_cowgen) thread_cow_update(td); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ i = SIGILL; ucode = ILL_PRVOPC; break; case T_BPTFLT: /* bpt instruction fault */ case T_TRCTRAP: /* trace trap */ enable_intr(); #ifdef KDTRACE_HOOKS if (type == T_BPTFLT) { fill_frame_regs(frame, ®s); if (dtrace_pid_probe_ptr != NULL && dtrace_pid_probe_ptr(®s) == 0) goto out; } #endif user_trctrap_out: frame->tf_eflags &= ~PSL_T; i = SIGTRAP; ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); break; case T_ARITHTRAP: /* arithmetic trap */ -#ifdef DEV_NPX ucode = npxtrap_x87(); if (ucode == -1) goto userout; -#else - ucode = 0; -#endif i = SIGFPE; break; /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle * them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ if (frame->tf_eflags & PSL_VM) { i = vm86_emulate((struct vm86frame *)frame); if (i == SIGTRAP) { type = T_TRCTRAP; load_dr6(rdr6() | 0x4000); goto user_trctrap_out; } if (i == 0) goto user; break; } i = SIGBUS; ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR; break; case T_SEGNPFLT: /* segment not present fault */ i = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ i = SIGBUS; ucode = BUS_OBJERR; break; case T_ALIGNFLT: i = SIGBUS; ucode = BUS_ADRALN; break; case T_DOUBLEFLT: /* double fault */ default: i = SIGBUS; ucode = BUS_OBJERR; break; case T_PAGEFLT: /* page fault */ i = trap_pfault(frame, TRUE, eva); #if defined(I586_CPU) && !defined(NO_F00F_HACK) if (i == -2) { /* * The f00f hack workaround has triggered, so * treat the fault as an illegal instruction * (T_PRIVINFLT) instead of a page fault. */ type = frame->tf_trapno = T_PRIVINFLT; /* Proceed as in that case. */ ucode = ILL_PRVOPC; i = SIGILL; break; } #endif if (i == -1) goto userout; if (i == 0) goto user; if (i == SIGSEGV) ucode = SEGV_MAPERR; else { if (prot_fault_translation == 0) { /* * Autodetect. * This check also covers the images * without the ABI-tag ELF note. */ if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && p->p_osrel >= P_OSREL_SIGSEGV) { i = SIGSEGV; ucode = SEGV_ACCERR; } else { i = SIGBUS; ucode = BUS_PAGE_FAULT; } } else if (prot_fault_translation == 1) { /* * Always compat mode. */ i = SIGBUS; ucode = BUS_PAGE_FAULT; } else { /* * Always SIGSEGV mode. */ i = SIGSEGV; ucode = SEGV_ACCERR; } } addr = eva; break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; i = SIGFPE; break; #ifdef DEV_ISA case T_NMI: #ifdef POWERFAIL_NMI #ifndef TIMER_FREQ # define TIMER_FREQ 1193182 #endif if (time_second - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(880, hz); lastalert = time_second; } goto userout; #else /* !POWERFAIL_NMI */ nmi_handle_intr(type, frame); break; #endif /* POWERFAIL_NMI */ #endif /* DEV_ISA */ case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; i = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; i = SIGFPE; break; case T_DNA: -#ifdef DEV_NPX KASSERT(PCB_USER_FPU(td->td_pcb), ("kernel FPU ctx has leaked")); /* transparent fault (due to context switch "late") */ if (npxdna()) goto userout; -#endif uprintf("pid %d killed due to lack of floating point\n", p->p_pid); i = SIGKILL; ucode = 0; break; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; i = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ -#if defined(DEV_NPX) && !defined(CPU_DISABLE_SSE) && defined(I686_CPU) ucode = npxtrap_sse(); if (ucode == -1) goto userout; -#else - ucode = 0; -#endif i = SIGFPE; break; #ifdef KDTRACE_HOOKS case T_DTRACE_RET: enable_intr(); fill_frame_regs(frame, ®s); if (dtrace_return_probe_ptr != NULL && dtrace_return_probe_ptr(®s) == 0) goto out; break; #endif } } else { /* kernel trap */ KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(frame, FALSE, eva); goto out; case T_DNA: -#ifdef DEV_NPX if (PCB_USER_FPU(td->td_pcb)) panic("Unregistered use of FPU in kernel"); if (npxdna()) goto out; -#endif break; case T_ARITHTRAP: /* arithmetic trap */ case T_XMMFLT: /* SIMD floating-point exception */ case T_FPOPFLT: /* FPU operand fetch fault */ /* * XXXKIB for now disable any FPU traps in kernel * handler registration seems to be overkill */ trap_fatal(frame, 0); goto out; /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle * them specially. */ case T_PROTFLT: /* general protection fault */ case T_STKFLT: /* stack fault */ if (frame->tf_eflags & PSL_VM) { i = vm86_emulate((struct vm86frame *)frame); if (i == SIGTRAP) { type = T_TRCTRAP; load_dr6(rdr6() | 0x4000); goto kernel_trctrap; } if (i != 0) /* * returns to original process */ vm86_trap((struct vm86frame *)frame); goto out; } if (type == T_STKFLT) break; /* FALL THROUGH */ case T_SEGNPFLT: /* segment not present fault */ if (curpcb->pcb_flags & PCB_VM86CALL) break; /* * Invalid %fs's and %gs's can be created using * procfs or PT_SETREGS or by invalidating the * underlying LDT entry. This causes a fault * in kernel mode when the kernel attempts to * switch contexts. Lose the bad context * (XXX) so that we can continue, and generate * a signal. */ if (frame->tf_eip == (int)cpu_switch_load_gs) { curpcb->pcb_gs = 0; #if 0 PROC_LOCK(p); kern_psignal(p, SIGBUS); PROC_UNLOCK(p); #endif goto out; } if (td->td_intr_nesting_level != 0) break; /* * Invalid segment selectors and out of bounds * %eip's and %esp's can be set up in user mode. * This causes a fault in kernel mode when the * kernel tries to return to user mode. We want * to get this fault so that we can fix the * problem here and not have to check all the * selectors and pointers when the user changes * them. */ if (frame->tf_eip == (int)doreti_iret) { frame->tf_eip = (int)doreti_iret_fault; goto out; } if (frame->tf_eip == (int)doreti_popl_ds) { frame->tf_eip = (int)doreti_popl_ds_fault; goto out; } if (frame->tf_eip == (int)doreti_popl_es) { frame->tf_eip = (int)doreti_popl_es_fault; goto out; } if (frame->tf_eip == (int)doreti_popl_fs) { frame->tf_eip = (int)doreti_popl_fs_fault; goto out; } if (curpcb->pcb_onfault != NULL) { frame->tf_eip = (int)curpcb->pcb_onfault; goto out; } break; case T_TSSFLT: /* * PSL_NT can be set in user mode and isn't cleared * automatically when the kernel is entered. This * causes a TSS fault when the kernel attempts to * `iret' because the TSS link is uninitialized. We * want to get this fault so that we can fix the * problem here and not every time the kernel is * entered. */ if (frame->tf_eflags & PSL_NT) { frame->tf_eflags &= ~PSL_NT; goto out; } break; case T_TRCTRAP: /* trace trap */ kernel_trctrap: if (frame->tf_eip == (int)IDTVEC(lcall_syscall)) { /* * We've just entered system mode via the * syscall lcall. Continue single stepping * silently until the syscall handler has * saved the flags. */ goto out; } if (frame->tf_eip == (int)IDTVEC(lcall_syscall) + 1) { /* * The syscall handler has now saved the * flags. Stop single stepping it. */ frame->tf_eflags &= ~PSL_T; goto out; } /* * Ignore debug register trace traps due to * accesses in the user's address space, which * can happen under several conditions such as * if a user sets a watchpoint on a buffer and * then passes that buffer to a system call. * We still want to get TRCTRAPS for addresses * in kernel space because that is useful when * debugging the kernel. */ if (user_dbreg_trap() && !(curpcb->pcb_flags & PCB_VM86CALL)) { /* * Reset breakpoint bits because the * processor doesn't */ load_dr6(rdr6() & ~0xf); goto out; } /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ case T_BPTFLT: /* * If KDB is enabled, let it handle the debugger trap. * Otherwise, debugger traps "can't happen". */ #ifdef KDB /* XXX %dr6 is not quite reentrant. */ dr6 = rdr6(); load_dr6(dr6 & ~0x4000); if (kdb_trap(type, dr6, frame)) goto out; #endif break; #ifdef DEV_ISA case T_NMI: #ifdef POWERFAIL_NMI if (time_second - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(880, hz); lastalert = time_second; } goto out; #else /* !POWERFAIL_NMI */ nmi_handle_intr(type, frame); goto out; #endif /* POWERFAIL_NMI */ #endif /* DEV_ISA */ } trap_fatal(frame, eva); goto out; } /* Translate fault for emulators (e.g. Linux) */ if (*p->p_sysent->sv_transtrap) i = (*p->p_sysent->sv_transtrap)(i, type); ksiginfo_init_trap(&ksi); ksi.ksi_signo = i; ksi.ksi_code = ucode; ksi.ksi_addr = (void *)addr; ksi.ksi_trapno = type; if (uprintf_signal) { uprintf("pid %d comm %s: signal %d err %x code %d type %d " "addr 0x%x esp 0x%08x eip 0x%08x " "<%02x %02x %02x %02x %02x %02x %02x %02x>\n", p->p_pid, p->p_comm, i, frame->tf_err, ucode, type, addr, frame->tf_esp, frame->tf_eip, fubyte((void *)(frame->tf_eip + 0)), fubyte((void *)(frame->tf_eip + 1)), fubyte((void *)(frame->tf_eip + 2)), fubyte((void *)(frame->tf_eip + 3)), fubyte((void *)(frame->tf_eip + 4)), fubyte((void *)(frame->tf_eip + 5)), fubyte((void *)(frame->tf_eip + 6)), fubyte((void *)(frame->tf_eip + 7))); } KASSERT((read_eflags() & PSL_I) != 0, ("interrupts disabled")); trapsignal(td, &ksi); #ifdef DEBUG if (type <= MAX_TRAP_MSG) { uprintf("fatal process exception: %s", trap_msg[type]); if ((type == T_PAGEFLT) || (type == T_PROTFLT)) uprintf(", fault VA = 0x%lx", (u_long)eva); uprintf("\n"); } #endif user: userret(td, frame); KASSERT(PCB_USER_FPU(td->td_pcb), ("Return from trap with kernel FPU ctx leaked")); userout: out: return; } static int trap_pfault(frame, usermode, eva) struct trapframe *frame; int usermode; vm_offset_t eva; { vm_offset_t va; vm_map_t map; int rv = 0; vm_prot_t ftype; struct thread *td = curthread; struct proc *p = td->td_proc; if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) { /* * Due to both processor errata and lazy TLB invalidation when * access restrictions are removed from virtual pages, memory * accesses that are allowed by the physical mapping layer may * nonetheless cause one spurious page fault per virtual page. * When the thread is executing a "no faulting" section that * is bracketed by vm_fault_{disable,enable}_pagefaults(), * every page fault is treated as a spurious page fault, * unless it accesses the same virtual address as the most * recent page fault within the same "no faulting" section. */ if (td->td_md.md_spurflt_addr != eva || (td->td_pflags & TDP_RESETSPUR) != 0) { /* * Do nothing to the TLB. A stale TLB entry is * flushed automatically by a page fault. */ td->td_md.md_spurflt_addr = eva; td->td_pflags &= ~TDP_RESETSPUR; return (0); } } else { /* * If we get a page fault while in a critical section, then * it is most likely a fatal kernel page fault. The kernel * is already going to panic trying to get a sleep lock to * do the VM lookup, so just consider it a fatal trap so the * kernel can print out a useful trap message and even get * to the debugger. * * If we get a page fault while holding a non-sleepable * lock, then it is most likely a fatal kernel page fault. * If WITNESS is enabled, then it's going to whine about * bogus LORs with various VM locks, so just skip to the * fatal trap handling directly. */ if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) { trap_fatal(frame, eva); return (-1); } } va = trunc_page(eva); if (va >= KERNBASE) { /* * Don't allow user-mode faults in kernel address space. * An exception: if the faulting address is the invalid * instruction entry in the IDT, then the Intel Pentium * F00F bug workaround was triggered, and we need to * treat it is as an illegal instruction, and not a page * fault. */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) return (-2); #endif if (usermode) goto nogo; map = kernel_map; } else { map = &p->p_vmspace->vm_map; /* * When accessing a user-space address, kernel must be * ready to accept the page fault, and provide a * handling routine. Since accessing the address * without the handler is a bug, do not try to handle * it normally, and panic immediately. */ if (!usermode && (td->td_intr_nesting_level != 0 || curpcb->pcb_onfault == NULL)) { trap_fatal(frame, eva); return (-1); } } /* * If the trap was caused by errant bits in the PTE then panic. */ if (frame->tf_err & PGEX_RSV) { trap_fatal(frame, eva); return (-1); } /* * PGEX_I is defined only if the execute disable bit capability is * supported and enabled. */ if (frame->tf_err & PGEX_W) ftype = VM_PROT_WRITE; #if defined(PAE) || defined(PAE_TABLES) else if ((frame->tf_err & PGEX_I) && pg_nx != 0) ftype = VM_PROT_EXECUTE; #endif else ftype = VM_PROT_READ; /* Fault in the page. */ rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); if (rv == KERN_SUCCESS) { #ifdef HWPMC_HOOKS if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { PMC_SOFT_CALL_TF( , , page_fault, all, frame); if (ftype == VM_PROT_READ) PMC_SOFT_CALL_TF( , , page_fault, read, frame); else PMC_SOFT_CALL_TF( , , page_fault, write, frame); } #endif return (0); } nogo: if (!usermode) { if (td->td_intr_nesting_level == 0 && curpcb->pcb_onfault != NULL) { frame->tf_eip = (int)curpcb->pcb_onfault; return (0); } trap_fatal(frame, eva); return (-1); } return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); } static void trap_fatal(frame, eva) struct trapframe *frame; vm_offset_t eva; { int code, ss, esp; u_int type; struct soft_segment_descriptor softseg; char *msg; code = frame->tf_err; type = frame->tf_trapno; sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg); if (type <= MAX_TRAP_MSG) msg = trap_msg[type]; else msg = "UNKNOWN"; printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg, frame->tf_eflags & PSL_VM ? "vm86" : ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif if (type == T_PAGEFLT) { printf("fault virtual address = 0x%x\n", eva); printf("fault code = %s %s%s, %s\n", code & PGEX_U ? "user" : "supervisor", code & PGEX_W ? "write" : "read", #if defined(PAE) || defined(PAE_TABLES) pg_nx != 0 ? (code & PGEX_I ? " instruction" : " data") : #endif "", code & PGEX_RSV ? "reserved bits in PTE" : code & PGEX_P ? "protection violation" : "page not present"); } printf("instruction pointer = 0x%x:0x%x\n", frame->tf_cs & 0xffff, frame->tf_eip); if (TF_HAS_STACKREGS(frame)) { ss = frame->tf_ss & 0xffff; esp = frame->tf_esp; } else { ss = GSEL(GDATA_SEL, SEL_KPL); esp = (int)&frame->tf_esp; } printf("stack pointer = 0x%x:0x%x\n", ss, esp); printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp); printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n", softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); printf(" = DPL %d, pres %d, def32 %d, gran %d\n", softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, softseg.ssd_gran); printf("processor eflags = "); if (frame->tf_eflags & PSL_T) printf("trace trap, "); if (frame->tf_eflags & PSL_I) printf("interrupt enabled, "); if (frame->tf_eflags & PSL_NT) printf("nested task, "); if (frame->tf_eflags & PSL_RF) printf("resume, "); if (frame->tf_eflags & PSL_VM) printf("vm86, "); printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); printf("current process = %d (%s)\n", curproc->p_pid, curthread->td_name); #ifdef KDB if (debugger_on_panic || kdb_active) { frame->tf_err = eva; /* smuggle fault address to ddb */ if (kdb_trap(type, 0, frame)) { frame->tf_err = code; /* restore error code */ return; } frame->tf_err = code; /* restore error code */ } #endif printf("trap number = %d\n", type); if (type <= MAX_TRAP_MSG) panic("%s", trap_msg[type]); else panic("unknown/reserved trap"); } /* * Double fault handler. Called when a fault occurs while writing * a frame for a trap/exception onto the stack. This usually occurs * when the stack overflows (such is the case with infinite recursion, * for example). * * XXX Note that the current PTD gets replaced by IdlePTD when the * task switch occurs. This means that the stack that was active at * the time of the double fault is not available at unless * the machine was idle when the double fault occurred. The downside * of this is that "trace " in ddb won't work. */ void dblfault_handler() { #ifdef KDTRACE_HOOKS if (dtrace_doubletrap_func != NULL) (*dtrace_doubletrap_func)(); #endif printf("\nFatal double fault:\n"); printf("eip = 0x%x\n", PCPU_GET(common_tss.tss_eip)); printf("esp = 0x%x\n", PCPU_GET(common_tss.tss_esp)); printf("ebp = 0x%x\n", PCPU_GET(common_tss.tss_ebp)); #ifdef SMP /* two separate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", PCPU_GET(cpuid)); printf("apic id = %02x\n", PCPU_GET(apic_id)); #endif panic("double fault"); } int cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) { struct proc *p; struct trapframe *frame; caddr_t params; long tmp; int error; p = td->td_proc; frame = td->td_frame; params = (caddr_t)frame->tf_esp + sizeof(int); sa->code = frame->tf_eax; /* * Need to check if this is a 32 bit or 64 bit syscall. */ if (sa->code == SYS_syscall) { /* * Code is first argument, followed by actual args. */ error = fueword(params, &tmp); if (error == -1) return (EFAULT); sa->code = tmp; params += sizeof(int); } else if (sa->code == SYS___syscall) { /* * Like syscall, but code is a quad, so as to maintain * quad alignment for the rest of the arguments. */ error = fueword(params, &tmp); if (error == -1) return (EFAULT); sa->code = tmp; params += sizeof(quad_t); } if (p->p_sysent->sv_mask) sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; if (params != NULL && sa->narg != 0) error = copyin(params, (caddr_t)sa->args, (u_int)(sa->narg * sizeof(int))); else error = 0; if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = frame->tf_edx; } return (error); } #include "../../kern/subr_syscall.c" /* * syscall - system call request C handler. A system call is * essentially treated as a trap by reusing the frame layout. */ void syscall(struct trapframe *frame) { struct thread *td; struct syscall_args sa; register_t orig_tf_eflags; int error; ksiginfo_t ksi; #ifdef DIAGNOSTIC if (!(TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0)) { panic("syscall"); /* NOT REACHED */ } #endif orig_tf_eflags = frame->tf_eflags; td = curthread; td->td_frame = frame; error = syscallenter(td, &sa); /* * Traced syscall. */ if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) { frame->tf_eflags &= ~PSL_T; ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_code = TRAP_TRACE; ksi.ksi_addr = (void *)frame->tf_eip; trapsignal(td, &ksi); } KASSERT(PCB_USER_FPU(td->td_pcb), ("System call %s returning with kernel FPU ctx leaked", syscallname(td->td_proc, sa.code))); KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td), ("System call %s returning with mangled pcb_save", syscallname(td->td_proc, sa.code))); syscallret(td, error, &sa); } Index: projects/ipsec/sys/i386/i386/vm86bios.s =================================================================== --- projects/ipsec/sys/i386/i386/vm86bios.s (revision 313186) +++ projects/ipsec/sys/i386/i386/vm86bios.s (revision 313187) @@ -1,177 +1,173 @@ /*- * Copyright (c) 1998 Jonathan Lemon * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ -#include "opt_npx.h" - #include /* miscellaneous asm macros */ #include #include "assym.s" #define SCR_NEWPTD PCB_ESI /* readability macros */ #define SCR_VMFRAME PCB_EBP /* see vm86.c for explanation */ #define SCR_STACK PCB_ESP #define SCR_PGTABLE PCB_EBX #define SCR_ARGFRAME PCB_EIP #define SCR_TSS0 PCB_VM86 #define SCR_TSS1 (PCB_VM86+4) .data ALIGN_DATA .globl vm86pcb vm86pcb: .long 0 .text /* * vm86_bioscall(struct trapframe_vm86 *vm86) */ ENTRY(vm86_bioscall) movl vm86pcb,%edx /* scratch data area */ movl 4(%esp),%eax movl %eax,SCR_ARGFRAME(%edx) /* save argument pointer */ pushl %ebx pushl %ebp pushl %esi pushl %edi pushl %gs -#ifdef DEV_NPX pushfl cli movl PCPU(CURTHREAD),%ecx cmpl %ecx,PCPU(FPCURTHREAD) /* do we need to save fp? */ jne 1f pushl %edx movl TD_PCB(%ecx),%ecx pushl PCB_SAVEFPU(%ecx) call npxsave addl $4,%esp popl %edx /* recover our pcb */ 1: popfl -#endif movl SCR_VMFRAME(%edx),%ebx /* target frame location */ movl %ebx,%edi /* destination */ movl SCR_ARGFRAME(%edx),%esi /* source (set on entry) */ movl $VM86_FRAMESIZE/4,%ecx /* sizeof(struct vm86frame)/4 */ cld rep movsl /* copy frame to new stack */ movl PCPU(CURPCB),%eax pushl %eax /* save curpcb */ movl %edx,PCPU(CURPCB) /* set curpcb to vm86pcb */ movl PCPU(TSS_GDT),%ebx /* entry in GDT */ movl 0(%ebx),%eax movl %eax,SCR_TSS0(%edx) /* save first word */ movl 4(%ebx),%eax andl $~0x200, %eax /* flip 386BSY -> 386TSS */ movl %eax,SCR_TSS1(%edx) /* save second word */ movl PCB_EXT(%edx),%edi /* vm86 tssd entry */ movl 0(%edi),%eax movl %eax,0(%ebx) movl 4(%edi),%eax movl %eax,4(%ebx) movl $GPROC0_SEL*8,%esi /* GSEL(entry, SEL_KPL) */ ltr %si movl %cr3,%eax pushl %eax /* save address space */ movl IdlePTD,%ecx movl %ecx,%ebx addl $KERNBASE,%ebx /* va of Idle PTD */ movl 0(%ebx),%eax pushl %eax /* old ptde != 0 when booting */ pushl %ebx /* keep for reuse */ movl %esp,SCR_STACK(%edx) /* save current stack location */ movl SCR_NEWPTD(%edx),%eax /* mapping for vm86 page table */ movl %eax,0(%ebx) /* ... install as PTD entry 0 */ #if defined(PAE) || defined(PAE_TABLES) movl IdlePDPT,%ecx #endif movl %ecx,%cr3 /* new page tables */ movl SCR_VMFRAME(%edx),%esp /* switch to new stack */ pushl %esp call vm86_prepcall /* finish setup */ add $4, %esp /* * Return via doreti */ MEXITCOUNT jmp doreti /* * vm86_biosret(struct trapframe_vm86 *vm86) */ ENTRY(vm86_biosret) movl vm86pcb,%edx /* data area */ movl 4(%esp),%esi /* source */ movl SCR_ARGFRAME(%edx),%edi /* destination */ movl $VM86_FRAMESIZE/4,%ecx /* size */ cld rep movsl /* copy frame to original frame */ movl SCR_STACK(%edx),%esp /* back to old stack */ popl %ebx /* saved va of Idle PTD */ popl %eax movl %eax,0(%ebx) /* restore old pte */ popl %eax movl %eax,%cr3 /* install old page table */ movl PCPU(TSS_GDT),%ebx /* entry in GDT */ movl SCR_TSS0(%edx),%eax movl %eax,0(%ebx) /* restore first word */ movl SCR_TSS1(%edx),%eax movl %eax,4(%ebx) /* restore second word */ movl $GPROC0_SEL*8,%esi /* GSEL(entry, SEL_KPL) */ ltr %si popl PCPU(CURPCB) /* restore curpcb/curproc */ movl SCR_ARGFRAME(%edx),%edx /* original stack frame */ movl TF_TRAPNO(%edx),%eax /* return (trapno) */ popl %gs popl %edi popl %esi popl %ebp popl %ebx ret /* back to our normal program */ Index: projects/ipsec/sys/i386/i386/vm_machdep.c =================================================================== --- projects/ipsec/sys/i386/i386/vm_machdep.c (revision 313186) +++ projects/ipsec/sys/i386/i386/vm_machdep.c (revision 313187) @@ -1,871 +1,855 @@ /*- * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * Copyright (c) 1994 John Dyson * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ */ #include __FBSDID("$FreeBSD$"); #include "opt_isa.h" #include "opt_npx.h" #include "opt_reset.h" #include "opt_cpu.h" #include "opt_xbox.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef CPU_ELAN #include #endif #include #include #include #include #include #include #include #ifdef XBOX #include #endif #ifndef NSFBUFS #define NSFBUFS (512 + maxusers * 16) #endif -#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) -#define CPU_ENABLE_SSE -#endif - _Static_assert(OFFSETOF_CURTHREAD == offsetof(struct pcpu, pc_curthread), "OFFSETOF_CURTHREAD does not correspond with offset of pc_curthread."); _Static_assert(OFFSETOF_CURPCB == offsetof(struct pcpu, pc_curpcb), "OFFSETOF_CURPCB does not correspond with offset of pc_curpcb."); _Static_assert(__OFFSETOF_MONITORBUF == offsetof(struct pcpu, pc_monitorbuf), "__OFFSETOF_MONINORBUF does not correspond with offset of pc_monitorbuf."); static void cpu_reset_real(void); #ifdef SMP static void cpu_reset_proxy(void); static u_int cpu_reset_proxyid; static volatile u_int cpu_reset_proxy_active; #endif union savefpu * get_pcb_user_save_td(struct thread *td) { vm_offset_t p; p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN); KASSERT((p % XSAVE_AREA_ALIGN) == 0, ("Unaligned pcb_user_save area")); return ((union savefpu *)p); } union savefpu * get_pcb_user_save_pcb(struct pcb *pcb) { vm_offset_t p; p = (vm_offset_t)(pcb + 1); return ((union savefpu *)p); } struct pcb * get_pcb_td(struct thread *td) { vm_offset_t p; p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE - roundup2(cpu_max_ext_state_size, XSAVE_AREA_ALIGN) - sizeof(struct pcb); return ((struct pcb *)p); } void * alloc_fpusave(int flags) { void *res; -#ifdef CPU_ENABLE_SSE struct savefpu_ymm *sf; -#endif res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags); -#ifdef CPU_ENABLE_SSE if (use_xsave) { sf = (struct savefpu_ymm *)res; bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd)); sf->sv_xstate.sx_hd.xstate_bv = xsave_mask; } -#endif return (res); } /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) { register struct proc *p1; struct pcb *pcb2; struct mdproc *mdp2; p1 = td1->td_proc; if ((flags & RFPROC) == 0) { if ((flags & RFMEM) == 0) { /* unshare user LDT */ struct mdproc *mdp1 = &p1->p_md; struct proc_ldt *pldt, *pldt1; mtx_lock_spin(&dt_lock); if ((pldt1 = mdp1->md_ldt) != NULL && pldt1->ldt_refcnt > 1) { pldt = user_ldt_alloc(mdp1, pldt1->ldt_len); if (pldt == NULL) panic("could not copy LDT"); mdp1->md_ldt = pldt; set_user_ldt(mdp1); user_ldt_deref(pldt1); } else mtx_unlock_spin(&dt_lock); } return; } /* Ensure that td1's pcb is up to date. */ if (td1 == curthread) td1->td_pcb->pcb_gs = rgs(); -#ifdef DEV_NPX critical_enter(); if (PCPU_GET(fpcurthread) == td1) npxsave(td1->td_pcb->pcb_save); critical_exit(); -#endif /* Point the pcb to the top of the stack */ pcb2 = get_pcb_td(td2); td2->td_pcb = pcb2; /* Copy td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); /* Properly initialize pcb_save */ pcb2->pcb_save = get_pcb_user_save_pcb(pcb2); bcopy(get_pcb_user_save_td(td1), get_pcb_user_save_pcb(pcb2), cpu_max_ext_state_size); /* Point mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; bcopy(&p1->p_md, mdp2, sizeof(*mdp2)); /* * Create a new fresh stack for the new process. * Copy the trap frame for the return to user mode as if from a * syscall. This copies most of the user mode register values. * The -16 is so we can expand the trapframe if we go to vm86. */ td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb - 16) - 1; bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe)); td2->td_frame->tf_eax = 0; /* Child returns zero */ td2->td_frame->tf_eflags &= ~PSL_C; /* success */ td2->td_frame->tf_edx = 1; /* * If the parent process has the trap bit set (i.e. a debugger had * single stepped the process to the system call), we need to clear * the trap flag from the new frame unless the debugger had set PF_FORK * on the parent. Otherwise, the child will receive a (likely * unexpected) SIGTRAP when it executes the first instruction after * returning to userland. */ if ((p1->p_pfsflags & PF_FORK) == 0) td2->td_frame->tf_eflags &= ~PSL_T; /* * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ #if defined(PAE) || defined(PAE_TABLES) pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdpt); #else pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir); #endif pcb2->pcb_edi = 0; pcb2->pcb_esi = (int)fork_return; /* fork_trampoline argument */ pcb2->pcb_ebp = 0; pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *); pcb2->pcb_ebx = (int)td2; /* fork_trampoline argument */ pcb2->pcb_eip = (int)fork_trampoline; /*- * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. * pcb2->pcb_flags: cloned above. * pcb2->pcb_onfault: cloned above (always NULL here?). * pcb2->pcb_gs: cloned above. * pcb2->pcb_ext: cleared below. */ /* * XXX don't copy the i/o pages. this should probably be fixed. */ pcb2->pcb_ext = 0; /* Copy the LDT, if necessary. */ mtx_lock_spin(&dt_lock); if (mdp2->md_ldt != NULL) { if (flags & RFMEM) { mdp2->md_ldt->ldt_refcnt++; } else { mdp2->md_ldt = user_ldt_alloc(mdp2, mdp2->md_ldt->ldt_len); if (mdp2->md_ldt == NULL) panic("could not copy LDT"); } } mtx_unlock_spin(&dt_lock); /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I; /* * Now, cpu_switch() can schedule the new process. * pcb_esp is loaded pointing to the cpu_switch() stack frame * containing the return address when exiting cpu_switch. * This will normally be to fork_trampoline(), which will have * %ebx loaded with the new proc's pointer. fork_trampoline() * will set up a stack to call fork_return(p, frame); to complete * the return to user-mode. */ } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg) { /* * Note that the trap frame follows the args, so the function * is really called like this: func(arg, frame); */ td->td_pcb->pcb_esi = (int) func; /* function */ td->td_pcb->pcb_ebx = (int) arg; /* first arg */ } void cpu_exit(struct thread *td) { /* * If this process has a custom LDT, release it. Reset pc->pcb_gs * and %gs before we free it in case they refer to an LDT entry. */ mtx_lock_spin(&dt_lock); if (td->td_proc->p_md.md_ldt) { td->td_pcb->pcb_gs = _udatasel; load_gs(_udatasel); user_ldt_free(td); } else mtx_unlock_spin(&dt_lock); } void cpu_thread_exit(struct thread *td) { -#ifdef DEV_NPX critical_enter(); if (td == PCPU_GET(fpcurthread)) npxdrop(); critical_exit(); -#endif /* Disable any hardware breakpoints. */ if (td->td_pcb->pcb_flags & PCB_DBREGS) { reset_dbregs(); td->td_pcb->pcb_flags &= ~PCB_DBREGS; } } void cpu_thread_clean(struct thread *td) { struct pcb *pcb; pcb = td->td_pcb; if (pcb->pcb_ext != NULL) { /* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */ /* * XXX do we need to move the TSS off the allocated pages * before freeing them? (not done here) */ kmem_free(kernel_arena, (vm_offset_t)pcb->pcb_ext, ctob(IOPAGES + 1)); pcb->pcb_ext = NULL; } } void cpu_thread_swapin(struct thread *td) { } void cpu_thread_swapout(struct thread *td) { } void cpu_thread_alloc(struct thread *td) { struct pcb *pcb; -#ifdef CPU_ENABLE_SSE struct xstate_hdr *xhdr; -#endif td->td_pcb = pcb = get_pcb_td(td); td->td_frame = (struct trapframe *)((caddr_t)pcb - 16) - 1; pcb->pcb_ext = NULL; pcb->pcb_save = get_pcb_user_save_pcb(pcb); -#ifdef CPU_ENABLE_SSE if (use_xsave) { xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1); bzero(xhdr, sizeof(*xhdr)); xhdr->xstate_bv = xsave_mask; } -#endif } void cpu_thread_free(struct thread *td) { cpu_thread_clean(td); } void cpu_set_syscall_retval(struct thread *td, int error) { switch (error) { case 0: td->td_frame->tf_eax = td->td_retval[0]; td->td_frame->tf_edx = td->td_retval[1]; td->td_frame->tf_eflags &= ~PSL_C; break; case ERESTART: /* * Reconstruct pc, assuming lcall $X,y is 7 bytes, int * 0x80 is 2 bytes. We saved this in tf_err. */ td->td_frame->tf_eip -= td->td_frame->tf_err; break; case EJUSTRETURN: break; default: td->td_frame->tf_eax = SV_ABI_ERRNO(td->td_proc, error); td->td_frame->tf_eflags |= PSL_C; break; } } /* * Initialize machine state, mostly pcb and trap frame for a new * thread, about to return to userspace. Put enough state in the new * thread's PCB to get it to go back to the fork_return(), which * finalizes the thread state and handles peculiarities of the first * return to userspace for the new thread. */ void cpu_copy_thread(struct thread *td, struct thread *td0) { struct pcb *pcb2; /* Point the pcb to the top of the stack. */ pcb2 = td->td_pcb; /* * Copy the upcall pcb. This loads kernel regs. * Those not loaded individually below get their default * values here. */ bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); pcb2->pcb_flags &= ~(PCB_NPXINITDONE | PCB_NPXUSERINITDONE | PCB_KERNNPX); pcb2->pcb_save = get_pcb_user_save_pcb(pcb2); bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save, cpu_max_ext_state_size); /* * Create a new fresh stack for the new thread. */ bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); /* If the current thread has the trap bit set (i.e. a debugger had * single stepped the process to the system call), we need to clear * the trap flag from the new frame. Otherwise, the new thread will * receive a (likely unexpected) SIGTRAP when it executes the first * instruction after returning to userland. */ td->td_frame->tf_eflags &= ~PSL_T; /* * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ pcb2->pcb_edi = 0; pcb2->pcb_esi = (int)fork_return; /* trampoline arg */ pcb2->pcb_ebp = 0; pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */ pcb2->pcb_ebx = (int)td; /* trampoline arg */ pcb2->pcb_eip = (int)fork_trampoline; pcb2->pcb_gs = rgs(); /* * If we didn't copy the pcb, we'd need to do the following registers: * pcb2->pcb_cr3: cloned above. * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. * pcb2->pcb_flags: cloned above. * pcb2->pcb_onfault: cloned above (always NULL here?). * pcb2->pcb_gs: cloned above. * pcb2->pcb_ext: cleared below. */ pcb2->pcb_ext = NULL; /* Setup to release spin count in fork_exit(). */ td->td_md.md_spinlock_count = 1; td->td_md.md_saved_flags = PSL_KERNEL | PSL_I; } /* * Set that machine state for performing an upcall that starts * the entry function with the given argument. */ void cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { /* * Do any extra cleaning that needs to be done. * The thread may have optional components * that are not present in a fresh thread. * This may be a recycled thread so make it look * as though it's newly allocated. */ cpu_thread_clean(td); /* * Set the trap frame to point at the beginning of the entry * function. */ td->td_frame->tf_ebp = 0; td->td_frame->tf_esp = (((int)stack->ss_sp + stack->ss_size - 4) & ~0x0f) - 4; td->td_frame->tf_eip = (int)entry; /* Pass the argument to the entry point. */ suword((void *)(td->td_frame->tf_esp + sizeof(void *)), (int)arg); } int cpu_set_user_tls(struct thread *td, void *tls_base) { struct segment_descriptor sd; uint32_t base; /* * Construct a descriptor and store it in the pcb for * the next context switch. Also store it in the gdt * so that the load of tf_fs into %fs will activate it * at return to userland. */ base = (uint32_t)tls_base; sd.sd_lobase = base & 0xffffff; sd.sd_hibase = (base >> 24) & 0xff; sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */ sd.sd_hilimit = 0xf; sd.sd_type = SDT_MEMRWA; sd.sd_dpl = SEL_UPL; sd.sd_p = 1; sd.sd_xx = 0; sd.sd_def32 = 1; sd.sd_gran = 1; critical_enter(); /* set %gs */ td->td_pcb->pcb_gsd = sd; if (td == curthread) { PCPU_GET(fsgs_gdt)[1] = sd; load_gs(GSEL(GUGS_SEL, SEL_UPL)); } critical_exit(); return (0); } /* * Convert kernel VA to physical address */ vm_paddr_t kvtop(void *addr) { vm_paddr_t pa; pa = pmap_kextract((vm_offset_t)addr); if (pa == 0) panic("kvtop: zero page frame"); return (pa); } #ifdef SMP static void cpu_reset_proxy() { cpuset_t tcrp; cpu_reset_proxy_active = 1; while (cpu_reset_proxy_active == 1) ; /* Wait for other cpu to see that we've started */ CPU_SETOF(cpu_reset_proxyid, &tcrp); stop_cpus(tcrp); printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid); DELAY(1000000); cpu_reset_real(); } #endif void cpu_reset() { #ifdef XBOX if (arch_i386_is_xbox) { /* Kick the PIC16L, it can reboot the box */ pic16l_reboot(); for (;;); } #endif #ifdef SMP cpuset_t map; u_int cnt; if (smp_started) { map = all_cpus; CPU_CLR(PCPU_GET(cpuid), &map); CPU_NAND(&map, &stopped_cpus); if (!CPU_EMPTY(&map)) { printf("cpu_reset: Stopping other CPUs\n"); stop_cpus(map); } if (PCPU_GET(cpuid) != 0) { cpu_reset_proxyid = PCPU_GET(cpuid); cpustop_restartfunc = cpu_reset_proxy; cpu_reset_proxy_active = 0; printf("cpu_reset: Restarting BSP\n"); /* Restart CPU #0. */ /* XXX: restart_cpus(1 << 0); */ CPU_SETOF(0, &started_cpus); wmb(); cnt = 0; while (cpu_reset_proxy_active == 0 && cnt < 10000000) cnt++; /* Wait for BSP to announce restart */ if (cpu_reset_proxy_active == 0) printf("cpu_reset: Failed to restart BSP\n"); enable_intr(); cpu_reset_proxy_active = 2; while (1); /* NOTREACHED */ } DELAY(1000000); } #endif cpu_reset_real(); /* NOTREACHED */ } static void cpu_reset_real() { struct region_descriptor null_idt; int b; disable_intr(); #ifdef CPU_ELAN if (elan_mmcr != NULL) elan_mmcr->RESCFG = 1; #endif if (cpu == CPU_GEODE1100) { /* Attempt Geode's own reset */ outl(0xcf8, 0x80009044ul); outl(0xcfc, 0xf); } #if !defined(BROKEN_KEYBOARD_RESET) /* * Attempt to do a CPU reset via the keyboard controller, * do not turn off GateA20, as any machine that fails * to do the reset here would then end up in no man's land. */ outb(IO_KBD + 4, 0xFE); DELAY(500000); /* wait 0.5 sec to see if that did it */ #endif /* * Attempt to force a reset via the Reset Control register at * I/O port 0xcf9. Bit 2 forces a system reset when it * transitions from 0 to 1. Bit 1 selects the type of reset * to attempt: 0 selects a "soft" reset, and 1 selects a * "hard" reset. We try a "hard" reset. The first write sets * bit 1 to select a "hard" reset and clears bit 2. The * second write forces a 0 -> 1 transition in bit 2 to trigger * a reset. */ outb(0xcf9, 0x2); outb(0xcf9, 0x6); DELAY(500000); /* wait 0.5 sec to see if that did it */ /* * Attempt to force a reset via the Fast A20 and Init register * at I/O port 0x92. Bit 1 serves as an alternate A20 gate. * Bit 0 asserts INIT# when set to 1. We are careful to only * preserve bit 1 while setting bit 0. We also must clear bit * 0 before setting it if it isn't already clear. */ b = inb(0x92); if (b != 0xff) { if ((b & 0x1) != 0) outb(0x92, b & 0xfe); outb(0x92, b | 0x1); DELAY(500000); /* wait 0.5 sec to see if that did it */ } printf("No known reset method worked, attempting CPU shutdown\n"); DELAY(1000000); /* wait 1 sec for printf to complete */ /* Wipe the IDT. */ null_idt.rd_limit = 0; null_idt.rd_base = 0; lidt(&null_idt); /* "good night, sweet prince .... " */ breakpoint(); /* NOTREACHED */ while(1); } /* * Get an sf_buf from the freelist. May block if none are available. */ void sf_buf_map(struct sf_buf *sf, int flags) { pt_entry_t opte, *ptep; /* * Update the sf_buf's virtual-to-physical mapping, flushing the * virtual address from the TLB. Since the reference count for * the sf_buf's old mapping was zero, that mapping is not * currently in use. Consequently, there is no need to exchange * the old and new PTEs atomically, even under PAE. */ ptep = vtopte(sf->kva); opte = *ptep; *ptep = VM_PAGE_TO_PHYS(sf->m) | pgeflag | PG_RW | PG_V | pmap_cache_bits(sf->m->md.pat_mode, 0); /* * Avoid unnecessary TLB invalidations: If the sf_buf's old * virtual-to-physical mapping was not used, then any processor * that has invalidated the sf_buf's virtual address from its TLB * since the last used mapping need not invalidate again. */ #ifdef SMP if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) CPU_ZERO(&sf->cpumask); sf_buf_shootdown(sf, flags); #else if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) pmap_invalidate_page(kernel_pmap, sf->kva); #endif } #ifdef SMP void sf_buf_shootdown(struct sf_buf *sf, int flags) { cpuset_t other_cpus; u_int cpuid; sched_pin(); cpuid = PCPU_GET(cpuid); if (!CPU_ISSET(cpuid, &sf->cpumask)) { CPU_SET(cpuid, &sf->cpumask); invlpg(sf->kva); } if ((flags & SFB_CPUPRIVATE) == 0) { other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); CPU_NAND(&other_cpus, &sf->cpumask); if (!CPU_EMPTY(&other_cpus)) { CPU_OR(&sf->cpumask, &other_cpus); smp_masked_invlpg(other_cpus, sf->kva); } } sched_unpin(); } #endif /* * MD part of sf_buf_free(). */ int sf_buf_unmap(struct sf_buf *sf) { return (0); } static void sf_buf_invalidate(struct sf_buf *sf) { vm_page_t m = sf->m; /* * Use pmap_qenter to update the pte for * existing mapping, in particular, the PAT * settings are recalculated. */ pmap_qenter(sf->kva, &m, 1); pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE, FALSE); } /* * Invalidate the cache lines that may belong to the page, if * (possibly old) mapping of the page by sf buffer exists. Returns * TRUE when mapping was found and cache invalidated. */ boolean_t sf_buf_invalidate_cache(vm_page_t m) { return (sf_buf_process_page(m, sf_buf_invalidate)); } /* * Software interrupt handler for queued VM system processing. */ void swi_vm(void *dummy) { if (busdma_swi_pending != 0) busdma_swi(); } /* * Tell whether this address is in some physical memory region. * Currently used by the kernel coredump code in order to avoid * dumping the ``ISA memory hole'' which could cause indefinite hangs, * or other unpredictable behaviour. */ int is_physical_memory(vm_paddr_t addr) { #ifdef DEV_ISA /* The ISA ``memory hole''. */ if (addr >= 0xa0000 && addr < 0x100000) return 0; #endif /* * stuff other tests for known memory-mapped devices (PCI?) * here */ return 1; } Index: projects/ipsec/sys/i386/include/atomic.h =================================================================== --- projects/ipsec/sys/i386/include/atomic.h (revision 313186) +++ projects/ipsec/sys/i386/include/atomic.h (revision 313187) @@ -1,846 +1,817 @@ /*- * Copyright (c) 1998 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_ATOMIC_H_ #define _MACHINE_ATOMIC_H_ #ifndef _SYS_CDEFS_H_ #error this file needs sys/cdefs.h as a prerequisite #endif #ifdef _KERNEL #include #include #endif #ifndef __OFFSETOF_MONITORBUF /* * __OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf). * * The open-coded number is used instead of the symbolic expression to * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers. * An assertion in i386/vm_machdep.c ensures that the value is correct. */ #define __OFFSETOF_MONITORBUF 0x180 static __inline void __mbk(void) { __asm __volatile("lock; addl $0,%%fs:%0" : "+m" (*(u_int *)__OFFSETOF_MONITORBUF) : : "memory", "cc"); } static __inline void __mbu(void) { __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc"); } #endif /* * Various simple operations on memory, each of which is atomic in the * presence of interrupts and multiple processors. * * atomic_set_char(P, V) (*(u_char *)(P) |= (V)) * atomic_clear_char(P, V) (*(u_char *)(P) &= ~(V)) * atomic_add_char(P, V) (*(u_char *)(P) += (V)) * atomic_subtract_char(P, V) (*(u_char *)(P) -= (V)) * * atomic_set_short(P, V) (*(u_short *)(P) |= (V)) * atomic_clear_short(P, V) (*(u_short *)(P) &= ~(V)) * atomic_add_short(P, V) (*(u_short *)(P) += (V)) * atomic_subtract_short(P, V) (*(u_short *)(P) -= (V)) * * atomic_set_int(P, V) (*(u_int *)(P) |= (V)) * atomic_clear_int(P, V) (*(u_int *)(P) &= ~(V)) * atomic_add_int(P, V) (*(u_int *)(P) += (V)) * atomic_subtract_int(P, V) (*(u_int *)(P) -= (V)) * atomic_swap_int(P, V) (return (*(u_int *)(P)); *(u_int *)(P) = (V);) * atomic_readandclear_int(P) (return (*(u_int *)(P)); *(u_int *)(P) = 0;) * * atomic_set_long(P, V) (*(u_long *)(P) |= (V)) * atomic_clear_long(P, V) (*(u_long *)(P) &= ~(V)) * atomic_add_long(P, V) (*(u_long *)(P) += (V)) * atomic_subtract_long(P, V) (*(u_long *)(P) -= (V)) * atomic_swap_long(P, V) (return (*(u_long *)(P)); *(u_long *)(P) = (V);) * atomic_readandclear_long(P) (return (*(u_long *)(P)); *(u_long *)(P) = 0;) */ /* * The above functions are expanded inline in the statically-linked * kernel. Lock prefixes are generated if an SMP kernel is being * built. * * Kernel modules call real functions which are built into the kernel. * This allows kernel modules to be portable between UP and SMP systems. */ #if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM) #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v); \ void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v) int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src); int atomic_fcmpset_int(volatile u_int *dst, u_int *expect, u_int src); u_int atomic_fetchadd_int(volatile u_int *p, u_int v); int atomic_testandset_int(volatile u_int *p, u_int v); int atomic_testandclear_int(volatile u_int *p, u_int v); void atomic_thread_fence_acq(void); void atomic_thread_fence_acq_rel(void); void atomic_thread_fence_rel(void); void atomic_thread_fence_seq_cst(void); #define ATOMIC_LOAD(TYPE) \ u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p) #define ATOMIC_STORE(TYPE) \ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) int atomic_cmpset_64(volatile uint64_t *, uint64_t, uint64_t); uint64_t atomic_load_acq_64(volatile uint64_t *); void atomic_store_rel_64(volatile uint64_t *, uint64_t); uint64_t atomic_swap_64(volatile uint64_t *, uint64_t); #else /* !KLD_MODULE && __GNUCLIKE_ASM */ /* * For userland, always use lock prefixes so that the binaries will run * on both SMP and !SMP systems. */ #if defined(SMP) || !defined(_KERNEL) #define MPLOCKED "lock ; " #else #define MPLOCKED #endif /* * The assembly is volatilized to avoid code chunk removal by the compiler. * GCC aggressively reorders operations and memory clobbering is necessary * in order to avoid that for memory barriers. */ #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ static __inline void \ atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ __asm __volatile(MPLOCKED OP \ : "+m" (*p) \ : CONS (V) \ : "cc"); \ } \ \ static __inline void \ atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ __asm __volatile(MPLOCKED OP \ : "+m" (*p) \ : CONS (V) \ : "memory", "cc"); \ } \ struct __hack /* * Atomic compare and set, used by the mutex functions * * if (*dst == expect) *dst = src (all 32 bit words) * * Returns 0 on failure, non-zero on success */ -#ifdef CPU_DISABLE_CMPXCHG - static __inline int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src) { u_char res; __asm __volatile( - " pushfl ; " - " cli ; " - " cmpl %3,%1 ; " - " jne 1f ; " - " movl %2,%1 ; " - "1: " - " sete %0 ; " - " popfl ; " - "# atomic_cmpset_int" - : "=q" (res), /* 0 */ - "+m" (*dst) /* 1 */ - : "r" (src), /* 2 */ - "r" (expect) /* 3 */ - : "memory"); - return (res); -} - -#else /* !CPU_DISABLE_CMPXCHG */ - -static __inline int -atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src) -{ - u_char res; - - __asm __volatile( " " MPLOCKED " " " cmpxchgl %3,%1 ; " " sete %0 ; " "# atomic_cmpset_int" : "=q" (res), /* 0 */ "+m" (*dst), /* 1 */ "+a" (expect) /* 2 */ : "r" (src) /* 3 */ : "memory", "cc"); return (res); } static __inline int atomic_fcmpset_int(volatile u_int *dst, u_int *expect, u_int src) { u_char res; __asm __volatile( " " MPLOCKED " " " cmpxchgl %3,%1 ; " " sete %0 ; " "# atomic_cmpset_int" : "=q" (res), /* 0 */ "+m" (*dst), /* 1 */ "+a" (*expect) /* 2 */ : "r" (src) /* 3 */ : "memory", "cc"); return (res); } - -#endif /* CPU_DISABLE_CMPXCHG */ /* * Atomically add the value of v to the integer pointed to by p and return * the previous value of *p. */ static __inline u_int atomic_fetchadd_int(volatile u_int *p, u_int v) { __asm __volatile( " " MPLOCKED " " " xaddl %0,%1 ; " "# atomic_fetchadd_int" : "+r" (v), /* 0 */ "+m" (*p) /* 1 */ : : "cc"); return (v); } static __inline int atomic_testandset_int(volatile u_int *p, u_int v) { u_char res; __asm __volatile( " " MPLOCKED " " " btsl %2,%1 ; " " setc %0 ; " "# atomic_testandset_int" : "=q" (res), /* 0 */ "+m" (*p) /* 1 */ : "Ir" (v & 0x1f) /* 2 */ : "cc"); return (res); } static __inline int atomic_testandclear_int(volatile u_int *p, u_int v) { u_char res; __asm __volatile( " " MPLOCKED " " " btrl %2,%1 ; " " setc %0 ; " "# atomic_testandclear_int" : "=q" (res), /* 0 */ "+m" (*p) /* 1 */ : "Ir" (v & 0x1f) /* 2 */ : "cc"); return (res); } /* * We assume that a = b will do atomic loads and stores. Due to the * IA32 memory model, a simple store guarantees release semantics. * * However, a load may pass a store if they are performed on distinct * addresses, so we need Store/Load barrier for sequentially * consistent fences in SMP kernels. We use "lock addl $0,mem" for a * Store/Load barrier, as recommended by the AMD Software Optimization * Guide, and not mfence. In the kernel, we use a private per-cpu * cache line for "mem", to avoid introducing false data * dependencies. In user space, we use the word at the top of the * stack. * * For UP kernels, however, the memory of the single processor is * always consistent, so we only need to stop the compiler from * reordering accesses in a way that violates the semantics of acquire * and release. */ #if defined(_KERNEL) #if defined(SMP) #define __storeload_barrier() __mbk() #else /* _KERNEL && UP */ #define __storeload_barrier() __compiler_membar() #endif /* SMP */ #else /* !_KERNEL */ #define __storeload_barrier() __mbu() #endif /* _KERNEL*/ #define ATOMIC_LOAD(TYPE) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ u_##TYPE res; \ \ res = *p; \ __compiler_membar(); \ return (res); \ } \ struct __hack #define ATOMIC_STORE(TYPE) \ static __inline void \ atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) \ { \ \ __compiler_membar(); \ *p = v; \ } \ struct __hack static __inline void atomic_thread_fence_acq(void) { __compiler_membar(); } static __inline void atomic_thread_fence_rel(void) { __compiler_membar(); } static __inline void atomic_thread_fence_acq_rel(void) { __compiler_membar(); } static __inline void atomic_thread_fence_seq_cst(void) { __storeload_barrier(); } #ifdef _KERNEL #ifdef WANT_FUNCTIONS int atomic_cmpset_64_i386(volatile uint64_t *, uint64_t, uint64_t); int atomic_cmpset_64_i586(volatile uint64_t *, uint64_t, uint64_t); uint64_t atomic_load_acq_64_i386(volatile uint64_t *); uint64_t atomic_load_acq_64_i586(volatile uint64_t *); void atomic_store_rel_64_i386(volatile uint64_t *, uint64_t); void atomic_store_rel_64_i586(volatile uint64_t *, uint64_t); uint64_t atomic_swap_64_i386(volatile uint64_t *, uint64_t); uint64_t atomic_swap_64_i586(volatile uint64_t *, uint64_t); #endif /* I486 does not support SMP or CMPXCHG8B. */ static __inline int atomic_cmpset_64_i386(volatile uint64_t *dst, uint64_t expect, uint64_t src) { volatile uint32_t *p; u_char res; p = (volatile uint32_t *)dst; __asm __volatile( " pushfl ; " " cli ; " " xorl %1,%%eax ; " " xorl %2,%%edx ; " " orl %%edx,%%eax ; " " jne 1f ; " " movl %4,%1 ; " " movl %5,%2 ; " "1: " " sete %3 ; " " popfl" : "+A" (expect), /* 0 */ "+m" (*p), /* 1 */ "+m" (*(p + 1)), /* 2 */ "=q" (res) /* 3 */ : "r" ((uint32_t)src), /* 4 */ "r" ((uint32_t)(src >> 32)) /* 5 */ : "memory", "cc"); return (res); } static __inline uint64_t atomic_load_acq_64_i386(volatile uint64_t *p) { volatile uint32_t *q; uint64_t res; q = (volatile uint32_t *)p; __asm __volatile( " pushfl ; " " cli ; " " movl %1,%%eax ; " " movl %2,%%edx ; " " popfl" : "=&A" (res) /* 0 */ : "m" (*q), /* 1 */ "m" (*(q + 1)) /* 2 */ : "memory"); return (res); } static __inline void atomic_store_rel_64_i386(volatile uint64_t *p, uint64_t v) { volatile uint32_t *q; q = (volatile uint32_t *)p; __asm __volatile( " pushfl ; " " cli ; " " movl %%eax,%0 ; " " movl %%edx,%1 ; " " popfl" : "=m" (*q), /* 0 */ "=m" (*(q + 1)) /* 1 */ : "A" (v) /* 2 */ : "memory"); } static __inline uint64_t atomic_swap_64_i386(volatile uint64_t *p, uint64_t v) { volatile uint32_t *q; uint64_t res; q = (volatile uint32_t *)p; __asm __volatile( " pushfl ; " " cli ; " " movl %1,%%eax ; " " movl %2,%%edx ; " " movl %4,%2 ; " " movl %3,%1 ; " " popfl" : "=&A" (res), /* 0 */ "+m" (*q), /* 1 */ "+m" (*(q + 1)) /* 2 */ : "r" ((uint32_t)v), /* 3 */ "r" ((uint32_t)(v >> 32))); /* 4 */ return (res); } static __inline int atomic_cmpset_64_i586(volatile uint64_t *dst, uint64_t expect, uint64_t src) { u_char res; __asm __volatile( " " MPLOCKED " " " cmpxchg8b %1 ; " " sete %0" : "=q" (res), /* 0 */ "+m" (*dst), /* 1 */ "+A" (expect) /* 2 */ : "b" ((uint32_t)src), /* 3 */ "c" ((uint32_t)(src >> 32)) /* 4 */ : "memory", "cc"); return (res); } static __inline uint64_t atomic_load_acq_64_i586(volatile uint64_t *p) { uint64_t res; __asm __volatile( " movl %%ebx,%%eax ; " " movl %%ecx,%%edx ; " " " MPLOCKED " " " cmpxchg8b %1" : "=&A" (res), /* 0 */ "+m" (*p) /* 1 */ : : "memory", "cc"); return (res); } static __inline void atomic_store_rel_64_i586(volatile uint64_t *p, uint64_t v) { __asm __volatile( " movl %%eax,%%ebx ; " " movl %%edx,%%ecx ; " "1: " " " MPLOCKED " " " cmpxchg8b %0 ; " " jne 1b" : "+m" (*p), /* 0 */ "+A" (v) /* 1 */ : : "ebx", "ecx", "memory", "cc"); } static __inline uint64_t atomic_swap_64_i586(volatile uint64_t *p, uint64_t v) { __asm __volatile( " movl %%eax,%%ebx ; " " movl %%edx,%%ecx ; " "1: " " " MPLOCKED " " " cmpxchg8b %0 ; " " jne 1b" : "+m" (*p), /* 0 */ "+A" (v) /* 1 */ : : "ebx", "ecx", "memory", "cc"); return (v); } static __inline int atomic_cmpset_64(volatile uint64_t *dst, uint64_t expect, uint64_t src) { if ((cpu_feature & CPUID_CX8) == 0) return (atomic_cmpset_64_i386(dst, expect, src)); else return (atomic_cmpset_64_i586(dst, expect, src)); } static __inline uint64_t atomic_load_acq_64(volatile uint64_t *p) { if ((cpu_feature & CPUID_CX8) == 0) return (atomic_load_acq_64_i386(p)); else return (atomic_load_acq_64_i586(p)); } static __inline void atomic_store_rel_64(volatile uint64_t *p, uint64_t v) { if ((cpu_feature & CPUID_CX8) == 0) atomic_store_rel_64_i386(p, v); else atomic_store_rel_64_i586(p, v); } static __inline uint64_t atomic_swap_64(volatile uint64_t *p, uint64_t v) { if ((cpu_feature & CPUID_CX8) == 0) return (atomic_swap_64_i386(p, v)); else return (atomic_swap_64_i586(p, v)); } #endif /* _KERNEL */ #endif /* KLD_MODULE || !__GNUCLIKE_ASM */ ATOMIC_ASM(set, char, "orb %b1,%0", "iq", v); ATOMIC_ASM(clear, char, "andb %b1,%0", "iq", ~v); ATOMIC_ASM(add, char, "addb %b1,%0", "iq", v); ATOMIC_ASM(subtract, char, "subb %b1,%0", "iq", v); ATOMIC_ASM(set, short, "orw %w1,%0", "ir", v); ATOMIC_ASM(clear, short, "andw %w1,%0", "ir", ~v); ATOMIC_ASM(add, short, "addw %w1,%0", "ir", v); ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir", v); ATOMIC_ASM(set, int, "orl %1,%0", "ir", v); ATOMIC_ASM(clear, int, "andl %1,%0", "ir", ~v); ATOMIC_ASM(add, int, "addl %1,%0", "ir", v); ATOMIC_ASM(subtract, int, "subl %1,%0", "ir", v); ATOMIC_ASM(set, long, "orl %1,%0", "ir", v); ATOMIC_ASM(clear, long, "andl %1,%0", "ir", ~v); ATOMIC_ASM(add, long, "addl %1,%0", "ir", v); ATOMIC_ASM(subtract, long, "subl %1,%0", "ir", v); #define ATOMIC_LOADSTORE(TYPE) \ ATOMIC_LOAD(TYPE); \ ATOMIC_STORE(TYPE) ATOMIC_LOADSTORE(char); ATOMIC_LOADSTORE(short); ATOMIC_LOADSTORE(int); ATOMIC_LOADSTORE(long); #undef ATOMIC_ASM #undef ATOMIC_LOAD #undef ATOMIC_STORE #undef ATOMIC_LOADSTORE #ifndef WANT_FUNCTIONS static __inline int atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src) { return (atomic_cmpset_int((volatile u_int *)dst, (u_int)expect, (u_int)src)); } static __inline u_long atomic_fetchadd_long(volatile u_long *p, u_long v) { return (atomic_fetchadd_int((volatile u_int *)p, (u_int)v)); } static __inline int atomic_testandset_long(volatile u_long *p, u_int v) { return (atomic_testandset_int((volatile u_int *)p, v)); } static __inline int atomic_testandclear_long(volatile u_long *p, u_int v) { return (atomic_testandclear_int((volatile u_int *)p, v)); } /* Read the current value and store a new value in the destination. */ #ifdef __GNUCLIKE_ASM static __inline u_int atomic_swap_int(volatile u_int *p, u_int v) { __asm __volatile( " xchgl %1,%0 ; " "# atomic_swap_int" : "+r" (v), /* 0 */ "+m" (*p)); /* 1 */ return (v); } static __inline u_long atomic_swap_long(volatile u_long *p, u_long v) { return (atomic_swap_int((volatile u_int *)p, (u_int)v)); } #else /* !__GNUCLIKE_ASM */ u_int atomic_swap_int(volatile u_int *p, u_int v); u_long atomic_swap_long(volatile u_long *p, u_long v); #endif /* __GNUCLIKE_ASM */ #define atomic_set_acq_char atomic_set_barr_char #define atomic_set_rel_char atomic_set_barr_char #define atomic_clear_acq_char atomic_clear_barr_char #define atomic_clear_rel_char atomic_clear_barr_char #define atomic_add_acq_char atomic_add_barr_char #define atomic_add_rel_char atomic_add_barr_char #define atomic_subtract_acq_char atomic_subtract_barr_char #define atomic_subtract_rel_char atomic_subtract_barr_char #define atomic_set_acq_short atomic_set_barr_short #define atomic_set_rel_short atomic_set_barr_short #define atomic_clear_acq_short atomic_clear_barr_short #define atomic_clear_rel_short atomic_clear_barr_short #define atomic_add_acq_short atomic_add_barr_short #define atomic_add_rel_short atomic_add_barr_short #define atomic_subtract_acq_short atomic_subtract_barr_short #define atomic_subtract_rel_short atomic_subtract_barr_short #define atomic_set_acq_int atomic_set_barr_int #define atomic_set_rel_int atomic_set_barr_int #define atomic_clear_acq_int atomic_clear_barr_int #define atomic_clear_rel_int atomic_clear_barr_int #define atomic_add_acq_int atomic_add_barr_int #define atomic_add_rel_int atomic_add_barr_int #define atomic_subtract_acq_int atomic_subtract_barr_int #define atomic_subtract_rel_int atomic_subtract_barr_int #define atomic_cmpset_acq_int atomic_cmpset_int #define atomic_cmpset_rel_int atomic_cmpset_int #define atomic_fcmpset_acq_int atomic_fcmpset_int #define atomic_fcmpset_rel_int atomic_fcmpset_int #define atomic_set_acq_long atomic_set_barr_long #define atomic_set_rel_long atomic_set_barr_long #define atomic_clear_acq_long atomic_clear_barr_long #define atomic_clear_rel_long atomic_clear_barr_long #define atomic_add_acq_long atomic_add_barr_long #define atomic_add_rel_long atomic_add_barr_long #define atomic_subtract_acq_long atomic_subtract_barr_long #define atomic_subtract_rel_long atomic_subtract_barr_long #define atomic_cmpset_acq_long atomic_cmpset_long #define atomic_cmpset_rel_long atomic_cmpset_long #define atomic_fcmpset_acq_long atomic_fcmpset_long #define atomic_fcmpset_rel_long atomic_fcmpset_long #define atomic_readandclear_int(p) atomic_swap_int(p, 0) #define atomic_readandclear_long(p) atomic_swap_long(p, 0) /* Operations on 8-bit bytes. */ #define atomic_set_8 atomic_set_char #define atomic_set_acq_8 atomic_set_acq_char #define atomic_set_rel_8 atomic_set_rel_char #define atomic_clear_8 atomic_clear_char #define atomic_clear_acq_8 atomic_clear_acq_char #define atomic_clear_rel_8 atomic_clear_rel_char #define atomic_add_8 atomic_add_char #define atomic_add_acq_8 atomic_add_acq_char #define atomic_add_rel_8 atomic_add_rel_char #define atomic_subtract_8 atomic_subtract_char #define atomic_subtract_acq_8 atomic_subtract_acq_char #define atomic_subtract_rel_8 atomic_subtract_rel_char #define atomic_load_acq_8 atomic_load_acq_char #define atomic_store_rel_8 atomic_store_rel_char /* Operations on 16-bit words. */ #define atomic_set_16 atomic_set_short #define atomic_set_acq_16 atomic_set_acq_short #define atomic_set_rel_16 atomic_set_rel_short #define atomic_clear_16 atomic_clear_short #define atomic_clear_acq_16 atomic_clear_acq_short #define atomic_clear_rel_16 atomic_clear_rel_short #define atomic_add_16 atomic_add_short #define atomic_add_acq_16 atomic_add_acq_short #define atomic_add_rel_16 atomic_add_rel_short #define atomic_subtract_16 atomic_subtract_short #define atomic_subtract_acq_16 atomic_subtract_acq_short #define atomic_subtract_rel_16 atomic_subtract_rel_short #define atomic_load_acq_16 atomic_load_acq_short #define atomic_store_rel_16 atomic_store_rel_short /* Operations on 32-bit double words. */ #define atomic_set_32 atomic_set_int #define atomic_set_acq_32 atomic_set_acq_int #define atomic_set_rel_32 atomic_set_rel_int #define atomic_clear_32 atomic_clear_int #define atomic_clear_acq_32 atomic_clear_acq_int #define atomic_clear_rel_32 atomic_clear_rel_int #define atomic_add_32 atomic_add_int #define atomic_add_acq_32 atomic_add_acq_int #define atomic_add_rel_32 atomic_add_rel_int #define atomic_subtract_32 atomic_subtract_int #define atomic_subtract_acq_32 atomic_subtract_acq_int #define atomic_subtract_rel_32 atomic_subtract_rel_int #define atomic_load_acq_32 atomic_load_acq_int #define atomic_store_rel_32 atomic_store_rel_int #define atomic_cmpset_32 atomic_cmpset_int #define atomic_cmpset_acq_32 atomic_cmpset_acq_int #define atomic_cmpset_rel_32 atomic_cmpset_rel_int #define atomic_fcmpset_32 atomic_fcmpset_int #define atomic_fcmpset_acq_32 atomic_fcmpset_acq_int #define atomic_fcmpset_rel_32 atomic_fcmpset_rel_int #define atomic_swap_32 atomic_swap_int #define atomic_readandclear_32 atomic_readandclear_int #define atomic_fetchadd_32 atomic_fetchadd_int #define atomic_testandset_32 atomic_testandset_int #define atomic_testandclear_32 atomic_testandclear_int /* Operations on pointers. */ #define atomic_set_ptr(p, v) \ atomic_set_int((volatile u_int *)(p), (u_int)(v)) #define atomic_set_acq_ptr(p, v) \ atomic_set_acq_int((volatile u_int *)(p), (u_int)(v)) #define atomic_set_rel_ptr(p, v) \ atomic_set_rel_int((volatile u_int *)(p), (u_int)(v)) #define atomic_clear_ptr(p, v) \ atomic_clear_int((volatile u_int *)(p), (u_int)(v)) #define atomic_clear_acq_ptr(p, v) \ atomic_clear_acq_int((volatile u_int *)(p), (u_int)(v)) #define atomic_clear_rel_ptr(p, v) \ atomic_clear_rel_int((volatile u_int *)(p), (u_int)(v)) #define atomic_add_ptr(p, v) \ atomic_add_int((volatile u_int *)(p), (u_int)(v)) #define atomic_add_acq_ptr(p, v) \ atomic_add_acq_int((volatile u_int *)(p), (u_int)(v)) #define atomic_add_rel_ptr(p, v) \ atomic_add_rel_int((volatile u_int *)(p), (u_int)(v)) #define atomic_subtract_ptr(p, v) \ atomic_subtract_int((volatile u_int *)(p), (u_int)(v)) #define atomic_subtract_acq_ptr(p, v) \ atomic_subtract_acq_int((volatile u_int *)(p), (u_int)(v)) #define atomic_subtract_rel_ptr(p, v) \ atomic_subtract_rel_int((volatile u_int *)(p), (u_int)(v)) #define atomic_load_acq_ptr(p) \ atomic_load_acq_int((volatile u_int *)(p)) #define atomic_store_rel_ptr(p, v) \ atomic_store_rel_int((volatile u_int *)(p), (v)) #define atomic_cmpset_ptr(dst, old, new) \ atomic_cmpset_int((volatile u_int *)(dst), (u_int)(old), (u_int)(new)) #define atomic_cmpset_acq_ptr(dst, old, new) \ atomic_cmpset_acq_int((volatile u_int *)(dst), (u_int)(old), \ (u_int)(new)) #define atomic_cmpset_rel_ptr(dst, old, new) \ atomic_cmpset_rel_int((volatile u_int *)(dst), (u_int)(old), \ (u_int)(new)) #define atomic_fcmpset_ptr(dst, old, new) \ atomic_fcmpset_int((volatile u_int *)(dst), (u_int *)(old), (u_int)(new)) #define atomic_fcmpset_acq_ptr(dst, old, new) \ atomic_fcmpset_acq_int((volatile u_int *)(dst), (u_int *)(old), \ (u_int)(new)) #define atomic_fcmpset_rel_ptr(dst, old, new) \ atomic_fcmpset_rel_int((volatile u_int *)(dst), (u_int *)(old), \ (u_int)(new)) #define atomic_swap_ptr(p, v) \ atomic_swap_int((volatile u_int *)(p), (u_int)(v)) #define atomic_readandclear_ptr(p) \ atomic_readandclear_int((volatile u_int *)(p)) #endif /* !WANT_FUNCTIONS */ #if defined(_KERNEL) #define mb() __mbk() #define wmb() __mbk() #define rmb() __mbk() #else #define mb() __mbu() #define wmb() __mbu() #define rmb() __mbu() #endif #endif /* !_MACHINE_ATOMIC_H_ */ Index: projects/ipsec/sys/i386/isa/npx.c =================================================================== --- projects/ipsec/sys/i386/isa/npx.c (revision 313186) +++ projects/ipsec/sys/i386/isa/npx.c (revision 313187) @@ -1,1501 +1,1423 @@ /*- * Copyright (c) 1990 William Jolitz. * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include "opt_isa.h" #include "opt_npx.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef NPX_DEBUG #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_ISA #include #endif -#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) -#define CPU_ENABLE_SSE -#endif - /* * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. */ #if defined(__GNUCLIKE_ASM) && !defined(lint) #define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw)) #define fnclex() __asm __volatile("fnclex") #define fninit() __asm __volatile("fninit") #define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr))) #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr))) #define fp_divide_by_0() __asm __volatile( \ "fldz; fld1; fdiv %st,%st(1); fnop") #define frstor(addr) __asm __volatile("frstor %0" : : "m" (*(addr))) -#ifdef CPU_ENABLE_SSE #define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr))) #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) #define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr)) #define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr))) static __inline void xrstor(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi)); } static __inline void xsave(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } static __inline void xsaveopt(char *addr, uint64_t mask) { uint32_t low, hi; low = mask; hi = mask >> 32; __asm __volatile("xsaveopt %0" : "=m" (*addr) : "a" (low), "d" (hi) : "memory"); } -#endif #else /* !(__GNUCLIKE_ASM && !lint) */ void fldcw(u_short cw); void fnclex(void); void fninit(void); void fnsave(caddr_t addr); void fnstcw(caddr_t addr); void fnstsw(caddr_t addr); void fp_divide_by_0(void); void frstor(caddr_t addr); -#ifdef CPU_ENABLE_SSE void fxsave(caddr_t addr); void fxrstor(caddr_t addr); void ldmxcsr(u_int csr); void stmxcsr(u_int *csr); void xrstor(char *addr, uint64_t mask); void xsave(char *addr, uint64_t mask); void xsaveopt(char *addr, uint64_t mask); -#endif #endif /* __GNUCLIKE_ASM && !lint */ #define start_emulating() load_cr0(rcr0() | CR0_TS) #define stop_emulating() clts() -#ifdef CPU_ENABLE_SSE #define GET_FPU_CW(thread) \ (cpu_fxsr ? \ (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \ (thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw) #define GET_FPU_SW(thread) \ (cpu_fxsr ? \ (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \ (thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw) #define SET_FPU_CW(savefpu, value) do { \ if (cpu_fxsr) \ (savefpu)->sv_xmm.sv_env.en_cw = (value); \ else \ (savefpu)->sv_87.sv_env.en_cw = (value); \ } while (0) -#else /* CPU_ENABLE_SSE */ -#define GET_FPU_CW(thread) \ - (thread->td_pcb->pcb_save->sv_87.sv_env.en_cw) -#define GET_FPU_SW(thread) \ - (thread->td_pcb->pcb_save->sv_87.sv_env.en_sw) -#define SET_FPU_CW(savefpu, value) \ - (savefpu)->sv_87.sv_env.en_cw = (value) -#endif /* CPU_ENABLE_SSE */ -#ifdef CPU_ENABLE_SSE CTASSERT(sizeof(union savefpu) == 512); CTASSERT(sizeof(struct xstate_hdr) == 64); CTASSERT(sizeof(struct savefpu_ymm) == 832); /* * This requirement is to make it easier for asm code to calculate * offset of the fpu save area from the pcb address. FPU save area * must be 64-byte aligned. */ CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0); /* * Ensure the copy of XCR0 saved in a core is contained in the padding * area. */ CTASSERT(X86_XSTATE_XCR0_OFFSET >= offsetof(struct savexmm, sv_pad) && X86_XSTATE_XCR0_OFFSET + sizeof(uint64_t) <= sizeof(struct savexmm)); static void fpu_clean_state(void); -#endif static void fpusave(union savefpu *); static void fpurstor(union savefpu *); int hw_float; SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, &hw_float, 0, "Floating point instructions executed in hardware"); -#ifdef CPU_ENABLE_SSE int use_xsave; uint64_t xsave_mask; -#endif static uma_zone_t fpu_save_area_zone; static union savefpu *npx_initialstate; -#ifdef CPU_ENABLE_SSE struct xsave_area_elm_descr { u_int offset; u_int size; } *xsave_area_desc; static int use_xsaveopt; -#endif static volatile u_int npx_traps_while_probing; alias_for_inthand_t probetrap; __asm(" \n\ .text \n\ .p2align 2,0x90 \n\ .type " __XSTRING(CNAME(probetrap)) ",@function \n\ " __XSTRING(CNAME(probetrap)) ": \n\ ss \n\ incl " __XSTRING(CNAME(npx_traps_while_probing)) " \n\ fnclex \n\ iret \n\ "); /* * Determine if an FPU is present and how to use it. */ static int npx_probe(void) { struct gate_descriptor save_idt_npxtrap; u_short control, status; /* * Modern CPUs all have an FPU that uses the INT16 interface * and provide a simple way to verify that, so handle the * common case right away. */ if (cpu_feature & CPUID_FPU) { hw_float = 1; return (1); } save_idt_npxtrap = idt[IDT_MF]; setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* * Don't trap while we're probing. */ stop_emulating(); /* * Finish resetting the coprocessor, if any. If there is an error * pending, then we may get a bogus IRQ13, but npx_intr() will handle * it OK. Bogus halts have never been observed, but we enabled * IRQ13 and cleared the BUSY# latch early to handle them anyway. */ fninit(); /* * Don't use fwait here because it might hang. * Don't use fnop here because it usually hangs if there is no FPU. */ DELAY(1000); /* wait for any IRQ13 */ #ifdef DIAGNOSTIC if (npx_traps_while_probing != 0) printf("fninit caused %u bogus npx trap(s)\n", npx_traps_while_probing); #endif /* * Check for a status of mostly zero. */ status = 0x5a5a; fnstsw(&status); if ((status & 0xb8ff) == 0) { /* * Good, now check for a proper control word. */ control = 0x5a5a; fnstcw(&control); if ((control & 0x1f3f) == 0x033f) { /* * We have an npx, now divide by 0 to see if exception * 16 works. */ control &= ~(1 << 2); /* enable divide by 0 trap */ fldcw(control); npx_traps_while_probing = 0; fp_divide_by_0(); if (npx_traps_while_probing != 0) { /* * Good, exception 16 works. */ hw_float = 1; goto cleanup; } printf( "FPU does not use exception 16 for error reporting\n"); goto cleanup; } } /* * Probe failed. Floating point simply won't work. * Notify user and disable FPU/MMX/SSE instruction execution. */ printf("WARNING: no FPU!\n"); __asm __volatile("smsw %%ax; orb %0,%%al; lmsw %%ax" : : "n" (CR0_EM | CR0_MP) : "ax"); cleanup: idt[IDT_MF] = save_idt_npxtrap; return (hw_float); } -#ifdef CPU_ENABLE_SSE /* * Enable XSAVE if supported and allowed by user. * Calculate the xsave_mask. */ static void npxinit_bsp1(void) { u_int cp[4]; uint64_t xsave_mask_user; if (cpu_fxsr && (cpu_feature2 & CPUID2_XSAVE) != 0) { use_xsave = 1; TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); } if (!use_xsave) return; cpuid_count(0xd, 0x0, cp); xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; if ((cp[0] & xsave_mask) != xsave_mask) panic("CPU0 does not support X87 or SSE: %x", cp[0]); xsave_mask = ((uint64_t)cp[3] << 32) | cp[0]; xsave_mask_user = xsave_mask; TUNABLE_QUAD_FETCH("hw.xsave_mask", &xsave_mask_user); xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; xsave_mask &= xsave_mask_user; if ((xsave_mask & XFEATURE_AVX512) != XFEATURE_AVX512) xsave_mask &= ~XFEATURE_AVX512; if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX) xsave_mask &= ~XFEATURE_MPX; cpuid_count(0xd, 0x1, cp); if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) use_xsaveopt = 1; } -#endif + /* * Calculate the fpu save area size. */ static void npxinit_bsp2(void) { -#ifdef CPU_ENABLE_SSE u_int cp[4]; if (use_xsave) { cpuid_count(0xd, 0x0, cp); cpu_max_ext_state_size = cp[1]; /* * Reload the cpu_feature2, since we enabled OSXSAVE. */ do_cpuid(1, cp); cpu_feature2 = cp[2]; } else -#endif cpu_max_ext_state_size = sizeof(union savefpu); } /* * Initialize floating point unit. */ void npxinit(bool bsp) { static union savefpu dummy; register_t saveintr; -#ifdef CPU_ENABLE_SSE u_int mxcsr; -#endif u_short control; if (bsp) { if (!npx_probe()) return; -#ifdef CPU_ENABLE_SSE npxinit_bsp1(); -#endif } -#ifdef CPU_ENABLE_SSE if (use_xsave) { load_cr4(rcr4() | CR4_XSAVE); load_xcr(XCR0, xsave_mask); } -#endif /* * XCR0 shall be set up before CPU can report the save area size. */ if (bsp) npxinit_bsp2(); /* * fninit has the same h/w bugs as fnsave. Use the detoxified * fnsave to throw away any junk in the fpu. fpusave() initializes * the fpu. * * It is too early for critical_enter() to work on AP. */ saveintr = intr_disable(); stop_emulating(); -#ifdef CPU_ENABLE_SSE if (cpu_fxsr) fninit(); else -#endif fnsave(&dummy); control = __INITIAL_NPXCW__; fldcw(control); -#ifdef CPU_ENABLE_SSE if (cpu_fxsr) { mxcsr = __INITIAL_MXCSR__; ldmxcsr(mxcsr); } -#endif start_emulating(); intr_restore(saveintr); } /* * On the boot CPU we generate a clean state that is used to * initialize the floating point unit when it is first used by a * process. */ static void npxinitstate(void *arg __unused) { register_t saveintr; -#ifdef CPU_ENABLE_SSE int cp[4], i, max_ext_n; -#endif if (!hw_float) return; npx_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF, M_WAITOK | M_ZERO); saveintr = intr_disable(); stop_emulating(); fpusave(npx_initialstate); -#ifdef CPU_ENABLE_SSE if (cpu_fxsr) { if (npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask) cpu_mxcsr_mask = npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask; else cpu_mxcsr_mask = 0xFFBF; /* * The fninit instruction does not modify XMM * registers or x87 registers (MM/ST). The fpusave * call dumped the garbage contained in the registers * after reset to the initial state saved. Clear XMM * and x87 registers file image to make the startup * program state and signal handler XMM/x87 register * content predictable. */ bzero(npx_initialstate->sv_xmm.sv_fp, sizeof(npx_initialstate->sv_xmm.sv_fp)); bzero(npx_initialstate->sv_xmm.sv_xmm, sizeof(npx_initialstate->sv_xmm.sv_xmm)); } else -#endif bzero(npx_initialstate->sv_87.sv_ac, sizeof(npx_initialstate->sv_87.sv_ac)); -#ifdef CPU_ENABLE_SSE /* * Create a table describing the layout of the CPU Extended * Save Area. */ if (use_xsave) { if (xsave_mask >> 32 != 0) max_ext_n = fls(xsave_mask >> 32) + 32; else max_ext_n = fls(xsave_mask); xsave_area_desc = malloc(max_ext_n * sizeof(struct xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO); /* x87 state */ xsave_area_desc[0].offset = 0; xsave_area_desc[0].size = 160; /* XMM */ xsave_area_desc[1].offset = 160; xsave_area_desc[1].size = 288 - 160; for (i = 2; i < max_ext_n; i++) { cpuid_count(0xd, i, cp); xsave_area_desc[i].offset = cp[1]; xsave_area_desc[i].size = cp[0]; } } -#endif fpu_save_area_zone = uma_zcreate("FPU_save_area", cpu_max_ext_state_size, NULL, NULL, NULL, NULL, XSAVE_AREA_ALIGN - 1, 0); start_emulating(); intr_restore(saveintr); } SYSINIT(npxinitstate, SI_SUB_DRIVERS, SI_ORDER_ANY, npxinitstate, NULL); /* * Free coprocessor (if we have it). */ void npxexit(struct thread *td) { critical_enter(); if (curthread == PCPU_GET(fpcurthread)) { stop_emulating(); fpusave(curpcb->pcb_save); start_emulating(); PCPU_SET(fpcurthread, NULL); } critical_exit(); #ifdef NPX_DEBUG if (hw_float) { u_int masked_exceptions; masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f; /* * Log exceptions that would have trapped with the old * control word (overflow, divide by 0, and invalid operand). */ if (masked_exceptions & 0x0d) log(LOG_ERR, "pid %d (%s) exited with masked floating point exceptions 0x%02x\n", td->td_proc->p_pid, td->td_proc->p_comm, masked_exceptions); } #endif } int npxformat(void) { if (!hw_float) return (_MC_FPFMT_NODEV); -#ifdef CPU_ENABLE_SSE if (cpu_fxsr) return (_MC_FPFMT_XMM); -#endif return (_MC_FPFMT_387); } /* * The following mechanism is used to ensure that the FPE_... value * that is passed as a trapcode to the signal handler of the user * process does not have more than one bit set. * * Multiple bits may be set if the user process modifies the control * word while a status word bit is already set. While this is a sign * of bad coding, we have no choise than to narrow them down to one * bit, since we must not send a trapcode that is not exactly one of * the FPE_ macros. * * The mechanism has a static table with 127 entries. Each combination * of the 7 FPU status word exception bits directly translates to a * position in this table, where a single FPE_... value is stored. * This FPE_... value stored there is considered the "most important" * of the exception bits and will be sent as the signal code. The * precedence of the bits is based upon Intel Document "Numerical * Applications", Chapter "Special Computational Situations". * * The macro to choose one of these values does these steps: 1) Throw * away status word bits that cannot be masked. 2) Throw away the bits * currently masked in the control word, assuming the user isn't * interested in them anymore. 3) Reinsert status word bit 7 (stack * fault) if it is set, which cannot be masked but must be presered. * 4) Use the remaining bits to point into the trapcode table. * * The 6 maskable bits in order of their preference, as stated in the * above referenced Intel manual: * 1 Invalid operation (FP_X_INV) * 1a Stack underflow * 1b Stack overflow * 1c Operand of unsupported format * 1d SNaN operand. * 2 QNaN operand (not an exception, irrelavant here) * 3 Any other invalid-operation not mentioned above or zero divide * (FP_X_INV, FP_X_DZ) * 4 Denormal operand (FP_X_DNML) * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) * 6 Inexact result (FP_X_IMP) */ static char fpetable[128] = { 0, FPE_FLTINV, /* 1 - INV */ FPE_FLTUND, /* 2 - DNML */ FPE_FLTINV, /* 3 - INV | DNML */ FPE_FLTDIV, /* 4 - DZ */ FPE_FLTINV, /* 5 - INV | DZ */ FPE_FLTDIV, /* 6 - DNML | DZ */ FPE_FLTINV, /* 7 - INV | DNML | DZ */ FPE_FLTOVF, /* 8 - OFL */ FPE_FLTINV, /* 9 - INV | OFL */ FPE_FLTUND, /* A - DNML | OFL */ FPE_FLTINV, /* B - INV | DNML | OFL */ FPE_FLTDIV, /* C - DZ | OFL */ FPE_FLTINV, /* D - INV | DZ | OFL */ FPE_FLTDIV, /* E - DNML | DZ | OFL */ FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ FPE_FLTUND, /* 10 - UFL */ FPE_FLTINV, /* 11 - INV | UFL */ FPE_FLTUND, /* 12 - DNML | UFL */ FPE_FLTINV, /* 13 - INV | DNML | UFL */ FPE_FLTDIV, /* 14 - DZ | UFL */ FPE_FLTINV, /* 15 - INV | DZ | UFL */ FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ FPE_FLTOVF, /* 18 - OFL | UFL */ FPE_FLTINV, /* 19 - INV | OFL | UFL */ FPE_FLTUND, /* 1A - DNML | OFL | UFL */ FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ FPE_FLTRES, /* 20 - IMP */ FPE_FLTINV, /* 21 - INV | IMP */ FPE_FLTUND, /* 22 - DNML | IMP */ FPE_FLTINV, /* 23 - INV | DNML | IMP */ FPE_FLTDIV, /* 24 - DZ | IMP */ FPE_FLTINV, /* 25 - INV | DZ | IMP */ FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ FPE_FLTOVF, /* 28 - OFL | IMP */ FPE_FLTINV, /* 29 - INV | OFL | IMP */ FPE_FLTUND, /* 2A - DNML | OFL | IMP */ FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ FPE_FLTUND, /* 30 - UFL | IMP */ FPE_FLTINV, /* 31 - INV | UFL | IMP */ FPE_FLTUND, /* 32 - DNML | UFL | IMP */ FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ FPE_FLTSUB, /* 40 - STK */ FPE_FLTSUB, /* 41 - INV | STK */ FPE_FLTUND, /* 42 - DNML | STK */ FPE_FLTSUB, /* 43 - INV | DNML | STK */ FPE_FLTDIV, /* 44 - DZ | STK */ FPE_FLTSUB, /* 45 - INV | DZ | STK */ FPE_FLTDIV, /* 46 - DNML | DZ | STK */ FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ FPE_FLTOVF, /* 48 - OFL | STK */ FPE_FLTSUB, /* 49 - INV | OFL | STK */ FPE_FLTUND, /* 4A - DNML | OFL | STK */ FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ FPE_FLTDIV, /* 4C - DZ | OFL | STK */ FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ FPE_FLTUND, /* 50 - UFL | STK */ FPE_FLTSUB, /* 51 - INV | UFL | STK */ FPE_FLTUND, /* 52 - DNML | UFL | STK */ FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ FPE_FLTDIV, /* 54 - DZ | UFL | STK */ FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ FPE_FLTOVF, /* 58 - OFL | UFL | STK */ FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ FPE_FLTRES, /* 60 - IMP | STK */ FPE_FLTSUB, /* 61 - INV | IMP | STK */ FPE_FLTUND, /* 62 - DNML | IMP | STK */ FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ FPE_FLTDIV, /* 64 - DZ | IMP | STK */ FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ FPE_FLTOVF, /* 68 - OFL | IMP | STK */ FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ FPE_FLTUND, /* 70 - UFL | IMP | STK */ FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ }; /* * Read the FP status and control words, then generate si_code value * for SIGFPE. The error code chosen will be one of the * FPE_... macros. It will be sent as the second argument to old * BSD-style signal handlers and as "siginfo_t->si_code" (second * argument) to SA_SIGINFO signal handlers. * * Some time ago, we cleared the x87 exceptions with FNCLEX there. * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The * usermode code which understands the FPU hardware enough to enable * the exceptions, can also handle clearing the exception state in the * handler. The only consequence of not clearing the exception is the * rethrow of the SIGFPE on return from the signal handler and * reexecution of the corresponding instruction. * * For XMM traps, the exceptions were never cleared. */ int npxtrap_x87(void) { u_short control, status; if (!hw_float) { printf( "npxtrap_x87: fpcurthread = %p, curthread = %p, hw_float = %d\n", PCPU_GET(fpcurthread), curthread, hw_float); panic("npxtrap from nowhere"); } critical_enter(); /* * Interrupt handling (for another interrupt) may have pushed the * state to memory. Fetch the relevant parts of the state from * wherever they are. */ if (PCPU_GET(fpcurthread) != curthread) { control = GET_FPU_CW(curthread); status = GET_FPU_SW(curthread); } else { fnstcw(&control); fnstsw(&status); } critical_exit(); return (fpetable[status & ((~control & 0x3f) | 0x40)]); } -#ifdef CPU_ENABLE_SSE int npxtrap_sse(void) { u_int mxcsr; if (!hw_float) { printf( "npxtrap_sse: fpcurthread = %p, curthread = %p, hw_float = %d\n", PCPU_GET(fpcurthread), curthread, hw_float); panic("npxtrap from nowhere"); } critical_enter(); if (PCPU_GET(fpcurthread) != curthread) mxcsr = curthread->td_pcb->pcb_save->sv_xmm.sv_env.en_mxcsr; else stmxcsr(&mxcsr); critical_exit(); return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]); } -#endif /* * Implement device not available (DNA) exception * * It would be better to switch FP context here (if curthread != fpcurthread) * and not necessarily for every context switch, but it is too hard to * access foreign pcb's. */ static int err_count = 0; int npxdna(void) { if (!hw_float) return (0); critical_enter(); if (PCPU_GET(fpcurthread) == curthread) { printf("npxdna: fpcurthread == curthread %d times\n", ++err_count); stop_emulating(); critical_exit(); return (1); } if (PCPU_GET(fpcurthread) != NULL) { printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n", PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_proc->p_pid, curthread, curthread->td_proc->p_pid); panic("npxdna"); } stop_emulating(); /* * Record new context early in case frstor causes a trap. */ PCPU_SET(fpcurthread, curthread); -#ifdef CPU_ENABLE_SSE if (cpu_fxsr) fpu_clean_state(); -#endif if ((curpcb->pcb_flags & PCB_NPXINITDONE) == 0) { /* * This is the first time this thread has used the FPU or * the PCB doesn't contain a clean FPU state. Explicitly * load an initial state. * * We prefer to restore the state from the actual save * area in PCB instead of directly loading from * npx_initialstate, to ignite the XSAVEOPT * tracking engine. */ bcopy(npx_initialstate, curpcb->pcb_save, cpu_max_ext_state_size); fpurstor(curpcb->pcb_save); if (curpcb->pcb_initial_npxcw != __INITIAL_NPXCW__) fldcw(curpcb->pcb_initial_npxcw); curpcb->pcb_flags |= PCB_NPXINITDONE; if (PCB_USER_FPU(curpcb)) curpcb->pcb_flags |= PCB_NPXUSERINITDONE; } else { fpurstor(curpcb->pcb_save); } critical_exit(); return (1); } /* * Wrapper for fpusave() called from context switch routines. * * npxsave() must be called with interrupts disabled, so that it clears * fpcurthread atomically with saving the state. We require callers to do the * disabling, since most callers need to disable interrupts anyway to call * npxsave() atomically with checking fpcurthread. */ void npxsave(addr) union savefpu *addr; { stop_emulating(); -#ifdef CPU_ENABLE_SSE if (use_xsaveopt) xsaveopt((char *)addr, xsave_mask); else -#endif fpusave(addr); start_emulating(); PCPU_SET(fpcurthread, NULL); } /* * Unconditionally save the current co-processor state across suspend and * resume. */ void npxsuspend(union savefpu *addr) { register_t cr0; if (!hw_float) return; if (PCPU_GET(fpcurthread) == NULL) { bcopy(npx_initialstate, addr, cpu_max_ext_state_size); return; } cr0 = rcr0(); stop_emulating(); fpusave(addr); load_cr0(cr0); } void npxresume(union savefpu *addr) { register_t cr0; if (!hw_float) return; cr0 = rcr0(); npxinit(false); stop_emulating(); fpurstor(addr); load_cr0(cr0); } void npxdrop(void) { struct thread *td; /* * Discard pending exceptions in the !cpu_fxsr case so that unmasked * ones don't cause a panic on the next frstor. */ -#ifdef CPU_ENABLE_SSE if (!cpu_fxsr) -#endif fnclex(); td = PCPU_GET(fpcurthread); KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread")); CRITICAL_ASSERT(td); PCPU_SET(fpcurthread, NULL); td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; start_emulating(); } /* * Get the user state of the FPU into pcb->pcb_user_save without * dropping ownership (if possible). It returns the FPU ownership * status. */ int npxgetregs(struct thread *td) { struct pcb *pcb; -#ifdef CPU_ENABLE_SSE uint64_t *xstate_bv, bit; char *sa; int max_ext_n, i; -#endif int owned; if (!hw_float) return (_MC_FPOWNED_NONE); pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { bcopy(npx_initialstate, get_pcb_user_save_pcb(pcb), cpu_max_ext_state_size); SET_FPU_CW(get_pcb_user_save_pcb(pcb), pcb->pcb_initial_npxcw); npxuserinited(td); return (_MC_FPOWNED_PCB); } critical_enter(); if (td == PCPU_GET(fpcurthread)) { fpusave(get_pcb_user_save_pcb(pcb)); -#ifdef CPU_ENABLE_SSE if (!cpu_fxsr) -#endif /* * fnsave initializes the FPU and destroys whatever * context it contains. Make sure the FPU owner * starts with a clean state next time. */ npxdrop(); owned = _MC_FPOWNED_FPU; } else { owned = _MC_FPOWNED_PCB; } critical_exit(); -#ifdef CPU_ENABLE_SSE if (use_xsave) { /* * Handle partially saved state. */ sa = (char *)get_pcb_user_save_pcb(pcb); xstate_bv = (uint64_t *)(sa + sizeof(union savefpu) + offsetof(struct xstate_hdr, xstate_bv)); if (xsave_mask >> 32 != 0) max_ext_n = fls(xsave_mask >> 32) + 32; else max_ext_n = fls(xsave_mask); for (i = 0; i < max_ext_n; i++) { bit = 1ULL << i; if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0) continue; bcopy((char *)npx_initialstate + xsave_area_desc[i].offset, sa + xsave_area_desc[i].offset, xsave_area_desc[i].size); *xstate_bv |= bit; } } -#endif return (owned); } void npxuserinited(struct thread *td) { struct pcb *pcb; pcb = td->td_pcb; if (PCB_USER_FPU(pcb)) pcb->pcb_flags |= PCB_NPXINITDONE; pcb->pcb_flags |= PCB_NPXUSERINITDONE; } -#ifdef CPU_ENABLE_SSE int npxsetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size) { struct xstate_hdr *hdr, *ehdr; size_t len, max_len; uint64_t bv; /* XXXKIB should we clear all extended state in xstate_bv instead ? */ if (xfpustate == NULL) return (0); if (!use_xsave) return (EOPNOTSUPP); len = xfpustate_size; if (len < sizeof(struct xstate_hdr)) return (EINVAL); max_len = cpu_max_ext_state_size - sizeof(union savefpu); if (len > max_len) return (EINVAL); ehdr = (struct xstate_hdr *)xfpustate; bv = ehdr->xstate_bv; /* * Avoid #gp. */ if (bv & ~xsave_mask) return (EINVAL); hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1); hdr->xstate_bv = bv; bcopy(xfpustate + sizeof(struct xstate_hdr), (char *)(hdr + 1), len - sizeof(struct xstate_hdr)); return (0); } -#endif int npxsetregs(struct thread *td, union savefpu *addr, char *xfpustate, size_t xfpustate_size) { struct pcb *pcb; -#ifdef CPU_ENABLE_SSE int error; -#endif if (!hw_float) return (ENXIO); pcb = td->td_pcb; critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { -#ifdef CPU_ENABLE_SSE error = npxsetxstate(td, xfpustate, xfpustate_size); if (error != 0) { critical_exit(); return (error); } if (!cpu_fxsr) -#endif fnclex(); /* As in npxdrop(). */ bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); fpurstor(get_pcb_user_save_td(td)); critical_exit(); pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE; } else { critical_exit(); -#ifdef CPU_ENABLE_SSE error = npxsetxstate(td, xfpustate, xfpustate_size); if (error != 0) return (error); -#endif bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); npxuserinited(td); } return (0); } static void fpusave(addr) union savefpu *addr; { -#ifdef CPU_ENABLE_SSE if (use_xsave) xsave((char *)addr, xsave_mask); else if (cpu_fxsr) fxsave(addr); else -#endif fnsave(addr); } -#ifdef CPU_ENABLE_SSE static void npx_fill_fpregs_xmm1(struct savexmm *sv_xmm, struct save87 *sv_87) { struct env87 *penv_87; struct envxmm *penv_xmm; int i; penv_87 = &sv_87->sv_env; penv_xmm = &sv_xmm->sv_env; /* FPU control/status */ penv_87->en_cw = penv_xmm->en_cw; penv_87->en_sw = penv_xmm->en_sw; penv_87->en_fip = penv_xmm->en_fip; penv_87->en_fcs = penv_xmm->en_fcs; penv_87->en_opcode = penv_xmm->en_opcode; penv_87->en_foo = penv_xmm->en_foo; penv_87->en_fos = penv_xmm->en_fos; /* FPU registers and tags */ penv_87->en_tw = 0xffff; for (i = 0; i < 8; ++i) { sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; if ((penv_xmm->en_tw & (1 << i)) != 0) /* zero and special are set as valid */ penv_87->en_tw &= ~(3 << i); } } void npx_fill_fpregs_xmm(struct savexmm *sv_xmm, struct save87 *sv_87) { bzero(sv_87, sizeof(*sv_87)); npx_fill_fpregs_xmm1(sv_xmm, sv_87); } void npx_set_fpregs_xmm(struct save87 *sv_87, struct savexmm *sv_xmm) { struct env87 *penv_87; struct envxmm *penv_xmm; int i; penv_87 = &sv_87->sv_env; penv_xmm = &sv_xmm->sv_env; /* FPU control/status */ penv_xmm->en_cw = penv_87->en_cw; penv_xmm->en_sw = penv_87->en_sw; penv_xmm->en_fip = penv_87->en_fip; penv_xmm->en_fcs = penv_87->en_fcs; penv_xmm->en_opcode = penv_87->en_opcode; penv_xmm->en_foo = penv_87->en_foo; penv_xmm->en_fos = penv_87->en_fos; /* FPU registers and tags */ penv_xmm->en_tw = 0; for (i = 0; i < 8; ++i) { sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; if ((penv_87->en_tw && (3 << i)) != (3 << i)) penv_xmm->en_tw |= 1 << i; } } -#endif /* CPU_ENABLE_SSE */ void npx_get_fsave(void *addr) { struct thread *td; union savefpu *sv; td = curthread; npxgetregs(td); sv = get_pcb_user_save_td(td); -#ifdef CPU_ENABLE_SSE if (cpu_fxsr) npx_fill_fpregs_xmm1(&sv->sv_xmm, addr); else -#endif bcopy(sv, addr, sizeof(struct env87) + sizeof(struct fpacc87[8])); } int npx_set_fsave(void *addr) { union savefpu sv; int error; bzero(&sv, sizeof(sv)); -#ifdef CPU_ENABLE_SSE if (cpu_fxsr) npx_set_fpregs_xmm(addr, &sv.sv_xmm); else -#endif bcopy(addr, &sv, sizeof(struct env87) + sizeof(struct fpacc87[8])); error = npxsetregs(curthread, &sv, NULL, 0); return (error); } -#ifdef CPU_ENABLE_SSE /* * On AuthenticAMD processors, the fxrstor instruction does not restore * the x87's stored last instruction pointer, last data pointer, and last * opcode values, except in the rare case in which the exception summary * (ES) bit in the x87 status word is set to 1. * * In order to avoid leaking this information across processes, we clean * these values by performing a dummy load before executing fxrstor(). */ static void fpu_clean_state(void) { static float dummy_variable = 0.0; u_short status; /* * Clear the ES bit in the x87 status word if it is currently * set, in order to avoid causing a fault in the upcoming load. */ fnstsw(&status); if (status & 0x80) fnclex(); /* * Load the dummy variable into the x87 stack. This mangles * the x87 stack, but we don't care since we're about to call * fxrstor() anyway. */ __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable)); } -#endif /* CPU_ENABLE_SSE */ static void fpurstor(union savefpu *addr) { -#ifdef CPU_ENABLE_SSE if (use_xsave) xrstor((char *)addr, xsave_mask); else if (cpu_fxsr) fxrstor(addr); else -#endif frstor(addr); } #ifdef DEV_ISA /* * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. */ static struct isa_pnp_id npxisa_ids[] = { { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ { 0 } }; static int npxisa_probe(device_t dev) { int result; if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) { device_quiet(dev); } return(result); } static int npxisa_attach(device_t dev) { return (0); } static device_method_t npxisa_methods[] = { /* Device interface */ DEVMETHOD(device_probe, npxisa_probe), DEVMETHOD(device_attach, npxisa_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), { 0, 0 } }; static driver_t npxisa_driver = { "npxisa", npxisa_methods, 1, /* no softc */ }; static devclass_t npxisa_devclass; DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0); DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0); #endif /* DEV_ISA */ static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", "Kernel contexts for FPU state"); #define FPU_KERN_CTX_NPXINITDONE 0x01 #define FPU_KERN_CTX_DUMMY 0x02 #define FPU_KERN_CTX_INUSE 0x04 struct fpu_kern_ctx { union savefpu *prev; uint32_t flags; char hwstate1[]; }; struct fpu_kern_ctx * fpu_kern_alloc_ctx(u_int flags) { struct fpu_kern_ctx *res; size_t sz; sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + cpu_max_ext_state_size; res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? M_NOWAIT : M_WAITOK) | M_ZERO); return (res); } void fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) { KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx")); /* XXXKIB clear the memory ? */ free(ctx, M_FPUKERN_CTX); } static union savefpu * fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) { vm_offset_t p; p = (vm_offset_t)&ctx->hwstate1; p = roundup2(p, XSAVE_AREA_ALIGN); return ((union savefpu *)p); } int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) { struct pcb *pcb; KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("using inuse ctx")); if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) { ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE; return (0); } pcb = td->td_pcb; KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == get_pcb_user_save_pcb(pcb), ("mangled pcb_save")); ctx->flags = FPU_KERN_CTX_INUSE; if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0) ctx->flags |= FPU_KERN_CTX_NPXINITDONE; npxexit(td); ctx->prev = pcb->pcb_save; pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); pcb->pcb_flags |= PCB_KERNNPX; pcb->pcb_flags &= ~PCB_NPXINITDONE; return (0); } int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) { struct pcb *pcb; KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0, ("leaving not inuse ctx")); ctx->flags &= ~FPU_KERN_CTX_INUSE; if (is_fpu_kern_thread(0) && (ctx->flags & FPU_KERN_CTX_DUMMY) != 0) return (0); pcb = td->td_pcb; critical_enter(); if (curthread == PCPU_GET(fpcurthread)) npxdrop(); critical_exit(); pcb->pcb_save = ctx->prev; if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) { if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0) pcb->pcb_flags |= PCB_NPXINITDONE; else pcb->pcb_flags &= ~PCB_NPXINITDONE; pcb->pcb_flags &= ~PCB_KERNNPX; } else { if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0) pcb->pcb_flags |= PCB_NPXINITDONE; else pcb->pcb_flags &= ~PCB_NPXINITDONE; KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); } return (0); } int fpu_kern_thread(u_int flags) { KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, ("Only kthread may use fpu_kern_thread")); KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb), ("mangled pcb_save")); KASSERT(PCB_USER_FPU(curpcb), ("recursive call")); curpcb->pcb_flags |= PCB_KERNNPX; return (0); } int is_fpu_kern_thread(u_int flags) { if ((curthread->td_pflags & TDP_KTHREAD) == 0) return (0); return ((curpcb->pcb_flags & PCB_KERNNPX) != 0); } /* * FPU save area alloc/free/init utility routines */ union savefpu * fpu_save_area_alloc(void) { return (uma_zalloc(fpu_save_area_zone, 0)); } void fpu_save_area_free(union savefpu *fsa) { uma_zfree(fpu_save_area_zone, fsa); } void fpu_save_area_reset(union savefpu *fsa) { bcopy(npx_initialstate, fsa, cpu_max_ext_state_size); } Index: projects/ipsec/sys/i386/linux/linux_ptrace.c =================================================================== --- projects/ipsec/sys/i386/linux/linux_ptrace.c (revision 313186) +++ projects/ipsec/sys/i386/linux/linux_ptrace.c (revision 313187) @@ -1,486 +1,474 @@ /*- * Copyright (c) 2001 Alexander Kabaev * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include #include #include #include #include -#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) -#define CPU_ENABLE_SSE -#endif - /* * Linux ptrace requests numbers. Mostly identical to FreeBSD, * except for MD ones and PT_ATTACH/PT_DETACH. */ #define PTRACE_TRACEME 0 #define PTRACE_PEEKTEXT 1 #define PTRACE_PEEKDATA 2 #define PTRACE_PEEKUSR 3 #define PTRACE_POKETEXT 4 #define PTRACE_POKEDATA 5 #define PTRACE_POKEUSR 6 #define PTRACE_CONT 7 #define PTRACE_KILL 8 #define PTRACE_SINGLESTEP 9 #define PTRACE_ATTACH 16 #define PTRACE_DETACH 17 #define LINUX_PTRACE_SYSCALL 24 #define PTRACE_GETREGS 12 #define PTRACE_SETREGS 13 #define PTRACE_GETFPREGS 14 #define PTRACE_SETFPREGS 15 #define PTRACE_GETFPXREGS 18 #define PTRACE_SETFPXREGS 19 #define PTRACE_SETOPTIONS 21 /* * Linux keeps debug registers at the following * offset in the user struct */ #define LINUX_DBREG_OFFSET 252 #define LINUX_DBREG_SIZE (8*sizeof(l_int)) static __inline int map_signum(int signum) { signum = linux_to_bsd_signal(signum); return ((signum == SIGSTOP)? 0 : signum); } struct linux_pt_reg { l_long ebx; l_long ecx; l_long edx; l_long esi; l_long edi; l_long ebp; l_long eax; l_int xds; l_int xes; l_int xfs; l_int xgs; l_long orig_eax; l_long eip; l_int xcs; l_long eflags; l_long esp; l_int xss; }; /* * Translate i386 ptrace registers between Linux and FreeBSD formats. * The translation is pretty straighforward, for all registers, but * orig_eax on Linux side and r_trapno and r_err in FreeBSD */ static void map_regs_to_linux(struct reg *bsd_r, struct linux_pt_reg *linux_r) { linux_r->ebx = bsd_r->r_ebx; linux_r->ecx = bsd_r->r_ecx; linux_r->edx = bsd_r->r_edx; linux_r->esi = bsd_r->r_esi; linux_r->edi = bsd_r->r_edi; linux_r->ebp = bsd_r->r_ebp; linux_r->eax = bsd_r->r_eax; linux_r->xds = bsd_r->r_ds; linux_r->xes = bsd_r->r_es; linux_r->xfs = bsd_r->r_fs; linux_r->xgs = bsd_r->r_gs; linux_r->orig_eax = bsd_r->r_eax; linux_r->eip = bsd_r->r_eip; linux_r->xcs = bsd_r->r_cs; linux_r->eflags = bsd_r->r_eflags; linux_r->esp = bsd_r->r_esp; linux_r->xss = bsd_r->r_ss; } static void map_regs_from_linux(struct reg *bsd_r, struct linux_pt_reg *linux_r) { bsd_r->r_ebx = linux_r->ebx; bsd_r->r_ecx = linux_r->ecx; bsd_r->r_edx = linux_r->edx; bsd_r->r_esi = linux_r->esi; bsd_r->r_edi = linux_r->edi; bsd_r->r_ebp = linux_r->ebp; bsd_r->r_eax = linux_r->eax; bsd_r->r_ds = linux_r->xds; bsd_r->r_es = linux_r->xes; bsd_r->r_fs = linux_r->xfs; bsd_r->r_gs = linux_r->xgs; bsd_r->r_eip = linux_r->eip; bsd_r->r_cs = linux_r->xcs; bsd_r->r_eflags = linux_r->eflags; bsd_r->r_esp = linux_r->esp; bsd_r->r_ss = linux_r->xss; } struct linux_pt_fpreg { l_long cwd; l_long swd; l_long twd; l_long fip; l_long fcs; l_long foo; l_long fos; l_long st_space[2*10]; }; static void map_fpregs_to_linux(struct fpreg *bsd_r, struct linux_pt_fpreg *linux_r) { linux_r->cwd = bsd_r->fpr_env[0]; linux_r->swd = bsd_r->fpr_env[1]; linux_r->twd = bsd_r->fpr_env[2]; linux_r->fip = bsd_r->fpr_env[3]; linux_r->fcs = bsd_r->fpr_env[4]; linux_r->foo = bsd_r->fpr_env[5]; linux_r->fos = bsd_r->fpr_env[6]; bcopy(bsd_r->fpr_acc, linux_r->st_space, sizeof(linux_r->st_space)); } static void map_fpregs_from_linux(struct fpreg *bsd_r, struct linux_pt_fpreg *linux_r) { bsd_r->fpr_env[0] = linux_r->cwd; bsd_r->fpr_env[1] = linux_r->swd; bsd_r->fpr_env[2] = linux_r->twd; bsd_r->fpr_env[3] = linux_r->fip; bsd_r->fpr_env[4] = linux_r->fcs; bsd_r->fpr_env[5] = linux_r->foo; bsd_r->fpr_env[6] = linux_r->fos; bcopy(bsd_r->fpr_acc, linux_r->st_space, sizeof(bsd_r->fpr_acc)); } struct linux_pt_fpxreg { l_ushort cwd; l_ushort swd; l_ushort twd; l_ushort fop; l_long fip; l_long fcs; l_long foo; l_long fos; l_long mxcsr; l_long reserved; l_long st_space[32]; l_long xmm_space[32]; l_long padding[56]; }; -#ifdef CPU_ENABLE_SSE static int linux_proc_read_fpxregs(struct thread *td, struct linux_pt_fpxreg *fpxregs) { PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); if (cpu_fxsr == 0 || (td->td_proc->p_flag & P_INMEM) == 0) return (EIO); bcopy(&get_pcb_user_save_td(td)->sv_xmm, fpxregs, sizeof(*fpxregs)); return (0); } static int linux_proc_write_fpxregs(struct thread *td, struct linux_pt_fpxreg *fpxregs) { PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); if (cpu_fxsr == 0 || (td->td_proc->p_flag & P_INMEM) == 0) return (EIO); bcopy(fpxregs, &get_pcb_user_save_td(td)->sv_xmm, sizeof(*fpxregs)); return (0); } -#endif int linux_ptrace(struct thread *td, struct linux_ptrace_args *uap) { union { struct linux_pt_reg reg; struct linux_pt_fpreg fpreg; struct linux_pt_fpxreg fpxreg; } r; union { struct reg bsd_reg; struct fpreg bsd_fpreg; struct dbreg bsd_dbreg; } u; void *addr; pid_t pid; int error, req; error = 0; /* by default, just copy data intact */ req = uap->req; pid = (pid_t)uap->pid; addr = (void *)uap->addr; switch (req) { case PTRACE_TRACEME: case PTRACE_POKETEXT: case PTRACE_POKEDATA: case PTRACE_KILL: error = kern_ptrace(td, req, pid, addr, uap->data); break; case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: { /* need to preserve return value */ int rval = td->td_retval[0]; error = kern_ptrace(td, req, pid, addr, 0); if (error == 0) error = copyout(td->td_retval, (void *)uap->data, sizeof(l_int)); td->td_retval[0] = rval; break; } case PTRACE_DETACH: error = kern_ptrace(td, PT_DETACH, pid, (void *)1, map_signum(uap->data)); break; case PTRACE_SINGLESTEP: case PTRACE_CONT: error = kern_ptrace(td, req, pid, (void *)1, map_signum(uap->data)); break; case PTRACE_ATTACH: error = kern_ptrace(td, PT_ATTACH, pid, addr, uap->data); break; case PTRACE_GETREGS: /* Linux is using data where FreeBSD is using addr */ error = kern_ptrace(td, PT_GETREGS, pid, &u.bsd_reg, 0); if (error == 0) { map_regs_to_linux(&u.bsd_reg, &r.reg); error = copyout(&r.reg, (void *)uap->data, sizeof(r.reg)); } break; case PTRACE_SETREGS: /* Linux is using data where FreeBSD is using addr */ error = copyin((void *)uap->data, &r.reg, sizeof(r.reg)); if (error == 0) { map_regs_from_linux(&u.bsd_reg, &r.reg); error = kern_ptrace(td, PT_SETREGS, pid, &u.bsd_reg, 0); } break; case PTRACE_GETFPREGS: /* Linux is using data where FreeBSD is using addr */ error = kern_ptrace(td, PT_GETFPREGS, pid, &u.bsd_fpreg, 0); if (error == 0) { map_fpregs_to_linux(&u.bsd_fpreg, &r.fpreg); error = copyout(&r.fpreg, (void *)uap->data, sizeof(r.fpreg)); } break; case PTRACE_SETFPREGS: /* Linux is using data where FreeBSD is using addr */ error = copyin((void *)uap->data, &r.fpreg, sizeof(r.fpreg)); if (error == 0) { map_fpregs_from_linux(&u.bsd_fpreg, &r.fpreg); error = kern_ptrace(td, PT_SETFPREGS, pid, &u.bsd_fpreg, 0); } break; case PTRACE_SETFPXREGS: -#ifdef CPU_ENABLE_SSE error = copyin((void *)uap->data, &r.fpxreg, sizeof(r.fpxreg)); if (error) break; -#endif /* FALL THROUGH */ case PTRACE_GETFPXREGS: { -#ifdef CPU_ENABLE_SSE struct proc *p; struct thread *td2; if (sizeof(struct linux_pt_fpxreg) != sizeof(struct savexmm)) { static int once = 0; if (!once) { printf("linux: savexmm != linux_pt_fpxreg\n"); once = 1; } error = EIO; break; } if ((p = pfind(uap->pid)) == NULL) { error = ESRCH; break; } /* Exiting processes can't be debugged. */ if ((p->p_flag & P_WEXIT) != 0) { error = ESRCH; goto fail; } if ((error = p_candebug(td, p)) != 0) goto fail; /* System processes can't be debugged. */ if ((p->p_flag & P_SYSTEM) != 0) { error = EINVAL; goto fail; } /* not being traced... */ if ((p->p_flag & P_TRACED) == 0) { error = EPERM; goto fail; } /* not being traced by YOU */ if (p->p_pptr != td->td_proc) { error = EBUSY; goto fail; } /* not currently stopped */ if (!P_SHOULDSTOP(p) || (p->p_flag & P_WAITED) == 0) { error = EBUSY; goto fail; } if (req == PTRACE_GETFPXREGS) { _PHOLD(p); /* may block */ td2 = FIRST_THREAD_IN_PROC(p); error = linux_proc_read_fpxregs(td2, &r.fpxreg); _PRELE(p); PROC_UNLOCK(p); if (error == 0) error = copyout(&r.fpxreg, (void *)uap->data, sizeof(r.fpxreg)); } else { /* clear dangerous bits exactly as Linux does*/ r.fpxreg.mxcsr &= 0xffbf; _PHOLD(p); /* may block */ td2 = FIRST_THREAD_IN_PROC(p); error = linux_proc_write_fpxregs(td2, &r.fpxreg); _PRELE(p); PROC_UNLOCK(p); } break; fail: PROC_UNLOCK(p); -#else - error = EIO; -#endif break; } case PTRACE_PEEKUSR: case PTRACE_POKEUSR: { error = EIO; /* check addr for alignment */ if (uap->addr < 0 || uap->addr & (sizeof(l_int) - 1)) break; /* * Allow linux programs to access register values in * user struct. We simulate this through PT_GET/SETREGS * as necessary. */ if (uap->addr < sizeof(struct linux_pt_reg)) { error = kern_ptrace(td, PT_GETREGS, pid, &u.bsd_reg, 0); if (error != 0) break; map_regs_to_linux(&u.bsd_reg, &r.reg); if (req == PTRACE_PEEKUSR) { error = copyout((char *)&r.reg + uap->addr, (void *)uap->data, sizeof(l_int)); break; } *(l_int *)((char *)&r.reg + uap->addr) = (l_int)uap->data; map_regs_from_linux(&u.bsd_reg, &r.reg); error = kern_ptrace(td, PT_SETREGS, pid, &u.bsd_reg, 0); } /* * Simulate debug registers access */ if (uap->addr >= LINUX_DBREG_OFFSET && uap->addr <= LINUX_DBREG_OFFSET + LINUX_DBREG_SIZE) { error = kern_ptrace(td, PT_GETDBREGS, pid, &u.bsd_dbreg, 0); if (error != 0) break; uap->addr -= LINUX_DBREG_OFFSET; if (req == PTRACE_PEEKUSR) { error = copyout((char *)&u.bsd_dbreg + uap->addr, (void *)uap->data, sizeof(l_int)); break; } *(l_int *)((char *)&u.bsd_dbreg + uap->addr) = uap->data; error = kern_ptrace(td, PT_SETDBREGS, pid, &u.bsd_dbreg, 0); } break; } case LINUX_PTRACE_SYSCALL: /* fall through */ default: printf("linux: ptrace(%u, ...) not implemented\n", (unsigned int)uap->req); error = EINVAL; break; } return (error); } Index: projects/ipsec/sys/modules/dtrace/dtrace/Makefile =================================================================== --- projects/ipsec/sys/modules/dtrace/dtrace/Makefile (revision 313186) +++ projects/ipsec/sys/modules/dtrace/dtrace/Makefile (revision 313187) @@ -1,77 +1,79 @@ # $FreeBSD$ SYSDIR?= ${.CURDIR}/../../.. ARCHDIR= ${MACHINE_CPUARCH} .PATH: ${SYSDIR}/cddl/contrib/opensolaris/uts/common/dtrace .PATH: ${SYSDIR}/cddl/compat/opensolaris/kern .PATH: ${SYSDIR}/cddl/kern .PATH: ${SYSDIR}/cddl/dev/dtrace .PATH: ${SYSDIR}/cddl/dev/dtrace/${ARCHDIR} KMOD= dtrace SRCS= dtrace.c \ + dtrace_xoroshiro128_plus.c \ dtrace_asm.S \ dtrace_subr.c .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" .PATH: ${SYSDIR}/cddl/dev/dtrace/x86 SRCS+= dis_tables.c \ instr_size.c CFLAGS+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/intel \ -I${SYSDIR}/cddl/dev/dtrace/x86 .endif SRCS+= bus_if.h device_if.h vnode_if.h # Needed for dtrace_asm.S SRCS+= assym.s # These are needed for assym.s SRCS+= opt_compat.h opt_kstack_pages.h opt_nfs.h opt_hwpmc_hooks.h #This is needed for dtrace.c SRCS += opensolaris_taskq.c .if ${MACHINE_CPUARCH} == "i386" SRCS+= opt_apic.h .endif CFLAGS+= -I${SYSDIR}/cddl/compat/opensolaris \ -I${SYSDIR}/cddl/dev/dtrace \ -I${SYSDIR}/cddl/dev/dtrace/${ARCHDIR} \ -I${SYSDIR}/cddl/contrib/opensolaris/uts/common \ + -I${SYSDIR}/cddl/contrib/opensolaris/uts/common/dtrace \ -I${SYSDIR}/cddl/contrib/opensolaris/common/util \ -I${SYSDIR} -DDIS_MEM CFLAGS+= -DSMP EXPORT_SYMS= dtrace_register \ dtrace_unregister \ dtrace_probe_lookup dtrace_asm.o: assym.s .if ${MACHINE_CPUARCH} == "arm" assym.o: assym.s ${AS} -meabi=5 -o assym.o assym.s .endif .if ${MACHINE_CPUARCH} == "riscv" assym.o: assym.s .if ${TARGET_ARCH:Mriscv*sf} ${AS} -mfloat-abi=soft -o assym.o assym.s .else ${AS} -mfloat-abi=double -o assym.o assym.s .endif .endif .include CFLAGS+= -include ${SYSDIR}/cddl/compat/opensolaris/sys/debug_compat.h CWARNFLAGS+= -Wno-parentheses CWARNFLAGS+= -Wno-uninitialized CWARNFLAGS+= -Wno-cast-qual CWARNFLAGS+= -Wno-unused Index: projects/ipsec/sys/modules/dtrace/fasttrap/Makefile =================================================================== --- projects/ipsec/sys/modules/dtrace/fasttrap/Makefile (revision 313186) +++ projects/ipsec/sys/modules/dtrace/fasttrap/Makefile (revision 313187) @@ -1,33 +1,34 @@ # $FreeBSD$ SYSDIR?= ${.CURDIR}/../../.. .PATH: ${SYSDIR}/cddl/contrib/opensolaris/uts/common/dtrace KMOD= fasttrap SRCS= fasttrap.c fasttrap_isa.c opt_compat.h SRCS+= vnode_if.h CFLAGS+= -I${SYSDIR}/cddl/compat/opensolaris \ -I${SYSDIR}/cddl/contrib/opensolaris/uts/common \ + -I${SYSDIR}/cddl/contrib/opensolaris/uts/common/dtrace \ -I${SYSDIR} .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" CFLAGS+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/intel .PATH: ${SYSDIR}/cddl/contrib/opensolaris/uts/intel/dtrace .elif ${MACHINE_CPUARCH} == "powerpc" CFLAGS+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/powerpc .PATH: ${SYSDIR}/cddl/contrib/opensolaris/uts/powerpc/dtrace .endif .PATH: ${SYSDIR}/cddl/contrib/opensolaris/common/unicode SRCS+= u8_textprep.c CFLAGS+= -DSMP .include CFLAGS+= -include ${SYSDIR}/cddl/compat/opensolaris/sys/debug_compat.h CWARNFLAGS+= -Wno-cast-qual CWARNFLAGS+= -Wno-unused Index: projects/ipsec/sys/modules/dtrace/systrace/Makefile =================================================================== --- projects/ipsec/sys/modules/dtrace/systrace/Makefile (revision 313186) +++ projects/ipsec/sys/modules/dtrace/systrace/Makefile (revision 313187) @@ -1,17 +1,18 @@ # $FreeBSD$ SYSDIR?= ${.CURDIR}/../../.. .PATH: ${SYSDIR}/cddl/dev/systrace KMOD= systrace SRCS= systrace.c SRCS+= vnode_if.h CFLAGS+= -I${SYSDIR}/cddl/compat/opensolaris \ -I${SYSDIR}/cddl/contrib/opensolaris/uts/common \ + -I${SYSDIR}/cddl/contrib/opensolaris/uts/common/dtrace \ -I${SYSDIR} .include CFLAGS+= -include ${SYSDIR}/cddl/compat/opensolaris/sys/debug_compat.h Index: projects/ipsec/sys/net80211/ieee80211_output.c =================================================================== --- projects/ipsec/sys/net80211/ieee80211_output.c (revision 313186) +++ projects/ipsec/sys/net80211/ieee80211_output.c (revision 313187) @@ -1,3719 +1,3726 @@ /*- * Copyright (c) 2001 Atsushi Onoe * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IEEE80211_SUPPORT_SUPERG #include #endif #ifdef IEEE80211_SUPPORT_TDMA #include #endif #include #include #include #if defined(INET) || defined(INET6) #include #endif #ifdef INET #include #include #include #endif #ifdef INET6 #include #endif #include #define ETHER_HEADER_COPY(dst, src) \ memcpy(dst, src, sizeof(struct ether_header)) static int ieee80211_fragment(struct ieee80211vap *, struct mbuf *, u_int hdrsize, u_int ciphdrsize, u_int mtu); static void ieee80211_tx_mgt_cb(struct ieee80211_node *, void *, int); #ifdef IEEE80211_DEBUG /* * Decide if an outbound management frame should be * printed when debugging is enabled. This filters some * of the less interesting frames that come frequently * (e.g. beacons). */ static __inline int doprint(struct ieee80211vap *vap, int subtype) { switch (subtype) { case IEEE80211_FC0_SUBTYPE_PROBE_RESP: return (vap->iv_opmode == IEEE80211_M_IBSS); } return 1; } #endif /* * Transmit a frame to the given destination on the given VAP. * * It's up to the caller to figure out the details of who this * is going to and resolving the node. * * This routine takes care of queuing it for power save, * A-MPDU state stuff, fast-frames state stuff, encapsulation * if required, then passing it up to the driver layer. * * This routine (for now) consumes the mbuf and frees the node * reference; it ideally will return a TX status which reflects * whether the mbuf was consumed or not, so the caller can * free the mbuf (if appropriate) and the node reference (again, * if appropriate.) */ int ieee80211_vap_pkt_send_dest(struct ieee80211vap *vap, struct mbuf *m, struct ieee80211_node *ni) { struct ieee80211com *ic = vap->iv_ic; struct ifnet *ifp = vap->iv_ifp; int mcast; if ((ni->ni_flags & IEEE80211_NODE_PWR_MGT) && (m->m_flags & M_PWR_SAV) == 0) { /* * Station in power save mode; pass the frame * to the 802.11 layer and continue. We'll get * the frame back when the time is right. * XXX lose WDS vap linkage? */ if (ieee80211_pwrsave(ni, m) != 0) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); ieee80211_free_node(ni); /* * We queued it fine, so tell the upper layer * that we consumed it. */ return (0); } /* calculate priority so drivers can find the tx queue */ if (ieee80211_classify(ni, m)) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_OUTPUT, ni->ni_macaddr, NULL, "%s", "classification failure"); vap->iv_stats.is_tx_classify++; if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); m_freem(m); ieee80211_free_node(ni); /* XXX better status? */ return (0); } /* * Stash the node pointer. Note that we do this after * any call to ieee80211_dwds_mcast because that code * uses any existing value for rcvif to identify the * interface it (might have been) received on. */ m->m_pkthdr.rcvif = (void *)ni; mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1: 0; BPF_MTAP(ifp, m); /* 802.3 tx */ /* * Check if A-MPDU tx aggregation is setup or if we * should try to enable it. The sta must be associated * with HT and A-MPDU enabled for use. When the policy * routine decides we should enable A-MPDU we issue an * ADDBA request and wait for a reply. The frame being * encapsulated will go out w/o using A-MPDU, or possibly * it might be collected by the driver and held/retransmit. * The default ic_ampdu_enable routine handles staggering * ADDBA requests in case the receiver NAK's us or we are * otherwise unable to establish a BA stream. * * Don't treat group-addressed frames as candidates for aggregation; * net80211 doesn't support 802.11aa-2012 and so group addressed * frames will always have sequence numbers allocated from the NON_QOS * TID. */ if ((ni->ni_flags & IEEE80211_NODE_AMPDU_TX) && (vap->iv_flags_ht & IEEE80211_FHT_AMPDU_TX)) { if ((m->m_flags & M_EAPOL) == 0 && (! mcast)) { int tid = WME_AC_TO_TID(M_WME_GETAC(m)); struct ieee80211_tx_ampdu *tap = &ni->ni_tx_ampdu[tid]; ieee80211_txampdu_count_packet(tap); if (IEEE80211_AMPDU_RUNNING(tap)) { /* * Operational, mark frame for aggregation. * * XXX do tx aggregation here */ m->m_flags |= M_AMPDU_MPDU; } else if (!IEEE80211_AMPDU_REQUESTED(tap) && ic->ic_ampdu_enable(ni, tap)) { /* * Not negotiated yet, request service. */ ieee80211_ampdu_request(ni, tap); /* XXX hold frame for reply? */ } } } #ifdef IEEE80211_SUPPORT_SUPERG /* * Check for AMSDU/FF; queue for aggregation * * Note: we don't bother trying to do fast frames or * A-MSDU encapsulation for 802.3 drivers. Now, we * likely could do it for FF (because it's a magic * atheros tunnel LLC type) but I don't think we're going * to really need to. For A-MSDU we'd have to set the * A-MSDU QoS bit in the wifi header, so we just plain * can't do it. * * Strictly speaking, we could actually /do/ A-MSDU / FF * with A-MPDU together which for certain circumstances * is beneficial (eg A-MSDU of TCK ACKs.) However, * I'll ignore that for now so existing behaviour is maintained. * Later on it would be good to make "amsdu + ampdu" configurable. */ else if (__predict_true((vap->iv_caps & IEEE80211_C_8023ENCAP) == 0)) { if ((! mcast) && ieee80211_amsdu_tx_ok(ni)) { m = ieee80211_amsdu_check(ni, m); if (m == NULL) { /* NB: any ni ref held on stageq */ IEEE80211_DPRINTF(vap, IEEE80211_MSG_SUPERG, "%s: amsdu_check queued frame\n", __func__); return (0); } } else if ((! mcast) && IEEE80211_ATH_CAP(vap, ni, IEEE80211_NODE_FF)) { m = ieee80211_ff_check(ni, m); if (m == NULL) { /* NB: any ni ref held on stageq */ IEEE80211_DPRINTF(vap, IEEE80211_MSG_SUPERG, "%s: ff_check queued frame\n", __func__); return (0); } } } #endif /* IEEE80211_SUPPORT_SUPERG */ /* * Grab the TX lock - serialise the TX process from this * point (where TX state is being checked/modified) * through to driver queue. */ IEEE80211_TX_LOCK(ic); /* * XXX make the encap and transmit code a separate function * so things like the FF (and later A-MSDU) path can just call * it for flushed frames. */ if (__predict_true((vap->iv_caps & IEEE80211_C_8023ENCAP) == 0)) { /* * Encapsulate the packet in prep for transmission. */ m = ieee80211_encap(vap, ni, m); if (m == NULL) { /* NB: stat+msg handled in ieee80211_encap */ IEEE80211_TX_UNLOCK(ic); ieee80211_free_node(ni); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (ENOBUFS); } } (void) ieee80211_parent_xmitpkt(ic, m); /* * Unlock at this point - no need to hold it across * ieee80211_free_node() (ie, the comlock) */ IEEE80211_TX_UNLOCK(ic); ic->ic_lastdata = ticks; return (0); } /* * Send the given mbuf through the given vap. * * This consumes the mbuf regardless of whether the transmit * was successful or not. * * This does none of the initial checks that ieee80211_start() * does (eg CAC timeout, interface wakeup) - the caller must * do this first. */ static int ieee80211_start_pkt(struct ieee80211vap *vap, struct mbuf *m) { #define IS_DWDS(vap) \ (vap->iv_opmode == IEEE80211_M_WDS && \ (vap->iv_flags_ext & IEEE80211_FEXT_WDSLEGACY) == 0) struct ieee80211com *ic = vap->iv_ic; struct ifnet *ifp = vap->iv_ifp; struct ieee80211_node *ni; struct ether_header *eh; /* * Cancel any background scan. */ if (ic->ic_flags & IEEE80211_F_SCAN) ieee80211_cancel_anyscan(vap); /* * Find the node for the destination so we can do * things like power save and fast frames aggregation. * * NB: past this point various code assumes the first * mbuf has the 802.3 header present (and contiguous). */ ni = NULL; if (m->m_len < sizeof(struct ether_header) && (m = m_pullup(m, sizeof(struct ether_header))) == NULL) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_OUTPUT, "discard frame, %s\n", "m_pullup failed"); vap->iv_stats.is_tx_nobuf++; /* XXX */ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (ENOBUFS); } eh = mtod(m, struct ether_header *); if (ETHER_IS_MULTICAST(eh->ether_dhost)) { if (IS_DWDS(vap)) { /* * Only unicast frames from the above go out * DWDS vaps; multicast frames are handled by * dispatching the frame as it comes through * the AP vap (see below). */ IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_WDS, eh->ether_dhost, "mcast", "%s", "on DWDS"); vap->iv_stats.is_dwds_mcast++; m_freem(m); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); /* XXX better status? */ return (ENOBUFS); } if (vap->iv_opmode == IEEE80211_M_HOSTAP) { /* * Spam DWDS vap's w/ multicast traffic. */ /* XXX only if dwds in use? */ ieee80211_dwds_mcast(vap, m); } } #ifdef IEEE80211_SUPPORT_MESH if (vap->iv_opmode != IEEE80211_M_MBSS) { #endif ni = ieee80211_find_txnode(vap, eh->ether_dhost); if (ni == NULL) { /* NB: ieee80211_find_txnode does stat+msg */ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); m_freem(m); /* XXX better status? */ return (ENOBUFS); } if (ni->ni_associd == 0 && (ni->ni_flags & IEEE80211_NODE_ASSOCID)) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_OUTPUT, eh->ether_dhost, NULL, "sta not associated (type 0x%04x)", htons(eh->ether_type)); vap->iv_stats.is_tx_notassoc++; if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); m_freem(m); ieee80211_free_node(ni); /* XXX better status? */ return (ENOBUFS); } #ifdef IEEE80211_SUPPORT_MESH } else { if (!IEEE80211_ADDR_EQ(eh->ether_shost, vap->iv_myaddr)) { /* * Proxy station only if configured. */ if (!ieee80211_mesh_isproxyena(vap)) { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_OUTPUT | IEEE80211_MSG_MESH, eh->ether_dhost, NULL, "%s", "proxy not enabled"); vap->iv_stats.is_mesh_notproxy++; if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); m_freem(m); /* XXX better status? */ return (ENOBUFS); } IEEE80211_DPRINTF(vap, IEEE80211_MSG_OUTPUT, "forward frame from DS SA(%6D), DA(%6D)\n", eh->ether_shost, ":", eh->ether_dhost, ":"); ieee80211_mesh_proxy_check(vap, eh->ether_shost); } ni = ieee80211_mesh_discover(vap, eh->ether_dhost, m); if (ni == NULL) { /* * NB: ieee80211_mesh_discover holds/disposes * frame (e.g. queueing on path discovery). */ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); /* XXX better status? */ return (ENOBUFS); } } #endif /* * We've resolved the sender, so attempt to transmit it. */ if (vap->iv_state == IEEE80211_S_SLEEP) { /* * In power save; queue frame and then wakeup device * for transmit. */ ic->ic_lastdata = ticks; if (ieee80211_pwrsave(ni, m) != 0) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); ieee80211_free_node(ni); ieee80211_new_state(vap, IEEE80211_S_RUN, 0); return (0); } if (ieee80211_vap_pkt_send_dest(vap, m, ni) != 0) return (ENOBUFS); return (0); #undef IS_DWDS } /* * Start method for vap's. All packets from the stack come * through here. We handle common processing of the packets * before dispatching them to the underlying device. * * if_transmit() requires that the mbuf be consumed by this call * regardless of the return condition. */ int ieee80211_vap_transmit(struct ifnet *ifp, struct mbuf *m) { struct ieee80211vap *vap = ifp->if_softc; struct ieee80211com *ic = vap->iv_ic; /* * No data frames go out unless we're running. * Note in particular this covers CAC and CSA * states (though maybe we should check muting * for CSA). */ if (vap->iv_state != IEEE80211_S_RUN && vap->iv_state != IEEE80211_S_SLEEP) { IEEE80211_LOCK(ic); /* re-check under the com lock to avoid races */ if (vap->iv_state != IEEE80211_S_RUN && vap->iv_state != IEEE80211_S_SLEEP) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_OUTPUT, "%s: ignore queue, in %s state\n", __func__, ieee80211_state_name[vap->iv_state]); vap->iv_stats.is_tx_badstate++; IEEE80211_UNLOCK(ic); ifp->if_drv_flags |= IFF_DRV_OACTIVE; m_freem(m); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (ENETDOWN); } IEEE80211_UNLOCK(ic); } /* * Sanitize mbuf flags for net80211 use. We cannot * clear M_PWR_SAV or M_MORE_DATA because these may * be set for frames that are re-submitted from the * power save queue. * * NB: This must be done before ieee80211_classify as * it marks EAPOL in frames with M_EAPOL. */ m->m_flags &= ~(M_80211_TX - M_PWR_SAV - M_MORE_DATA); /* * Bump to the packet transmission path. * The mbuf will be consumed here. */ return (ieee80211_start_pkt(vap, m)); } void ieee80211_vap_qflush(struct ifnet *ifp) { /* Empty for now */ } /* * 802.11 raw output routine. * * XXX TODO: this (and other send routines) should correctly * XXX keep the pwr mgmt bit set if it decides to call into the * XXX driver to send a frame whilst the state is SLEEP. * * Otherwise the peer may decide that we're awake and flood us * with traffic we are still too asleep to receive! */ int ieee80211_raw_output(struct ieee80211vap *vap, struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params) { struct ieee80211com *ic = vap->iv_ic; int error; /* * Set node - the caller has taken a reference, so ensure * that the mbuf has the same node value that * it would if it were going via the normal path. */ m->m_pkthdr.rcvif = (void *)ni; /* * Attempt to add bpf transmit parameters. * * For now it's ok to fail; the raw_xmit api still takes * them as an option. * * Later on when ic_raw_xmit() has params removed, * they'll have to be added - so fail the transmit if * they can't be. */ if (params) (void) ieee80211_add_xmit_params(m, params); error = ic->ic_raw_xmit(ni, m, params); if (error) { if_inc_counter(vap->iv_ifp, IFCOUNTER_OERRORS, 1); ieee80211_free_node(ni); } return (error); } /* * 802.11 output routine. This is (currently) used only to * connect bpf write calls to the 802.11 layer for injecting * raw 802.11 frames. */ int ieee80211_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { #define senderr(e) do { error = (e); goto bad;} while (0) struct ieee80211_node *ni = NULL; struct ieee80211vap *vap; struct ieee80211_frame *wh; struct ieee80211com *ic = NULL; int error; int ret; if (ifp->if_drv_flags & IFF_DRV_OACTIVE) { /* * Short-circuit requests if the vap is marked OACTIVE * as this can happen because a packet came down through * ieee80211_start before the vap entered RUN state in * which case it's ok to just drop the frame. This * should not be necessary but callers of if_output don't * check OACTIVE. */ senderr(ENETDOWN); } vap = ifp->if_softc; ic = vap->iv_ic; /* * Hand to the 802.3 code if not tagged as * a raw 802.11 frame. */ if (dst->sa_family != AF_IEEE80211) return vap->iv_output(ifp, m, dst, ro); #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) senderr(error); #endif if (ifp->if_flags & IFF_MONITOR) senderr(ENETDOWN); if (!IFNET_IS_UP_RUNNING(ifp)) senderr(ENETDOWN); if (vap->iv_state == IEEE80211_S_CAC) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_OUTPUT | IEEE80211_MSG_DOTH, "block %s frame in CAC state\n", "raw data"); vap->iv_stats.is_tx_badstate++; senderr(EIO); /* XXX */ } else if (vap->iv_state == IEEE80211_S_SCAN) senderr(EIO); /* XXX bypass bridge, pfil, carp, etc. */ if (m->m_pkthdr.len < sizeof(struct ieee80211_frame_ack)) senderr(EIO); /* XXX */ wh = mtod(m, struct ieee80211_frame *); if ((wh->i_fc[0] & IEEE80211_FC0_VERSION_MASK) != IEEE80211_FC0_VERSION_0) senderr(EIO); /* XXX */ if (m->m_pkthdr.len < ieee80211_anyhdrsize(wh)) senderr(EIO); /* XXX */ /* locate destination node */ switch (wh->i_fc[1] & IEEE80211_FC1_DIR_MASK) { case IEEE80211_FC1_DIR_NODS: case IEEE80211_FC1_DIR_FROMDS: ni = ieee80211_find_txnode(vap, wh->i_addr1); break; case IEEE80211_FC1_DIR_TODS: case IEEE80211_FC1_DIR_DSTODS: ni = ieee80211_find_txnode(vap, wh->i_addr3); break; default: senderr(EIO); /* XXX */ } if (ni == NULL) { /* * Permit packets w/ bpf params through regardless * (see below about sa_len). */ if (dst->sa_len == 0) senderr(EHOSTUNREACH); ni = ieee80211_ref_node(vap->iv_bss); } /* * Sanitize mbuf for net80211 flags leaked from above. * * NB: This must be done before ieee80211_classify as * it marks EAPOL in frames with M_EAPOL. */ m->m_flags &= ~M_80211_TX; /* calculate priority so drivers can find the tx queue */ /* XXX assumes an 802.3 frame */ if (ieee80211_classify(ni, m)) senderr(EIO); /* XXX */ IEEE80211_NODE_STAT(ni, tx_data); if (IEEE80211_IS_MULTICAST(wh->i_addr1)) { IEEE80211_NODE_STAT(ni, tx_mcast); m->m_flags |= M_MCAST; } else IEEE80211_NODE_STAT(ni, tx_ucast); /* NB: ieee80211_encap does not include 802.11 header */ IEEE80211_NODE_STAT_ADD(ni, tx_bytes, m->m_pkthdr.len); IEEE80211_TX_LOCK(ic); /* * NB: DLT_IEEE802_11_RADIO identifies the parameters are * present by setting the sa_len field of the sockaddr (yes, * this is a hack). * NB: we assume sa_data is suitably aligned to cast. */ ret = ieee80211_raw_output(vap, ni, m, (const struct ieee80211_bpf_params *)(dst->sa_len ? dst->sa_data : NULL)); IEEE80211_TX_UNLOCK(ic); return (ret); bad: if (m != NULL) m_freem(m); if (ni != NULL) ieee80211_free_node(ni); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return error; #undef senderr } /* * Set the direction field and address fields of an outgoing * frame. Note this should be called early on in constructing * a frame as it sets i_fc[1]; other bits can then be or'd in. */ void ieee80211_send_setup( struct ieee80211_node *ni, struct mbuf *m, int type, int tid, const uint8_t sa[IEEE80211_ADDR_LEN], const uint8_t da[IEEE80211_ADDR_LEN], const uint8_t bssid[IEEE80211_ADDR_LEN]) { #define WH4(wh) ((struct ieee80211_frame_addr4 *)wh) struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_tx_ampdu *tap; struct ieee80211_frame *wh = mtod(m, struct ieee80211_frame *); ieee80211_seq seqno; IEEE80211_TX_LOCK_ASSERT(ni->ni_ic); wh->i_fc[0] = IEEE80211_FC0_VERSION_0 | type; if ((type & IEEE80211_FC0_TYPE_MASK) == IEEE80211_FC0_TYPE_DATA) { switch (vap->iv_opmode) { case IEEE80211_M_STA: wh->i_fc[1] = IEEE80211_FC1_DIR_TODS; IEEE80211_ADDR_COPY(wh->i_addr1, bssid); IEEE80211_ADDR_COPY(wh->i_addr2, sa); IEEE80211_ADDR_COPY(wh->i_addr3, da); break; case IEEE80211_M_IBSS: case IEEE80211_M_AHDEMO: wh->i_fc[1] = IEEE80211_FC1_DIR_NODS; IEEE80211_ADDR_COPY(wh->i_addr1, da); IEEE80211_ADDR_COPY(wh->i_addr2, sa); IEEE80211_ADDR_COPY(wh->i_addr3, bssid); break; case IEEE80211_M_HOSTAP: wh->i_fc[1] = IEEE80211_FC1_DIR_FROMDS; IEEE80211_ADDR_COPY(wh->i_addr1, da); IEEE80211_ADDR_COPY(wh->i_addr2, bssid); IEEE80211_ADDR_COPY(wh->i_addr3, sa); break; case IEEE80211_M_WDS: wh->i_fc[1] = IEEE80211_FC1_DIR_DSTODS; IEEE80211_ADDR_COPY(wh->i_addr1, da); IEEE80211_ADDR_COPY(wh->i_addr2, vap->iv_myaddr); IEEE80211_ADDR_COPY(wh->i_addr3, da); IEEE80211_ADDR_COPY(WH4(wh)->i_addr4, sa); break; case IEEE80211_M_MBSS: #ifdef IEEE80211_SUPPORT_MESH if (IEEE80211_IS_MULTICAST(da)) { wh->i_fc[1] = IEEE80211_FC1_DIR_FROMDS; /* XXX next hop */ IEEE80211_ADDR_COPY(wh->i_addr1, da); IEEE80211_ADDR_COPY(wh->i_addr2, vap->iv_myaddr); } else { wh->i_fc[1] = IEEE80211_FC1_DIR_DSTODS; IEEE80211_ADDR_COPY(wh->i_addr1, da); IEEE80211_ADDR_COPY(wh->i_addr2, vap->iv_myaddr); IEEE80211_ADDR_COPY(wh->i_addr3, da); IEEE80211_ADDR_COPY(WH4(wh)->i_addr4, sa); } #endif break; case IEEE80211_M_MONITOR: /* NB: to quiet compiler */ break; } } else { wh->i_fc[1] = IEEE80211_FC1_DIR_NODS; IEEE80211_ADDR_COPY(wh->i_addr1, da); IEEE80211_ADDR_COPY(wh->i_addr2, sa); #ifdef IEEE80211_SUPPORT_MESH if (vap->iv_opmode == IEEE80211_M_MBSS) IEEE80211_ADDR_COPY(wh->i_addr3, sa); else #endif IEEE80211_ADDR_COPY(wh->i_addr3, bssid); } *(uint16_t *)&wh->i_dur[0] = 0; /* * XXX TODO: this is what the TX lock is for. * Here we're incrementing sequence numbers, and they * need to be in lock-step with what the driver is doing * both in TX ordering and crypto encap (IV increment.) * * If the driver does seqno itself, then we can skip * assigning sequence numbers here, and we can avoid * requiring the TX lock. */ tap = &ni->ni_tx_ampdu[tid]; if (tid != IEEE80211_NONQOS_TID && IEEE80211_AMPDU_RUNNING(tap)) { m->m_flags |= M_AMPDU_MPDU; } else { if (IEEE80211_HAS_SEQ(type & IEEE80211_FC0_TYPE_MASK, type & IEEE80211_FC0_SUBTYPE_MASK)) /* * 802.11-2012 9.3.2.10 - QoS multicast frames * come out of a different seqno space. */ if (IEEE80211_IS_MULTICAST(wh->i_addr1)) { seqno = ni->ni_txseqs[IEEE80211_NONQOS_TID]++; } else { seqno = ni->ni_txseqs[tid]++; } else seqno = 0; *(uint16_t *)&wh->i_seq[0] = htole16(seqno << IEEE80211_SEQ_SEQ_SHIFT); M_SEQNO_SET(m, seqno); } if (IEEE80211_IS_MULTICAST(wh->i_addr1)) m->m_flags |= M_MCAST; #undef WH4 } /* * Send a management frame to the specified node. The node pointer * must have a reference as the pointer will be passed to the driver * and potentially held for a long time. If the frame is successfully * dispatched to the driver, then it is responsible for freeing the * reference (and potentially free'ing up any associated storage); * otherwise deal with reclaiming any reference (on error). */ int ieee80211_mgmt_output(struct ieee80211_node *ni, struct mbuf *m, int type, struct ieee80211_bpf_params *params) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; struct ieee80211_frame *wh; int ret; KASSERT(ni != NULL, ("null node")); if (vap->iv_state == IEEE80211_S_CAC) { IEEE80211_NOTE(vap, IEEE80211_MSG_OUTPUT | IEEE80211_MSG_DOTH, ni, "block %s frame in CAC state", ieee80211_mgt_subtype_name(type)); vap->iv_stats.is_tx_badstate++; ieee80211_free_node(ni); m_freem(m); return EIO; /* XXX */ } M_PREPEND(m, sizeof(struct ieee80211_frame), M_NOWAIT); if (m == NULL) { ieee80211_free_node(ni); return ENOMEM; } IEEE80211_TX_LOCK(ic); wh = mtod(m, struct ieee80211_frame *); ieee80211_send_setup(ni, m, IEEE80211_FC0_TYPE_MGT | type, IEEE80211_NONQOS_TID, vap->iv_myaddr, ni->ni_macaddr, ni->ni_bssid); if (params->ibp_flags & IEEE80211_BPF_CRYPTO) { IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_AUTH, wh->i_addr1, "encrypting frame (%s)", __func__); wh->i_fc[1] |= IEEE80211_FC1_PROTECTED; } m->m_flags |= M_ENCAP; /* mark encapsulated */ KASSERT(type != IEEE80211_FC0_SUBTYPE_PROBE_RESP, ("probe response?")); M_WME_SETAC(m, params->ibp_pri); #ifdef IEEE80211_DEBUG /* avoid printing too many frames */ if ((ieee80211_msg_debug(vap) && doprint(vap, type)) || ieee80211_msg_dumppkts(vap)) { printf("[%s] send %s on channel %u\n", ether_sprintf(wh->i_addr1), ieee80211_mgt_subtype_name(type), ieee80211_chan2ieee(ic, ic->ic_curchan)); } #endif IEEE80211_NODE_STAT(ni, tx_mgmt); ret = ieee80211_raw_output(vap, ni, m, params); IEEE80211_TX_UNLOCK(ic); return (ret); } static void ieee80211_nulldata_transmitted(struct ieee80211_node *ni, void *arg, int status) { struct ieee80211vap *vap = ni->ni_vap; wakeup(vap); } /* * Send a null data frame to the specified node. If the station * is setup for QoS then a QoS Null Data frame is constructed. * If this is a WDS station then a 4-address frame is constructed. * * NB: the caller is assumed to have setup a node reference * for use; this is necessary to deal with a race condition * when probing for inactive stations. Like ieee80211_mgmt_output * we must cleanup any node reference on error; however we * can safely just unref it as we know it will never be the * last reference to the node. */ int ieee80211_send_nulldata(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; struct mbuf *m; struct ieee80211_frame *wh; int hdrlen; uint8_t *frm; int ret; if (vap->iv_state == IEEE80211_S_CAC) { IEEE80211_NOTE(vap, IEEE80211_MSG_OUTPUT | IEEE80211_MSG_DOTH, ni, "block %s frame in CAC state", "null data"); ieee80211_unref_node(&ni); vap->iv_stats.is_tx_badstate++; return EIO; /* XXX */ } if (ni->ni_flags & (IEEE80211_NODE_QOS|IEEE80211_NODE_HT)) hdrlen = sizeof(struct ieee80211_qosframe); else hdrlen = sizeof(struct ieee80211_frame); /* NB: only WDS vap's get 4-address frames */ if (vap->iv_opmode == IEEE80211_M_WDS) hdrlen += IEEE80211_ADDR_LEN; if (ic->ic_flags & IEEE80211_F_DATAPAD) hdrlen = roundup(hdrlen, sizeof(uint32_t)); m = ieee80211_getmgtframe(&frm, ic->ic_headroom + hdrlen, 0); if (m == NULL) { /* XXX debug msg */ ieee80211_unref_node(&ni); vap->iv_stats.is_tx_nobuf++; return ENOMEM; } KASSERT(M_LEADINGSPACE(m) >= hdrlen, ("leading space %zd", M_LEADINGSPACE(m))); M_PREPEND(m, hdrlen, M_NOWAIT); if (m == NULL) { /* NB: cannot happen */ ieee80211_free_node(ni); return ENOMEM; } IEEE80211_TX_LOCK(ic); wh = mtod(m, struct ieee80211_frame *); /* NB: a little lie */ if (ni->ni_flags & IEEE80211_NODE_QOS) { const int tid = WME_AC_TO_TID(WME_AC_BE); uint8_t *qos; ieee80211_send_setup(ni, m, IEEE80211_FC0_TYPE_DATA | IEEE80211_FC0_SUBTYPE_QOS_NULL, tid, vap->iv_myaddr, ni->ni_macaddr, ni->ni_bssid); if (vap->iv_opmode == IEEE80211_M_WDS) qos = ((struct ieee80211_qosframe_addr4 *) wh)->i_qos; else qos = ((struct ieee80211_qosframe *) wh)->i_qos; qos[0] = tid & IEEE80211_QOS_TID; if (ic->ic_wme.wme_wmeChanParams.cap_wmeParams[WME_AC_BE].wmep_noackPolicy) qos[0] |= IEEE80211_QOS_ACKPOLICY_NOACK; qos[1] = 0; } else { ieee80211_send_setup(ni, m, IEEE80211_FC0_TYPE_DATA | IEEE80211_FC0_SUBTYPE_NODATA, IEEE80211_NONQOS_TID, vap->iv_myaddr, ni->ni_macaddr, ni->ni_bssid); } if (vap->iv_opmode != IEEE80211_M_WDS) { /* NB: power management bit is never sent by an AP */ if ((ni->ni_flags & IEEE80211_NODE_PWR_MGT) && vap->iv_opmode != IEEE80211_M_HOSTAP) wh->i_fc[1] |= IEEE80211_FC1_PWR_MGT; } if ((ic->ic_flags & IEEE80211_F_SCAN) && (ni->ni_flags & IEEE80211_NODE_PWR_MGT)) { ieee80211_add_callback(m, ieee80211_nulldata_transmitted, NULL); } m->m_len = m->m_pkthdr.len = hdrlen; m->m_flags |= M_ENCAP; /* mark encapsulated */ M_WME_SETAC(m, WME_AC_BE); IEEE80211_NODE_STAT(ni, tx_data); IEEE80211_NOTE(vap, IEEE80211_MSG_DEBUG | IEEE80211_MSG_DUMPPKTS, ni, "send %snull data frame on channel %u, pwr mgt %s", ni->ni_flags & IEEE80211_NODE_QOS ? "QoS " : "", ieee80211_chan2ieee(ic, ic->ic_curchan), wh->i_fc[1] & IEEE80211_FC1_PWR_MGT ? "ena" : "dis"); ret = ieee80211_raw_output(vap, ni, m, NULL); IEEE80211_TX_UNLOCK(ic); return (ret); } /* * Assign priority to a frame based on any vlan tag assigned * to the station and/or any Diffserv setting in an IP header. * Finally, if an ACM policy is setup (in station mode) it's * applied. */ int ieee80211_classify(struct ieee80211_node *ni, struct mbuf *m) { const struct ether_header *eh = mtod(m, struct ether_header *); int v_wme_ac, d_wme_ac, ac; /* * Always promote PAE/EAPOL frames to high priority. */ if (eh->ether_type == htons(ETHERTYPE_PAE)) { /* NB: mark so others don't need to check header */ m->m_flags |= M_EAPOL; ac = WME_AC_VO; goto done; } /* * Non-qos traffic goes to BE. */ if ((ni->ni_flags & IEEE80211_NODE_QOS) == 0) { ac = WME_AC_BE; goto done; } /* * If node has a vlan tag then all traffic * to it must have a matching tag. */ v_wme_ac = 0; if (ni->ni_vlan != 0) { if ((m->m_flags & M_VLANTAG) == 0) { IEEE80211_NODE_STAT(ni, tx_novlantag); return 1; } if (EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != EVL_VLANOFTAG(ni->ni_vlan)) { IEEE80211_NODE_STAT(ni, tx_vlanmismatch); return 1; } /* map vlan priority to AC */ v_wme_ac = TID_TO_WME_AC(EVL_PRIOFTAG(ni->ni_vlan)); } /* XXX m_copydata may be too slow for fast path */ #ifdef INET if (eh->ether_type == htons(ETHERTYPE_IP)) { uint8_t tos; /* * IP frame, map the DSCP bits from the TOS field. */ /* NB: ip header may not be in first mbuf */ m_copydata(m, sizeof(struct ether_header) + offsetof(struct ip, ip_tos), sizeof(tos), &tos); tos >>= 5; /* NB: ECN + low 3 bits of DSCP */ d_wme_ac = TID_TO_WME_AC(tos); } else { #endif /* INET */ #ifdef INET6 if (eh->ether_type == htons(ETHERTYPE_IPV6)) { uint32_t flow; uint8_t tos; /* * IPv6 frame, map the DSCP bits from the traffic class field. */ m_copydata(m, sizeof(struct ether_header) + offsetof(struct ip6_hdr, ip6_flow), sizeof(flow), (caddr_t) &flow); tos = (uint8_t)(ntohl(flow) >> 20); tos >>= 5; /* NB: ECN + low 3 bits of DSCP */ d_wme_ac = TID_TO_WME_AC(tos); } else { #endif /* INET6 */ d_wme_ac = WME_AC_BE; #ifdef INET6 } #endif #ifdef INET } #endif /* * Use highest priority AC. */ if (v_wme_ac > d_wme_ac) ac = v_wme_ac; else ac = d_wme_ac; /* * Apply ACM policy. */ if (ni->ni_vap->iv_opmode == IEEE80211_M_STA) { static const int acmap[4] = { WME_AC_BK, /* WME_AC_BE */ WME_AC_BK, /* WME_AC_BK */ WME_AC_BE, /* WME_AC_VI */ WME_AC_VI, /* WME_AC_VO */ }; struct ieee80211com *ic = ni->ni_ic; while (ac != WME_AC_BK && ic->ic_wme.wme_wmeBssChanParams.cap_wmeParams[ac].wmep_acm) ac = acmap[ac]; } done: M_WME_SETAC(m, ac); return 0; } /* * Insure there is sufficient contiguous space to encapsulate the * 802.11 data frame. If room isn't already there, arrange for it. * Drivers and cipher modules assume we have done the necessary work * and fail rudely if they don't find the space they need. */ struct mbuf * ieee80211_mbuf_adjust(struct ieee80211vap *vap, int hdrsize, struct ieee80211_key *key, struct mbuf *m) { #define TO_BE_RECLAIMED (sizeof(struct ether_header) - sizeof(struct llc)) int needed_space = vap->iv_ic->ic_headroom + hdrsize; if (key != NULL) { /* XXX belongs in crypto code? */ needed_space += key->wk_cipher->ic_header; /* XXX frags */ /* * When crypto is being done in the host we must insure * the data are writable for the cipher routines; clone * a writable mbuf chain. * XXX handle SWMIC specially */ if (key->wk_flags & (IEEE80211_KEY_SWENCRYPT|IEEE80211_KEY_SWENMIC)) { m = m_unshare(m, M_NOWAIT); if (m == NULL) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_OUTPUT, "%s: cannot get writable mbuf\n", __func__); vap->iv_stats.is_tx_nobuf++; /* XXX new stat */ return NULL; } } } /* * We know we are called just before stripping an Ethernet * header and prepending an LLC header. This means we know * there will be * sizeof(struct ether_header) - sizeof(struct llc) * bytes recovered to which we need additional space for the * 802.11 header and any crypto header. */ /* XXX check trailing space and copy instead? */ if (M_LEADINGSPACE(m) < needed_space - TO_BE_RECLAIMED) { struct mbuf *n = m_gethdr(M_NOWAIT, m->m_type); if (n == NULL) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_OUTPUT, "%s: cannot expand storage\n", __func__); vap->iv_stats.is_tx_nobuf++; m_freem(m); return NULL; } KASSERT(needed_space <= MHLEN, ("not enough room, need %u got %d\n", needed_space, MHLEN)); /* * Setup new mbuf to have leading space to prepend the * 802.11 header and any crypto header bits that are * required (the latter are added when the driver calls * back to ieee80211_crypto_encap to do crypto encapsulation). */ /* NB: must be first 'cuz it clobbers m_data */ m_move_pkthdr(n, m); n->m_len = 0; /* NB: m_gethdr does not set */ n->m_data += needed_space; /* * Pull up Ethernet header to create the expected layout. * We could use m_pullup but that's overkill (i.e. we don't * need the actual data) and it cannot fail so do it inline * for speed. */ /* NB: struct ether_header is known to be contiguous */ n->m_len += sizeof(struct ether_header); m->m_len -= sizeof(struct ether_header); m->m_data += sizeof(struct ether_header); /* * Replace the head of the chain. */ n->m_next = m; m = n; } return m; #undef TO_BE_RECLAIMED } /* * Return the transmit key to use in sending a unicast frame. * If a unicast key is set we use that. When no unicast key is set * we fall back to the default transmit key. */ static __inline struct ieee80211_key * ieee80211_crypto_getucastkey(struct ieee80211vap *vap, struct ieee80211_node *ni) { if (IEEE80211_KEY_UNDEFINED(&ni->ni_ucastkey)) { if (vap->iv_def_txkey == IEEE80211_KEYIX_NONE || IEEE80211_KEY_UNDEFINED(&vap->iv_nw_keys[vap->iv_def_txkey])) return NULL; return &vap->iv_nw_keys[vap->iv_def_txkey]; } else { return &ni->ni_ucastkey; } } /* * Return the transmit key to use in sending a multicast frame. * Multicast traffic always uses the group key which is installed as * the default tx key. */ static __inline struct ieee80211_key * ieee80211_crypto_getmcastkey(struct ieee80211vap *vap, struct ieee80211_node *ni) { if (vap->iv_def_txkey == IEEE80211_KEYIX_NONE || IEEE80211_KEY_UNDEFINED(&vap->iv_nw_keys[vap->iv_def_txkey])) return NULL; return &vap->iv_nw_keys[vap->iv_def_txkey]; } /* * Encapsulate an outbound data frame. The mbuf chain is updated. * If an error is encountered NULL is returned. The caller is required * to provide a node reference and pullup the ethernet header in the * first mbuf. * * NB: Packet is assumed to be processed by ieee80211_classify which * marked EAPOL frames w/ M_EAPOL. */ struct mbuf * ieee80211_encap(struct ieee80211vap *vap, struct ieee80211_node *ni, struct mbuf *m) { #define WH4(wh) ((struct ieee80211_frame_addr4 *)(wh)) #define MC01(mc) ((struct ieee80211_meshcntl_ae01 *)mc) struct ieee80211com *ic = ni->ni_ic; #ifdef IEEE80211_SUPPORT_MESH struct ieee80211_mesh_state *ms = vap->iv_mesh; struct ieee80211_meshcntl_ae10 *mc; struct ieee80211_mesh_route *rt = NULL; int dir = -1; #endif struct ether_header eh; struct ieee80211_frame *wh; struct ieee80211_key *key; struct llc *llc; int hdrsize, hdrspace, datalen, addqos, txfrag, is4addr, is_mcast; ieee80211_seq seqno; int meshhdrsize, meshae; uint8_t *qos; int is_amsdu = 0; IEEE80211_TX_LOCK_ASSERT(ic); is_mcast = !! (m->m_flags & (M_MCAST | M_BCAST)); /* * Copy existing Ethernet header to a safe place. The * rest of the code assumes it's ok to strip it when * reorganizing state for the final encapsulation. */ KASSERT(m->m_len >= sizeof(eh), ("no ethernet header!")); ETHER_HEADER_COPY(&eh, mtod(m, caddr_t)); /* * Insure space for additional headers. First identify * transmit key to use in calculating any buffer adjustments * required. This is also used below to do privacy * encapsulation work. Then calculate the 802.11 header * size and any padding required by the driver. * * Note key may be NULL if we fall back to the default * transmit key and that is not set. In that case the * buffer may not be expanded as needed by the cipher * routines, but they will/should discard it. */ if (vap->iv_flags & IEEE80211_F_PRIVACY) { if (vap->iv_opmode == IEEE80211_M_STA || !IEEE80211_IS_MULTICAST(eh.ether_dhost) || (vap->iv_opmode == IEEE80211_M_WDS && (vap->iv_flags_ext & IEEE80211_FEXT_WDSLEGACY))) key = ieee80211_crypto_getucastkey(vap, ni); else key = ieee80211_crypto_getmcastkey(vap, ni); if (key == NULL && (m->m_flags & M_EAPOL) == 0) { IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_CRYPTO, eh.ether_dhost, "no default transmit key (%s) deftxkey %u", __func__, vap->iv_def_txkey); vap->iv_stats.is_tx_nodefkey++; goto bad; } } else key = NULL; /* * XXX Some ap's don't handle QoS-encapsulated EAPOL * frames so suppress use. This may be an issue if other * ap's require all data frames to be QoS-encapsulated * once negotiated in which case we'll need to make this * configurable. * * Don't send multicast QoS frames. * Technically multicast frames can be QoS if all stations in the * BSS are also QoS. * * NB: mesh data frames are QoS, including multicast frames. */ addqos = (((is_mcast == 0) && (ni->ni_flags & (IEEE80211_NODE_QOS|IEEE80211_NODE_HT))) || (vap->iv_opmode == IEEE80211_M_MBSS)) && (m->m_flags & M_EAPOL) == 0; if (addqos) hdrsize = sizeof(struct ieee80211_qosframe); else hdrsize = sizeof(struct ieee80211_frame); #ifdef IEEE80211_SUPPORT_MESH if (vap->iv_opmode == IEEE80211_M_MBSS) { /* * Mesh data frames are encapsulated according to the * rules of Section 11B.8.5 (p.139 of D3.0 spec). * o Group Addressed data (aka multicast) originating * at the local sta are sent w/ 3-address format and * address extension mode 00 * o Individually Addressed data (aka unicast) originating * at the local sta are sent w/ 4-address format and * address extension mode 00 * o Group Addressed data forwarded from a non-mesh sta are * sent w/ 3-address format and address extension mode 01 * o Individually Address data from another sta are sent * w/ 4-address format and address extension mode 10 */ is4addr = 0; /* NB: don't use, disable */ if (!IEEE80211_IS_MULTICAST(eh.ether_dhost)) { rt = ieee80211_mesh_rt_find(vap, eh.ether_dhost); KASSERT(rt != NULL, ("route is NULL")); dir = IEEE80211_FC1_DIR_DSTODS; hdrsize += IEEE80211_ADDR_LEN; if (rt->rt_flags & IEEE80211_MESHRT_FLAGS_PROXY) { if (IEEE80211_ADDR_EQ(rt->rt_mesh_gate, vap->iv_myaddr)) { IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_MESH, eh.ether_dhost, "%s", "trying to send to ourself"); goto bad; } meshae = IEEE80211_MESH_AE_10; meshhdrsize = sizeof(struct ieee80211_meshcntl_ae10); } else { meshae = IEEE80211_MESH_AE_00; meshhdrsize = sizeof(struct ieee80211_meshcntl); } } else { dir = IEEE80211_FC1_DIR_FROMDS; if (!IEEE80211_ADDR_EQ(eh.ether_shost, vap->iv_myaddr)) { /* proxy group */ meshae = IEEE80211_MESH_AE_01; meshhdrsize = sizeof(struct ieee80211_meshcntl_ae01); } else { /* group */ meshae = IEEE80211_MESH_AE_00; meshhdrsize = sizeof(struct ieee80211_meshcntl); } } } else { #endif /* * 4-address frames need to be generated for: * o packets sent through a WDS vap (IEEE80211_M_WDS) * o packets sent through a vap marked for relaying * (e.g. a station operating with dynamic WDS) */ is4addr = vap->iv_opmode == IEEE80211_M_WDS || ((vap->iv_flags_ext & IEEE80211_FEXT_4ADDR) && !IEEE80211_ADDR_EQ(eh.ether_shost, vap->iv_myaddr)); if (is4addr) hdrsize += IEEE80211_ADDR_LEN; meshhdrsize = meshae = 0; #ifdef IEEE80211_SUPPORT_MESH } #endif /* * Honor driver DATAPAD requirement. */ if (ic->ic_flags & IEEE80211_F_DATAPAD) hdrspace = roundup(hdrsize, sizeof(uint32_t)); else hdrspace = hdrsize; if (__predict_true((m->m_flags & M_FF) == 0)) { /* * Normal frame. */ m = ieee80211_mbuf_adjust(vap, hdrspace + meshhdrsize, key, m); if (m == NULL) { /* NB: ieee80211_mbuf_adjust handles msgs+statistics */ goto bad; } /* NB: this could be optimized 'cuz of ieee80211_mbuf_adjust */ m_adj(m, sizeof(struct ether_header) - sizeof(struct llc)); llc = mtod(m, struct llc *); llc->llc_dsap = llc->llc_ssap = LLC_SNAP_LSAP; llc->llc_control = LLC_UI; llc->llc_snap.org_code[0] = 0; llc->llc_snap.org_code[1] = 0; llc->llc_snap.org_code[2] = 0; llc->llc_snap.ether_type = eh.ether_type; } else { #ifdef IEEE80211_SUPPORT_SUPERG /* * Aggregated frame. Check if it's for AMSDU or FF. * * XXX TODO: IEEE80211_NODE_AMSDU* isn't implemented * anywhere for some reason. But, since 11n requires * AMSDU RX, we can just assume "11n" == "AMSDU". */ IEEE80211_DPRINTF(vap, IEEE80211_MSG_SUPERG, "%s: called; M_FF\n", __func__); if (ieee80211_amsdu_tx_ok(ni)) { m = ieee80211_amsdu_encap(vap, m, hdrspace + meshhdrsize, key); is_amsdu = 1; } else { m = ieee80211_ff_encap(vap, m, hdrspace + meshhdrsize, key); } if (m == NULL) #endif goto bad; } datalen = m->m_pkthdr.len; /* NB: w/o 802.11 header */ M_PREPEND(m, hdrspace + meshhdrsize, M_NOWAIT); if (m == NULL) { vap->iv_stats.is_tx_nobuf++; goto bad; } wh = mtod(m, struct ieee80211_frame *); wh->i_fc[0] = IEEE80211_FC0_VERSION_0 | IEEE80211_FC0_TYPE_DATA; *(uint16_t *)wh->i_dur = 0; qos = NULL; /* NB: quiet compiler */ if (is4addr) { wh->i_fc[1] = IEEE80211_FC1_DIR_DSTODS; IEEE80211_ADDR_COPY(wh->i_addr1, ni->ni_macaddr); IEEE80211_ADDR_COPY(wh->i_addr2, vap->iv_myaddr); IEEE80211_ADDR_COPY(wh->i_addr3, eh.ether_dhost); IEEE80211_ADDR_COPY(WH4(wh)->i_addr4, eh.ether_shost); } else switch (vap->iv_opmode) { case IEEE80211_M_STA: wh->i_fc[1] = IEEE80211_FC1_DIR_TODS; IEEE80211_ADDR_COPY(wh->i_addr1, ni->ni_bssid); IEEE80211_ADDR_COPY(wh->i_addr2, eh.ether_shost); IEEE80211_ADDR_COPY(wh->i_addr3, eh.ether_dhost); break; case IEEE80211_M_IBSS: case IEEE80211_M_AHDEMO: wh->i_fc[1] = IEEE80211_FC1_DIR_NODS; IEEE80211_ADDR_COPY(wh->i_addr1, eh.ether_dhost); IEEE80211_ADDR_COPY(wh->i_addr2, eh.ether_shost); /* * NB: always use the bssid from iv_bss as the * neighbor's may be stale after an ibss merge */ IEEE80211_ADDR_COPY(wh->i_addr3, vap->iv_bss->ni_bssid); break; case IEEE80211_M_HOSTAP: wh->i_fc[1] = IEEE80211_FC1_DIR_FROMDS; IEEE80211_ADDR_COPY(wh->i_addr1, eh.ether_dhost); IEEE80211_ADDR_COPY(wh->i_addr2, ni->ni_bssid); IEEE80211_ADDR_COPY(wh->i_addr3, eh.ether_shost); break; #ifdef IEEE80211_SUPPORT_MESH case IEEE80211_M_MBSS: /* NB: offset by hdrspace to deal with DATAPAD */ mc = (struct ieee80211_meshcntl_ae10 *) (mtod(m, uint8_t *) + hdrspace); wh->i_fc[1] = dir; switch (meshae) { case IEEE80211_MESH_AE_00: /* no proxy */ mc->mc_flags = 0; if (dir == IEEE80211_FC1_DIR_DSTODS) { /* ucast */ IEEE80211_ADDR_COPY(wh->i_addr1, ni->ni_macaddr); IEEE80211_ADDR_COPY(wh->i_addr2, vap->iv_myaddr); IEEE80211_ADDR_COPY(wh->i_addr3, eh.ether_dhost); IEEE80211_ADDR_COPY(WH4(wh)->i_addr4, eh.ether_shost); qos =((struct ieee80211_qosframe_addr4 *) wh)->i_qos; } else if (dir == IEEE80211_FC1_DIR_FROMDS) { /* mcast */ IEEE80211_ADDR_COPY(wh->i_addr1, eh.ether_dhost); IEEE80211_ADDR_COPY(wh->i_addr2, vap->iv_myaddr); IEEE80211_ADDR_COPY(wh->i_addr3, eh.ether_shost); qos = ((struct ieee80211_qosframe *) wh)->i_qos; } break; case IEEE80211_MESH_AE_01: /* mcast, proxy */ wh->i_fc[1] = IEEE80211_FC1_DIR_FROMDS; IEEE80211_ADDR_COPY(wh->i_addr1, eh.ether_dhost); IEEE80211_ADDR_COPY(wh->i_addr2, vap->iv_myaddr); IEEE80211_ADDR_COPY(wh->i_addr3, vap->iv_myaddr); mc->mc_flags = 1; IEEE80211_ADDR_COPY(MC01(mc)->mc_addr4, eh.ether_shost); qos = ((struct ieee80211_qosframe *) wh)->i_qos; break; case IEEE80211_MESH_AE_10: /* ucast, proxy */ KASSERT(rt != NULL, ("route is NULL")); IEEE80211_ADDR_COPY(wh->i_addr1, rt->rt_nexthop); IEEE80211_ADDR_COPY(wh->i_addr2, vap->iv_myaddr); IEEE80211_ADDR_COPY(wh->i_addr3, rt->rt_mesh_gate); IEEE80211_ADDR_COPY(WH4(wh)->i_addr4, vap->iv_myaddr); mc->mc_flags = IEEE80211_MESH_AE_10; IEEE80211_ADDR_COPY(mc->mc_addr5, eh.ether_dhost); IEEE80211_ADDR_COPY(mc->mc_addr6, eh.ether_shost); qos = ((struct ieee80211_qosframe_addr4 *) wh)->i_qos; break; default: KASSERT(0, ("meshae %d", meshae)); break; } mc->mc_ttl = ms->ms_ttl; ms->ms_seq++; le32enc(mc->mc_seq, ms->ms_seq); break; #endif case IEEE80211_M_WDS: /* NB: is4addr should always be true */ default: goto bad; } if (m->m_flags & M_MORE_DATA) wh->i_fc[1] |= IEEE80211_FC1_MORE_DATA; if (addqos) { int ac, tid; if (is4addr) { qos = ((struct ieee80211_qosframe_addr4 *) wh)->i_qos; /* NB: mesh case handled earlier */ } else if (vap->iv_opmode != IEEE80211_M_MBSS) qos = ((struct ieee80211_qosframe *) wh)->i_qos; ac = M_WME_GETAC(m); /* map from access class/queue to 11e header priorty value */ tid = WME_AC_TO_TID(ac); qos[0] = tid & IEEE80211_QOS_TID; if (ic->ic_wme.wme_wmeChanParams.cap_wmeParams[ac].wmep_noackPolicy) qos[0] |= IEEE80211_QOS_ACKPOLICY_NOACK; #ifdef IEEE80211_SUPPORT_MESH if (vap->iv_opmode == IEEE80211_M_MBSS) qos[1] = IEEE80211_QOS_MC; else #endif qos[1] = 0; wh->i_fc[0] |= IEEE80211_FC0_SUBTYPE_QOS; /* * If this is an A-MSDU then ensure we set the * relevant field. */ if (is_amsdu) qos[0] |= IEEE80211_QOS_AMSDU; /* * XXX TODO TX lock is needed for atomic updates of sequence * numbers. If the driver does it, then don't do it here; * and we don't need the TX lock held. */ if ((m->m_flags & M_AMPDU_MPDU) == 0) { /* * 802.11-2012 9.3.2.10 - * * If this is a multicast frame then we need * to ensure that the sequence number comes from * a separate seqno space and not the TID space. * * Otherwise multicast frames may actually cause * holes in the TX blockack window space and * upset various things. */ if (IEEE80211_IS_MULTICAST(wh->i_addr1)) seqno = ni->ni_txseqs[IEEE80211_NONQOS_TID]++; else seqno = ni->ni_txseqs[tid]++; /* * NB: don't assign a sequence # to potential * aggregates; we expect this happens at the * point the frame comes off any aggregation q * as otherwise we may introduce holes in the * BA sequence space and/or make window accouting * more difficult. * * XXX may want to control this with a driver * capability; this may also change when we pull * aggregation up into net80211 */ seqno = ni->ni_txseqs[tid]++; *(uint16_t *)wh->i_seq = htole16(seqno << IEEE80211_SEQ_SEQ_SHIFT); M_SEQNO_SET(m, seqno); } } else { /* * XXX TODO TX lock is needed for atomic updates of sequence * numbers. If the driver does it, then don't do it here; * and we don't need the TX lock held. */ seqno = ni->ni_txseqs[IEEE80211_NONQOS_TID]++; *(uint16_t *)wh->i_seq = htole16(seqno << IEEE80211_SEQ_SEQ_SHIFT); M_SEQNO_SET(m, seqno); /* * XXX TODO: we shouldn't allow EAPOL, etc that would * be forced to be non-QoS traffic to be A-MSDU encapsulated. */ if (is_amsdu) printf("%s: XXX ERROR: is_amsdu set; not QoS!\n", __func__); } /* check if xmit fragmentation is required */ txfrag = (m->m_pkthdr.len > vap->iv_fragthreshold && !IEEE80211_IS_MULTICAST(wh->i_addr1) && (vap->iv_caps & IEEE80211_C_TXFRAG) && (m->m_flags & (M_FF | M_AMPDU_MPDU)) == 0); if (key != NULL) { /* * IEEE 802.1X: send EAPOL frames always in the clear. * WPA/WPA2: encrypt EAPOL keys when pairwise keys are set. */ if ((m->m_flags & M_EAPOL) == 0 || ((vap->iv_flags & IEEE80211_F_WPA) && (vap->iv_opmode == IEEE80211_M_STA ? !IEEE80211_KEY_UNDEFINED(key) : !IEEE80211_KEY_UNDEFINED(&ni->ni_ucastkey)))) { wh->i_fc[1] |= IEEE80211_FC1_PROTECTED; if (!ieee80211_crypto_enmic(vap, key, m, txfrag)) { IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_OUTPUT, eh.ether_dhost, "%s", "enmic failed, discard frame"); vap->iv_stats.is_crypto_enmicfail++; goto bad; } } } if (txfrag && !ieee80211_fragment(vap, m, hdrsize, key != NULL ? key->wk_cipher->ic_header : 0, vap->iv_fragthreshold)) goto bad; m->m_flags |= M_ENCAP; /* mark encapsulated */ IEEE80211_NODE_STAT(ni, tx_data); if (IEEE80211_IS_MULTICAST(wh->i_addr1)) { IEEE80211_NODE_STAT(ni, tx_mcast); m->m_flags |= M_MCAST; } else IEEE80211_NODE_STAT(ni, tx_ucast); IEEE80211_NODE_STAT_ADD(ni, tx_bytes, datalen); return m; bad: if (m != NULL) m_freem(m); return NULL; #undef WH4 #undef MC01 } void ieee80211_free_mbuf(struct mbuf *m) { struct mbuf *next; if (m == NULL) return; do { next = m->m_nextpkt; m->m_nextpkt = NULL; m_freem(m); } while ((m = next) != NULL); } /* * Fragment the frame according to the specified mtu. * The size of the 802.11 header (w/o padding) is provided * so we don't need to recalculate it. We create a new * mbuf for each fragment and chain it through m_nextpkt; * we might be able to optimize this by reusing the original * packet's mbufs but that is significantly more complicated. */ static int ieee80211_fragment(struct ieee80211vap *vap, struct mbuf *m0, u_int hdrsize, u_int ciphdrsize, u_int mtu) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_frame *wh, *whf; struct mbuf *m, *prev; u_int totalhdrsize, fragno, fragsize, off, remainder, payload; u_int hdrspace; KASSERT(m0->m_nextpkt == NULL, ("mbuf already chained?")); KASSERT(m0->m_pkthdr.len > mtu, ("pktlen %u mtu %u", m0->m_pkthdr.len, mtu)); /* * Honor driver DATAPAD requirement. */ if (ic->ic_flags & IEEE80211_F_DATAPAD) hdrspace = roundup(hdrsize, sizeof(uint32_t)); else hdrspace = hdrsize; wh = mtod(m0, struct ieee80211_frame *); /* NB: mark the first frag; it will be propagated below */ wh->i_fc[1] |= IEEE80211_FC1_MORE_FRAG; totalhdrsize = hdrspace + ciphdrsize; fragno = 1; off = mtu - ciphdrsize; remainder = m0->m_pkthdr.len - off; prev = m0; do { fragsize = MIN(totalhdrsize + remainder, mtu); m = m_get2(fragsize, M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) goto bad; /* leave room to prepend any cipher header */ m_align(m, fragsize - ciphdrsize); /* * Form the header in the fragment. Note that since * we mark the first fragment with the MORE_FRAG bit * it automatically is propagated to each fragment; we * need only clear it on the last fragment (done below). * NB: frag 1+ dont have Mesh Control field present. */ whf = mtod(m, struct ieee80211_frame *); memcpy(whf, wh, hdrsize); #ifdef IEEE80211_SUPPORT_MESH if (vap->iv_opmode == IEEE80211_M_MBSS) { if (IEEE80211_IS_DSTODS(wh)) ((struct ieee80211_qosframe_addr4 *) whf)->i_qos[1] &= ~IEEE80211_QOS_MC; else ((struct ieee80211_qosframe *) whf)->i_qos[1] &= ~IEEE80211_QOS_MC; } #endif *(uint16_t *)&whf->i_seq[0] |= htole16( (fragno & IEEE80211_SEQ_FRAG_MASK) << IEEE80211_SEQ_FRAG_SHIFT); fragno++; payload = fragsize - totalhdrsize; /* NB: destination is known to be contiguous */ m_copydata(m0, off, payload, mtod(m, uint8_t *) + hdrspace); m->m_len = hdrspace + payload; m->m_pkthdr.len = hdrspace + payload; m->m_flags |= M_FRAG; /* chain up the fragment */ prev->m_nextpkt = m; prev = m; /* deduct fragment just formed */ remainder -= payload; off += payload; } while (remainder != 0); /* set the last fragment */ m->m_flags |= M_LASTFRAG; whf->i_fc[1] &= ~IEEE80211_FC1_MORE_FRAG; /* strip first mbuf now that everything has been copied */ m_adj(m0, -(m0->m_pkthdr.len - (mtu - ciphdrsize))); m0->m_flags |= M_FIRSTFRAG | M_FRAG; vap->iv_stats.is_tx_fragframes++; vap->iv_stats.is_tx_frags += fragno-1; return 1; bad: /* reclaim fragments but leave original frame for caller to free */ ieee80211_free_mbuf(m0->m_nextpkt); m0->m_nextpkt = NULL; return 0; } /* * Add a supported rates element id to a frame. */ uint8_t * ieee80211_add_rates(uint8_t *frm, const struct ieee80211_rateset *rs) { int nrates; *frm++ = IEEE80211_ELEMID_RATES; nrates = rs->rs_nrates; if (nrates > IEEE80211_RATE_SIZE) nrates = IEEE80211_RATE_SIZE; *frm++ = nrates; memcpy(frm, rs->rs_rates, nrates); return frm + nrates; } /* * Add an extended supported rates element id to a frame. */ uint8_t * ieee80211_add_xrates(uint8_t *frm, const struct ieee80211_rateset *rs) { /* * Add an extended supported rates element if operating in 11g mode. */ if (rs->rs_nrates > IEEE80211_RATE_SIZE) { int nrates = rs->rs_nrates - IEEE80211_RATE_SIZE; *frm++ = IEEE80211_ELEMID_XRATES; *frm++ = nrates; memcpy(frm, rs->rs_rates + IEEE80211_RATE_SIZE, nrates); frm += nrates; } return frm; } /* * Add an ssid element to a frame. */ uint8_t * ieee80211_add_ssid(uint8_t *frm, const uint8_t *ssid, u_int len) { *frm++ = IEEE80211_ELEMID_SSID; *frm++ = len; memcpy(frm, ssid, len); return frm + len; } /* * Add an erp element to a frame. */ static uint8_t * ieee80211_add_erp(uint8_t *frm, struct ieee80211com *ic) { uint8_t erp; *frm++ = IEEE80211_ELEMID_ERP; *frm++ = 1; erp = 0; if (ic->ic_nonerpsta != 0) erp |= IEEE80211_ERP_NON_ERP_PRESENT; if (ic->ic_flags & IEEE80211_F_USEPROT) erp |= IEEE80211_ERP_USE_PROTECTION; if (ic->ic_flags & IEEE80211_F_USEBARKER) erp |= IEEE80211_ERP_LONG_PREAMBLE; *frm++ = erp; return frm; } /* * Add a CFParams element to a frame. */ static uint8_t * ieee80211_add_cfparms(uint8_t *frm, struct ieee80211com *ic) { #define ADDSHORT(frm, v) do { \ le16enc(frm, v); \ frm += 2; \ } while (0) *frm++ = IEEE80211_ELEMID_CFPARMS; *frm++ = 6; *frm++ = 0; /* CFP count */ *frm++ = 2; /* CFP period */ ADDSHORT(frm, 0); /* CFP MaxDuration (TU) */ ADDSHORT(frm, 0); /* CFP CurRemaining (TU) */ return frm; #undef ADDSHORT } static __inline uint8_t * add_appie(uint8_t *frm, const struct ieee80211_appie *ie) { memcpy(frm, ie->ie_data, ie->ie_len); return frm + ie->ie_len; } static __inline uint8_t * add_ie(uint8_t *frm, const uint8_t *ie) { memcpy(frm, ie, 2 + ie[1]); return frm + 2 + ie[1]; } #define WME_OUI_BYTES 0x00, 0x50, 0xf2 /* * Add a WME information element to a frame. */ uint8_t * ieee80211_add_wme_info(uint8_t *frm, struct ieee80211_wme_state *wme) { static const struct ieee80211_wme_info info = { .wme_id = IEEE80211_ELEMID_VENDOR, .wme_len = sizeof(struct ieee80211_wme_info) - 2, .wme_oui = { WME_OUI_BYTES }, .wme_type = WME_OUI_TYPE, .wme_subtype = WME_INFO_OUI_SUBTYPE, .wme_version = WME_VERSION, .wme_info = 0, }; memcpy(frm, &info, sizeof(info)); return frm + sizeof(info); } /* * Add a WME parameters element to a frame. */ static uint8_t * ieee80211_add_wme_param(uint8_t *frm, struct ieee80211_wme_state *wme) { #define SM(_v, _f) (((_v) << _f##_S) & _f) #define ADDSHORT(frm, v) do { \ le16enc(frm, v); \ frm += 2; \ } while (0) /* NB: this works 'cuz a param has an info at the front */ static const struct ieee80211_wme_info param = { .wme_id = IEEE80211_ELEMID_VENDOR, .wme_len = sizeof(struct ieee80211_wme_param) - 2, .wme_oui = { WME_OUI_BYTES }, .wme_type = WME_OUI_TYPE, .wme_subtype = WME_PARAM_OUI_SUBTYPE, .wme_version = WME_VERSION, }; int i; memcpy(frm, ¶m, sizeof(param)); frm += __offsetof(struct ieee80211_wme_info, wme_info); *frm++ = wme->wme_bssChanParams.cap_info; /* AC info */ *frm++ = 0; /* reserved field */ for (i = 0; i < WME_NUM_AC; i++) { const struct wmeParams *ac = &wme->wme_bssChanParams.cap_wmeParams[i]; *frm++ = SM(i, WME_PARAM_ACI) | SM(ac->wmep_acm, WME_PARAM_ACM) | SM(ac->wmep_aifsn, WME_PARAM_AIFSN) ; *frm++ = SM(ac->wmep_logcwmax, WME_PARAM_LOGCWMAX) | SM(ac->wmep_logcwmin, WME_PARAM_LOGCWMIN) ; ADDSHORT(frm, ac->wmep_txopLimit); } return frm; #undef SM #undef ADDSHORT } #undef WME_OUI_BYTES /* * Add an 11h Power Constraint element to a frame. */ static uint8_t * ieee80211_add_powerconstraint(uint8_t *frm, struct ieee80211vap *vap) { const struct ieee80211_channel *c = vap->iv_bss->ni_chan; /* XXX per-vap tx power limit? */ int8_t limit = vap->iv_ic->ic_txpowlimit / 2; frm[0] = IEEE80211_ELEMID_PWRCNSTR; frm[1] = 1; frm[2] = c->ic_maxregpower > limit ? c->ic_maxregpower - limit : 0; return frm + 3; } /* * Add an 11h Power Capability element to a frame. */ static uint8_t * ieee80211_add_powercapability(uint8_t *frm, const struct ieee80211_channel *c) { frm[0] = IEEE80211_ELEMID_PWRCAP; frm[1] = 2; frm[2] = c->ic_minpower; frm[3] = c->ic_maxpower; return frm + 4; } /* * Add an 11h Supported Channels element to a frame. */ static uint8_t * ieee80211_add_supportedchannels(uint8_t *frm, struct ieee80211com *ic) { static const int ielen = 26; frm[0] = IEEE80211_ELEMID_SUPPCHAN; frm[1] = ielen; /* XXX not correct */ memcpy(frm+2, ic->ic_chan_avail, ielen); return frm + 2 + ielen; } /* * Add an 11h Quiet time element to a frame. */ static uint8_t * -ieee80211_add_quiet(uint8_t *frm, struct ieee80211vap *vap) +ieee80211_add_quiet(uint8_t *frm, struct ieee80211vap *vap, int update) { struct ieee80211_quiet_ie *quiet = (struct ieee80211_quiet_ie *) frm; quiet->quiet_ie = IEEE80211_ELEMID_QUIET; quiet->len = 6; - if (vap->iv_quiet_count_value == 1) - vap->iv_quiet_count_value = vap->iv_quiet_count; - else if (vap->iv_quiet_count_value > 1) - vap->iv_quiet_count_value--; + /* + * Only update every beacon interval - otherwise probe responses + * would update the quiet count value. + */ + if (update) { + if (vap->iv_quiet_count_value == 1) + vap->iv_quiet_count_value = vap->iv_quiet_count; + else if (vap->iv_quiet_count_value > 1) + vap->iv_quiet_count_value--; + } + if (vap->iv_quiet_count_value == 0) { /* value 0 is reserved as per 802.11h standerd */ vap->iv_quiet_count_value = 1; } quiet->tbttcount = vap->iv_quiet_count_value; quiet->period = vap->iv_quiet_period; quiet->duration = htole16(vap->iv_quiet_duration); quiet->offset = htole16(vap->iv_quiet_offset); return frm + sizeof(*quiet); } /* * Add an 11h Channel Switch Announcement element to a frame. * Note that we use the per-vap CSA count to adjust the global * counter so we can use this routine to form probe response * frames and get the current count. */ static uint8_t * ieee80211_add_csa(uint8_t *frm, struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_csa_ie *csa = (struct ieee80211_csa_ie *) frm; csa->csa_ie = IEEE80211_ELEMID_CSA; csa->csa_len = 3; csa->csa_mode = 1; /* XXX force quiet on channel */ csa->csa_newchan = ieee80211_chan2ieee(ic, ic->ic_csa_newchan); csa->csa_count = ic->ic_csa_count - vap->iv_csa_count; return frm + sizeof(*csa); } /* * Add an 11h country information element to a frame. */ static uint8_t * ieee80211_add_countryie(uint8_t *frm, struct ieee80211com *ic) { if (ic->ic_countryie == NULL || ic->ic_countryie_chan != ic->ic_bsschan) { /* * Handle lazy construction of ie. This is done on * first use and after a channel change that requires * re-calculation. */ if (ic->ic_countryie != NULL) IEEE80211_FREE(ic->ic_countryie, M_80211_NODE_IE); ic->ic_countryie = ieee80211_alloc_countryie(ic); if (ic->ic_countryie == NULL) return frm; ic->ic_countryie_chan = ic->ic_bsschan; } return add_appie(frm, ic->ic_countryie); } uint8_t * ieee80211_add_wpa(uint8_t *frm, const struct ieee80211vap *vap) { if (vap->iv_flags & IEEE80211_F_WPA1 && vap->iv_wpa_ie != NULL) return (add_ie(frm, vap->iv_wpa_ie)); else { /* XXX else complain? */ return (frm); } } uint8_t * ieee80211_add_rsn(uint8_t *frm, const struct ieee80211vap *vap) { if (vap->iv_flags & IEEE80211_F_WPA2 && vap->iv_rsn_ie != NULL) return (add_ie(frm, vap->iv_rsn_ie)); else { /* XXX else complain? */ return (frm); } } uint8_t * ieee80211_add_qos(uint8_t *frm, const struct ieee80211_node *ni) { if (ni->ni_flags & IEEE80211_NODE_QOS) { *frm++ = IEEE80211_ELEMID_QOS; *frm++ = 1; *frm++ = 0; } return (frm); } /* * Send a probe request frame with the specified ssid * and any optional information element data. */ /* XXX VHT? */ int ieee80211_send_probereq(struct ieee80211_node *ni, const uint8_t sa[IEEE80211_ADDR_LEN], const uint8_t da[IEEE80211_ADDR_LEN], const uint8_t bssid[IEEE80211_ADDR_LEN], const uint8_t *ssid, size_t ssidlen) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; struct ieee80211_node *bss; const struct ieee80211_txparam *tp; struct ieee80211_bpf_params params; const struct ieee80211_rateset *rs; struct mbuf *m; uint8_t *frm; int ret; bss = ieee80211_ref_node(vap->iv_bss); if (vap->iv_state == IEEE80211_S_CAC) { IEEE80211_NOTE(vap, IEEE80211_MSG_OUTPUT, ni, "block %s frame in CAC state", "probe request"); vap->iv_stats.is_tx_badstate++; ieee80211_free_node(bss); return EIO; /* XXX */ } /* * Hold a reference on the node so it doesn't go away until after * the xmit is complete all the way in the driver. On error we * will remove our reference. */ IEEE80211_DPRINTF(vap, IEEE80211_MSG_NODE, "ieee80211_ref_node (%s:%u) %p<%s> refcnt %d\n", __func__, __LINE__, ni, ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni)+1); ieee80211_ref_node(ni); /* * prreq frame format * [tlv] ssid * [tlv] supported rates * [tlv] RSN (optional) * [tlv] extended supported rates * [tlv] HT cap (optional) * [tlv] VHT cap (optional) * [tlv] WPA (optional) * [tlv] user-specified ie's */ m = ieee80211_getmgtframe(&frm, ic->ic_headroom + sizeof(struct ieee80211_frame), 2 + IEEE80211_NWID_LEN + 2 + IEEE80211_RATE_SIZE + sizeof(struct ieee80211_ie_htcap) + sizeof(struct ieee80211_ie_vhtcap) + sizeof(struct ieee80211_ie_htinfo) /* XXX not needed? */ + sizeof(struct ieee80211_ie_wpa) + 2 + (IEEE80211_RATE_MAXSIZE - IEEE80211_RATE_SIZE) + sizeof(struct ieee80211_ie_wpa) + (vap->iv_appie_probereq != NULL ? vap->iv_appie_probereq->ie_len : 0) ); if (m == NULL) { vap->iv_stats.is_tx_nobuf++; ieee80211_free_node(ni); ieee80211_free_node(bss); return ENOMEM; } frm = ieee80211_add_ssid(frm, ssid, ssidlen); rs = ieee80211_get_suprates(ic, ic->ic_curchan); frm = ieee80211_add_rates(frm, rs); frm = ieee80211_add_rsn(frm, vap); frm = ieee80211_add_xrates(frm, rs); /* * Note: we can't use bss; we don't have one yet. * * So, we should announce our capabilities * in this channel mode (2g/5g), not the * channel details itself. */ if ((vap->iv_opmode == IEEE80211_M_IBSS) && (vap->iv_flags_ht & IEEE80211_FHT_HT)) { struct ieee80211_channel *c; /* * Get the HT channel that we should try upgrading to. * If we can do 40MHz then this'll upgrade it appropriately. */ c = ieee80211_ht_adjust_channel(ic, ic->ic_curchan, vap->iv_flags_ht); frm = ieee80211_add_htcap_ch(frm, vap, c); } /* * XXX TODO: need to figure out what/how to update the * VHT channel. */ #if 0 (vap->iv_flags_vht & IEEE80211_FVHT_VHT) { struct ieee80211_channel *c; c = ieee80211_ht_adjust_channel(ic, ic->ic_curchan, vap->iv_flags_ht); c = ieee80211_vht_adjust_channel(ic, c, vap->iv_flags_vht); frm = ieee80211_add_vhtcap_ch(frm, vap, c); } #endif frm = ieee80211_add_wpa(frm, vap); if (vap->iv_appie_probereq != NULL) frm = add_appie(frm, vap->iv_appie_probereq); m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *); KASSERT(M_LEADINGSPACE(m) >= sizeof(struct ieee80211_frame), ("leading space %zd", M_LEADINGSPACE(m))); M_PREPEND(m, sizeof(struct ieee80211_frame), M_NOWAIT); if (m == NULL) { /* NB: cannot happen */ ieee80211_free_node(ni); ieee80211_free_node(bss); return ENOMEM; } IEEE80211_TX_LOCK(ic); ieee80211_send_setup(ni, m, IEEE80211_FC0_TYPE_MGT | IEEE80211_FC0_SUBTYPE_PROBE_REQ, IEEE80211_NONQOS_TID, sa, da, bssid); /* XXX power management? */ m->m_flags |= M_ENCAP; /* mark encapsulated */ M_WME_SETAC(m, WME_AC_BE); IEEE80211_NODE_STAT(ni, tx_probereq); IEEE80211_NODE_STAT(ni, tx_mgmt); IEEE80211_DPRINTF(vap, IEEE80211_MSG_DEBUG | IEEE80211_MSG_DUMPPKTS, "send probe req on channel %u bssid %s sa %6D da %6D ssid \"%.*s\"\n", ieee80211_chan2ieee(ic, ic->ic_curchan), ether_sprintf(bssid), sa, ":", da, ":", ssidlen, ssid); memset(¶ms, 0, sizeof(params)); params.ibp_pri = M_WME_GETAC(m); tp = &vap->iv_txparms[ieee80211_chan2mode(ic->ic_curchan)]; params.ibp_rate0 = tp->mgmtrate; if (IEEE80211_IS_MULTICAST(da)) { params.ibp_flags |= IEEE80211_BPF_NOACK; params.ibp_try0 = 1; } else params.ibp_try0 = tp->maxretry; params.ibp_power = ni->ni_txpower; ret = ieee80211_raw_output(vap, ni, m, ¶ms); IEEE80211_TX_UNLOCK(ic); ieee80211_free_node(bss); return (ret); } /* * Calculate capability information for mgt frames. */ uint16_t ieee80211_getcapinfo(struct ieee80211vap *vap, struct ieee80211_channel *chan) { struct ieee80211com *ic = vap->iv_ic; uint16_t capinfo; KASSERT(vap->iv_opmode != IEEE80211_M_STA, ("station mode")); if (vap->iv_opmode == IEEE80211_M_HOSTAP) capinfo = IEEE80211_CAPINFO_ESS; else if (vap->iv_opmode == IEEE80211_M_IBSS) capinfo = IEEE80211_CAPINFO_IBSS; else capinfo = 0; if (vap->iv_flags & IEEE80211_F_PRIVACY) capinfo |= IEEE80211_CAPINFO_PRIVACY; if ((ic->ic_flags & IEEE80211_F_SHPREAMBLE) && IEEE80211_IS_CHAN_2GHZ(chan)) capinfo |= IEEE80211_CAPINFO_SHORT_PREAMBLE; if (ic->ic_flags & IEEE80211_F_SHSLOT) capinfo |= IEEE80211_CAPINFO_SHORT_SLOTTIME; if (IEEE80211_IS_CHAN_5GHZ(chan) && (vap->iv_flags & IEEE80211_F_DOTH)) capinfo |= IEEE80211_CAPINFO_SPECTRUM_MGMT; return capinfo; } /* * Send a management frame. The node is for the destination (or ic_bss * when in station mode). Nodes other than ic_bss have their reference * count bumped to reflect our use for an indeterminant time. */ int ieee80211_send_mgmt(struct ieee80211_node *ni, int type, int arg) { #define HTFLAGS (IEEE80211_NODE_HT | IEEE80211_NODE_HTCOMPAT) #define senderr(_x, _v) do { vap->iv_stats._v++; ret = _x; goto bad; } while (0) struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; struct ieee80211_node *bss = vap->iv_bss; struct ieee80211_bpf_params params; struct mbuf *m; uint8_t *frm; uint16_t capinfo; int has_challenge, is_shared_key, ret, status; KASSERT(ni != NULL, ("null node")); /* * Hold a reference on the node so it doesn't go away until after * the xmit is complete all the way in the driver. On error we * will remove our reference. */ IEEE80211_DPRINTF(vap, IEEE80211_MSG_NODE, "ieee80211_ref_node (%s:%u) %p<%s> refcnt %d\n", __func__, __LINE__, ni, ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni)+1); ieee80211_ref_node(ni); memset(¶ms, 0, sizeof(params)); switch (type) { case IEEE80211_FC0_SUBTYPE_AUTH: status = arg >> 16; arg &= 0xffff; has_challenge = ((arg == IEEE80211_AUTH_SHARED_CHALLENGE || arg == IEEE80211_AUTH_SHARED_RESPONSE) && ni->ni_challenge != NULL); /* * Deduce whether we're doing open authentication or * shared key authentication. We do the latter if * we're in the middle of a shared key authentication * handshake or if we're initiating an authentication * request and configured to use shared key. */ is_shared_key = has_challenge || arg >= IEEE80211_AUTH_SHARED_RESPONSE || (arg == IEEE80211_AUTH_SHARED_REQUEST && bss->ni_authmode == IEEE80211_AUTH_SHARED); m = ieee80211_getmgtframe(&frm, ic->ic_headroom + sizeof(struct ieee80211_frame), 3 * sizeof(uint16_t) + (has_challenge && status == IEEE80211_STATUS_SUCCESS ? sizeof(uint16_t)+IEEE80211_CHALLENGE_LEN : 0) ); if (m == NULL) senderr(ENOMEM, is_tx_nobuf); ((uint16_t *)frm)[0] = (is_shared_key) ? htole16(IEEE80211_AUTH_ALG_SHARED) : htole16(IEEE80211_AUTH_ALG_OPEN); ((uint16_t *)frm)[1] = htole16(arg); /* sequence number */ ((uint16_t *)frm)[2] = htole16(status);/* status */ if (has_challenge && status == IEEE80211_STATUS_SUCCESS) { ((uint16_t *)frm)[3] = htole16((IEEE80211_CHALLENGE_LEN << 8) | IEEE80211_ELEMID_CHALLENGE); memcpy(&((uint16_t *)frm)[4], ni->ni_challenge, IEEE80211_CHALLENGE_LEN); m->m_pkthdr.len = m->m_len = 4 * sizeof(uint16_t) + IEEE80211_CHALLENGE_LEN; if (arg == IEEE80211_AUTH_SHARED_RESPONSE) { IEEE80211_NOTE(vap, IEEE80211_MSG_AUTH, ni, "request encrypt frame (%s)", __func__); /* mark frame for encryption */ params.ibp_flags |= IEEE80211_BPF_CRYPTO; } } else m->m_pkthdr.len = m->m_len = 3 * sizeof(uint16_t); /* XXX not right for shared key */ if (status == IEEE80211_STATUS_SUCCESS) IEEE80211_NODE_STAT(ni, tx_auth); else IEEE80211_NODE_STAT(ni, tx_auth_fail); if (vap->iv_opmode == IEEE80211_M_STA) ieee80211_add_callback(m, ieee80211_tx_mgt_cb, (void *) vap->iv_state); break; case IEEE80211_FC0_SUBTYPE_DEAUTH: IEEE80211_NOTE(vap, IEEE80211_MSG_AUTH, ni, "send station deauthenticate (reason: %d (%s))", arg, ieee80211_reason_to_string(arg)); m = ieee80211_getmgtframe(&frm, ic->ic_headroom + sizeof(struct ieee80211_frame), sizeof(uint16_t)); if (m == NULL) senderr(ENOMEM, is_tx_nobuf); *(uint16_t *)frm = htole16(arg); /* reason */ m->m_pkthdr.len = m->m_len = sizeof(uint16_t); IEEE80211_NODE_STAT(ni, tx_deauth); IEEE80211_NODE_STAT_SET(ni, tx_deauth_code, arg); ieee80211_node_unauthorize(ni); /* port closed */ break; case IEEE80211_FC0_SUBTYPE_ASSOC_REQ: case IEEE80211_FC0_SUBTYPE_REASSOC_REQ: /* XXX VHT? */ /* * asreq frame format * [2] capability information * [2] listen interval * [6*] current AP address (reassoc only) * [tlv] ssid * [tlv] supported rates * [tlv] extended supported rates * [4] power capability (optional) * [28] supported channels (optional) * [tlv] HT capabilities * [tlv] VHT capabilities * [tlv] WME (optional) * [tlv] Vendor OUI HT capabilities (optional) * [tlv] Atheros capabilities (if negotiated) * [tlv] AppIE's (optional) */ m = ieee80211_getmgtframe(&frm, ic->ic_headroom + sizeof(struct ieee80211_frame), sizeof(uint16_t) + sizeof(uint16_t) + IEEE80211_ADDR_LEN + 2 + IEEE80211_NWID_LEN + 2 + IEEE80211_RATE_SIZE + 2 + (IEEE80211_RATE_MAXSIZE - IEEE80211_RATE_SIZE) + 4 + 2 + 26 + sizeof(struct ieee80211_wme_info) + sizeof(struct ieee80211_ie_htcap) + sizeof(struct ieee80211_ie_vhtcap) + 4 + sizeof(struct ieee80211_ie_htcap) #ifdef IEEE80211_SUPPORT_SUPERG + sizeof(struct ieee80211_ath_ie) #endif + (vap->iv_appie_wpa != NULL ? vap->iv_appie_wpa->ie_len : 0) + (vap->iv_appie_assocreq != NULL ? vap->iv_appie_assocreq->ie_len : 0) ); if (m == NULL) senderr(ENOMEM, is_tx_nobuf); KASSERT(vap->iv_opmode == IEEE80211_M_STA, ("wrong mode %u", vap->iv_opmode)); capinfo = IEEE80211_CAPINFO_ESS; if (vap->iv_flags & IEEE80211_F_PRIVACY) capinfo |= IEEE80211_CAPINFO_PRIVACY; /* * NB: Some 11a AP's reject the request when * short premable is set. */ if ((ic->ic_flags & IEEE80211_F_SHPREAMBLE) && IEEE80211_IS_CHAN_2GHZ(ic->ic_curchan)) capinfo |= IEEE80211_CAPINFO_SHORT_PREAMBLE; if (IEEE80211_IS_CHAN_ANYG(ic->ic_curchan) && (ic->ic_caps & IEEE80211_C_SHSLOT)) capinfo |= IEEE80211_CAPINFO_SHORT_SLOTTIME; if ((ni->ni_capinfo & IEEE80211_CAPINFO_SPECTRUM_MGMT) && (vap->iv_flags & IEEE80211_F_DOTH)) capinfo |= IEEE80211_CAPINFO_SPECTRUM_MGMT; *(uint16_t *)frm = htole16(capinfo); frm += 2; KASSERT(bss->ni_intval != 0, ("beacon interval is zero!")); *(uint16_t *)frm = htole16(howmany(ic->ic_lintval, bss->ni_intval)); frm += 2; if (type == IEEE80211_FC0_SUBTYPE_REASSOC_REQ) { IEEE80211_ADDR_COPY(frm, bss->ni_bssid); frm += IEEE80211_ADDR_LEN; } frm = ieee80211_add_ssid(frm, ni->ni_essid, ni->ni_esslen); frm = ieee80211_add_rates(frm, &ni->ni_rates); frm = ieee80211_add_rsn(frm, vap); frm = ieee80211_add_xrates(frm, &ni->ni_rates); if (capinfo & IEEE80211_CAPINFO_SPECTRUM_MGMT) { frm = ieee80211_add_powercapability(frm, ic->ic_curchan); frm = ieee80211_add_supportedchannels(frm, ic); } /* * Check the channel - we may be using an 11n NIC with an * 11n capable station, but we're configured to be an 11b * channel. */ if ((vap->iv_flags_ht & IEEE80211_FHT_HT) && IEEE80211_IS_CHAN_HT(ni->ni_chan) && ni->ni_ies.htcap_ie != NULL && ni->ni_ies.htcap_ie[0] == IEEE80211_ELEMID_HTCAP) { frm = ieee80211_add_htcap(frm, ni); } if ((vap->iv_flags_vht & IEEE80211_FVHT_VHT) && IEEE80211_IS_CHAN_VHT(ni->ni_chan) && ni->ni_ies.vhtcap_ie != NULL && ni->ni_ies.vhtcap_ie[0] == IEEE80211_ELEMID_VHT_CAP) { frm = ieee80211_add_vhtcap(frm, ni); } frm = ieee80211_add_wpa(frm, vap); if ((ic->ic_flags & IEEE80211_F_WME) && ni->ni_ies.wme_ie != NULL) frm = ieee80211_add_wme_info(frm, &ic->ic_wme); /* * Same deal - only send HT info if we're on an 11n * capable channel. */ if ((vap->iv_flags_ht & IEEE80211_FHT_HT) && IEEE80211_IS_CHAN_HT(ni->ni_chan) && ni->ni_ies.htcap_ie != NULL && ni->ni_ies.htcap_ie[0] == IEEE80211_ELEMID_VENDOR) { frm = ieee80211_add_htcap_vendor(frm, ni); } #ifdef IEEE80211_SUPPORT_SUPERG if (IEEE80211_ATH_CAP(vap, ni, IEEE80211_F_ATHEROS)) { frm = ieee80211_add_ath(frm, IEEE80211_ATH_CAP(vap, ni, IEEE80211_F_ATHEROS), ((vap->iv_flags & IEEE80211_F_WPA) == 0 && ni->ni_authmode != IEEE80211_AUTH_8021X) ? vap->iv_def_txkey : IEEE80211_KEYIX_NONE); } #endif /* IEEE80211_SUPPORT_SUPERG */ if (vap->iv_appie_assocreq != NULL) frm = add_appie(frm, vap->iv_appie_assocreq); m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *); ieee80211_add_callback(m, ieee80211_tx_mgt_cb, (void *) vap->iv_state); break; case IEEE80211_FC0_SUBTYPE_ASSOC_RESP: case IEEE80211_FC0_SUBTYPE_REASSOC_RESP: /* * asresp frame format * [2] capability information * [2] status * [2] association ID * [tlv] supported rates * [tlv] extended supported rates * [tlv] HT capabilities (standard, if STA enabled) * [tlv] HT information (standard, if STA enabled) * [tlv] VHT capabilities (standard, if STA enabled) * [tlv] VHT information (standard, if STA enabled) * [tlv] WME (if configured and STA enabled) * [tlv] HT capabilities (vendor OUI, if STA enabled) * [tlv] HT information (vendor OUI, if STA enabled) * [tlv] Atheros capabilities (if STA enabled) * [tlv] AppIE's (optional) */ m = ieee80211_getmgtframe(&frm, ic->ic_headroom + sizeof(struct ieee80211_frame), sizeof(uint16_t) + sizeof(uint16_t) + sizeof(uint16_t) + 2 + IEEE80211_RATE_SIZE + 2 + (IEEE80211_RATE_MAXSIZE - IEEE80211_RATE_SIZE) + sizeof(struct ieee80211_ie_htcap) + 4 + sizeof(struct ieee80211_ie_htinfo) + 4 + sizeof(struct ieee80211_ie_vhtcap) + sizeof(struct ieee80211_ie_vht_operation) + sizeof(struct ieee80211_wme_param) #ifdef IEEE80211_SUPPORT_SUPERG + sizeof(struct ieee80211_ath_ie) #endif + (vap->iv_appie_assocresp != NULL ? vap->iv_appie_assocresp->ie_len : 0) ); if (m == NULL) senderr(ENOMEM, is_tx_nobuf); capinfo = ieee80211_getcapinfo(vap, bss->ni_chan); *(uint16_t *)frm = htole16(capinfo); frm += 2; *(uint16_t *)frm = htole16(arg); /* status */ frm += 2; if (arg == IEEE80211_STATUS_SUCCESS) { *(uint16_t *)frm = htole16(ni->ni_associd); IEEE80211_NODE_STAT(ni, tx_assoc); } else IEEE80211_NODE_STAT(ni, tx_assoc_fail); frm += 2; frm = ieee80211_add_rates(frm, &ni->ni_rates); frm = ieee80211_add_xrates(frm, &ni->ni_rates); /* NB: respond according to what we received */ if ((ni->ni_flags & HTFLAGS) == IEEE80211_NODE_HT) { frm = ieee80211_add_htcap(frm, ni); frm = ieee80211_add_htinfo(frm, ni); } if ((vap->iv_flags & IEEE80211_F_WME) && ni->ni_ies.wme_ie != NULL) frm = ieee80211_add_wme_param(frm, &ic->ic_wme); if ((ni->ni_flags & HTFLAGS) == HTFLAGS) { frm = ieee80211_add_htcap_vendor(frm, ni); frm = ieee80211_add_htinfo_vendor(frm, ni); } if (ni->ni_flags & IEEE80211_NODE_VHT) { frm = ieee80211_add_vhtcap(frm, ni); frm = ieee80211_add_vhtinfo(frm, ni); } #ifdef IEEE80211_SUPPORT_SUPERG if (IEEE80211_ATH_CAP(vap, ni, IEEE80211_F_ATHEROS)) frm = ieee80211_add_ath(frm, IEEE80211_ATH_CAP(vap, ni, IEEE80211_F_ATHEROS), ((vap->iv_flags & IEEE80211_F_WPA) == 0 && ni->ni_authmode != IEEE80211_AUTH_8021X) ? vap->iv_def_txkey : IEEE80211_KEYIX_NONE); #endif /* IEEE80211_SUPPORT_SUPERG */ if (vap->iv_appie_assocresp != NULL) frm = add_appie(frm, vap->iv_appie_assocresp); m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *); break; case IEEE80211_FC0_SUBTYPE_DISASSOC: IEEE80211_NOTE(vap, IEEE80211_MSG_ASSOC, ni, "send station disassociate (reason: %d (%s))", arg, ieee80211_reason_to_string(arg)); m = ieee80211_getmgtframe(&frm, ic->ic_headroom + sizeof(struct ieee80211_frame), sizeof(uint16_t)); if (m == NULL) senderr(ENOMEM, is_tx_nobuf); *(uint16_t *)frm = htole16(arg); /* reason */ m->m_pkthdr.len = m->m_len = sizeof(uint16_t); IEEE80211_NODE_STAT(ni, tx_disassoc); IEEE80211_NODE_STAT_SET(ni, tx_disassoc_code, arg); break; default: IEEE80211_NOTE(vap, IEEE80211_MSG_ANY, ni, "invalid mgmt frame type %u", type); senderr(EINVAL, is_tx_unknownmgt); /* NOTREACHED */ } /* NB: force non-ProbeResp frames to the highest queue */ params.ibp_pri = WME_AC_VO; params.ibp_rate0 = bss->ni_txparms->mgmtrate; /* NB: we know all frames are unicast */ params.ibp_try0 = bss->ni_txparms->maxretry; params.ibp_power = bss->ni_txpower; return ieee80211_mgmt_output(ni, m, type, ¶ms); bad: ieee80211_free_node(ni); return ret; #undef senderr #undef HTFLAGS } /* * Return an mbuf with a probe response frame in it. * Space is left to prepend and 802.11 header at the * front but it's left to the caller to fill in. */ /* XXX VHT? */ struct mbuf * ieee80211_alloc_proberesp(struct ieee80211_node *bss, int legacy) { struct ieee80211vap *vap = bss->ni_vap; struct ieee80211com *ic = bss->ni_ic; const struct ieee80211_rateset *rs; struct mbuf *m; uint16_t capinfo; uint8_t *frm; /* * probe response frame format * [8] time stamp * [2] beacon interval * [2] cabability information * [tlv] ssid * [tlv] supported rates * [tlv] parameter set (FH/DS) * [tlv] parameter set (IBSS) * [tlv] country (optional) * [3] power control (optional) * [5] channel switch announcement (CSA) (optional) * [tlv] extended rate phy (ERP) * [tlv] extended supported rates * [tlv] RSN (optional) * [tlv] HT capabilities * [tlv] HT information * [tlv] WPA (optional) * [tlv] WME (optional) * [tlv] Vendor OUI HT capabilities (optional) * [tlv] Vendor OUI HT information (optional) * [tlv] Atheros capabilities * [tlv] AppIE's (optional) * [tlv] Mesh ID (MBSS) * [tlv] Mesh Conf (MBSS) */ m = ieee80211_getmgtframe(&frm, ic->ic_headroom + sizeof(struct ieee80211_frame), 8 + sizeof(uint16_t) + sizeof(uint16_t) + 2 + IEEE80211_NWID_LEN + 2 + IEEE80211_RATE_SIZE + 7 /* max(7,3) */ + IEEE80211_COUNTRY_MAX_SIZE + 3 + sizeof(struct ieee80211_csa_ie) + sizeof(struct ieee80211_quiet_ie) + 3 + 2 + (IEEE80211_RATE_MAXSIZE - IEEE80211_RATE_SIZE) + sizeof(struct ieee80211_ie_wpa) + sizeof(struct ieee80211_ie_htcap) + sizeof(struct ieee80211_ie_htinfo) + sizeof(struct ieee80211_ie_wpa) + sizeof(struct ieee80211_wme_param) + 4 + sizeof(struct ieee80211_ie_htcap) + 4 + sizeof(struct ieee80211_ie_htinfo) #ifdef IEEE80211_SUPPORT_SUPERG + sizeof(struct ieee80211_ath_ie) #endif #ifdef IEEE80211_SUPPORT_MESH + 2 + IEEE80211_MESHID_LEN + sizeof(struct ieee80211_meshconf_ie) #endif + (vap->iv_appie_proberesp != NULL ? vap->iv_appie_proberesp->ie_len : 0) ); if (m == NULL) { vap->iv_stats.is_tx_nobuf++; return NULL; } memset(frm, 0, 8); /* timestamp should be filled later */ frm += 8; *(uint16_t *)frm = htole16(bss->ni_intval); frm += 2; capinfo = ieee80211_getcapinfo(vap, bss->ni_chan); *(uint16_t *)frm = htole16(capinfo); frm += 2; frm = ieee80211_add_ssid(frm, bss->ni_essid, bss->ni_esslen); rs = ieee80211_get_suprates(ic, bss->ni_chan); frm = ieee80211_add_rates(frm, rs); if (IEEE80211_IS_CHAN_FHSS(bss->ni_chan)) { *frm++ = IEEE80211_ELEMID_FHPARMS; *frm++ = 5; *frm++ = bss->ni_fhdwell & 0x00ff; *frm++ = (bss->ni_fhdwell >> 8) & 0x00ff; *frm++ = IEEE80211_FH_CHANSET( ieee80211_chan2ieee(ic, bss->ni_chan)); *frm++ = IEEE80211_FH_CHANPAT( ieee80211_chan2ieee(ic, bss->ni_chan)); *frm++ = bss->ni_fhindex; } else { *frm++ = IEEE80211_ELEMID_DSPARMS; *frm++ = 1; *frm++ = ieee80211_chan2ieee(ic, bss->ni_chan); } if (vap->iv_opmode == IEEE80211_M_IBSS) { *frm++ = IEEE80211_ELEMID_IBSSPARMS; *frm++ = 2; *frm++ = 0; *frm++ = 0; /* TODO: ATIM window */ } if ((vap->iv_flags & IEEE80211_F_DOTH) || (vap->iv_flags_ext & IEEE80211_FEXT_DOTD)) frm = ieee80211_add_countryie(frm, ic); if (vap->iv_flags & IEEE80211_F_DOTH) { if (IEEE80211_IS_CHAN_5GHZ(bss->ni_chan)) frm = ieee80211_add_powerconstraint(frm, vap); if (ic->ic_flags & IEEE80211_F_CSAPENDING) frm = ieee80211_add_csa(frm, vap); } if (vap->iv_flags & IEEE80211_F_DOTH) { if (IEEE80211_IS_CHAN_DFS(ic->ic_bsschan) && (vap->iv_flags_ext & IEEE80211_FEXT_DFS)) { if (vap->iv_quiet) - frm = ieee80211_add_quiet(frm, vap); + frm = ieee80211_add_quiet(frm, vap, 0); } } if (IEEE80211_IS_CHAN_ANYG(bss->ni_chan)) frm = ieee80211_add_erp(frm, ic); frm = ieee80211_add_xrates(frm, rs); frm = ieee80211_add_rsn(frm, vap); /* * NB: legacy 11b clients do not get certain ie's. * The caller identifies such clients by passing * a token in legacy to us. Could expand this to be * any legacy client for stuff like HT ie's. */ if (IEEE80211_IS_CHAN_HT(bss->ni_chan) && legacy != IEEE80211_SEND_LEGACY_11B) { frm = ieee80211_add_htcap(frm, bss); frm = ieee80211_add_htinfo(frm, bss); } frm = ieee80211_add_wpa(frm, vap); if (vap->iv_flags & IEEE80211_F_WME) frm = ieee80211_add_wme_param(frm, &ic->ic_wme); if (IEEE80211_IS_CHAN_HT(bss->ni_chan) && (vap->iv_flags_ht & IEEE80211_FHT_HTCOMPAT) && legacy != IEEE80211_SEND_LEGACY_11B) { frm = ieee80211_add_htcap_vendor(frm, bss); frm = ieee80211_add_htinfo_vendor(frm, bss); } #ifdef IEEE80211_SUPPORT_SUPERG if ((vap->iv_flags & IEEE80211_F_ATHEROS) && legacy != IEEE80211_SEND_LEGACY_11B) frm = ieee80211_add_athcaps(frm, bss); #endif if (vap->iv_appie_proberesp != NULL) frm = add_appie(frm, vap->iv_appie_proberesp); #ifdef IEEE80211_SUPPORT_MESH if (vap->iv_opmode == IEEE80211_M_MBSS) { frm = ieee80211_add_meshid(frm, vap); frm = ieee80211_add_meshconf(frm, vap); } #endif m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *); return m; } /* * Send a probe response frame to the specified mac address. * This does not go through the normal mgt frame api so we * can specify the destination address and re-use the bss node * for the sta reference. */ int ieee80211_send_proberesp(struct ieee80211vap *vap, const uint8_t da[IEEE80211_ADDR_LEN], int legacy) { struct ieee80211_node *bss = vap->iv_bss; struct ieee80211com *ic = vap->iv_ic; struct mbuf *m; int ret; if (vap->iv_state == IEEE80211_S_CAC) { IEEE80211_NOTE(vap, IEEE80211_MSG_OUTPUT, bss, "block %s frame in CAC state", "probe response"); vap->iv_stats.is_tx_badstate++; return EIO; /* XXX */ } /* * Hold a reference on the node so it doesn't go away until after * the xmit is complete all the way in the driver. On error we * will remove our reference. */ IEEE80211_DPRINTF(vap, IEEE80211_MSG_NODE, "ieee80211_ref_node (%s:%u) %p<%s> refcnt %d\n", __func__, __LINE__, bss, ether_sprintf(bss->ni_macaddr), ieee80211_node_refcnt(bss)+1); ieee80211_ref_node(bss); m = ieee80211_alloc_proberesp(bss, legacy); if (m == NULL) { ieee80211_free_node(bss); return ENOMEM; } M_PREPEND(m, sizeof(struct ieee80211_frame), M_NOWAIT); KASSERT(m != NULL, ("no room for header")); IEEE80211_TX_LOCK(ic); ieee80211_send_setup(bss, m, IEEE80211_FC0_TYPE_MGT | IEEE80211_FC0_SUBTYPE_PROBE_RESP, IEEE80211_NONQOS_TID, vap->iv_myaddr, da, bss->ni_bssid); /* XXX power management? */ m->m_flags |= M_ENCAP; /* mark encapsulated */ M_WME_SETAC(m, WME_AC_BE); IEEE80211_DPRINTF(vap, IEEE80211_MSG_DEBUG | IEEE80211_MSG_DUMPPKTS, "send probe resp on channel %u to %s%s\n", ieee80211_chan2ieee(ic, ic->ic_curchan), ether_sprintf(da), legacy ? " " : ""); IEEE80211_NODE_STAT(bss, tx_mgmt); ret = ieee80211_raw_output(vap, bss, m, NULL); IEEE80211_TX_UNLOCK(ic); return (ret); } /* * Allocate and build a RTS (Request To Send) control frame. */ struct mbuf * ieee80211_alloc_rts(struct ieee80211com *ic, const uint8_t ra[IEEE80211_ADDR_LEN], const uint8_t ta[IEEE80211_ADDR_LEN], uint16_t dur) { struct ieee80211_frame_rts *rts; struct mbuf *m; /* XXX honor ic_headroom */ m = m_gethdr(M_NOWAIT, MT_DATA); if (m != NULL) { rts = mtod(m, struct ieee80211_frame_rts *); rts->i_fc[0] = IEEE80211_FC0_VERSION_0 | IEEE80211_FC0_TYPE_CTL | IEEE80211_FC0_SUBTYPE_RTS; rts->i_fc[1] = IEEE80211_FC1_DIR_NODS; *(u_int16_t *)rts->i_dur = htole16(dur); IEEE80211_ADDR_COPY(rts->i_ra, ra); IEEE80211_ADDR_COPY(rts->i_ta, ta); m->m_pkthdr.len = m->m_len = sizeof(struct ieee80211_frame_rts); } return m; } /* * Allocate and build a CTS (Clear To Send) control frame. */ struct mbuf * ieee80211_alloc_cts(struct ieee80211com *ic, const uint8_t ra[IEEE80211_ADDR_LEN], uint16_t dur) { struct ieee80211_frame_cts *cts; struct mbuf *m; /* XXX honor ic_headroom */ m = m_gethdr(M_NOWAIT, MT_DATA); if (m != NULL) { cts = mtod(m, struct ieee80211_frame_cts *); cts->i_fc[0] = IEEE80211_FC0_VERSION_0 | IEEE80211_FC0_TYPE_CTL | IEEE80211_FC0_SUBTYPE_CTS; cts->i_fc[1] = IEEE80211_FC1_DIR_NODS; *(u_int16_t *)cts->i_dur = htole16(dur); IEEE80211_ADDR_COPY(cts->i_ra, ra); m->m_pkthdr.len = m->m_len = sizeof(struct ieee80211_frame_cts); } return m; } static void ieee80211_tx_mgt_timeout(void *arg) { struct ieee80211vap *vap = arg; IEEE80211_LOCK(vap->iv_ic); if (vap->iv_state != IEEE80211_S_INIT && (vap->iv_ic->ic_flags & IEEE80211_F_SCAN) == 0) { /* * NB: it's safe to specify a timeout as the reason here; * it'll only be used in the right state. */ ieee80211_new_state_locked(vap, IEEE80211_S_SCAN, IEEE80211_SCAN_FAIL_TIMEOUT); } IEEE80211_UNLOCK(vap->iv_ic); } /* * This is the callback set on net80211-sourced transmitted * authentication request frames. * * This does a couple of things: * * + If the frame transmitted was a success, it schedules a future * event which will transition the interface to scan. * If a state transition _then_ occurs before that event occurs, * said state transition will cancel this callout. * * + If the frame transmit was a failure, it immediately schedules * the transition back to scan. */ static void ieee80211_tx_mgt_cb(struct ieee80211_node *ni, void *arg, int status) { struct ieee80211vap *vap = ni->ni_vap; enum ieee80211_state ostate = (enum ieee80211_state) arg; /* * Frame transmit completed; arrange timer callback. If * transmit was successfully we wait for response. Otherwise * we arrange an immediate callback instead of doing the * callback directly since we don't know what state the driver * is in (e.g. what locks it is holding). This work should * not be too time-critical and not happen too often so the * added overhead is acceptable. * * XXX what happens if !acked but response shows up before callback? */ if (vap->iv_state == ostate) { callout_reset(&vap->iv_mgtsend, status == 0 ? IEEE80211_TRANS_WAIT*hz : 0, ieee80211_tx_mgt_timeout, vap); } } /* XXX VHT? */ static void ieee80211_beacon_construct(struct mbuf *m, uint8_t *frm, struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_beacon_offsets *bo = &vap->iv_bcn_off; struct ieee80211com *ic = ni->ni_ic; struct ieee80211_rateset *rs = &ni->ni_rates; uint16_t capinfo; /* * beacon frame format * * TODO: update to 802.11-2012; a lot of stuff has changed; * vendor extensions should be at the end, etc. * * [8] time stamp * [2] beacon interval * [2] cabability information * [tlv] ssid * [tlv] supported rates * [3] parameter set (DS) * [8] CF parameter set (optional) * [tlv] parameter set (IBSS/TIM) * [tlv] country (optional) * [3] power control (optional) * [5] channel switch announcement (CSA) (optional) * XXX TODO: Quiet * XXX TODO: IBSS DFS * XXX TODO: TPC report * [tlv] extended rate phy (ERP) * [tlv] extended supported rates * [tlv] RSN parameters * XXX TODO: BSSLOAD * (XXX EDCA parameter set, QoS capability?) * XXX TODO: AP channel report * * [tlv] HT capabilities * [tlv] HT information * XXX TODO: 20/40 BSS coexistence * Mesh: * XXX TODO: Meshid * XXX TODO: mesh config * XXX TODO: mesh awake window * XXX TODO: beacon timing (mesh, etc) * XXX TODO: MCCAOP Advertisement Overview * XXX TODO: MCCAOP Advertisement * XXX TODO: Mesh channel switch parameters * VHT: * XXX TODO: VHT capabilities * XXX TODO: VHT operation * XXX TODO: VHT transmit power envelope * XXX TODO: channel switch wrapper element * XXX TODO: extended BSS load element * * XXX Vendor-specific OIDs (e.g. Atheros) * [tlv] WPA parameters * [tlv] WME parameters * [tlv] Vendor OUI HT capabilities (optional) * [tlv] Vendor OUI HT information (optional) * [tlv] Atheros capabilities (optional) * [tlv] TDMA parameters (optional) * [tlv] Mesh ID (MBSS) * [tlv] Mesh Conf (MBSS) * [tlv] application data (optional) */ memset(bo, 0, sizeof(*bo)); memset(frm, 0, 8); /* XXX timestamp is set by hardware/driver */ frm += 8; *(uint16_t *)frm = htole16(ni->ni_intval); frm += 2; capinfo = ieee80211_getcapinfo(vap, ni->ni_chan); bo->bo_caps = (uint16_t *)frm; *(uint16_t *)frm = htole16(capinfo); frm += 2; *frm++ = IEEE80211_ELEMID_SSID; if ((vap->iv_flags & IEEE80211_F_HIDESSID) == 0) { *frm++ = ni->ni_esslen; memcpy(frm, ni->ni_essid, ni->ni_esslen); frm += ni->ni_esslen; } else *frm++ = 0; frm = ieee80211_add_rates(frm, rs); if (!IEEE80211_IS_CHAN_FHSS(ni->ni_chan)) { *frm++ = IEEE80211_ELEMID_DSPARMS; *frm++ = 1; *frm++ = ieee80211_chan2ieee(ic, ni->ni_chan); } if (ic->ic_flags & IEEE80211_F_PCF) { bo->bo_cfp = frm; frm = ieee80211_add_cfparms(frm, ic); } bo->bo_tim = frm; if (vap->iv_opmode == IEEE80211_M_IBSS) { *frm++ = IEEE80211_ELEMID_IBSSPARMS; *frm++ = 2; *frm++ = 0; *frm++ = 0; /* TODO: ATIM window */ bo->bo_tim_len = 0; } else if (vap->iv_opmode == IEEE80211_M_HOSTAP || vap->iv_opmode == IEEE80211_M_MBSS) { /* TIM IE is the same for Mesh and Hostap */ struct ieee80211_tim_ie *tie = (struct ieee80211_tim_ie *) frm; tie->tim_ie = IEEE80211_ELEMID_TIM; tie->tim_len = 4; /* length */ tie->tim_count = 0; /* DTIM count */ tie->tim_period = vap->iv_dtim_period; /* DTIM period */ tie->tim_bitctl = 0; /* bitmap control */ tie->tim_bitmap[0] = 0; /* Partial Virtual Bitmap */ frm += sizeof(struct ieee80211_tim_ie); bo->bo_tim_len = 1; } bo->bo_tim_trailer = frm; if ((vap->iv_flags & IEEE80211_F_DOTH) || (vap->iv_flags_ext & IEEE80211_FEXT_DOTD)) frm = ieee80211_add_countryie(frm, ic); if (vap->iv_flags & IEEE80211_F_DOTH) { if (IEEE80211_IS_CHAN_5GHZ(ni->ni_chan)) frm = ieee80211_add_powerconstraint(frm, vap); bo->bo_csa = frm; if (ic->ic_flags & IEEE80211_F_CSAPENDING) frm = ieee80211_add_csa(frm, vap); } else bo->bo_csa = frm; if (vap->iv_flags & IEEE80211_F_DOTH) { bo->bo_quiet = frm; if (IEEE80211_IS_CHAN_DFS(ic->ic_bsschan) && (vap->iv_flags_ext & IEEE80211_FEXT_DFS)) { if (vap->iv_quiet) - frm = ieee80211_add_quiet(frm,vap); + frm = ieee80211_add_quiet(frm,vap, 0); } } else bo->bo_quiet = frm; if (IEEE80211_IS_CHAN_ANYG(ni->ni_chan)) { bo->bo_erp = frm; frm = ieee80211_add_erp(frm, ic); } frm = ieee80211_add_xrates(frm, rs); frm = ieee80211_add_rsn(frm, vap); if (IEEE80211_IS_CHAN_HT(ni->ni_chan)) { frm = ieee80211_add_htcap(frm, ni); bo->bo_htinfo = frm; frm = ieee80211_add_htinfo(frm, ni); } if (IEEE80211_IS_CHAN_VHT(ni->ni_chan)) { frm = ieee80211_add_vhtcap(frm, ni); bo->bo_vhtinfo = frm; frm = ieee80211_add_vhtinfo(frm, ni); /* Transmit power envelope */ /* Channel switch wrapper element */ /* Extended bss load element */ } frm = ieee80211_add_wpa(frm, vap); if (vap->iv_flags & IEEE80211_F_WME) { bo->bo_wme = frm; frm = ieee80211_add_wme_param(frm, &ic->ic_wme); } if (IEEE80211_IS_CHAN_HT(ni->ni_chan) && (vap->iv_flags_ht & IEEE80211_FHT_HTCOMPAT)) { frm = ieee80211_add_htcap_vendor(frm, ni); frm = ieee80211_add_htinfo_vendor(frm, ni); } #ifdef IEEE80211_SUPPORT_SUPERG if (vap->iv_flags & IEEE80211_F_ATHEROS) { bo->bo_ath = frm; frm = ieee80211_add_athcaps(frm, ni); } #endif #ifdef IEEE80211_SUPPORT_TDMA if (vap->iv_caps & IEEE80211_C_TDMA) { bo->bo_tdma = frm; frm = ieee80211_add_tdma(frm, vap); } #endif if (vap->iv_appie_beacon != NULL) { bo->bo_appie = frm; bo->bo_appie_len = vap->iv_appie_beacon->ie_len; frm = add_appie(frm, vap->iv_appie_beacon); } /* XXX TODO: move meshid/meshconf up to before vendor extensions? */ #ifdef IEEE80211_SUPPORT_MESH if (vap->iv_opmode == IEEE80211_M_MBSS) { frm = ieee80211_add_meshid(frm, vap); bo->bo_meshconf = frm; frm = ieee80211_add_meshconf(frm, vap); } #endif bo->bo_tim_trailer_len = frm - bo->bo_tim_trailer; bo->bo_csa_trailer_len = frm - bo->bo_csa; m->m_pkthdr.len = m->m_len = frm - mtod(m, uint8_t *); } /* * Allocate a beacon frame and fillin the appropriate bits. */ /* XXX VHT? */ struct mbuf * ieee80211_beacon_alloc(struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; struct ifnet *ifp = vap->iv_ifp; struct ieee80211_frame *wh; struct mbuf *m; int pktlen; uint8_t *frm; /* * beacon frame format * * Note: This needs updating for 802.11-2012. * * [8] time stamp * [2] beacon interval * [2] cabability information * [tlv] ssid * [tlv] supported rates * [3] parameter set (DS) * [8] CF parameter set (optional) * [tlv] parameter set (IBSS/TIM) * [tlv] country (optional) * [3] power control (optional) * [5] channel switch announcement (CSA) (optional) * [tlv] extended rate phy (ERP) * [tlv] extended supported rates * [tlv] RSN parameters * [tlv] HT capabilities * [tlv] HT information * [tlv] VHT capabilities * [tlv] VHT operation * [tlv] Vendor OUI HT capabilities (optional) * [tlv] Vendor OUI HT information (optional) * XXX Vendor-specific OIDs (e.g. Atheros) * [tlv] WPA parameters * [tlv] WME parameters * [tlv] TDMA parameters (optional) * [tlv] Mesh ID (MBSS) * [tlv] Mesh Conf (MBSS) * [tlv] application data (optional) * NB: we allocate the max space required for the TIM bitmap. * XXX how big is this? */ /* XXX VHT? */ pktlen = 8 /* time stamp */ + sizeof(uint16_t) /* beacon interval */ + sizeof(uint16_t) /* capabilities */ + 2 + ni->ni_esslen /* ssid */ + 2 + IEEE80211_RATE_SIZE /* supported rates */ + 2 + 1 /* DS parameters */ + 2 + 6 /* CF parameters */ + 2 + 4 + vap->iv_tim_len /* DTIM/IBSSPARMS */ + IEEE80211_COUNTRY_MAX_SIZE /* country */ + 2 + 1 /* power control */ + sizeof(struct ieee80211_csa_ie) /* CSA */ + sizeof(struct ieee80211_quiet_ie) /* Quiet */ + 2 + 1 /* ERP */ + 2 + (IEEE80211_RATE_MAXSIZE - IEEE80211_RATE_SIZE) + (vap->iv_caps & IEEE80211_C_WPA ? /* WPA 1+2 */ 2*sizeof(struct ieee80211_ie_wpa) : 0) /* XXX conditional? */ + 4+2*sizeof(struct ieee80211_ie_htcap)/* HT caps */ + 4+2*sizeof(struct ieee80211_ie_htinfo)/* HT info */ + sizeof(struct ieee80211_ie_vhtcap)/* VHT caps */ + sizeof(struct ieee80211_ie_vht_operation)/* VHT info */ + (vap->iv_caps & IEEE80211_C_WME ? /* WME */ sizeof(struct ieee80211_wme_param) : 0) #ifdef IEEE80211_SUPPORT_SUPERG + sizeof(struct ieee80211_ath_ie) /* ATH */ #endif #ifdef IEEE80211_SUPPORT_TDMA + (vap->iv_caps & IEEE80211_C_TDMA ? /* TDMA */ sizeof(struct ieee80211_tdma_param) : 0) #endif #ifdef IEEE80211_SUPPORT_MESH + 2 + ni->ni_meshidlen + sizeof(struct ieee80211_meshconf_ie) #endif + IEEE80211_MAX_APPIE ; m = ieee80211_getmgtframe(&frm, ic->ic_headroom + sizeof(struct ieee80211_frame), pktlen); if (m == NULL) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_ANY, "%s: cannot get buf; size %u\n", __func__, pktlen); vap->iv_stats.is_tx_nobuf++; return NULL; } ieee80211_beacon_construct(m, frm, ni); M_PREPEND(m, sizeof(struct ieee80211_frame), M_NOWAIT); KASSERT(m != NULL, ("no space for 802.11 header?")); wh = mtod(m, struct ieee80211_frame *); wh->i_fc[0] = IEEE80211_FC0_VERSION_0 | IEEE80211_FC0_TYPE_MGT | IEEE80211_FC0_SUBTYPE_BEACON; wh->i_fc[1] = IEEE80211_FC1_DIR_NODS; *(uint16_t *)wh->i_dur = 0; IEEE80211_ADDR_COPY(wh->i_addr1, ifp->if_broadcastaddr); IEEE80211_ADDR_COPY(wh->i_addr2, vap->iv_myaddr); IEEE80211_ADDR_COPY(wh->i_addr3, ni->ni_bssid); *(uint16_t *)wh->i_seq = 0; return m; } /* * Update the dynamic parts of a beacon frame based on the current state. */ int ieee80211_beacon_update(struct ieee80211_node *ni, struct mbuf *m, int mcast) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211_beacon_offsets *bo = &vap->iv_bcn_off; struct ieee80211com *ic = ni->ni_ic; int len_changed = 0; uint16_t capinfo; struct ieee80211_frame *wh; ieee80211_seq seqno; IEEE80211_LOCK(ic); /* * Handle 11h channel change when we've reached the count. * We must recalculate the beacon frame contents to account * for the new channel. Note we do this only for the first * vap that reaches this point; subsequent vaps just update * their beacon state to reflect the recalculated channel. */ if (isset(bo->bo_flags, IEEE80211_BEACON_CSA) && vap->iv_csa_count == ic->ic_csa_count) { vap->iv_csa_count = 0; /* * Effect channel change before reconstructing the beacon * frame contents as many places reference ni_chan. */ if (ic->ic_csa_newchan != NULL) ieee80211_csa_completeswitch(ic); /* * NB: ieee80211_beacon_construct clears all pending * updates in bo_flags so we don't need to explicitly * clear IEEE80211_BEACON_CSA. */ ieee80211_beacon_construct(m, mtod(m, uint8_t*) + sizeof(struct ieee80211_frame), ni); /* XXX do WME aggressive mode processing? */ IEEE80211_UNLOCK(ic); return 1; /* just assume length changed */ } wh = mtod(m, struct ieee80211_frame *); /* * XXX TODO Strictly speaking this should be incremented with the TX * lock held so as to serialise access to the non-qos TID sequence * number space. * * If the driver identifies it does its own TX seqno management then * we can skip this (and still not do the TX seqno.) */ seqno = ni->ni_txseqs[IEEE80211_NONQOS_TID]++; *(uint16_t *)&wh->i_seq[0] = htole16(seqno << IEEE80211_SEQ_SEQ_SHIFT); M_SEQNO_SET(m, seqno); /* XXX faster to recalculate entirely or just changes? */ capinfo = ieee80211_getcapinfo(vap, ni->ni_chan); *bo->bo_caps = htole16(capinfo); if (vap->iv_flags & IEEE80211_F_WME) { struct ieee80211_wme_state *wme = &ic->ic_wme; /* * Check for aggressive mode change. When there is * significant high priority traffic in the BSS * throttle back BE traffic by using conservative * parameters. Otherwise BE uses aggressive params * to optimize performance of legacy/non-QoS traffic. */ if (wme->wme_flags & WME_F_AGGRMODE) { if (wme->wme_hipri_traffic > wme->wme_hipri_switch_thresh) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_WME, "%s: traffic %u, disable aggressive mode\n", __func__, wme->wme_hipri_traffic); wme->wme_flags &= ~WME_F_AGGRMODE; ieee80211_wme_updateparams_locked(vap); wme->wme_hipri_traffic = wme->wme_hipri_switch_hysteresis; } else wme->wme_hipri_traffic = 0; } else { if (wme->wme_hipri_traffic <= wme->wme_hipri_switch_thresh) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_WME, "%s: traffic %u, enable aggressive mode\n", __func__, wme->wme_hipri_traffic); wme->wme_flags |= WME_F_AGGRMODE; ieee80211_wme_updateparams_locked(vap); wme->wme_hipri_traffic = 0; } else wme->wme_hipri_traffic = wme->wme_hipri_switch_hysteresis; } if (isset(bo->bo_flags, IEEE80211_BEACON_WME)) { (void) ieee80211_add_wme_param(bo->bo_wme, wme); clrbit(bo->bo_flags, IEEE80211_BEACON_WME); } } if (isset(bo->bo_flags, IEEE80211_BEACON_HTINFO)) { ieee80211_ht_update_beacon(vap, bo); clrbit(bo->bo_flags, IEEE80211_BEACON_HTINFO); } #ifdef IEEE80211_SUPPORT_TDMA if (vap->iv_caps & IEEE80211_C_TDMA) { /* * NB: the beacon is potentially updated every TBTT. */ ieee80211_tdma_update_beacon(vap, bo); } #endif #ifdef IEEE80211_SUPPORT_MESH if (vap->iv_opmode == IEEE80211_M_MBSS) ieee80211_mesh_update_beacon(vap, bo); #endif if (vap->iv_opmode == IEEE80211_M_HOSTAP || vap->iv_opmode == IEEE80211_M_MBSS) { /* NB: no IBSS support*/ struct ieee80211_tim_ie *tie = (struct ieee80211_tim_ie *) bo->bo_tim; if (isset(bo->bo_flags, IEEE80211_BEACON_TIM)) { u_int timlen, timoff, i; /* * ATIM/DTIM needs updating. If it fits in the * current space allocated then just copy in the * new bits. Otherwise we need to move any trailing * data to make room. Note that we know there is * contiguous space because ieee80211_beacon_allocate * insures there is space in the mbuf to write a * maximal-size virtual bitmap (based on iv_max_aid). */ /* * Calculate the bitmap size and offset, copy any * trailer out of the way, and then copy in the * new bitmap and update the information element. * Note that the tim bitmap must contain at least * one byte and any offset must be even. */ if (vap->iv_ps_pending != 0) { timoff = 128; /* impossibly large */ for (i = 0; i < vap->iv_tim_len; i++) if (vap->iv_tim_bitmap[i]) { timoff = i &~ 1; break; } KASSERT(timoff != 128, ("tim bitmap empty!")); for (i = vap->iv_tim_len-1; i >= timoff; i--) if (vap->iv_tim_bitmap[i]) break; timlen = 1 + (i - timoff); } else { timoff = 0; timlen = 1; } /* * TODO: validate this! */ if (timlen != bo->bo_tim_len) { /* copy up/down trailer */ int adjust = tie->tim_bitmap+timlen - bo->bo_tim_trailer; ovbcopy(bo->bo_tim_trailer, bo->bo_tim_trailer+adjust, bo->bo_tim_trailer_len); bo->bo_tim_trailer += adjust; bo->bo_erp += adjust; bo->bo_htinfo += adjust; bo->bo_vhtinfo += adjust; #ifdef IEEE80211_SUPPORT_SUPERG bo->bo_ath += adjust; #endif #ifdef IEEE80211_SUPPORT_TDMA bo->bo_tdma += adjust; #endif #ifdef IEEE80211_SUPPORT_MESH bo->bo_meshconf += adjust; #endif bo->bo_appie += adjust; bo->bo_wme += adjust; bo->bo_csa += adjust; bo->bo_quiet += adjust; bo->bo_tim_len = timlen; /* update information element */ tie->tim_len = 3 + timlen; tie->tim_bitctl = timoff; len_changed = 1; } memcpy(tie->tim_bitmap, vap->iv_tim_bitmap + timoff, bo->bo_tim_len); clrbit(bo->bo_flags, IEEE80211_BEACON_TIM); IEEE80211_DPRINTF(vap, IEEE80211_MSG_POWER, "%s: TIM updated, pending %u, off %u, len %u\n", __func__, vap->iv_ps_pending, timoff, timlen); } /* count down DTIM period */ if (tie->tim_count == 0) tie->tim_count = tie->tim_period - 1; else tie->tim_count--; /* update state for buffered multicast frames on DTIM */ if (mcast && tie->tim_count == 0) tie->tim_bitctl |= 1; else tie->tim_bitctl &= ~1; if (isset(bo->bo_flags, IEEE80211_BEACON_CSA)) { struct ieee80211_csa_ie *csa = (struct ieee80211_csa_ie *) bo->bo_csa; /* * Insert or update CSA ie. If we're just starting * to count down to the channel switch then we need * to insert the CSA ie. Otherwise we just need to * drop the count. The actual change happens above * when the vap's count reaches the target count. */ if (vap->iv_csa_count == 0) { memmove(&csa[1], csa, bo->bo_csa_trailer_len); bo->bo_erp += sizeof(*csa); bo->bo_htinfo += sizeof(*csa); bo->bo_vhtinfo += sizeof(*csa); bo->bo_wme += sizeof(*csa); #ifdef IEEE80211_SUPPORT_SUPERG bo->bo_ath += sizeof(*csa); #endif #ifdef IEEE80211_SUPPORT_TDMA bo->bo_tdma += sizeof(*csa); #endif #ifdef IEEE80211_SUPPORT_MESH bo->bo_meshconf += sizeof(*csa); #endif bo->bo_appie += sizeof(*csa); bo->bo_csa_trailer_len += sizeof(*csa); bo->bo_quiet += sizeof(*csa); bo->bo_tim_trailer_len += sizeof(*csa); m->m_len += sizeof(*csa); m->m_pkthdr.len += sizeof(*csa); ieee80211_add_csa(bo->bo_csa, vap); } else csa->csa_count--; vap->iv_csa_count++; /* NB: don't clear IEEE80211_BEACON_CSA */ } if (IEEE80211_IS_CHAN_DFS(ic->ic_bsschan) && (vap->iv_flags_ext & IEEE80211_FEXT_DFS) ){ if (vap->iv_quiet) - ieee80211_add_quiet(bo->bo_quiet, vap); + ieee80211_add_quiet(bo->bo_quiet, vap, 1); } if (isset(bo->bo_flags, IEEE80211_BEACON_ERP)) { /* * ERP element needs updating. */ (void) ieee80211_add_erp(bo->bo_erp, ic); clrbit(bo->bo_flags, IEEE80211_BEACON_ERP); } #ifdef IEEE80211_SUPPORT_SUPERG if (isset(bo->bo_flags, IEEE80211_BEACON_ATH)) { ieee80211_add_athcaps(bo->bo_ath, ni); clrbit(bo->bo_flags, IEEE80211_BEACON_ATH); } #endif } if (isset(bo->bo_flags, IEEE80211_BEACON_APPIE)) { const struct ieee80211_appie *aie = vap->iv_appie_beacon; int aielen; uint8_t *frm; aielen = 0; if (aie != NULL) aielen += aie->ie_len; if (aielen != bo->bo_appie_len) { /* copy up/down trailer */ int adjust = aielen - bo->bo_appie_len; ovbcopy(bo->bo_tim_trailer, bo->bo_tim_trailer+adjust, bo->bo_tim_trailer_len); bo->bo_tim_trailer += adjust; bo->bo_appie += adjust; bo->bo_appie_len = aielen; len_changed = 1; } frm = bo->bo_appie; if (aie != NULL) frm = add_appie(frm, aie); clrbit(bo->bo_flags, IEEE80211_BEACON_APPIE); } IEEE80211_UNLOCK(ic); return len_changed; } /* * Do Ethernet-LLC encapsulation for each payload in a fast frame * tunnel encapsulation. The frame is assumed to have an Ethernet * header at the front that must be stripped before prepending the * LLC followed by the Ethernet header passed in (with an Ethernet * type that specifies the payload size). */ struct mbuf * ieee80211_ff_encap1(struct ieee80211vap *vap, struct mbuf *m, const struct ether_header *eh) { struct llc *llc; uint16_t payload; /* XXX optimize by combining m_adj+M_PREPEND */ m_adj(m, sizeof(struct ether_header) - sizeof(struct llc)); llc = mtod(m, struct llc *); llc->llc_dsap = llc->llc_ssap = LLC_SNAP_LSAP; llc->llc_control = LLC_UI; llc->llc_snap.org_code[0] = 0; llc->llc_snap.org_code[1] = 0; llc->llc_snap.org_code[2] = 0; llc->llc_snap.ether_type = eh->ether_type; payload = m->m_pkthdr.len; /* NB: w/o Ethernet header */ M_PREPEND(m, sizeof(struct ether_header), M_NOWAIT); if (m == NULL) { /* XXX cannot happen */ IEEE80211_DPRINTF(vap, IEEE80211_MSG_SUPERG, "%s: no space for ether_header\n", __func__); vap->iv_stats.is_tx_nobuf++; return NULL; } ETHER_HEADER_COPY(mtod(m, void *), eh); mtod(m, struct ether_header *)->ether_type = htons(payload); return m; } /* * Complete an mbuf transmission. * * For now, this simply processes a completed frame after the * driver has completed it's transmission and/or retransmission. * It assumes the frame is an 802.11 encapsulated frame. * * Later on it will grow to become the exit path for a given frame * from the driver and, depending upon how it's been encapsulated * and already transmitted, it may end up doing A-MPDU retransmission, * power save requeuing, etc. * * In order for the above to work, the driver entry point to this * must not hold any driver locks. Thus, the driver needs to delay * any actual mbuf completion until it can release said locks. * * This frees the mbuf and if the mbuf has a node reference, * the node reference will be freed. */ void ieee80211_tx_complete(struct ieee80211_node *ni, struct mbuf *m, int status) { if (ni != NULL) { struct ifnet *ifp = ni->ni_vap->iv_ifp; if (status == 0) { if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if (m->m_flags & M_MCAST) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); } else if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if (m->m_flags & M_TXCB) ieee80211_process_callback(ni, m, status); ieee80211_free_node(ni); } m_freem(m); } Index: projects/ipsec/sys/netinet/tcp_fastopen.c =================================================================== --- projects/ipsec/sys/netinet/tcp_fastopen.c (revision 313186) +++ projects/ipsec/sys/netinet/tcp_fastopen.c (revision 313187) @@ -1,443 +1,444 @@ /*- * Copyright (c) 2015 Patrick Kelsey * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * This is a server-side implementation of TCP Fast Open (TFO) [RFC7413]. * * This implementation is currently considered to be experimental and is not * included in kernel builds by default. To include this code, add the * following line to your kernel config: * * options TCP_RFC7413 * * The generated TFO cookies are the 64-bit output of * SipHash24(<16-byte-key>). Multiple concurrent valid keys are * supported so that time-based rolling cookie invalidation policies can be * implemented in the system. The default number of concurrent keys is 2. * This can be adjusted in the kernel config as follows: * * options TCP_RFC7413_MAX_KEYS= * * * The following TFO-specific sysctls are defined: * * net.inet.tcp.fastopen.acceptany (RW, default 0) * When non-zero, all client-supplied TFO cookies will be considered to * be valid. * * net.inet.tcp.fastopen.autokey (RW, default 120) * When this and net.inet.tcp.fastopen.enabled are non-zero, a new key * will be automatically generated after this many seconds. * * net.inet.tcp.fastopen.enabled (RW, default 0) * When zero, no new TFO connections can be created. On the transition * from enabled to disabled, all installed keys are removed. On the * transition from disabled to enabled, if net.inet.tcp.fastopen.autokey * is non-zero and there are no keys installed, a new key will be * generated immediately. The transition from enabled to disabled does * not affect any TFO connections in progress; it only prevents new ones * from being made. * * net.inet.tcp.fastopen.keylen (RO) * The key length in bytes. * * net.inet.tcp.fastopen.maxkeys (RO) * The maximum number of keys supported. * * net.inet.tcp.fastopen.numkeys (RO) * The current number of keys installed. * * net.inet.tcp.fastopen.setkey (WO) * Install a new key by writing net.inet.tcp.fastopen.keylen bytes to this * sysctl. * * * In order for TFO connections to be created via a listen socket, that * socket must have the TCP_FASTOPEN socket option set on it. This option * can be set on the socket either before or after the listen() is invoked. * Clearing this option on a listen socket after it has been set has no * effect on existing TFO connections or TFO connections in progress; it * only prevents new TFO connections from being made. * * For passively-created sockets, the TCP_FASTOPEN socket option can be * queried to determine whether the connection was established using TFO. * Note that connections that are established via a TFO SYN, but that fall * back to using a non-TFO SYN|ACK will have the TCP_FASTOPEN socket option * set. * * Per the RFC, this implementation limits the number of TFO connections * that can be in the SYN_RECEIVED state on a per listen-socket basis. * Whenever this limit is exceeded, requests for new TFO connections are * serviced as non-TFO requests. Without such a limit, given a valid TFO * cookie, an attacker could keep the listen queue in an overflow condition * using a TFO SYN flood. This implementation sets the limit at half the * configured listen backlog. * */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCP_FASTOPEN_KEY_LEN SIPHASH_KEY_LENGTH #if !defined(TCP_RFC7413_MAX_KEYS) || (TCP_RFC7413_MAX_KEYS < 1) #define TCP_FASTOPEN_MAX_KEYS 2 #else #define TCP_FASTOPEN_MAX_KEYS TCP_RFC7413_MAX_KEYS #endif struct tcp_fastopen_keylist { unsigned int newest; uint8_t key[TCP_FASTOPEN_MAX_KEYS][TCP_FASTOPEN_KEY_LEN]; }; struct tcp_fastopen_callout { struct callout c; struct vnet *v; }; SYSCTL_NODE(_net_inet_tcp, OID_AUTO, fastopen, CTLFLAG_RW, 0, "TCP Fast Open"); static VNET_DEFINE(int, tcp_fastopen_acceptany) = 0; #define V_tcp_fastopen_acceptany VNET(tcp_fastopen_acceptany) SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, acceptany, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_acceptany), 0, "Accept any non-empty cookie"); static VNET_DEFINE(unsigned int, tcp_fastopen_autokey) = 120; #define V_tcp_fastopen_autokey VNET(tcp_fastopen_autokey) static int sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, autokey, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0, &sysctl_net_inet_tcp_fastopen_autokey, "IU", "Number of seconds between auto-generation of a new key; zero disables"); VNET_DEFINE(unsigned int, tcp_fastopen_enabled) = 0; static int sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, enabled, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0, &sysctl_net_inet_tcp_fastopen_enabled, "IU", "Enable/disable TCP Fast Open processing"); SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, keylen, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_KEY_LEN, "Key length in bytes"); SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxkeys, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_KEYS, "Maximum number of keys supported"); static VNET_DEFINE(unsigned int, tcp_fastopen_numkeys) = 0; #define V_tcp_fastopen_numkeys VNET(tcp_fastopen_numkeys) SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numkeys, CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numkeys), 0, "Number of keys installed"); static int sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setkey, CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR, NULL, 0, &sysctl_net_inet_tcp_fastopen_setkey, "", "Install a new key"); static VNET_DEFINE(struct rmlock, tcp_fastopen_keylock); #define V_tcp_fastopen_keylock VNET(tcp_fastopen_keylock) #define TCP_FASTOPEN_KEYS_RLOCK(t) rm_rlock(&V_tcp_fastopen_keylock, (t)) #define TCP_FASTOPEN_KEYS_RUNLOCK(t) rm_runlock(&V_tcp_fastopen_keylock, (t)) #define TCP_FASTOPEN_KEYS_WLOCK() rm_wlock(&V_tcp_fastopen_keylock) #define TCP_FASTOPEN_KEYS_WUNLOCK() rm_wunlock(&V_tcp_fastopen_keylock) static VNET_DEFINE(struct tcp_fastopen_keylist, tcp_fastopen_keys); #define V_tcp_fastopen_keys VNET(tcp_fastopen_keys) static VNET_DEFINE(struct tcp_fastopen_callout, tcp_fastopen_autokey_ctx); #define V_tcp_fastopen_autokey_ctx VNET(tcp_fastopen_autokey_ctx) static VNET_DEFINE(uma_zone_t, counter_zone); #define V_counter_zone VNET(counter_zone) void tcp_fastopen_init(void) { V_counter_zone = uma_zcreate("tfo", sizeof(unsigned int), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); rm_init(&V_tcp_fastopen_keylock, "tfo_keylock"); callout_init_rm(&V_tcp_fastopen_autokey_ctx.c, &V_tcp_fastopen_keylock, 0); + V_tcp_fastopen_autokey_ctx.v = curvnet; V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1; } void tcp_fastopen_destroy(void) { callout_drain(&V_tcp_fastopen_autokey_ctx.c); rm_destroy(&V_tcp_fastopen_keylock); uma_zdestroy(V_counter_zone); } unsigned int * tcp_fastopen_alloc_counter(void) { unsigned int *counter; counter = uma_zalloc(V_counter_zone, M_NOWAIT); if (counter) *counter = 1; return (counter); } void tcp_fastopen_decrement_counter(unsigned int *counter) { if (*counter == 1) uma_zfree(V_counter_zone, counter); else atomic_subtract_int(counter, 1); } static void tcp_fastopen_addkey_locked(uint8_t *key) { V_tcp_fastopen_keys.newest++; if (V_tcp_fastopen_keys.newest == TCP_FASTOPEN_MAX_KEYS) V_tcp_fastopen_keys.newest = 0; memcpy(V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest], key, TCP_FASTOPEN_KEY_LEN); if (V_tcp_fastopen_numkeys < TCP_FASTOPEN_MAX_KEYS) V_tcp_fastopen_numkeys++; } static void tcp_fastopen_autokey_locked(void) { uint8_t newkey[TCP_FASTOPEN_KEY_LEN]; arc4rand(newkey, TCP_FASTOPEN_KEY_LEN, 0); tcp_fastopen_addkey_locked(newkey); } static void tcp_fastopen_autokey_callout(void *arg) { struct tcp_fastopen_callout *ctx = arg; CURVNET_SET(ctx->v); tcp_fastopen_autokey_locked(); callout_reset(&ctx->c, V_tcp_fastopen_autokey * hz, tcp_fastopen_autokey_callout, ctx); CURVNET_RESTORE(); } static uint64_t tcp_fastopen_make_cookie(uint8_t key[SIPHASH_KEY_LENGTH], struct in_conninfo *inc) { SIPHASH_CTX ctx; uint64_t siphash; SipHash24_Init(&ctx); SipHash_SetKey(&ctx, key); switch (inc->inc_flags & INC_ISIPV6) { #ifdef INET case 0: SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr)); break; #endif #ifdef INET6 case INC_ISIPV6: SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr)); break; #endif } SipHash_Final((u_int8_t *)&siphash, &ctx); return (siphash); } /* * Return values: * -1 the cookie is invalid and no valid cookie is available * 0 the cookie is invalid and the latest cookie has been returned * 1 the cookie is valid and the latest cookie has been returned */ int tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie, unsigned int len, uint64_t *latest_cookie) { struct rm_priotracker tracker; unsigned int i, key_index; uint64_t cur_cookie; if (V_tcp_fastopen_acceptany) { *latest_cookie = 0; return (1); } if (len != TCP_FASTOPEN_COOKIE_LEN) { if (V_tcp_fastopen_numkeys > 0) { *latest_cookie = tcp_fastopen_make_cookie( V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest], inc); return (0); } return (-1); } /* * Check against each available key, from newest to oldest. */ TCP_FASTOPEN_KEYS_RLOCK(&tracker); key_index = V_tcp_fastopen_keys.newest; for (i = 0; i < V_tcp_fastopen_numkeys; i++) { cur_cookie = tcp_fastopen_make_cookie(V_tcp_fastopen_keys.key[key_index], inc); if (i == 0) *latest_cookie = cur_cookie; if (memcmp(cookie, &cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0) { TCP_FASTOPEN_KEYS_RUNLOCK(&tracker); return (1); } if (key_index == 0) key_index = TCP_FASTOPEN_MAX_KEYS - 1; else key_index--; } TCP_FASTOPEN_KEYS_RUNLOCK(&tracker); return (0); } static int sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS) { int error; unsigned int new; new = V_tcp_fastopen_autokey; error = sysctl_handle_int(oidp, &new, 0, req); if (error == 0 && req->newptr) { if (new > (INT_MAX / hz)) return (EINVAL); TCP_FASTOPEN_KEYS_WLOCK(); if (V_tcp_fastopen_enabled) { if (V_tcp_fastopen_autokey && !new) callout_stop(&V_tcp_fastopen_autokey_ctx.c); else if (new) callout_reset(&V_tcp_fastopen_autokey_ctx.c, new * hz, tcp_fastopen_autokey_callout, &V_tcp_fastopen_autokey_ctx); } V_tcp_fastopen_autokey = new; TCP_FASTOPEN_KEYS_WUNLOCK(); } return (error); } static int sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS) { int error; unsigned int new; new = V_tcp_fastopen_enabled; error = sysctl_handle_int(oidp, &new, 0, req); if (error == 0 && req->newptr) { if (V_tcp_fastopen_enabled && !new) { /* enabled -> disabled */ TCP_FASTOPEN_KEYS_WLOCK(); V_tcp_fastopen_numkeys = 0; V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1; if (V_tcp_fastopen_autokey) callout_stop(&V_tcp_fastopen_autokey_ctx.c); V_tcp_fastopen_enabled = 0; TCP_FASTOPEN_KEYS_WUNLOCK(); } else if (!V_tcp_fastopen_enabled && new) { /* disabled -> enabled */ TCP_FASTOPEN_KEYS_WLOCK(); if (V_tcp_fastopen_autokey && (V_tcp_fastopen_numkeys == 0)) { tcp_fastopen_autokey_locked(); callout_reset(&V_tcp_fastopen_autokey_ctx.c, V_tcp_fastopen_autokey * hz, tcp_fastopen_autokey_callout, &V_tcp_fastopen_autokey_ctx); } V_tcp_fastopen_enabled = 1; TCP_FASTOPEN_KEYS_WUNLOCK(); } } return (error); } static int sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS) { int error; uint8_t newkey[TCP_FASTOPEN_KEY_LEN]; if (req->oldptr != NULL || req->oldlen != 0) return (EINVAL); if (req->newptr == NULL) return (EPERM); if (req->newlen != sizeof(newkey)) return (EINVAL); error = SYSCTL_IN(req, newkey, sizeof(newkey)); if (error) return (error); TCP_FASTOPEN_KEYS_WLOCK(); tcp_fastopen_addkey_locked(newkey); TCP_FASTOPEN_KEYS_WUNLOCK(); return (0); } Index: projects/ipsec/sys/netinet/tcp_subr.c =================================================================== --- projects/ipsec/sys/netinet/tcp_subr.c (revision 313186) +++ projects/ipsec/sys/netinet/tcp_subr.c (revision 313187) @@ -1,2767 +1,2767 @@ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_tcpdebug.h" #include #include #include #include #ifdef TCP_HHOOK #include #endif #include #ifdef TCP_HHOOK #include #endif #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #include #include #include #include #endif #ifdef TCP_RFC7413 #include #endif #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #ifdef TCPPCAP #include #endif #ifdef TCPDEBUG #include #endif #ifdef INET6 #include #endif #ifdef TCP_OFFLOAD #include #endif #include #include #include #include VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS; #ifdef INET6 VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS; #endif struct rwlock tcp_function_lock; static int sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS) { int error, new; new = V_tcp_mssdflt; error = sysctl_handle_int(oidp, &new, 0, req); if (error == 0 && req->newptr) { if (new < TCP_MINMSS) error = EINVAL; else V_tcp_mssdflt = new; } return (error); } SYSCTL_PROC(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(tcp_mssdflt), 0, &sysctl_net_inet_tcp_mss_check, "I", "Default TCP Maximum Segment Size"); #ifdef INET6 static int sysctl_net_inet_tcp_mss_v6_check(SYSCTL_HANDLER_ARGS) { int error, new; new = V_tcp_v6mssdflt; error = sysctl_handle_int(oidp, &new, 0, req); if (error == 0 && req->newptr) { if (new < TCP_MINMSS) error = EINVAL; else V_tcp_v6mssdflt = new; } return (error); } SYSCTL_PROC(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(tcp_v6mssdflt), 0, &sysctl_net_inet_tcp_mss_v6_check, "I", "Default TCP Maximum Segment Size for IPv6"); #endif /* INET6 */ /* * Minimum MSS we accept and use. This prevents DoS attacks where * we are forced to a ridiculous low MSS like 20 and send hundreds * of packets instead of one. The effect scales with the available * bandwidth and quickly saturates the CPU and network interface * with packet generation and sending. Set to zero to disable MINMSS * checking. This setting prevents us from sending too small packets. */ VNET_DEFINE(int, tcp_minmss) = TCP_MINMSS; SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_minmss), 0, "Minimum TCP Maximum Segment Size"); VNET_DEFINE(int, tcp_do_rfc1323) = 1; SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_rfc1323), 0, "Enable rfc1323 (high performance TCP) extensions"); static int tcp_log_debug = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_debug, CTLFLAG_RW, &tcp_log_debug, 0, "Log errors caused by incoming TCP segments"); static int tcp_tcbhashsize; SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable"); static int do_tcpdrain = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0, "Enable tcp_drain routine for extra help when low on mbufs"); SYSCTL_UINT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcbinfo.ipi_count), 0, "Number of active PCBs"); static VNET_DEFINE(int, icmp_may_rst) = 1; #define V_icmp_may_rst VNET(icmp_may_rst) SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp_may_rst), 0, "Certain ICMP unreachable messages may abort connections in SYN_SENT"); static VNET_DEFINE(int, tcp_isn_reseed_interval) = 0; #define V_tcp_isn_reseed_interval VNET(tcp_isn_reseed_interval) SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_isn_reseed_interval), 0, "Seconds between reseeding of ISN secret"); static int tcp_soreceive_stream; SYSCTL_INT(_net_inet_tcp, OID_AUTO, soreceive_stream, CTLFLAG_RDTUN, &tcp_soreceive_stream, 0, "Using soreceive_stream for TCP sockets"); VNET_DEFINE(uma_zone_t, sack_hole_zone); #define V_sack_hole_zone VNET(sack_hole_zone) #ifdef TCP_HHOOK VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]); #endif static struct inpcb *tcp_notify(struct inpcb *, int); static struct inpcb *tcp_mtudisc_notify(struct inpcb *, int); static void tcp_mtudisc(struct inpcb *, int); static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr); static struct tcp_function_block tcp_def_funcblk = { "default", tcp_output, tcp_do_segment, tcp_default_ctloutput, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0 }; int t_functions_inited = 0; struct tcp_funchead t_functions; static struct tcp_function_block *tcp_func_set_ptr = &tcp_def_funcblk; static void init_tcp_functions(void) { if (t_functions_inited == 0) { TAILQ_INIT(&t_functions); rw_init_flags(&tcp_function_lock, "tcp_func_lock" , 0); t_functions_inited = 1; } } static struct tcp_function_block * find_tcp_functions_locked(struct tcp_function_set *fs) { struct tcp_function *f; struct tcp_function_block *blk=NULL; TAILQ_FOREACH(f, &t_functions, tf_next) { if (strcmp(f->tf_fb->tfb_tcp_block_name, fs->function_set_name) == 0) { blk = f->tf_fb; break; } } return(blk); } static struct tcp_function_block * find_tcp_fb_locked(struct tcp_function_block *blk, struct tcp_function **s) { struct tcp_function_block *rblk=NULL; struct tcp_function *f; TAILQ_FOREACH(f, &t_functions, tf_next) { if (f->tf_fb == blk) { rblk = blk; if (s) { *s = f; } break; } } return (rblk); } struct tcp_function_block * find_and_ref_tcp_functions(struct tcp_function_set *fs) { struct tcp_function_block *blk; rw_rlock(&tcp_function_lock); blk = find_tcp_functions_locked(fs); if (blk) refcount_acquire(&blk->tfb_refcnt); rw_runlock(&tcp_function_lock); return(blk); } struct tcp_function_block * find_and_ref_tcp_fb(struct tcp_function_block *blk) { struct tcp_function_block *rblk; rw_rlock(&tcp_function_lock); rblk = find_tcp_fb_locked(blk, NULL); if (rblk) refcount_acquire(&rblk->tfb_refcnt); rw_runlock(&tcp_function_lock); return(rblk); } static int sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS) { int error=ENOENT; struct tcp_function_set fs; struct tcp_function_block *blk; memset(&fs, 0, sizeof(fs)); rw_rlock(&tcp_function_lock); blk = find_tcp_fb_locked(tcp_func_set_ptr, NULL); if (blk) { /* Found him */ strcpy(fs.function_set_name, blk->tfb_tcp_block_name); fs.pcbcnt = blk->tfb_refcnt; } rw_runlock(&tcp_function_lock); error = sysctl_handle_string(oidp, fs.function_set_name, sizeof(fs.function_set_name), req); /* Check for error or no change */ if (error != 0 || req->newptr == NULL) return(error); rw_wlock(&tcp_function_lock); blk = find_tcp_functions_locked(&fs); if ((blk == NULL) || (blk->tfb_flags & TCP_FUNC_BEING_REMOVED)) { error = ENOENT; goto done; } tcp_func_set_ptr = blk; done: rw_wunlock(&tcp_function_lock); return (error); } SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_default, CTLTYPE_STRING | CTLFLAG_RW, NULL, 0, sysctl_net_inet_default_tcp_functions, "A", "Set/get the default TCP functions"); static int sysctl_net_inet_list_available(SYSCTL_HANDLER_ARGS) { int error, cnt, linesz; struct tcp_function *f; char *buffer, *cp; size_t bufsz, outsz; cnt = 0; rw_rlock(&tcp_function_lock); TAILQ_FOREACH(f, &t_functions, tf_next) { cnt++; } rw_runlock(&tcp_function_lock); bufsz = (cnt+2) * (TCP_FUNCTION_NAME_LEN_MAX + 12) + 1; buffer = malloc(bufsz, M_TEMP, M_WAITOK); error = 0; cp = buffer; linesz = snprintf(cp, bufsz, "\n%-32s%c %s\n", "Stack", 'D', "PCB count"); cp += linesz; bufsz -= linesz; outsz = linesz; rw_rlock(&tcp_function_lock); TAILQ_FOREACH(f, &t_functions, tf_next) { linesz = snprintf(cp, bufsz, "%-32s%c %u\n", f->tf_fb->tfb_tcp_block_name, (f->tf_fb == tcp_func_set_ptr) ? '*' : ' ', f->tf_fb->tfb_refcnt); if (linesz >= bufsz) { error = EOVERFLOW; break; } cp += linesz; bufsz -= linesz; outsz += linesz; } rw_runlock(&tcp_function_lock); if (error == 0) error = sysctl_handle_string(oidp, buffer, outsz + 1, req); free(buffer, M_TEMP); return (error); } SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_available, CTLTYPE_STRING|CTLFLAG_RD, NULL, 0, sysctl_net_inet_list_available, "A", "list available TCP Function sets"); /* * Target size of TCP PCB hash tables. Must be a power of two. * * Note that this can be overridden by the kernel environment * variable net.inet.tcp.tcbhashsize */ #ifndef TCBHASHSIZE #define TCBHASHSIZE 0 #endif /* * XXX * Callouts should be moved into struct tcp directly. They are currently * separate because the tcpcb structure is exported to userland for sysctl * parsing purposes, which do not know about callouts. */ struct tcpcb_mem { struct tcpcb tcb; struct tcp_timer tt; struct cc_var ccv; #ifdef TCP_HHOOK struct osd osd; #endif }; static VNET_DEFINE(uma_zone_t, tcpcb_zone); #define V_tcpcb_zone VNET(tcpcb_zone) MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers"); MALLOC_DEFINE(M_TCPFUNCTIONS, "tcpfunc", "TCP function set memory"); static struct mtx isn_mtx; #define ISN_LOCK_INIT() mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF) #define ISN_LOCK() mtx_lock(&isn_mtx) #define ISN_UNLOCK() mtx_unlock(&isn_mtx) /* * TCP initialization. */ static void tcp_zone_change(void *tag) { uma_zone_set_max(V_tcbinfo.ipi_zone, maxsockets); uma_zone_set_max(V_tcpcb_zone, maxsockets); tcp_tw_zone_change(); } static int tcp_inpcb_init(void *mem, int size, int flags) { struct inpcb *inp = mem; INP_LOCK_INIT(inp, "inp", "tcpinp"); return (0); } /* * Take a value and get the next power of 2 that doesn't overflow. * Used to size the tcp_inpcb hash buckets. */ static int maketcp_hashsize(int size) { int hashsize; /* * auto tune. * get the next power of 2 higher than maxsockets. */ hashsize = 1 << fls(size); /* catch overflow, and just go one power of 2 smaller */ if (hashsize < size) { hashsize = 1 << (fls(size) - 1); } return (hashsize); } int register_tcp_functions(struct tcp_function_block *blk, int wait) { struct tcp_function_block *lblk; struct tcp_function *n; struct tcp_function_set fs; if (t_functions_inited == 0) { init_tcp_functions(); } if ((blk->tfb_tcp_output == NULL) || (blk->tfb_tcp_do_segment == NULL) || (blk->tfb_tcp_ctloutput == NULL) || (strlen(blk->tfb_tcp_block_name) == 0)) { /* * These functions are required and you * need a name. */ return (EINVAL); } if (blk->tfb_tcp_timer_stop_all || blk->tfb_tcp_timer_activate || blk->tfb_tcp_timer_active || blk->tfb_tcp_timer_stop) { /* * If you define one timer function you * must have them all. */ if ((blk->tfb_tcp_timer_stop_all == NULL) || (blk->tfb_tcp_timer_activate == NULL) || (blk->tfb_tcp_timer_active == NULL) || (blk->tfb_tcp_timer_stop == NULL)) { return (EINVAL); } } n = malloc(sizeof(struct tcp_function), M_TCPFUNCTIONS, wait); if (n == NULL) { return (ENOMEM); } n->tf_fb = blk; strcpy(fs.function_set_name, blk->tfb_tcp_block_name); rw_wlock(&tcp_function_lock); lblk = find_tcp_functions_locked(&fs); if (lblk) { /* Duplicate name space not allowed */ rw_wunlock(&tcp_function_lock); free(n, M_TCPFUNCTIONS); return (EALREADY); } refcount_init(&blk->tfb_refcnt, 0); blk->tfb_flags = 0; TAILQ_INSERT_TAIL(&t_functions, n, tf_next); rw_wunlock(&tcp_function_lock); return(0); } int deregister_tcp_functions(struct tcp_function_block *blk) { struct tcp_function_block *lblk; struct tcp_function *f; int error=ENOENT; if (strcmp(blk->tfb_tcp_block_name, "default") == 0) { /* You can't un-register the default */ return (EPERM); } rw_wlock(&tcp_function_lock); if (blk == tcp_func_set_ptr) { /* You can't free the current default */ rw_wunlock(&tcp_function_lock); return (EBUSY); } if (blk->tfb_refcnt) { /* Still tcb attached, mark it. */ blk->tfb_flags |= TCP_FUNC_BEING_REMOVED; rw_wunlock(&tcp_function_lock); return (EBUSY); } lblk = find_tcp_fb_locked(blk, &f); if (lblk) { /* Found */ TAILQ_REMOVE(&t_functions, f, tf_next); f->tf_fb = NULL; free(f, M_TCPFUNCTIONS); error = 0; } rw_wunlock(&tcp_function_lock); return (error); } void tcp_init(void) { const char *tcbhash_tuneable; int hashsize; tcbhash_tuneable = "net.inet.tcp.tcbhashsize"; #ifdef TCP_HHOOK if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN, &V_tcp_hhh[HHOOK_TCP_EST_IN], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0) printf("%s: WARNING: unable to register helper hook\n", __func__); if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT, &V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0) printf("%s: WARNING: unable to register helper hook\n", __func__); #endif hashsize = TCBHASHSIZE; TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize); if (hashsize == 0) { /* * Auto tune the hash size based on maxsockets. * A perfect hash would have a 1:1 mapping * (hashsize = maxsockets) however it's been * suggested that O(2) average is better. */ hashsize = maketcp_hashsize(maxsockets / 4); /* * Our historical default is 512, * do not autotune lower than this. */ if (hashsize < 512) hashsize = 512; if (bootverbose && IS_DEFAULT_VNET(curvnet)) printf("%s: %s auto tuned to %d\n", __func__, tcbhash_tuneable, hashsize); } /* * We require a hashsize to be a power of two. * Previously if it was not a power of two we would just reset it * back to 512, which could be a nasty surprise if you did not notice * the error message. * Instead what we do is clip it to the closest power of two lower * than the specified hash value. */ if (!powerof2(hashsize)) { int oldhashsize = hashsize; hashsize = maketcp_hashsize(hashsize); /* prevent absurdly low value */ if (hashsize < 16) hashsize = 16; printf("%s: WARNING: TCB hash size not a power of 2, " "clipped from %d to %d.\n", __func__, oldhashsize, hashsize); } in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize, "tcp_inpcb", tcp_inpcb_init, NULL, 0, IPI_HASHFIELDS_4TUPLE); /* * These have to be type stable for the benefit of the timers. */ V_tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); uma_zone_set_max(V_tcpcb_zone, maxsockets); uma_zone_set_warning(V_tcpcb_zone, "kern.ipc.maxsockets limit reached"); tcp_tw_init(); syncache_init(); tcp_hc_init(); TUNABLE_INT_FETCH("net.inet.tcp.sack.enable", &V_tcp_do_sack); V_sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); +#ifdef TCP_RFC7413 + tcp_fastopen_init(); +#endif + /* Skip initialization of globals for non-default instances. */ if (!IS_DEFAULT_VNET(curvnet)) return; tcp_reass_global_init(); /* XXX virtualize those bellow? */ tcp_delacktime = TCPTV_DELACK; tcp_keepinit = TCPTV_KEEP_INIT; tcp_keepidle = TCPTV_KEEP_IDLE; tcp_keepintvl = TCPTV_KEEPINTVL; tcp_maxpersistidle = TCPTV_KEEP_IDLE; tcp_msl = TCPTV_MSL; tcp_rexmit_min = TCPTV_MIN; if (tcp_rexmit_min < 1) tcp_rexmit_min = 1; tcp_persmin = TCPTV_PERSMIN; tcp_persmax = TCPTV_PERSMAX; tcp_rexmit_slop = TCPTV_CPU_VAR; tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT; tcp_tcbhashsize = hashsize; /* Setup the tcp function block list */ init_tcp_functions(); register_tcp_functions(&tcp_def_funcblk, M_WAITOK); if (tcp_soreceive_stream) { #ifdef INET tcp_usrreqs.pru_soreceive = soreceive_stream; #endif #ifdef INET6 tcp6_usrreqs.pru_soreceive = soreceive_stream; #endif /* INET6 */ } #ifdef INET6 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) #else /* INET6 */ #define TCP_MINPROTOHDR (sizeof(struct tcpiphdr)) #endif /* INET6 */ if (max_protohdr < TCP_MINPROTOHDR) max_protohdr = TCP_MINPROTOHDR; if (max_linkhdr + TCP_MINPROTOHDR > MHLEN) panic("tcp_init"); #undef TCP_MINPROTOHDR ISN_LOCK_INIT(); EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL, SHUTDOWN_PRI_DEFAULT); EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL, EVENTHANDLER_PRI_ANY); #ifdef TCPPCAP tcp_pcap_init(); -#endif - -#ifdef TCP_RFC7413 - tcp_fastopen_init(); #endif } #ifdef VIMAGE static void tcp_destroy(void *unused __unused) { int n; #ifdef TCP_HHOOK int error; #endif /* * All our processes are gone, all our sockets should be cleaned * up, which means, we should be past the tcp_discardcb() calls. * Sleep to let all tcpcb timers really disappear and cleanup. */ for (;;) { INP_LIST_RLOCK(&V_tcbinfo); n = V_tcbinfo.ipi_count; INP_LIST_RUNLOCK(&V_tcbinfo); if (n == 0) break; pause("tcpdes", hz / 10); } tcp_hc_destroy(); syncache_destroy(); tcp_tw_destroy(); in_pcbinfo_destroy(&V_tcbinfo); /* tcp_discardcb() clears the sack_holes up. */ uma_zdestroy(V_sack_hole_zone); uma_zdestroy(V_tcpcb_zone); #ifdef TCP_RFC7413 /* * Cannot free the zone until all tcpcbs are released as we attach * the allocations to them. */ tcp_fastopen_destroy(); #endif #ifdef TCP_HHOOK error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_IN]); if (error != 0) { printf("%s: WARNING: unable to deregister helper hook " "type=%d, id=%d: error %d returned\n", __func__, HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN, error); } error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_OUT]); if (error != 0) { printf("%s: WARNING: unable to deregister helper hook " "type=%d, id=%d: error %d returned\n", __func__, HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT, error); } #endif } VNET_SYSUNINIT(tcp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, tcp_destroy, NULL); #endif void tcp_fini(void *xtp) { } /* * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb. * tcp_template used to store this data in mbufs, but we now recopy it out * of the tcpcb each time to conserve mbufs. */ void tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr) { struct tcphdr *th = (struct tcphdr *)tcp_ptr; INP_WLOCK_ASSERT(inp); #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) { struct ip6_hdr *ip6; ip6 = (struct ip6_hdr *)ip_ptr; ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | (inp->inp_flow & IPV6_FLOWINFO_MASK); ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | (IPV6_VERSION & IPV6_VERSION_MASK); ip6->ip6_nxt = IPPROTO_TCP; ip6->ip6_plen = htons(sizeof(struct tcphdr)); ip6->ip6_src = inp->in6p_laddr; ip6->ip6_dst = inp->in6p_faddr; } #endif /* INET6 */ #if defined(INET6) && defined(INET) else #endif #ifdef INET { struct ip *ip; ip = (struct ip *)ip_ptr; ip->ip_v = IPVERSION; ip->ip_hl = 5; ip->ip_tos = inp->inp_ip_tos; ip->ip_len = 0; ip->ip_id = 0; ip->ip_off = 0; ip->ip_ttl = inp->inp_ip_ttl; ip->ip_sum = 0; ip->ip_p = IPPROTO_TCP; ip->ip_src = inp->inp_laddr; ip->ip_dst = inp->inp_faddr; } #endif /* INET */ th->th_sport = inp->inp_lport; th->th_dport = inp->inp_fport; th->th_seq = 0; th->th_ack = 0; th->th_x2 = 0; th->th_off = 5; th->th_flags = 0; th->th_win = 0; th->th_urp = 0; th->th_sum = 0; /* in_pseudo() is called later for ipv4 */ } /* * Create template to be used to send tcp packets on a connection. * Allocates an mbuf and fills in a skeletal tcp/ip header. The only * use for this function is in keepalives, which use tcp_respond. */ struct tcptemp * tcpip_maketemplate(struct inpcb *inp) { struct tcptemp *t; t = malloc(sizeof(*t), M_TEMP, M_NOWAIT); if (t == NULL) return (NULL); tcpip_fillheaders(inp, (void *)&t->tt_ipgen, (void *)&t->tt_t); return (t); } /* * Send a single message to the TCP at address specified by * the given TCP/IP header. If m == NULL, then we make a copy * of the tcpiphdr at th and send directly to the addressed host. * This is used to force keep alive messages out using the TCP * template for a connection. If flags are given then we send * a message back to the TCP which originated the segment th, * and discard the mbuf containing it and any other attached mbufs. * * In any case the ack and sequence number of the transmitted * segment are as specified by the parameters. * * NOTE: If m != NULL, then th must point to *inside* the mbuf. */ void tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, tcp_seq ack, tcp_seq seq, int flags) { struct tcpopt to; struct inpcb *inp; struct ip *ip; struct mbuf *optm; struct tcphdr *nth; u_char *optp; #ifdef INET6 struct ip6_hdr *ip6; int isipv6; #endif /* INET6 */ int optlen, tlen, win; bool incl_opts; KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL")); #ifdef INET6 isipv6 = ((struct ip *)ipgen)->ip_v == (IPV6_VERSION >> 4); ip6 = ipgen; #endif /* INET6 */ ip = ipgen; if (tp != NULL) { inp = tp->t_inpcb; KASSERT(inp != NULL, ("tcp control block w/o inpcb")); INP_WLOCK_ASSERT(inp); } else inp = NULL; incl_opts = false; win = 0; if (tp != NULL) { if (!(flags & TH_RST)) { win = sbspace(&inp->inp_socket->so_rcv); if (win > TCP_MAXWIN << tp->rcv_scale) win = TCP_MAXWIN << tp->rcv_scale; } if ((tp->t_flags & TF_NOOPT) == 0) incl_opts = true; } if (m == NULL) { m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return; m->m_data += max_linkhdr; #ifdef INET6 if (isipv6) { bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(struct ip6_hdr)); ip6 = mtod(m, struct ip6_hdr *); nth = (struct tcphdr *)(ip6 + 1); } else #endif /* INET6 */ { bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); ip = mtod(m, struct ip *); nth = (struct tcphdr *)(ip + 1); } bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr)); flags = TH_ACK; } else if (!M_WRITABLE(m)) { struct mbuf *n; /* Can't reuse 'm', allocate a new mbuf. */ n = m_gethdr(M_NOWAIT, MT_DATA); if (n == NULL) { m_freem(m); return; } if (!m_dup_pkthdr(n, m, M_NOWAIT)) { m_freem(m); m_freem(n); return; } n->m_data += max_linkhdr; /* m_len is set later */ #define xchg(a,b,type) { type t; t=a; a=b; b=t; } #ifdef INET6 if (isipv6) { bcopy((caddr_t)ip6, mtod(n, caddr_t), sizeof(struct ip6_hdr)); ip6 = mtod(n, struct ip6_hdr *); xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr); nth = (struct tcphdr *)(ip6 + 1); } else #endif /* INET6 */ { bcopy((caddr_t)ip, mtod(n, caddr_t), sizeof(struct ip)); ip = mtod(n, struct ip *); xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t); nth = (struct tcphdr *)(ip + 1); } bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr)); xchg(nth->th_dport, nth->th_sport, uint16_t); th = nth; m_freem(m); m = n; } else { /* * reuse the mbuf. * XXX MRT We inherit the FIB, which is lucky. */ m_freem(m->m_next); m->m_next = NULL; m->m_data = (caddr_t)ipgen; /* m_len is set later */ #ifdef INET6 if (isipv6) { xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr); nth = (struct tcphdr *)(ip6 + 1); } else #endif /* INET6 */ { xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t); nth = (struct tcphdr *)(ip + 1); } if (th != nth) { /* * this is usually a case when an extension header * exists between the IPv6 header and the * TCP header. */ nth->th_sport = th->th_sport; nth->th_dport = th->th_dport; } xchg(nth->th_dport, nth->th_sport, uint16_t); #undef xchg } tlen = 0; #ifdef INET6 if (isipv6) tlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr); #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET tlen = sizeof (struct tcpiphdr); #endif #ifdef INVARIANTS m->m_len = 0; KASSERT(M_TRAILINGSPACE(m) >= tlen, ("Not enough trailing space for message (m=%p, need=%d, have=%ld)", m, tlen, (long)M_TRAILINGSPACE(m))); #endif m->m_len = tlen; to.to_flags = 0; if (incl_opts) { /* Make sure we have room. */ if (M_TRAILINGSPACE(m) < TCP_MAXOLEN) { m->m_next = m_get(M_NOWAIT, MT_DATA); if (m->m_next) { optp = mtod(m->m_next, u_char *); optm = m->m_next; } else incl_opts = false; } else { optp = (u_char *) (nth + 1); optm = m; } } if (incl_opts) { /* Timestamps. */ if (tp->t_flags & TF_RCVD_TSTMP) { to.to_tsval = tcp_ts_getticks() + tp->ts_offset; to.to_tsecr = tp->ts_recent; to.to_flags |= TOF_TS; } #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) /* TCP-MD5 (RFC2385). */ if (tp->t_flags & TF_SIGNATURE) to.to_flags |= TOF_SIGNATURE; #endif /* Add the options. */ tlen += optlen = tcp_addoptions(&to, optp); /* Update m_len in the correct mbuf. */ optm->m_len += optlen; } else optlen = 0; #ifdef INET6 if (isipv6) { ip6->ip6_flow = 0; ip6->ip6_vfc = IPV6_VERSION; ip6->ip6_nxt = IPPROTO_TCP; ip6->ip6_plen = htons(tlen - sizeof(*ip6)); } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET { ip->ip_len = htons(tlen); ip->ip_ttl = V_ip_defttl; if (V_path_mtu_discovery) ip->ip_off |= htons(IP_DF); } #endif m->m_pkthdr.len = tlen; m->m_pkthdr.rcvif = NULL; #ifdef MAC if (inp != NULL) { /* * Packet is associated with a socket, so allow the * label of the response to reflect the socket label. */ INP_WLOCK_ASSERT(inp); mac_inpcb_create_mbuf(inp, m); } else { /* * Packet is not associated with a socket, so possibly * update the label in place. */ mac_netinet_tcp_reply(m); } #endif nth->th_seq = htonl(seq); nth->th_ack = htonl(ack); nth->th_x2 = 0; nth->th_off = (sizeof (struct tcphdr) + optlen) >> 2; nth->th_flags = flags; if (tp != NULL) nth->th_win = htons((u_short) (win >> tp->rcv_scale)); else nth->th_win = htons((u_short)win); nth->th_urp = 0; #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (to.to_flags & TOF_SIGNATURE) { if (!TCPMD5_ENABLED() || TCPMD5_OUTPUT(m, nth, to.to_signature) != 0) { m_freem(m); return; } } #endif m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); #ifdef INET6 if (isipv6) { m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; nth->th_sum = in6_cksum_pseudo(ip6, tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0); ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb : NULL, NULL); } #endif /* INET6 */ #if defined(INET6) && defined(INET) else #endif #ifdef INET { m->m_pkthdr.csum_flags = CSUM_TCP; nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p))); } #endif /* INET */ #ifdef TCPDEBUG if (tp == NULL || (inp->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0); #endif TCP_PROBE3(debug__output, tp, th, m); if (flags & TH_RST) TCP_PROBE5(accept__refused, NULL, NULL, m, tp, nth); TCP_PROBE5(send, NULL, tp, m, tp, nth); #ifdef INET6 if (isipv6) (void) ip6_output(m, NULL, NULL, 0, NULL, NULL, inp); #endif /* INET6 */ #if defined(INET) && defined(INET6) else #endif #ifdef INET (void) ip_output(m, NULL, NULL, 0, NULL, inp); #endif } /* * Create a new TCP control block, making an * empty reassembly queue and hooking it to the argument * protocol control block. The `inp' parameter must have * come from the zone allocator set up in tcp_init(). */ struct tcpcb * tcp_newtcpcb(struct inpcb *inp) { struct tcpcb_mem *tm; struct tcpcb *tp; #ifdef INET6 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ tm = uma_zalloc(V_tcpcb_zone, M_NOWAIT | M_ZERO); if (tm == NULL) return (NULL); tp = &tm->tcb; /* Initialise cc_var struct for this tcpcb. */ tp->ccv = &tm->ccv; tp->ccv->type = IPPROTO_TCP; tp->ccv->ccvc.tcp = tp; rw_rlock(&tcp_function_lock); tp->t_fb = tcp_func_set_ptr; refcount_acquire(&tp->t_fb->tfb_refcnt); rw_runlock(&tcp_function_lock); /* * Use the current system default CC algorithm. */ CC_LIST_RLOCK(); KASSERT(!STAILQ_EMPTY(&cc_list), ("cc_list is empty!")); CC_ALGO(tp) = CC_DEFAULT(); CC_LIST_RUNLOCK(); if (CC_ALGO(tp)->cb_init != NULL) if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) { if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp, 1); refcount_release(&tp->t_fb->tfb_refcnt); uma_zfree(V_tcpcb_zone, tm); return (NULL); } #ifdef TCP_HHOOK tp->osd = &tm->osd; if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) { if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp, 1); refcount_release(&tp->t_fb->tfb_refcnt); uma_zfree(V_tcpcb_zone, tm); return (NULL); } #endif #ifdef VIMAGE tp->t_vnet = inp->inp_vnet; #endif tp->t_timers = &tm->tt; /* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */ tp->t_maxseg = #ifdef INET6 isipv6 ? V_tcp_v6mssdflt : #endif /* INET6 */ V_tcp_mssdflt; /* Set up our timeouts. */ callout_init(&tp->t_timers->tt_rexmt, 1); callout_init(&tp->t_timers->tt_persist, 1); callout_init(&tp->t_timers->tt_keep, 1); callout_init(&tp->t_timers->tt_2msl, 1); callout_init(&tp->t_timers->tt_delack, 1); if (V_tcp_do_rfc1323) tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); if (V_tcp_do_sack) tp->t_flags |= TF_SACK_PERMIT; TAILQ_INIT(&tp->snd_holes); /* * The tcpcb will hold a reference on its inpcb until tcp_discardcb() * is called. */ in_pcbref(inp); /* Reference for tcpcb */ tp->t_inpcb = inp; /* * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no * rtt estimate. Set rttvar so that srtt + 4 * rttvar gives * reasonable initial retransmit time. */ tp->t_srtt = TCPTV_SRTTBASE; tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4; tp->t_rttmin = tcp_rexmit_min; tp->t_rxtcur = TCPTV_RTOBASE; tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; tp->t_rcvtime = ticks; /* * IPv4 TTL initialization is necessary for an IPv6 socket as well, * because the socket may be bound to an IPv6 wildcard address, * which may match an IPv4-mapped IPv6 address. */ inp->inp_ip_ttl = V_ip_defttl; inp->inp_ppcb = tp; #ifdef TCPPCAP /* * Init the TCP PCAP queues. */ tcp_pcap_tcpcb_init(tp); #endif if (tp->t_fb->tfb_tcp_fb_init) { (*tp->t_fb->tfb_tcp_fb_init)(tp); } return (tp); /* XXX */ } /* * Switch the congestion control algorithm back to NewReno for any active * control blocks using an algorithm which is about to go away. * This ensures the CC framework can allow the unload to proceed without leaving * any dangling pointers which would trigger a panic. * Returning non-zero would inform the CC framework that something went wrong * and it would be unsafe to allow the unload to proceed. However, there is no * way for this to occur with this implementation so we always return zero. */ int tcp_ccalgounload(struct cc_algo *unload_algo) { struct cc_algo *tmpalgo; struct inpcb *inp; struct tcpcb *tp; VNET_ITERATOR_DECL(vnet_iter); /* * Check all active control blocks across all network stacks and change * any that are using "unload_algo" back to NewReno. If "unload_algo" * requires cleanup code to be run, call it. */ VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); INP_INFO_WLOCK(&V_tcbinfo); /* * New connections already part way through being initialised * with the CC algo we're removing will not race with this code * because the INP_INFO_WLOCK is held during initialisation. We * therefore don't enter the loop below until the connection * list has stabilised. */ LIST_FOREACH(inp, &V_tcb, inp_list) { INP_WLOCK(inp); /* Important to skip tcptw structs. */ if (!(inp->inp_flags & INP_TIMEWAIT) && (tp = intotcpcb(inp)) != NULL) { /* * By holding INP_WLOCK here, we are assured * that the connection is not currently * executing inside the CC module's functions * i.e. it is safe to make the switch back to * NewReno. */ if (CC_ALGO(tp) == unload_algo) { tmpalgo = CC_ALGO(tp); /* NewReno does not require any init. */ CC_ALGO(tp) = &newreno_cc_algo; if (tmpalgo->cb_destroy != NULL) tmpalgo->cb_destroy(tp->ccv); } } INP_WUNLOCK(inp); } INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK(); return (0); } /* * Drop a TCP connection, reporting * the specified error. If connection is synchronized, * then send a RST to peer. */ struct tcpcb * tcp_drop(struct tcpcb *tp, int errno) { struct socket *so = tp->t_inpcb->inp_socket; INP_INFO_LOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(tp->t_inpcb); if (TCPS_HAVERCVDSYN(tp->t_state)) { tcp_state_change(tp, TCPS_CLOSED); (void) tp->t_fb->tfb_tcp_output(tp); TCPSTAT_INC(tcps_drops); } else TCPSTAT_INC(tcps_conndrops); if (errno == ETIMEDOUT && tp->t_softerror) errno = tp->t_softerror; so->so_error = errno; return (tcp_close(tp)); } void tcp_discardcb(struct tcpcb *tp) { struct inpcb *inp = tp->t_inpcb; struct socket *so = inp->inp_socket; #ifdef INET6 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ int released; INP_WLOCK_ASSERT(inp); /* * Make sure that all of our timers are stopped before we delete the * PCB. * * If stopping a timer fails, we schedule a discard function in same * callout, and the last discard function called will take care of * deleting the tcpcb. */ tp->t_timers->tt_draincnt = 0; tcp_timer_stop(tp, TT_REXMT); tcp_timer_stop(tp, TT_PERSIST); tcp_timer_stop(tp, TT_KEEP); tcp_timer_stop(tp, TT_2MSL); tcp_timer_stop(tp, TT_DELACK); if (tp->t_fb->tfb_tcp_timer_stop_all) { /* * Call the stop-all function of the methods, * this function should call the tcp_timer_stop() * method with each of the function specific timeouts. * That stop will be called via the tfb_tcp_timer_stop() * which should use the async drain function of the * callout system (see tcp_var.h). */ tp->t_fb->tfb_tcp_timer_stop_all(tp); } /* * If we got enough samples through the srtt filter, * save the rtt and rttvar in the routing entry. * 'Enough' is arbitrarily defined as 4 rtt samples. * 4 samples is enough for the srtt filter to converge * to within enough % of the correct value; fewer samples * and we could save a bogus rtt. The danger is not high * as tcp quickly recovers from everything. * XXX: Works very well but needs some more statistics! */ if (tp->t_rttupdated >= 4) { struct hc_metrics_lite metrics; uint32_t ssthresh; bzero(&metrics, sizeof(metrics)); /* * Update the ssthresh always when the conditions below * are satisfied. This gives us better new start value * for the congestion avoidance for new connections. * ssthresh is only set if packet loss occurred on a session. * * XXXRW: 'so' may be NULL here, and/or socket buffer may be * being torn down. Ideally this code would not use 'so'. */ ssthresh = tp->snd_ssthresh; if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) { /* * convert the limit from user data bytes to * packets then to packet data bytes. */ ssthresh = (ssthresh + tp->t_maxseg / 2) / tp->t_maxseg; if (ssthresh < 2) ssthresh = 2; ssthresh *= (tp->t_maxseg + #ifdef INET6 (isipv6 ? sizeof (struct ip6_hdr) + sizeof (struct tcphdr) : #endif sizeof (struct tcpiphdr) #ifdef INET6 ) #endif ); } else ssthresh = 0; metrics.rmx_ssthresh = ssthresh; metrics.rmx_rtt = tp->t_srtt; metrics.rmx_rttvar = tp->t_rttvar; metrics.rmx_cwnd = tp->snd_cwnd; metrics.rmx_sendpipe = 0; metrics.rmx_recvpipe = 0; tcp_hc_update(&inp->inp_inc, &metrics); } /* free the reassembly queue, if any */ tcp_reass_flush(tp); #ifdef TCP_OFFLOAD /* Disconnect offload device, if any. */ if (tp->t_flags & TF_TOE) tcp_offload_detach(tp); #endif tcp_free_sackholes(tp); #ifdef TCPPCAP /* Free the TCP PCAP queues. */ tcp_pcap_drain(&(tp->t_inpkts)); tcp_pcap_drain(&(tp->t_outpkts)); #endif /* Allow the CC algorithm to clean up after itself. */ if (CC_ALGO(tp)->cb_destroy != NULL) CC_ALGO(tp)->cb_destroy(tp->ccv); #ifdef TCP_HHOOK khelp_destroy_osd(tp->osd); #endif CC_ALGO(tp) = NULL; inp->inp_ppcb = NULL; if (tp->t_timers->tt_draincnt == 0) { /* We own the last reference on tcpcb, let's free it. */ TCPSTATES_DEC(tp->t_state); if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp, 1); refcount_release(&tp->t_fb->tfb_refcnt); tp->t_inpcb = NULL; uma_zfree(V_tcpcb_zone, tp); released = in_pcbrele_wlocked(inp); KASSERT(!released, ("%s: inp %p should not have been released " "here", __func__, inp)); } } void tcp_timer_discard(void *ptp) { struct inpcb *inp; struct tcpcb *tp; tp = (struct tcpcb *)ptp; CURVNET_SET(tp->t_vnet); INP_INFO_RLOCK(&V_tcbinfo); inp = tp->t_inpcb; KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); INP_WLOCK(inp); KASSERT((tp->t_timers->tt_flags & TT_STOPPED) != 0, ("%s: tcpcb has to be stopped here", __func__)); tp->t_timers->tt_draincnt--; if (tp->t_timers->tt_draincnt == 0) { /* We own the last reference on this tcpcb, let's free it. */ TCPSTATES_DEC(tp->t_state); if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp, 1); refcount_release(&tp->t_fb->tfb_refcnt); tp->t_inpcb = NULL; uma_zfree(V_tcpcb_zone, tp); if (in_pcbrele_wlocked(inp)) { INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } } INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } /* * Attempt to close a TCP control block, marking it as dropped, and freeing * the socket if we hold the only reference. */ struct tcpcb * tcp_close(struct tcpcb *tp) { struct inpcb *inp = tp->t_inpcb; struct socket *so; INP_INFO_LOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); #ifdef TCP_OFFLOAD if (tp->t_state == TCPS_LISTEN) tcp_offload_listen_stop(tp); #endif #ifdef TCP_RFC7413 /* * This releases the TFO pending counter resource for TFO listen * sockets as well as passively-created TFO sockets that transition * from SYN_RECEIVED to CLOSED. */ if (tp->t_tfo_pending) { tcp_fastopen_decrement_counter(tp->t_tfo_pending); tp->t_tfo_pending = NULL; } #endif in_pcbdrop(inp); TCPSTAT_INC(tcps_closed); if (tp->t_state != TCPS_CLOSED) tcp_state_change(tp, TCPS_CLOSED); KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL")); so = inp->inp_socket; soisdisconnected(so); if (inp->inp_flags & INP_SOCKREF) { KASSERT(so->so_state & SS_PROTOREF, ("tcp_close: !SS_PROTOREF")); inp->inp_flags &= ~INP_SOCKREF; INP_WUNLOCK(inp); ACCEPT_LOCK(); SOCK_LOCK(so); so->so_state &= ~SS_PROTOREF; sofree(so); return (NULL); } return (tp); } void tcp_drain(void) { VNET_ITERATOR_DECL(vnet_iter); if (!do_tcpdrain) return; VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); struct inpcb *inpb; struct tcpcb *tcpb; /* * Walk the tcpbs, if existing, and flush the reassembly queue, * if there is one... * XXX: The "Net/3" implementation doesn't imply that the TCP * reassembly queue should be flushed, but in a situation * where we're really low on mbufs, this is potentially * useful. */ INP_INFO_WLOCK(&V_tcbinfo); LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) { if (inpb->inp_flags & INP_TIMEWAIT) continue; INP_WLOCK(inpb); if ((tcpb = intotcpcb(inpb)) != NULL) { tcp_reass_flush(tcpb); tcp_clean_sackreport(tcpb); #ifdef TCPPCAP if (tcp_pcap_aggressive_free) { /* Free the TCP PCAP queues. */ tcp_pcap_drain(&(tcpb->t_inpkts)); tcp_pcap_drain(&(tcpb->t_outpkts)); } #endif } INP_WUNLOCK(inpb); } INP_INFO_WUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); } /* * Notify a tcp user of an asynchronous error; * store error as soft error, but wake up user * (for now, won't do anything until can select for soft error). * * Do not wake up user since there currently is no mechanism for * reporting soft errors (yet - a kqueue filter may be added). */ static struct inpcb * tcp_notify(struct inpcb *inp, int error) { struct tcpcb *tp; INP_INFO_LOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); if ((inp->inp_flags & INP_TIMEWAIT) || (inp->inp_flags & INP_DROPPED)) return (inp); tp = intotcpcb(inp); KASSERT(tp != NULL, ("tcp_notify: tp == NULL")); /* * Ignore some errors if we are hooked up. * If connection hasn't completed, has retransmitted several times, * and receives a second error, give up now. This is better * than waiting a long time to establish a connection that * can never complete. */ if (tp->t_state == TCPS_ESTABLISHED && (error == EHOSTUNREACH || error == ENETUNREACH || error == EHOSTDOWN)) { if (inp->inp_route.ro_rt) { RTFREE(inp->inp_route.ro_rt); inp->inp_route.ro_rt = (struct rtentry *)NULL; } return (inp); } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 && tp->t_softerror) { tp = tcp_drop(tp, error); if (tp != NULL) return (inp); else return (NULL); } else { tp->t_softerror = error; return (inp); } #if 0 wakeup( &so->so_timeo); sorwakeup(so); sowwakeup(so); #endif } static int tcp_pcblist(SYSCTL_HANDLER_ARGS) { int error, i, m, n, pcb_count; struct inpcb *inp, **inp_list; inp_gen_t gencnt; struct xinpgen xig; /* * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ if (req->oldptr == NULL) { n = V_tcbinfo.ipi_count + counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]); n += imax(n / 8, 10); req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb); return (0); } if (req->newptr != NULL) return (EPERM); /* * OK, now we're committed to doing something. */ INP_LIST_RLOCK(&V_tcbinfo); gencnt = V_tcbinfo.ipi_gencnt; n = V_tcbinfo.ipi_count; INP_LIST_RUNLOCK(&V_tcbinfo); m = counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]); error = sysctl_wire_old_buffer(req, 2 * (sizeof xig) + (n + m) * sizeof(struct xtcpcb)); if (error != 0) return (error); xig.xig_len = sizeof xig; xig.xig_count = n + m; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return (error); error = syncache_pcblist(req, m, &pcb_count); if (error) return (error); inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK); INP_INFO_WLOCK(&V_tcbinfo); for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0; inp != NULL && i < n; inp = LIST_NEXT(inp, inp_list)) { INP_WLOCK(inp); if (inp->inp_gencnt <= gencnt) { /* * XXX: This use of cr_cansee(), introduced with * TCP state changes, is not quite right, but for * now, better than nothing. */ if (inp->inp_flags & INP_TIMEWAIT) { if (intotw(inp) != NULL) error = cr_cansee(req->td->td_ucred, intotw(inp)->tw_cred); else error = EINVAL; /* Skip this inp. */ } else error = cr_canseeinpcb(req->td->td_ucred, inp); if (error == 0) { in_pcbref(inp); inp_list[i++] = inp; } } INP_WUNLOCK(inp); } INP_INFO_WUNLOCK(&V_tcbinfo); n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (inp->inp_gencnt <= gencnt) { struct xtcpcb xt; void *inp_ppcb; bzero(&xt, sizeof(xt)); xt.xt_len = sizeof xt; /* XXX should avoid extra copy */ bcopy(inp, &xt.xt_inp, sizeof *inp); inp_ppcb = inp->inp_ppcb; if (inp_ppcb == NULL) bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); else if (inp->inp_flags & INP_TIMEWAIT) { bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); xt.xt_tp.t_state = TCPS_TIME_WAIT; } else { bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); if (xt.xt_tp.t_timers) tcp_timer_to_xtimer(&xt.xt_tp, xt.xt_tp.t_timers, &xt.xt_timer); } if (inp->inp_socket != NULL) sotoxsocket(inp->inp_socket, &xt.xt_socket); else { bzero(&xt.xt_socket, sizeof xt.xt_socket); xt.xt_socket.xso_protocol = IPPROTO_TCP; } xt.xt_inp.inp_gencnt = inp->inp_gencnt; INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xt, sizeof xt); } else INP_RUNLOCK(inp); } INP_INFO_RLOCK(&V_tcbinfo); for (i = 0; i < n; i++) { inp = inp_list[i]; INP_RLOCK(inp); if (!in_pcbrele_rlocked(inp)) INP_RUNLOCK(inp); } INP_INFO_RUNLOCK(&V_tcbinfo); if (!error) { /* * Give the user an updated idea of our state. * If the generation differs from what we told * her before, she knows that something happened * while we were processing this request, and it * might be necessary to retry. */ INP_LIST_RLOCK(&V_tcbinfo); xig.xig_gen = V_tcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = V_tcbinfo.ipi_count + pcb_count; INP_LIST_RUNLOCK(&V_tcbinfo); error = SYSCTL_OUT(req, &xig, sizeof xig); } free(inp_list, M_TEMP); return (error); } SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0, tcp_pcblist, "S,xtcpcb", "List of active TCP connections"); #ifdef INET static int tcp_getcred(SYSCTL_HANDLER_ARGS) { struct xucred xuc; struct sockaddr_in addrs[2]; struct inpcb *inp; int error; error = priv_check(req->td, PRIV_NETINET_GETCRED); if (error) return (error); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); inp = in_pcblookup(&V_tcbinfo, addrs[1].sin_addr, addrs[1].sin_port, addrs[0].sin_addr, addrs[0].sin_port, INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { if (inp->inp_socket == NULL) error = ENOENT; if (error == 0) error = cr_canseeinpcb(req->td->td_ucred, inp); if (error == 0) cru2x(inp->inp_cred, &xuc); INP_RUNLOCK(inp); } else error = ENOENT; if (error == 0) error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); return (error); } SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, tcp_getcred, "S,xucred", "Get the xucred of a TCP connection"); #endif /* INET */ #ifdef INET6 static int tcp6_getcred(SYSCTL_HANDLER_ARGS) { struct xucred xuc; struct sockaddr_in6 addrs[2]; struct inpcb *inp; int error; #ifdef INET int mapped = 0; #endif error = priv_check(req->td, PRIV_NETINET_GETCRED); if (error) return (error); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 || (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) { return (error); } if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) { #ifdef INET if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr)) mapped = 1; else #endif return (EINVAL); } #ifdef INET if (mapped == 1) inp = in_pcblookup(&V_tcbinfo, *(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12], addrs[1].sin6_port, *(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12], addrs[0].sin6_port, INPLOOKUP_RLOCKPCB, NULL); else #endif inp = in6_pcblookup(&V_tcbinfo, &addrs[1].sin6_addr, addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { if (inp->inp_socket == NULL) error = ENOENT; if (error == 0) error = cr_canseeinpcb(req->td->td_ucred, inp); if (error == 0) cru2x(inp->inp_cred, &xuc); INP_RUNLOCK(inp); } else error = ENOENT; if (error == 0) error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); return (error); } SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, tcp6_getcred, "S,xucred", "Get the xucred of a TCP6 connection"); #endif /* INET6 */ #ifdef INET void tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) { struct ip *ip = vip; struct tcphdr *th; struct in_addr faddr; struct inpcb *inp; struct tcpcb *tp; struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify; struct icmp *icp; struct in_conninfo inc; tcp_seq icmp_tcp_seq; int mtu; faddr = ((struct sockaddr_in *)sa)->sin_addr; if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) return; if (cmd == PRC_MSGSIZE) notify = tcp_mtudisc_notify; else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB || cmd == PRC_UNREACH_PORT || cmd == PRC_UNREACH_PROTOCOL || cmd == PRC_TIMXCEED_INTRANS) && ip) notify = tcp_drop_syn_sent; /* * Hostdead is ugly because it goes linearly through all PCBs. * XXX: We never get this from ICMP, otherwise it makes an * excellent DoS attack on machines with many connections. */ else if (cmd == PRC_HOSTDEAD) ip = NULL; else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) return; if (ip == NULL) { in_pcbnotifyall(&V_tcbinfo, faddr, inetctlerrmap[cmd], notify); return; } icp = (struct icmp *)((caddr_t)ip - offsetof(struct icmp, icmp_ip)); th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); INP_INFO_RLOCK(&V_tcbinfo); inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, ip->ip_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL); if (inp != NULL && PRC_IS_REDIRECT(cmd)) { /* signal EHOSTDOWN, as it flushes the cached route */ inp = (*notify)(inp, EHOSTDOWN); if (inp != NULL) INP_WUNLOCK(inp); } else if (inp != NULL) { if (!(inp->inp_flags & INP_TIMEWAIT) && !(inp->inp_flags & INP_DROPPED) && !(inp->inp_socket == NULL)) { icmp_tcp_seq = ntohl(th->th_seq); tp = intotcpcb(inp); if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) && SEQ_LT(icmp_tcp_seq, tp->snd_max)) { if (cmd == PRC_MSGSIZE) { /* * MTU discovery: * If we got a needfrag set the MTU * in the route to the suggested new * value (if given) and then notify. */ mtu = ntohs(icp->icmp_nextmtu); /* * If no alternative MTU was * proposed, try the next smaller * one. */ if (!mtu) mtu = ip_next_mtu( ntohs(ip->ip_len), 1); if (mtu < V_tcp_minmss + sizeof(struct tcpiphdr)) mtu = V_tcp_minmss + sizeof(struct tcpiphdr); /* * Only process the offered MTU if it * is smaller than the current one. */ if (mtu < tp->t_maxseg + sizeof(struct tcpiphdr)) { bzero(&inc, sizeof(inc)); inc.inc_faddr = faddr; inc.inc_fibnum = inp->inp_inc.inc_fibnum; tcp_hc_updatemtu(&inc, mtu); tcp_mtudisc(inp, mtu); } } else inp = (*notify)(inp, inetctlerrmap[cmd]); } } if (inp != NULL) INP_WUNLOCK(inp); } else { bzero(&inc, sizeof(inc)); inc.inc_fport = th->th_dport; inc.inc_lport = th->th_sport; inc.inc_faddr = faddr; inc.inc_laddr = ip->ip_src; syncache_unreach(&inc, th); } INP_INFO_RUNLOCK(&V_tcbinfo); } #endif /* INET */ #ifdef INET6 void tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d) { struct in6_addr *dst; struct tcphdr *th; struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify; struct ip6_hdr *ip6; struct mbuf *m; struct inpcb *inp; struct tcpcb *tp; struct icmp6_hdr *icmp6; struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; struct in_conninfo inc; tcp_seq icmp_tcp_seq; unsigned int mtu; unsigned int off; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; /* if the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; icmp6 = ip6cp->ip6c_icmp6; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; sa6_src = ip6cp->ip6c_src; dst = ip6cp->ip6c_finaldst; } else { m = NULL; ip6 = NULL; off = 0; /* fool gcc */ sa6_src = &sa6_any; dst = NULL; } if (cmd == PRC_MSGSIZE) notify = tcp_mtudisc_notify; else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB || cmd == PRC_UNREACH_PORT || cmd == PRC_UNREACH_PROTOCOL || cmd == PRC_TIMXCEED_INTRANS) && ip6 != NULL) notify = tcp_drop_syn_sent; /* * Hostdead is ugly because it goes linearly through all PCBs. * XXX: We never get this from ICMP, otherwise it makes an * excellent DoS attack on machines with many connections. */ else if (cmd == PRC_HOSTDEAD) ip6 = NULL; else if ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0) return; if (ip6 == NULL) { in6_pcbnotify(&V_tcbinfo, sa, 0, (const struct sockaddr *)sa6_src, 0, cmd, NULL, notify); return; } /* Check if we can safely get the ports from the tcp hdr */ if (m == NULL || (m->m_pkthdr.len < (int32_t) (off + offsetof(struct tcphdr, th_seq)))) { return; } th = (struct tcphdr *) mtodo(ip6cp->ip6c_m, ip6cp->ip6c_off); INP_INFO_RLOCK(&V_tcbinfo); inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_dst, th->th_dport, &ip6->ip6_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL); if (inp != NULL && PRC_IS_REDIRECT(cmd)) { /* signal EHOSTDOWN, as it flushes the cached route */ inp = (*notify)(inp, EHOSTDOWN); if (inp != NULL) INP_WUNLOCK(inp); } else if (inp != NULL) { if (!(inp->inp_flags & INP_TIMEWAIT) && !(inp->inp_flags & INP_DROPPED) && !(inp->inp_socket == NULL)) { icmp_tcp_seq = ntohl(th->th_seq); tp = intotcpcb(inp); if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) && SEQ_LT(icmp_tcp_seq, tp->snd_max)) { if (cmd == PRC_MSGSIZE) { /* * MTU discovery: * If we got a needfrag set the MTU * in the route to the suggested new * value (if given) and then notify. */ mtu = ntohl(icmp6->icmp6_mtu); /* * If no alternative MTU was * proposed, or the proposed * MTU was too small, set to * the min. */ if (mtu < IPV6_MMTU) mtu = IPV6_MMTU - 8; bzero(&inc, sizeof(inc)); inc.inc_fibnum = M_GETFIB(m); inc.inc_flags |= INC_ISIPV6; inc.inc6_faddr = *dst; if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL)) goto unlock_inp; /* * Only process the offered MTU if it * is smaller than the current one. */ if (mtu < tp->t_maxseg + (sizeof (*th) + sizeof (*ip6))) { tcp_hc_updatemtu(&inc, mtu); tcp_mtudisc(inp, mtu); ICMP6STAT_INC(icp6s_pmtuchg); } } else inp = (*notify)(inp, inet6ctlerrmap[cmd]); } } unlock_inp: if (inp != NULL) INP_WUNLOCK(inp); } else { bzero(&inc, sizeof(inc)); inc.inc_fibnum = M_GETFIB(m); inc.inc_flags |= INC_ISIPV6; inc.inc_fport = th->th_dport; inc.inc_lport = th->th_sport; inc.inc6_faddr = *dst; inc.inc6_laddr = ip6->ip6_src; syncache_unreach(&inc, th); } INP_INFO_RUNLOCK(&V_tcbinfo); } #endif /* INET6 */ /* * Following is where TCP initial sequence number generation occurs. * * There are two places where we must use initial sequence numbers: * 1. In SYN-ACK packets. * 2. In SYN packets. * * All ISNs for SYN-ACK packets are generated by the syncache. See * tcp_syncache.c for details. * * The ISNs in SYN packets must be monotonic; TIME_WAIT recycling * depends on this property. In addition, these ISNs should be * unguessable so as to prevent connection hijacking. To satisfy * the requirements of this situation, the algorithm outlined in * RFC 1948 is used, with only small modifications. * * Implementation details: * * Time is based off the system timer, and is corrected so that it * increases by one megabyte per second. This allows for proper * recycling on high speed LANs while still leaving over an hour * before rollover. * * As reading the *exact* system time is too expensive to be done * whenever setting up a TCP connection, we increment the time * offset in two ways. First, a small random positive increment * is added to isn_offset for each connection that is set up. * Second, the function tcp_isn_tick fires once per clock tick * and increments isn_offset as necessary so that sequence numbers * are incremented at approximately ISN_BYTES_PER_SECOND. The * random positive increments serve only to ensure that the same * exact sequence number is never sent out twice (as could otherwise * happen when a port is recycled in less than the system tick * interval.) * * net.inet.tcp.isn_reseed_interval controls the number of seconds * between seeding of isn_secret. This is normally set to zero, * as reseeding should not be necessary. * * Locking of the global variables isn_secret, isn_last_reseed, isn_offset, * isn_offset_old, and isn_ctx is performed using the TCP pcbinfo lock. In * general, this means holding an exclusive (write) lock. */ #define ISN_BYTES_PER_SECOND 1048576 #define ISN_STATIC_INCREMENT 4096 #define ISN_RANDOM_INCREMENT (4096 - 1) static VNET_DEFINE(u_char, isn_secret[32]); static VNET_DEFINE(int, isn_last); static VNET_DEFINE(int, isn_last_reseed); static VNET_DEFINE(u_int32_t, isn_offset); static VNET_DEFINE(u_int32_t, isn_offset_old); #define V_isn_secret VNET(isn_secret) #define V_isn_last VNET(isn_last) #define V_isn_last_reseed VNET(isn_last_reseed) #define V_isn_offset VNET(isn_offset) #define V_isn_offset_old VNET(isn_offset_old) tcp_seq tcp_new_isn(struct tcpcb *tp) { MD5_CTX isn_ctx; u_int32_t md5_buffer[4]; tcp_seq new_isn; u_int32_t projected_offset; INP_WLOCK_ASSERT(tp->t_inpcb); ISN_LOCK(); /* Seed if this is the first use, reseed if requested. */ if ((V_isn_last_reseed == 0) || ((V_tcp_isn_reseed_interval > 0) && (((u_int)V_isn_last_reseed + (u_int)V_tcp_isn_reseed_interval*hz) < (u_int)ticks))) { read_random(&V_isn_secret, sizeof(V_isn_secret)); V_isn_last_reseed = ticks; } /* Compute the md5 hash and return the ISN. */ MD5Init(&isn_ctx); MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_fport, sizeof(u_short)); MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_lport, sizeof(u_short)); #ifdef INET6 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) { MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_faddr, sizeof(struct in6_addr)); MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_laddr, sizeof(struct in6_addr)); } else #endif { MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_faddr, sizeof(struct in_addr)); MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_laddr, sizeof(struct in_addr)); } MD5Update(&isn_ctx, (u_char *) &V_isn_secret, sizeof(V_isn_secret)); MD5Final((u_char *) &md5_buffer, &isn_ctx); new_isn = (tcp_seq) md5_buffer[0]; V_isn_offset += ISN_STATIC_INCREMENT + (arc4random() & ISN_RANDOM_INCREMENT); if (ticks != V_isn_last) { projected_offset = V_isn_offset_old + ISN_BYTES_PER_SECOND / hz * (ticks - V_isn_last); if (SEQ_GT(projected_offset, V_isn_offset)) V_isn_offset = projected_offset; V_isn_offset_old = V_isn_offset; V_isn_last = ticks; } new_isn += V_isn_offset; ISN_UNLOCK(); return (new_isn); } /* * When a specific ICMP unreachable message is received and the * connection state is SYN-SENT, drop the connection. This behavior * is controlled by the icmp_may_rst sysctl. */ struct inpcb * tcp_drop_syn_sent(struct inpcb *inp, int errno) { struct tcpcb *tp; INP_INFO_RLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); if ((inp->inp_flags & INP_TIMEWAIT) || (inp->inp_flags & INP_DROPPED)) return (inp); tp = intotcpcb(inp); if (tp->t_state != TCPS_SYN_SENT) return (inp); tp = tcp_drop(tp, errno); if (tp != NULL) return (inp); else return (NULL); } /* * When `need fragmentation' ICMP is received, update our idea of the MSS * based on the new value. Also nudge TCP to send something, since we * know the packet we just sent was dropped. * This duplicates some code in the tcp_mss() function in tcp_input.c. */ static struct inpcb * tcp_mtudisc_notify(struct inpcb *inp, int error) { tcp_mtudisc(inp, -1); return (inp); } static void tcp_mtudisc(struct inpcb *inp, int mtuoffer) { struct tcpcb *tp; struct socket *so; INP_WLOCK_ASSERT(inp); if ((inp->inp_flags & INP_TIMEWAIT) || (inp->inp_flags & INP_DROPPED)) return; tp = intotcpcb(inp); KASSERT(tp != NULL, ("tcp_mtudisc: tp == NULL")); tcp_mss_update(tp, -1, mtuoffer, NULL, NULL); so = inp->inp_socket; SOCKBUF_LOCK(&so->so_snd); /* If the mss is larger than the socket buffer, decrease the mss. */ if (so->so_snd.sb_hiwat < tp->t_maxseg) tp->t_maxseg = so->so_snd.sb_hiwat; SOCKBUF_UNLOCK(&so->so_snd); TCPSTAT_INC(tcps_mturesent); tp->t_rtttime = 0; tp->snd_nxt = tp->snd_una; tcp_free_sackholes(tp); tp->snd_recover = tp->snd_max; if (tp->t_flags & TF_SACK_PERMIT) EXIT_FASTRECOVERY(tp->t_flags); tp->t_fb->tfb_tcp_output(tp); } #ifdef INET /* * Look-up the routing entry to the peer of this inpcb. If no route * is found and it cannot be allocated, then return 0. This routine * is called by TCP routines that access the rmx structure and by * tcp_mss_update to get the peer/interface MTU. */ uint32_t tcp_maxmtu(struct in_conninfo *inc, struct tcp_ifcap *cap) { struct nhop4_extended nh4; struct ifnet *ifp; uint32_t maxmtu = 0; KASSERT(inc != NULL, ("tcp_maxmtu with NULL in_conninfo pointer")); if (inc->inc_faddr.s_addr != INADDR_ANY) { if (fib4_lookup_nh_ext(inc->inc_fibnum, inc->inc_faddr, NHR_REF, 0, &nh4) != 0) return (0); ifp = nh4.nh_ifp; maxmtu = nh4.nh_mtu; /* Report additional interface capabilities. */ if (cap != NULL) { if (ifp->if_capenable & IFCAP_TSO4 && ifp->if_hwassist & CSUM_TSO) { cap->ifcap |= CSUM_TSO; cap->tsomax = ifp->if_hw_tsomax; cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; } } fib4_free_nh_ext(inc->inc_fibnum, &nh4); } return (maxmtu); } #endif /* INET */ #ifdef INET6 uint32_t tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap) { struct nhop6_extended nh6; struct in6_addr dst6; uint32_t scopeid; struct ifnet *ifp; uint32_t maxmtu = 0; KASSERT(inc != NULL, ("tcp_maxmtu6 with NULL in_conninfo pointer")); if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) { in6_splitscope(&inc->inc6_faddr, &dst6, &scopeid); if (fib6_lookup_nh_ext(inc->inc_fibnum, &dst6, scopeid, 0, 0, &nh6) != 0) return (0); ifp = nh6.nh_ifp; maxmtu = nh6.nh_mtu; /* Report additional interface capabilities. */ if (cap != NULL) { if (ifp->if_capenable & IFCAP_TSO6 && ifp->if_hwassist & CSUM_TSO) { cap->ifcap |= CSUM_TSO; cap->tsomax = ifp->if_hw_tsomax; cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; } } fib6_free_nh_ext(inc->inc_fibnum, &nh6); } return (maxmtu); } #endif /* INET6 */ /* * Calculate effective SMSS per RFC5681 definition for a given TCP * connection at its current state, taking into account SACK and etc. */ u_int tcp_maxseg(const struct tcpcb *tp) { u_int optlen; if (tp->t_flags & TF_NOOPT) return (tp->t_maxseg); /* * Here we have a simplified code from tcp_addoptions(), * without a proper loop, and having most of paddings hardcoded. * We might make mistakes with padding here in some edge cases, * but this is harmless, since result of tcp_maxseg() is used * only in cwnd and ssthresh estimations. */ #define PAD(len) ((((len) / 4) + !!((len) % 4)) * 4) if (TCPS_HAVEESTABLISHED(tp->t_state)) { if (tp->t_flags & TF_RCVD_TSTMP) optlen = TCPOLEN_TSTAMP_APPA; else optlen = 0; #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (tp->t_flags & TF_SIGNATURE) optlen += PAD(TCPOLEN_SIGNATURE); #endif if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) { optlen += TCPOLEN_SACKHDR; optlen += tp->rcv_numsacks * TCPOLEN_SACK; optlen = PAD(optlen); } } else { if (tp->t_flags & TF_REQ_TSTMP) optlen = TCPOLEN_TSTAMP_APPA; else optlen = PAD(TCPOLEN_MAXSEG); if (tp->t_flags & TF_REQ_SCALE) optlen += PAD(TCPOLEN_WINDOW); #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (tp->t_flags & TF_SIGNATURE) optlen += PAD(TCPOLEN_SIGNATURE); #endif if (tp->t_flags & TF_SACK_PERMIT) optlen += PAD(TCPOLEN_SACK_PERMITTED); } #undef PAD optlen = min(optlen, TCP_MAXOLEN); return (tp->t_maxseg - optlen); } static int sysctl_drop(SYSCTL_HANDLER_ARGS) { /* addrs[0] is a foreign socket, addrs[1] is a local one. */ struct sockaddr_storage addrs[2]; struct inpcb *inp; struct tcpcb *tp; struct tcptw *tw; struct sockaddr_in *fin, *lin; #ifdef INET6 struct sockaddr_in6 *fin6, *lin6; #endif int error; inp = NULL; fin = lin = NULL; #ifdef INET6 fin6 = lin6 = NULL; #endif error = 0; if (req->oldptr != NULL || req->oldlen != 0) return (EINVAL); if (req->newptr == NULL) return (EPERM); if (req->newlen < sizeof(addrs)) return (ENOMEM); error = SYSCTL_IN(req, &addrs, sizeof(addrs)); if (error) return (error); switch (addrs[0].ss_family) { #ifdef INET6 case AF_INET6: fin6 = (struct sockaddr_in6 *)&addrs[0]; lin6 = (struct sockaddr_in6 *)&addrs[1]; if (fin6->sin6_len != sizeof(struct sockaddr_in6) || lin6->sin6_len != sizeof(struct sockaddr_in6)) return (EINVAL); if (IN6_IS_ADDR_V4MAPPED(&fin6->sin6_addr)) { if (!IN6_IS_ADDR_V4MAPPED(&lin6->sin6_addr)) return (EINVAL); in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[0]); in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[1]); fin = (struct sockaddr_in *)&addrs[0]; lin = (struct sockaddr_in *)&addrs[1]; break; } error = sa6_embedscope(fin6, V_ip6_use_defzone); if (error) return (error); error = sa6_embedscope(lin6, V_ip6_use_defzone); if (error) return (error); break; #endif #ifdef INET case AF_INET: fin = (struct sockaddr_in *)&addrs[0]; lin = (struct sockaddr_in *)&addrs[1]; if (fin->sin_len != sizeof(struct sockaddr_in) || lin->sin_len != sizeof(struct sockaddr_in)) return (EINVAL); break; #endif default: return (EINVAL); } INP_INFO_RLOCK(&V_tcbinfo); switch (addrs[0].ss_family) { #ifdef INET6 case AF_INET6: inp = in6_pcblookup(&V_tcbinfo, &fin6->sin6_addr, fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port, INPLOOKUP_WLOCKPCB, NULL); break; #endif #ifdef INET case AF_INET: inp = in_pcblookup(&V_tcbinfo, fin->sin_addr, fin->sin_port, lin->sin_addr, lin->sin_port, INPLOOKUP_WLOCKPCB, NULL); break; #endif } if (inp != NULL) { if (inp->inp_flags & INP_TIMEWAIT) { /* * XXXRW: There currently exists a state where an * inpcb is present, but its timewait state has been * discarded. For now, don't allow dropping of this * type of inpcb. */ tw = intotw(inp); if (tw != NULL) tcp_twclose(tw, 0); else INP_WUNLOCK(inp); } else if (!(inp->inp_flags & INP_DROPPED) && !(inp->inp_socket->so_options & SO_ACCEPTCONN)) { tp = intotcpcb(inp); tp = tcp_drop(tp, ECONNABORTED); if (tp != NULL) INP_WUNLOCK(inp); } else INP_WUNLOCK(inp); } else error = ESRCH; INP_INFO_RUNLOCK(&V_tcbinfo); return (error); } SYSCTL_PROC(_net_inet_tcp, TCPCTL_DROP, drop, CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP, NULL, 0, sysctl_drop, "", "Drop TCP connection"); /* * Generate a standardized TCP log line for use throughout the * tcp subsystem. Memory allocation is done with M_NOWAIT to * allow use in the interrupt context. * * NB: The caller MUST free(s, M_TCPLOG) the returned string. * NB: The function may return NULL if memory allocation failed. * * Due to header inclusion and ordering limitations the struct ip * and ip6_hdr pointers have to be passed as void pointers. */ char * tcp_log_vain(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr) { /* Is logging enabled? */ if (tcp_log_in_vain == 0) return (NULL); return (tcp_log_addr(inc, th, ip4hdr, ip6hdr)); } char * tcp_log_addrs(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr) { /* Is logging enabled? */ if (tcp_log_debug == 0) return (NULL); return (tcp_log_addr(inc, th, ip4hdr, ip6hdr)); } static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr) { char *s, *sp; size_t size; struct ip *ip; #ifdef INET6 const struct ip6_hdr *ip6; ip6 = (const struct ip6_hdr *)ip6hdr; #endif /* INET6 */ ip = (struct ip *)ip4hdr; /* * The log line looks like this: * "TCP: [1.2.3.4]:50332 to [1.2.3.4]:80 tcpflags 0x2" */ size = sizeof("TCP: []:12345 to []:12345 tcpflags 0x2<>") + sizeof(PRINT_TH_FLAGS) + 1 + #ifdef INET6 2 * INET6_ADDRSTRLEN; #else 2 * INET_ADDRSTRLEN; #endif /* INET6 */ s = malloc(size, M_TCPLOG, M_ZERO|M_NOWAIT); if (s == NULL) return (NULL); strcat(s, "TCP: ["); sp = s + strlen(s); if (inc && ((inc->inc_flags & INC_ISIPV6) == 0)) { inet_ntoa_r(inc->inc_faddr, sp); sp = s + strlen(s); sprintf(sp, "]:%i to [", ntohs(inc->inc_fport)); sp = s + strlen(s); inet_ntoa_r(inc->inc_laddr, sp); sp = s + strlen(s); sprintf(sp, "]:%i", ntohs(inc->inc_lport)); #ifdef INET6 } else if (inc) { ip6_sprintf(sp, &inc->inc6_faddr); sp = s + strlen(s); sprintf(sp, "]:%i to [", ntohs(inc->inc_fport)); sp = s + strlen(s); ip6_sprintf(sp, &inc->inc6_laddr); sp = s + strlen(s); sprintf(sp, "]:%i", ntohs(inc->inc_lport)); } else if (ip6 && th) { ip6_sprintf(sp, &ip6->ip6_src); sp = s + strlen(s); sprintf(sp, "]:%i to [", ntohs(th->th_sport)); sp = s + strlen(s); ip6_sprintf(sp, &ip6->ip6_dst); sp = s + strlen(s); sprintf(sp, "]:%i", ntohs(th->th_dport)); #endif /* INET6 */ #ifdef INET } else if (ip && th) { inet_ntoa_r(ip->ip_src, sp); sp = s + strlen(s); sprintf(sp, "]:%i to [", ntohs(th->th_sport)); sp = s + strlen(s); inet_ntoa_r(ip->ip_dst, sp); sp = s + strlen(s); sprintf(sp, "]:%i", ntohs(th->th_dport)); #endif /* INET */ } else { free(s, M_TCPLOG); return (NULL); } sp = s + strlen(s); if (th) sprintf(sp, " tcpflags 0x%b", th->th_flags, PRINT_TH_FLAGS); if (*(s + size - 1) != '\0') panic("%s: string too long", __func__); return (s); } /* * A subroutine which makes it easy to track TCP state changes with DTrace. * This function shouldn't be called for t_state initializations that don't * correspond to actual TCP state transitions. */ void tcp_state_change(struct tcpcb *tp, int newstate) { #if defined(KDTRACE_HOOKS) int pstate = tp->t_state; #endif TCPSTATES_DEC(tp->t_state); TCPSTATES_INC(newstate); tp->t_state = newstate; TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, pstate); } Index: projects/ipsec/sys/vm/vm_page.c =================================================================== --- projects/ipsec/sys/vm/vm_page.c (revision 313186) +++ projects/ipsec/sys/vm/vm_page.c (revision 313187) @@ -1,3603 +1,3624 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1998 Matthew Dillon. All Rights Reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 */ /*- * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * GENERAL RULES ON VM_PAGE MANIPULATION * * - A page queue lock is required when adding or removing a page from a * page queue regardless of other locks or the busy state of a page. * * * In general, no thread besides the page daemon can acquire or * hold more than one page queue lock at a time. * * * The page daemon can acquire and hold any pair of page queue * locks in any order. * * - The object lock is required when inserting or removing * pages from an object (vm_page_insert() or vm_page_remove()). * */ /* * Resident memory management module. */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Associated with page of user-allocatable memory is a * page structure. */ struct vm_domain vm_dom[MAXMEMDOM]; struct mtx_padalign vm_page_queue_free_mtx; struct mtx_padalign pa_lock[PA_LOCK_COUNT]; /* * bogus page -- for I/O to/from partially complete buffers, * or for paging into sparsely invalid regions. */ vm_page_t bogus_page; vm_page_t vm_page_array; long vm_page_array_size; long first_page; static int boot_pages = UMA_BOOT_PAGES; SYSCTL_INT(_vm, OID_AUTO, boot_pages, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &boot_pages, 0, "number of pages allocated for bootstrapping the VM system"); static int pa_tryrelock_restart; SYSCTL_INT(_vm, OID_AUTO, tryrelock_restart, CTLFLAG_RD, &pa_tryrelock_restart, 0, "Number of tryrelock restarts"); static TAILQ_HEAD(, vm_page) blacklist_head; static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_page_blacklist, "A", "Blacklist pages"); /* Is the page daemon waiting for free pages? */ static int vm_pageout_pages_needed; static uma_zone_t fakepg_zone; static void vm_page_alloc_check(vm_page_t m); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); static void vm_page_enqueue(uint8_t queue, vm_page_t m); static void vm_page_free_wakeup(void); static void vm_page_init(void *dummy); static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred); static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred); static int vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run, vm_paddr_t high); SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init, NULL); static void vm_page_init(void *dummy) { fakepg_zone = uma_zcreate("fakepg", sizeof(struct vm_page), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM); bogus_page = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED); } /* Make sure that u_long is at least 64 bits when PAGE_SIZE is 32K. */ #if PAGE_SIZE == 32768 #ifdef CTASSERT CTASSERT(sizeof(u_long) >= 8); #endif #endif /* * Try to acquire a physical address lock while a pmap is locked. If we * fail to trylock we unlock and lock the pmap directly and cache the * locked pa in *locked. The caller should then restart their loop in case * the virtual to physical mapping has changed. */ int vm_page_pa_tryrelock(pmap_t pmap, vm_paddr_t pa, vm_paddr_t *locked) { vm_paddr_t lockpa; lockpa = *locked; *locked = pa; if (lockpa) { PA_LOCK_ASSERT(lockpa, MA_OWNED); if (PA_LOCKPTR(pa) == PA_LOCKPTR(lockpa)) return (0); PA_UNLOCK(lockpa); } if (PA_TRYLOCK(pa)) return (0); PMAP_UNLOCK(pmap); atomic_add_int(&pa_tryrelock_restart, 1); PA_LOCK(pa); PMAP_LOCK(pmap); return (EAGAIN); } /* * vm_set_page_size: * * Sets the page size, perhaps based upon the memory * size. Must be called before any use of page-size * dependent functions. */ void vm_set_page_size(void) { if (vm_cnt.v_page_size == 0) vm_cnt.v_page_size = PAGE_SIZE; if (((vm_cnt.v_page_size - 1) & vm_cnt.v_page_size) != 0) panic("vm_set_page_size: page size not a power of two"); } /* * vm_page_blacklist_next: * * Find the next entry in the provided string of blacklist * addresses. Entries are separated by space, comma, or newline. * If an invalid integer is encountered then the rest of the * string is skipped. Updates the list pointer to the next * character, or NULL if the string is exhausted or invalid. */ static vm_paddr_t vm_page_blacklist_next(char **list, char *end) { vm_paddr_t bad; char *cp, *pos; if (list == NULL || *list == NULL) return (0); if (**list =='\0') { *list = NULL; return (0); } /* * If there's no end pointer then the buffer is coming from * the kenv and we know it's null-terminated. */ if (end == NULL) end = *list + strlen(*list); /* Ensure that strtoq() won't walk off the end */ if (*end != '\0') { if (*end == '\n' || *end == ' ' || *end == ',') *end = '\0'; else { printf("Blacklist not terminated, skipping\n"); *list = NULL; return (0); } } for (pos = *list; *pos != '\0'; pos = cp) { bad = strtoq(pos, &cp, 0); if (*cp == '\0' || *cp == ' ' || *cp == ',' || *cp == '\n') { if (bad == 0) { if (++cp < end) continue; else break; } } else break; if (*cp == '\0' || ++cp >= end) *list = NULL; else *list = cp; return (trunc_page(bad)); } printf("Garbage in RAM blacklist, skipping\n"); *list = NULL; return (0); } /* * vm_page_blacklist_check: * * Iterate through the provided string of blacklist addresses, pulling * each entry out of the physical allocator free list and putting it * onto a list for reporting via the vm.page_blacklist sysctl. */ static void vm_page_blacklist_check(char *list, char *end) { vm_paddr_t pa; vm_page_t m; char *next; int ret; next = list; while (next != NULL) { if ((pa = vm_page_blacklist_next(&next, end)) == 0) continue; m = vm_phys_paddr_to_vm_page(pa); if (m == NULL) continue; mtx_lock(&vm_page_queue_free_mtx); ret = vm_phys_unfree_page(m); mtx_unlock(&vm_page_queue_free_mtx); if (ret == TRUE) { TAILQ_INSERT_TAIL(&blacklist_head, m, listq); if (bootverbose) printf("Skipping page with pa 0x%jx\n", (uintmax_t)pa); } } } /* * vm_page_blacklist_load: * * Search for a special module named "ram_blacklist". It'll be a * plain text file provided by the user via the loader directive * of the same name. */ static void vm_page_blacklist_load(char **list, char **end) { void *mod; u_char *ptr; u_int len; mod = NULL; ptr = NULL; mod = preload_search_by_type("ram_blacklist"); if (mod != NULL) { ptr = preload_fetch_addr(mod); len = preload_fetch_size(mod); } *list = ptr; if (ptr != NULL) *end = ptr + len; else *end = NULL; return; } static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS) { vm_page_t m; struct sbuf sbuf; int error, first; first = 1; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sbuf_new_for_sysctl(&sbuf, NULL, 128, req); TAILQ_FOREACH(m, &blacklist_head, listq) { sbuf_printf(&sbuf, "%s%#jx", first ? "" : ",", (uintmax_t)m->phys_addr); first = 0; } error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); return (error); } static void vm_page_domain_init(struct vm_domain *vmd) { struct vm_pagequeue *pq; int i; *__DECONST(char **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_name) = "vm inactive pagequeue"; *__DECONST(u_int **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_vcnt) = &vm_cnt.v_inactive_count; *__DECONST(char **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_name) = "vm active pagequeue"; *__DECONST(u_int **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_vcnt) = &vm_cnt.v_active_count; *__DECONST(char **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_name) = "vm laundry pagequeue"; *__DECONST(int **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_vcnt) = &vm_cnt.v_laundry_count; *__DECONST(char **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_name) = "vm unswappable pagequeue"; /* Unswappable dirty pages are counted as being in the laundry. */ *__DECONST(int **, &vmd->vmd_pagequeues[PQ_UNSWAPPABLE].pq_vcnt) = &vm_cnt.v_laundry_count; vmd->vmd_page_count = 0; vmd->vmd_free_count = 0; vmd->vmd_segs = 0; vmd->vmd_oom = FALSE; for (i = 0; i < PQ_COUNT; i++) { pq = &vmd->vmd_pagequeues[i]; TAILQ_INIT(&pq->pq_pl); mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue", MTX_DEF | MTX_DUPOK); } } /* * vm_page_startup: * - * Initializes the resident memory module. - * - * Allocates memory for the page cells, and - * for the object/offset-to-page hash table headers. - * Each page cell is initialized and placed on the free list. + * Initializes the resident memory module. Allocates physical memory for + * bootstrapping UMA and some data structures that are used to manage + * physical pages. Initializes these structures, and populates the free + * page queues. */ vm_offset_t vm_page_startup(vm_offset_t vaddr) { vm_offset_t mapped; - vm_paddr_t page_range; + vm_paddr_t high_avail, low_avail, page_range, size; vm_paddr_t new_end; int i; vm_paddr_t pa; vm_paddr_t last_pa; char *list, *listend; vm_paddr_t end; vm_paddr_t biggestsize; - vm_paddr_t low_water, high_water; int biggestone; int pages_per_zone; biggestsize = 0; biggestone = 0; vaddr = round_page(vaddr); for (i = 0; phys_avail[i + 1]; i += 2) { phys_avail[i] = round_page(phys_avail[i]); phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); } - - low_water = phys_avail[0]; - high_water = phys_avail[1]; - - for (i = 0; i < vm_phys_nsegs; i++) { - if (vm_phys_segs[i].start < low_water) - low_water = vm_phys_segs[i].start; - if (vm_phys_segs[i].end > high_water) - high_water = vm_phys_segs[i].end; - } for (i = 0; phys_avail[i + 1]; i += 2) { - vm_paddr_t size = phys_avail[i + 1] - phys_avail[i]; - + size = phys_avail[i + 1] - phys_avail[i]; if (size > biggestsize) { biggestone = i; biggestsize = size; } - if (phys_avail[i] < low_water) - low_water = phys_avail[i]; - if (phys_avail[i + 1] > high_water) - high_water = phys_avail[i + 1]; } end = phys_avail[biggestone+1]; /* * Initialize the page and queue locks. */ mtx_init(&vm_page_queue_free_mtx, "vm page free queue", NULL, MTX_DEF); for (i = 0; i < PA_LOCK_COUNT; i++) mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF); for (i = 0; i < vm_ndomains; i++) vm_page_domain_init(&vm_dom[i]); /* - * Almost all of the pages needed for boot strapping UMA are used + * Almost all of the pages needed for bootstrapping UMA are used * for zone structures, so if the number of CPUs results in those * structures taking more than one page each, we set aside more pages * in proportion to the zone structure size. */ pages_per_zone = howmany(sizeof(struct uma_zone) + sizeof(struct uma_cache) * (mp_maxid + 1), UMA_SLAB_SIZE); if (pages_per_zone > 1) { /* Reserve more pages so that we don't run out. */ boot_pages = UMA_BOOT_PAGES_ZONES * pages_per_zone; } /* * Allocate memory for use when boot strapping the kernel memory * allocator. * * CTFLAG_RDTUN doesn't work during the early boot process, so we must * manually fetch the value. */ TUNABLE_INT_FETCH("vm.boot_pages", &boot_pages); new_end = end - (boot_pages * UMA_SLAB_SIZE); new_end = trunc_page(new_end); mapped = pmap_map(&vaddr, new_end, end, VM_PROT_READ | VM_PROT_WRITE); bzero((void *)mapped, end - new_end); uma_startup((void *)mapped, boot_pages); #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \ defined(__i386__) || defined(__mips__) /* * Allocate a bitmap to indicate that a random physical page * needs to be included in a minidump. * * The amd64 port needs this to indicate which direct map pages * need to be dumped, via calls to dump_add_page()/dump_drop_page(). * * However, i386 still needs this workspace internally within the * minidump code. In theory, they are not needed on i386, but are * included should the sf_buf code decide to use them. */ last_pa = 0; for (i = 0; dump_avail[i + 1] != 0; i += 2) if (dump_avail[i + 1] > last_pa) last_pa = dump_avail[i + 1]; page_range = last_pa / PAGE_SIZE; vm_page_dump_size = round_page(roundup2(page_range, NBBY) / NBBY); new_end -= vm_page_dump_size; vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end, new_end + vm_page_dump_size, VM_PROT_READ | VM_PROT_WRITE); bzero((void *)vm_page_dump, vm_page_dump_size); #endif +#if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) + /* + * Include the UMA bootstrap pages and vm_page_dump in a crash dump. + * When pmap_map() uses the direct map, they are not automatically + * included. + */ + for (pa = new_end; pa < end; pa += PAGE_SIZE) + dump_add_page(pa); +#endif + phys_avail[biggestone + 1] = new_end; #ifdef __amd64__ /* * Request that the physical pages underlying the message buffer be * included in a crash dump. Since the message buffer is accessed * through the direct map, they are not automatically included. */ pa = DMAP_TO_PHYS((vm_offset_t)msgbufp->msg_ptr); last_pa = pa + round_page(msgbufsize); while (pa < last_pa) { dump_add_page(pa); pa += PAGE_SIZE; } #endif /* * Compute the number of pages of memory that will be available for - * use (taking into account the overhead of a page structure per - * page). + * use, taking into account the overhead of a page structure per page. + * In other words, solve + * "available physical memory" - round_page(page_range * + * sizeof(struct vm_page)) = page_range * PAGE_SIZE + * for page_range. */ - first_page = low_water / PAGE_SIZE; -#ifdef VM_PHYSSEG_SPARSE - page_range = 0; + low_avail = phys_avail[0]; + high_avail = phys_avail[1]; for (i = 0; i < vm_phys_nsegs; i++) { - page_range += atop(vm_phys_segs[i].end - - vm_phys_segs[i].start); + if (vm_phys_segs[i].start < low_avail) + low_avail = vm_phys_segs[i].start; + if (vm_phys_segs[i].end > high_avail) + high_avail = vm_phys_segs[i].end; } + /* Skip the first chunk. It is already accounted for. */ + for (i = 2; phys_avail[i + 1] != 0; i += 2) { + if (phys_avail[i] < low_avail) + low_avail = phys_avail[i]; + if (phys_avail[i + 1] > high_avail) + high_avail = phys_avail[i + 1]; + } + first_page = low_avail / PAGE_SIZE; +#ifdef VM_PHYSSEG_SPARSE + size = 0; + for (i = 0; i < vm_phys_nsegs; i++) + size += vm_phys_segs[i].end - vm_phys_segs[i].start; for (i = 0; phys_avail[i + 1] != 0; i += 2) - page_range += atop(phys_avail[i + 1] - phys_avail[i]); + size += phys_avail[i + 1] - phys_avail[i]; + page_range = size / (PAGE_SIZE + sizeof(struct vm_page)); #elif defined(VM_PHYSSEG_DENSE) - page_range = high_water / PAGE_SIZE - first_page; + /* + * In the VM_PHYSSEG_DENSE case, the number of pages can account for + * the overhead of a page structure per page only if vm_page_array is + * allocated from the last physical memory chunk. Otherwise, we must + * allocate page structures representing the physical memory + * underlying vm_page_array, even though they will not be used. + */ + if (new_end == high_avail) + page_range = (high_avail - low_avail) / (PAGE_SIZE + + sizeof(struct vm_page)); + else + page_range = high_avail / PAGE_SIZE - first_page; #else #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined." #endif end = new_end; /* * Reserve an unmapped guard page to trap access to vm_page_array[-1]. + * However, because this page is allocated from KVM, out-of-bounds + * accesses using the direct map will not be trapped. */ vaddr += PAGE_SIZE; /* - * Initialize the mem entry structures now, and put them in the free - * queue. + * Allocate physical memory for the page structures, and map it. */ new_end = trunc_page(end - page_range * sizeof(struct vm_page)); mapped = pmap_map(&vaddr, new_end, end, VM_PROT_READ | VM_PROT_WRITE); vm_page_array = (vm_page_t) mapped; #if VM_NRESERVLEVEL > 0 /* - * Allocate memory for the reservation management system's data - * structures. + * Allocate physical memory for the reservation management system's + * data structures, and map it. */ - new_end = vm_reserv_startup(&vaddr, new_end, high_water); + if (high_avail == end) + high_avail = new_end; + new_end = vm_reserv_startup(&vaddr, new_end, high_avail); #endif #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) /* - * pmap_map on arm64, amd64, and mips can come out of the direct-map, - * not kvm like i386, so the pages must be tracked for a crashdump to - * include this data. This includes the vm_page_array and the early - * UMA bootstrap pages. + * Include vm_page_array and vm_reserv_array in a crash dump. */ - for (pa = new_end; pa < phys_avail[biggestone + 1]; pa += PAGE_SIZE) + for (pa = new_end; pa < end; pa += PAGE_SIZE) dump_add_page(pa); #endif phys_avail[biggestone + 1] = new_end; /* * Add physical memory segments corresponding to the available * physical pages. */ for (i = 0; phys_avail[i + 1] != 0; i += 2) vm_phys_add_seg(phys_avail[i], phys_avail[i + 1]); /* * Clear all of the page structures */ bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page)); for (i = 0; i < page_range; i++) vm_page_array[i].order = VM_NFREEORDER; vm_page_array_size = page_range; /* * Initialize the physical memory allocator. */ vm_phys_init(); /* * Add every available physical page that is not blacklisted to * the free lists. */ vm_cnt.v_page_count = 0; vm_cnt.v_free_count = 0; for (i = 0; phys_avail[i + 1] != 0; i += 2) { pa = phys_avail[i]; last_pa = phys_avail[i + 1]; while (pa < last_pa) { vm_phys_add_page(pa); pa += PAGE_SIZE; } } TAILQ_INIT(&blacklist_head); vm_page_blacklist_load(&list, &listend); vm_page_blacklist_check(list, listend); list = kern_getenv("vm.blacklist"); vm_page_blacklist_check(list, NULL); freeenv(list); #if VM_NRESERVLEVEL > 0 /* * Initialize the reservation management system. */ vm_reserv_init(); #endif return (vaddr); } void vm_page_reference(vm_page_t m) { vm_page_aflag_set(m, PGA_REFERENCED); } /* * vm_page_busy_downgrade: * * Downgrade an exclusive busy page into a single shared busy page. */ void vm_page_busy_downgrade(vm_page_t m) { u_int x; bool locked; vm_page_assert_xbusied(m); locked = mtx_owned(vm_page_lockptr(m)); for (;;) { x = m->busy_lock; x &= VPB_BIT_WAITERS; if (x != 0 && !locked) vm_page_lock(m); if (atomic_cmpset_rel_int(&m->busy_lock, VPB_SINGLE_EXCLUSIVER | x, VPB_SHARERS_WORD(1))) break; if (x != 0 && !locked) vm_page_unlock(m); } if (x != 0) { wakeup(m); if (!locked) vm_page_unlock(m); } } /* * vm_page_sbusied: * * Return a positive value if the page is shared busied, 0 otherwise. */ int vm_page_sbusied(vm_page_t m) { u_int x; x = m->busy_lock; return ((x & VPB_BIT_SHARED) != 0 && x != VPB_UNBUSIED); } /* * vm_page_sunbusy: * * Shared unbusy a page. */ void vm_page_sunbusy(vm_page_t m) { u_int x; vm_page_assert_sbusied(m); for (;;) { x = m->busy_lock; if (VPB_SHARERS(x) > 1) { if (atomic_cmpset_int(&m->busy_lock, x, x - VPB_ONE_SHARER)) break; continue; } if ((x & VPB_BIT_WAITERS) == 0) { KASSERT(x == VPB_SHARERS_WORD(1), ("vm_page_sunbusy: invalid lock state")); if (atomic_cmpset_int(&m->busy_lock, VPB_SHARERS_WORD(1), VPB_UNBUSIED)) break; continue; } KASSERT(x == (VPB_SHARERS_WORD(1) | VPB_BIT_WAITERS), ("vm_page_sunbusy: invalid lock state for waiters")); vm_page_lock(m); if (!atomic_cmpset_int(&m->busy_lock, x, VPB_UNBUSIED)) { vm_page_unlock(m); continue; } wakeup(m); vm_page_unlock(m); break; } } /* * vm_page_busy_sleep: * * Sleep and release the page lock, using the page pointer as wchan. * This is used to implement the hard-path of busying mechanism. * * The given page must be locked. * * If nonshared is true, sleep only if the page is xbusy. */ void vm_page_busy_sleep(vm_page_t m, const char *wmesg, bool nonshared) { u_int x; vm_page_assert_locked(m); x = m->busy_lock; if (x == VPB_UNBUSIED || (nonshared && (x & VPB_BIT_SHARED) != 0) || ((x & VPB_BIT_WAITERS) == 0 && !atomic_cmpset_int(&m->busy_lock, x, x | VPB_BIT_WAITERS))) { vm_page_unlock(m); return; } msleep(m, vm_page_lockptr(m), PVM | PDROP, wmesg, 0); } /* * vm_page_trysbusy: * * Try to shared busy a page. * If the operation succeeds 1 is returned otherwise 0. * The operation never sleeps. */ int vm_page_trysbusy(vm_page_t m) { u_int x; for (;;) { x = m->busy_lock; if ((x & VPB_BIT_SHARED) == 0) return (0); if (atomic_cmpset_acq_int(&m->busy_lock, x, x + VPB_ONE_SHARER)) return (1); } } static void vm_page_xunbusy_locked(vm_page_t m) { vm_page_assert_xbusied(m); vm_page_assert_locked(m); atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED); /* There is a waiter, do wakeup() instead of vm_page_flash(). */ wakeup(m); } void vm_page_xunbusy_maybelocked(vm_page_t m) { bool lockacq; vm_page_assert_xbusied(m); /* * Fast path for unbusy. If it succeeds, we know that there * are no waiters, so we do not need a wakeup. */ if (atomic_cmpset_rel_int(&m->busy_lock, VPB_SINGLE_EXCLUSIVER, VPB_UNBUSIED)) return; lockacq = !mtx_owned(vm_page_lockptr(m)); if (lockacq) vm_page_lock(m); vm_page_xunbusy_locked(m); if (lockacq) vm_page_unlock(m); } /* * vm_page_xunbusy_hard: * * Called after the first try the exclusive unbusy of a page failed. * It is assumed that the waiters bit is on. */ void vm_page_xunbusy_hard(vm_page_t m) { vm_page_assert_xbusied(m); vm_page_lock(m); vm_page_xunbusy_locked(m); vm_page_unlock(m); } /* * vm_page_flash: * * Wakeup anyone waiting for the page. * The ownership bits do not change. * * The given page must be locked. */ void vm_page_flash(vm_page_t m) { u_int x; vm_page_lock_assert(m, MA_OWNED); for (;;) { x = m->busy_lock; if ((x & VPB_BIT_WAITERS) == 0) return; if (atomic_cmpset_int(&m->busy_lock, x, x & (~VPB_BIT_WAITERS))) break; } wakeup(m); } /* * Keep page from being freed by the page daemon * much of the same effect as wiring, except much lower * overhead and should be used only for *very* temporary * holding ("wiring"). */ void vm_page_hold(vm_page_t mem) { vm_page_lock_assert(mem, MA_OWNED); mem->hold_count++; } void vm_page_unhold(vm_page_t mem) { vm_page_lock_assert(mem, MA_OWNED); KASSERT(mem->hold_count >= 1, ("vm_page_unhold: hold count < 0!!!")); --mem->hold_count; if (mem->hold_count == 0 && (mem->flags & PG_UNHOLDFREE) != 0) vm_page_free_toq(mem); } /* * vm_page_unhold_pages: * * Unhold each of the pages that is referenced by the given array. */ void vm_page_unhold_pages(vm_page_t *ma, int count) { struct mtx *mtx, *new_mtx; mtx = NULL; for (; count != 0; count--) { /* * Avoid releasing and reacquiring the same page lock. */ new_mtx = vm_page_lockptr(*ma); if (mtx != new_mtx) { if (mtx != NULL) mtx_unlock(mtx); mtx = new_mtx; mtx_lock(mtx); } vm_page_unhold(*ma); ma++; } if (mtx != NULL) mtx_unlock(mtx); } vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa) { vm_page_t m; #ifdef VM_PHYSSEG_SPARSE m = vm_phys_paddr_to_vm_page(pa); if (m == NULL) m = vm_phys_fictitious_to_vm_page(pa); return (m); #elif defined(VM_PHYSSEG_DENSE) long pi; pi = atop(pa); if (pi >= first_page && (pi - first_page) < vm_page_array_size) { m = &vm_page_array[pi - first_page]; return (m); } return (vm_phys_fictitious_to_vm_page(pa)); #else #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined." #endif } /* * vm_page_getfake: * * Create a fictitious page with the specified physical address and * memory attribute. The memory attribute is the only the machine- * dependent aspect of a fictitious page that must be initialized. */ vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr) { vm_page_t m; m = uma_zalloc(fakepg_zone, M_WAITOK | M_ZERO); vm_page_initfake(m, paddr, memattr); return (m); } void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) { if ((m->flags & PG_FICTITIOUS) != 0) { /* * The page's memattr might have changed since the * previous initialization. Update the pmap to the * new memattr. */ goto memattr; } m->phys_addr = paddr; m->queue = PQ_NONE; /* Fictitious pages don't use "segind". */ m->flags = PG_FICTITIOUS; /* Fictitious pages don't use "order" or "pool". */ m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_SINGLE_EXCLUSIVER; m->wire_count = 1; pmap_page_init(m); memattr: pmap_page_set_memattr(m, memattr); } /* * vm_page_putfake: * * Release a fictitious page. */ void vm_page_putfake(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) != 0, ("managed %p", m)); KASSERT((m->flags & PG_FICTITIOUS) != 0, ("vm_page_putfake: bad page %p", m)); uma_zfree(fakepg_zone, m); } /* * vm_page_updatefake: * * Update the given fictitious page to the specified physical address and * memory attribute. */ void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) { KASSERT((m->flags & PG_FICTITIOUS) != 0, ("vm_page_updatefake: bad page %p", m)); m->phys_addr = paddr; pmap_page_set_memattr(m, memattr); } /* * vm_page_free: * * Free a page. */ void vm_page_free(vm_page_t m) { m->flags &= ~PG_ZERO; vm_page_free_toq(m); } /* * vm_page_free_zero: * * Free a page to the zerod-pages queue */ void vm_page_free_zero(vm_page_t m) { m->flags |= PG_ZERO; vm_page_free_toq(m); } /* * Unbusy and handle the page queueing for a page from a getpages request that * was optionally read ahead or behind. */ void vm_page_readahead_finish(vm_page_t m) { /* We shouldn't put invalid pages on queues. */ KASSERT(m->valid != 0, ("%s: %p is invalid", __func__, m)); /* * Since the page is not the actually needed one, whether it should * be activated or deactivated is not obvious. Empirical results * have shown that deactivating the page is usually the best choice, * unless the page is wanted by another thread. */ vm_page_lock(m); if ((m->busy_lock & VPB_BIT_WAITERS) != 0) vm_page_activate(m); else vm_page_deactivate(m); vm_page_unlock(m); vm_page_xunbusy(m); } /* * vm_page_sleep_if_busy: * * Sleep and release the page queues lock if the page is busied. * Returns TRUE if the thread slept. * * The given page must be unlocked and object containing it must * be locked. */ int vm_page_sleep_if_busy(vm_page_t m, const char *msg) { vm_object_t obj; vm_page_lock_assert(m, MA_NOTOWNED); VM_OBJECT_ASSERT_WLOCKED(m->object); if (vm_page_busied(m)) { /* * The page-specific object must be cached because page * identity can change during the sleep, causing the * re-lock of a different object. * It is assumed that a reference to the object is already * held by the callers. */ obj = m->object; vm_page_lock(m); VM_OBJECT_WUNLOCK(obj); vm_page_busy_sleep(m, msg, false); VM_OBJECT_WLOCK(obj); return (TRUE); } return (FALSE); } /* * vm_page_dirty_KBI: [ internal use only ] * * Set all bits in the page's dirty field. * * The object containing the specified page must be locked if the * call is made from the machine-independent layer. * * See vm_page_clear_dirty_mask(). * * This function should only be called by vm_page_dirty(). */ void vm_page_dirty_KBI(vm_page_t m) { /* Refer to this operation by its public name. */ KASSERT(m->valid == VM_PAGE_BITS_ALL, ("vm_page_dirty: page is invalid!")); m->dirty = VM_PAGE_BITS_ALL; } /* * vm_page_insert: [ internal use only ] * * Inserts the given mem entry into the object and object list. * * The object must be locked. */ int vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) { vm_page_t mpred; VM_OBJECT_ASSERT_WLOCKED(object); mpred = vm_radix_lookup_le(&object->rtree, pindex); return (vm_page_insert_after(m, object, pindex, mpred)); } /* * vm_page_insert_after: * * Inserts the page "m" into the specified object at offset "pindex". * * The page "mpred" must immediately precede the offset "pindex" within * the specified object. * * The object must be locked. */ static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred) { vm_page_t msucc; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(m->object == NULL, ("vm_page_insert_after: page already inserted")); if (mpred != NULL) { KASSERT(mpred->object == object, ("vm_page_insert_after: object doesn't contain mpred")); KASSERT(mpred->pindex < pindex, ("vm_page_insert_after: mpred doesn't precede pindex")); msucc = TAILQ_NEXT(mpred, listq); } else msucc = TAILQ_FIRST(&object->memq); if (msucc != NULL) KASSERT(msucc->pindex > pindex, ("vm_page_insert_after: msucc doesn't succeed pindex")); /* * Record the object/offset pair in this page */ m->object = object; m->pindex = pindex; /* * Now link into the object's ordered list of backed pages. */ if (vm_radix_insert(&object->rtree, m)) { m->object = NULL; m->pindex = 0; return (1); } vm_page_insert_radixdone(m, object, mpred); return (0); } /* * vm_page_insert_radixdone: * * Complete page "m" insertion into the specified object after the * radix trie hooking. * * The page "mpred" must precede the offset "m->pindex" within the * specified object. * * The object must be locked. */ static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object != NULL && m->object == object, ("vm_page_insert_radixdone: page %p has inconsistent object", m)); if (mpred != NULL) { KASSERT(mpred->object == object, ("vm_page_insert_after: object doesn't contain mpred")); KASSERT(mpred->pindex < m->pindex, ("vm_page_insert_after: mpred doesn't precede pindex")); } if (mpred != NULL) TAILQ_INSERT_AFTER(&object->memq, mpred, m, listq); else TAILQ_INSERT_HEAD(&object->memq, m, listq); /* * Show that the object has one more resident page. */ object->resident_page_count++; /* * Hold the vnode until the last page is released. */ if (object->resident_page_count == 1 && object->type == OBJT_VNODE) vhold(object->handle); /* * Since we are inserting a new and possibly dirty page, * update the object's OBJ_MIGHTBEDIRTY flag. */ if (pmap_page_is_write_mapped(m)) vm_object_set_writeable_dirty(object); } /* * vm_page_remove: * * Removes the specified page from its containing object, but does not * invalidate any backing storage. * * The object must be locked. The page must be locked if it is managed. */ void vm_page_remove(vm_page_t m) { vm_object_t object; vm_page_t mrem; if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_assert_locked(m); if ((object = m->object) == NULL) return; VM_OBJECT_ASSERT_WLOCKED(object); if (vm_page_xbusied(m)) vm_page_xunbusy_maybelocked(m); mrem = vm_radix_remove(&object->rtree, m->pindex); KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m)); /* * Now remove from the object's list of backed pages. */ TAILQ_REMOVE(&object->memq, m, listq); /* * And show that the object has one fewer resident page. */ object->resident_page_count--; /* * The vnode may now be recycled. */ if (object->resident_page_count == 0 && object->type == OBJT_VNODE) vdrop(object->handle); m->object = NULL; } /* * vm_page_lookup: * * Returns the page associated with the object/offset * pair specified; if none is found, NULL is returned. * * The object must be locked. */ vm_page_t vm_page_lookup(vm_object_t object, vm_pindex_t pindex) { VM_OBJECT_ASSERT_LOCKED(object); return (vm_radix_lookup(&object->rtree, pindex)); } /* * vm_page_find_least: * * Returns the page associated with the object with least pindex * greater than or equal to the parameter pindex, or NULL. * * The object must be locked. */ vm_page_t vm_page_find_least(vm_object_t object, vm_pindex_t pindex) { vm_page_t m; VM_OBJECT_ASSERT_LOCKED(object); if ((m = TAILQ_FIRST(&object->memq)) != NULL && m->pindex < pindex) m = vm_radix_lookup_ge(&object->rtree, pindex); return (m); } /* * Returns the given page's successor (by pindex) within the object if it is * resident; if none is found, NULL is returned. * * The object must be locked. */ vm_page_t vm_page_next(vm_page_t m) { vm_page_t next; VM_OBJECT_ASSERT_LOCKED(m->object); if ((next = TAILQ_NEXT(m, listq)) != NULL) { MPASS(next->object == m->object); if (next->pindex != m->pindex + 1) next = NULL; } return (next); } /* * Returns the given page's predecessor (by pindex) within the object if it is * resident; if none is found, NULL is returned. * * The object must be locked. */ vm_page_t vm_page_prev(vm_page_t m) { vm_page_t prev; VM_OBJECT_ASSERT_LOCKED(m->object); if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL) { MPASS(prev->object == m->object); if (prev->pindex != m->pindex - 1) prev = NULL; } return (prev); } /* * Uses the page mnew as a replacement for an existing page at index * pindex which must be already present in the object. * * The existing page must not be on a paging queue. */ vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex) { vm_page_t mold; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(mnew->object == NULL, ("vm_page_replace: page already in object")); /* * This function mostly follows vm_page_insert() and * vm_page_remove() without the radix, object count and vnode * dance. Double check such functions for more comments. */ mnew->object = object; mnew->pindex = pindex; mold = vm_radix_replace(&object->rtree, mnew); KASSERT(mold->queue == PQ_NONE, ("vm_page_replace: mold is on a paging queue")); /* Keep the resident page list in sorted order. */ TAILQ_INSERT_AFTER(&object->memq, mold, mnew, listq); TAILQ_REMOVE(&object->memq, mold, listq); mold->object = NULL; vm_page_xunbusy_maybelocked(mold); /* * The object's resident_page_count does not change because we have * swapped one page for another, but OBJ_MIGHTBEDIRTY. */ if (pmap_page_is_write_mapped(mnew)) vm_object_set_writeable_dirty(object); return (mold); } /* * vm_page_rename: * * Move the given memory entry from its * current object to the specified target object/offset. * * Note: swap associated with the page must be invalidated by the move. We * have to do this for several reasons: (1) we aren't freeing the * page, (2) we are dirtying the page, (3) the VM system is probably * moving the page from object A to B, and will then later move * the backing store from A to B and we can't have a conflict. * * Note: we *always* dirty the page. It is necessary both for the * fact that we moved it, and because we may be invalidating * swap. * * The objects must be locked. */ int vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) { vm_page_t mpred; vm_pindex_t opidx; VM_OBJECT_ASSERT_WLOCKED(new_object); mpred = vm_radix_lookup_le(&new_object->rtree, new_pindex); KASSERT(mpred == NULL || mpred->pindex != new_pindex, ("vm_page_rename: pindex already renamed")); /* * Create a custom version of vm_page_insert() which does not depend * by m_prev and can cheat on the implementation aspects of the * function. */ opidx = m->pindex; m->pindex = new_pindex; if (vm_radix_insert(&new_object->rtree, m)) { m->pindex = opidx; return (1); } /* * The operation cannot fail anymore. The removal must happen before * the listq iterator is tainted. */ m->pindex = opidx; vm_page_lock(m); vm_page_remove(m); /* Return back to the new pindex to complete vm_page_insert(). */ m->pindex = new_pindex; m->object = new_object; vm_page_unlock(m); vm_page_insert_radixdone(m, new_object, mpred); vm_page_dirty(m); return (0); } /* * vm_page_alloc: * * Allocate and return a page that is associated with the specified * object and offset pair. By default, this page is exclusive busied. * * The caller must always specify an allocation class. * * allocation classes: * VM_ALLOC_NORMAL normal process request * VM_ALLOC_SYSTEM system *really* needs a page * VM_ALLOC_INTERRUPT interrupt time request * * optional allocation flags: * VM_ALLOC_COUNT(number) the number of additional pages that the caller * intends to allocate * VM_ALLOC_NOBUSY do not exclusive busy the page * VM_ALLOC_NODUMP do not include the page in a kernel core dump * VM_ALLOC_NOOBJ page is not associated with an object and * should not be exclusive busy * VM_ALLOC_SBUSY shared busy the allocated page * VM_ALLOC_WIRED wire the allocated page * VM_ALLOC_ZERO prefer a zeroed page * * This routine may not sleep. */ vm_page_t vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) { vm_page_t m, mpred; int flags, req_class; mpred = NULL; /* XXX: pacify gcc */ KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && (object != NULL || (req & VM_ALLOC_SBUSY) == 0) && ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) != (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)), ("vm_page_alloc: inconsistent object(%p)/req(%x)", object, req)); if (object != NULL) VM_OBJECT_ASSERT_WLOCKED(object); req_class = req & VM_ALLOC_CLASS_MASK; /* * The page daemon is allowed to dig deeper into the free page list. */ if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) req_class = VM_ALLOC_SYSTEM; if (object != NULL) { mpred = vm_radix_lookup_le(&object->rtree, pindex); KASSERT(mpred == NULL || mpred->pindex != pindex, ("vm_page_alloc: pindex already allocated")); } /* * Allocate a page if the number of free pages exceeds the minimum * for the request class. */ mtx_lock(&vm_page_queue_free_mtx); if (vm_cnt.v_free_count > vm_cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM && vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT && vm_cnt.v_free_count > 0)) { /* * Can we allocate the page from a reservation? */ #if VM_NRESERVLEVEL > 0 if (object == NULL || (object->flags & (OBJ_COLORED | OBJ_FICTITIOUS)) != OBJ_COLORED || (m = vm_reserv_alloc_page(object, pindex, mpred)) == NULL) #endif { /* * If not, allocate it from the free page queues. */ m = vm_phys_alloc_pages(object != NULL ? VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); #if VM_NRESERVLEVEL > 0 if (m == NULL && vm_reserv_reclaim_inactive()) { m = vm_phys_alloc_pages(object != NULL ? VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); } #endif } } else { /* * Not allocatable, give up. */ mtx_unlock(&vm_page_queue_free_mtx); atomic_add_int(&vm_pageout_deficit, max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); pagedaemon_wakeup(); return (NULL); } /* * At this point we had better have found a good page. */ KASSERT(m != NULL, ("vm_page_alloc: missing page")); vm_phys_freecnt_adj(m, -1); mtx_unlock(&vm_page_queue_free_mtx); vm_page_alloc_check(m); /* * Initialize the page. Only the PG_ZERO flag is inherited. */ flags = 0; if ((req & VM_ALLOC_ZERO) != 0) flags = PG_ZERO; flags &= m->flags; if ((req & VM_ALLOC_NODUMP) != 0) flags |= PG_NODUMP; m->flags = flags; m->aflags = 0; m->oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ? VPO_UNMANAGED : 0; m->busy_lock = VPB_UNBUSIED; if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0) m->busy_lock = VPB_SINGLE_EXCLUSIVER; if ((req & VM_ALLOC_SBUSY) != 0) m->busy_lock = VPB_SHARERS_WORD(1); if (req & VM_ALLOC_WIRED) { /* * The page lock is not required for wiring a page until that * page is inserted into the object. */ atomic_add_int(&vm_cnt.v_wire_count, 1); m->wire_count = 1; } m->act_count = 0; if (object != NULL) { if (vm_page_insert_after(m, object, pindex, mpred)) { pagedaemon_wakeup(); if (req & VM_ALLOC_WIRED) { atomic_subtract_int(&vm_cnt.v_wire_count, 1); m->wire_count = 0; } KASSERT(m->object == NULL, ("page %p has object", m)); m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_UNBUSIED; /* Don't change PG_ZERO. */ vm_page_free_toq(m); return (NULL); } /* Ignore device objects; the pager sets "memattr" for them. */ if (object->memattr != VM_MEMATTR_DEFAULT && (object->flags & OBJ_FICTITIOUS) == 0) pmap_page_set_memattr(m, object->memattr); } else m->pindex = pindex; /* * Don't wakeup too often - wakeup the pageout daemon when * we would be nearly out of memory. */ if (vm_paging_needed()) pagedaemon_wakeup(); return (m); } /* * vm_page_alloc_contig: * * Allocate a contiguous set of physical pages of the given size "npages" * from the free lists. All of the physical pages must be at or above * the given physical address "low" and below the given physical address * "high". The given value "alignment" determines the alignment of the * first physical page in the set. If the given value "boundary" is * non-zero, then the set of physical pages cannot cross any physical * address boundary that is a multiple of that value. Both "alignment" * and "boundary" must be a power of two. * * If the specified memory attribute, "memattr", is VM_MEMATTR_DEFAULT, * then the memory attribute setting for the physical pages is configured * to the object's memory attribute setting. Otherwise, the memory * attribute setting for the physical pages is configured to "memattr", * overriding the object's memory attribute setting. However, if the * object's memory attribute setting is not VM_MEMATTR_DEFAULT, then the * memory attribute setting for the physical pages cannot be configured * to VM_MEMATTR_DEFAULT. * * The specified object may not contain fictitious pages. * * The caller must always specify an allocation class. * * allocation classes: * VM_ALLOC_NORMAL normal process request * VM_ALLOC_SYSTEM system *really* needs a page * VM_ALLOC_INTERRUPT interrupt time request * * optional allocation flags: * VM_ALLOC_NOBUSY do not exclusive busy the page * VM_ALLOC_NODUMP do not include the page in a kernel core dump * VM_ALLOC_NOOBJ page is not associated with an object and * should not be exclusive busy * VM_ALLOC_SBUSY shared busy the allocated page * VM_ALLOC_WIRED wire the allocated page * VM_ALLOC_ZERO prefer a zeroed page * * This routine may not sleep. */ vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr) { vm_page_t m, m_ret, mpred; u_int busy_lock, flags, oflags; int req_class; mpred = NULL; /* XXX: pacify gcc */ KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && (object != NULL || (req & VM_ALLOC_SBUSY) == 0) && ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) != (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)), ("vm_page_alloc_contig: inconsistent object(%p)/req(%x)", object, req)); if (object != NULL) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->flags & OBJ_FICTITIOUS) == 0, ("vm_page_alloc_contig: object %p has fictitious pages", object)); } KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero")); req_class = req & VM_ALLOC_CLASS_MASK; /* * The page daemon is allowed to dig deeper into the free page list. */ if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) req_class = VM_ALLOC_SYSTEM; if (object != NULL) { mpred = vm_radix_lookup_le(&object->rtree, pindex); KASSERT(mpred == NULL || mpred->pindex != pindex, ("vm_page_alloc_contig: pindex already allocated")); } /* * Can we allocate the pages without the number of free pages falling * below the lower bound for the allocation class? */ mtx_lock(&vm_page_queue_free_mtx); if (vm_cnt.v_free_count >= npages + vm_cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM && vm_cnt.v_free_count >= npages + vm_cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT && vm_cnt.v_free_count >= npages)) { /* * Can we allocate the pages from a reservation? */ #if VM_NRESERVLEVEL > 0 retry: if (object == NULL || (object->flags & OBJ_COLORED) == 0 || (m_ret = vm_reserv_alloc_contig(object, pindex, npages, low, high, alignment, boundary, mpred)) == NULL) #endif /* * If not, allocate them from the free page queues. */ m_ret = vm_phys_alloc_contig(npages, low, high, alignment, boundary); } else { mtx_unlock(&vm_page_queue_free_mtx); atomic_add_int(&vm_pageout_deficit, npages); pagedaemon_wakeup(); return (NULL); } if (m_ret != NULL) vm_phys_freecnt_adj(m_ret, -npages); else { #if VM_NRESERVLEVEL > 0 if (vm_reserv_reclaim_contig(npages, low, high, alignment, boundary)) goto retry; #endif } mtx_unlock(&vm_page_queue_free_mtx); if (m_ret == NULL) return (NULL); for (m = m_ret; m < &m_ret[npages]; m++) vm_page_alloc_check(m); /* * Initialize the pages. Only the PG_ZERO flag is inherited. */ flags = 0; if ((req & VM_ALLOC_ZERO) != 0) flags = PG_ZERO; if ((req & VM_ALLOC_NODUMP) != 0) flags |= PG_NODUMP; oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ? VPO_UNMANAGED : 0; busy_lock = VPB_UNBUSIED; if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0) busy_lock = VPB_SINGLE_EXCLUSIVER; if ((req & VM_ALLOC_SBUSY) != 0) busy_lock = VPB_SHARERS_WORD(1); if ((req & VM_ALLOC_WIRED) != 0) atomic_add_int(&vm_cnt.v_wire_count, npages); if (object != NULL) { if (object->memattr != VM_MEMATTR_DEFAULT && memattr == VM_MEMATTR_DEFAULT) memattr = object->memattr; } for (m = m_ret; m < &m_ret[npages]; m++) { m->aflags = 0; m->flags = (m->flags | PG_NODUMP) & flags; m->busy_lock = busy_lock; if ((req & VM_ALLOC_WIRED) != 0) m->wire_count = 1; m->act_count = 0; m->oflags = oflags; if (object != NULL) { if (vm_page_insert_after(m, object, pindex, mpred)) { pagedaemon_wakeup(); if ((req & VM_ALLOC_WIRED) != 0) atomic_subtract_int( &vm_cnt.v_wire_count, npages); KASSERT(m->object == NULL, ("page %p has object", m)); mpred = m; for (m = m_ret; m < &m_ret[npages]; m++) { if (m <= mpred && (req & VM_ALLOC_WIRED) != 0) m->wire_count = 0; m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_UNBUSIED; /* Don't change PG_ZERO. */ vm_page_free_toq(m); } return (NULL); } mpred = m; } else m->pindex = pindex; if (memattr != VM_MEMATTR_DEFAULT) pmap_page_set_memattr(m, memattr); pindex++; } if (vm_paging_needed()) pagedaemon_wakeup(); return (m_ret); } /* * Check a page that has been freshly dequeued from a freelist. */ static void vm_page_alloc_check(vm_page_t m) { KASSERT(m->object == NULL, ("page %p has object", m)); KASSERT(m->queue == PQ_NONE, ("page %p has unexpected queue %d", m, m->queue)); KASSERT(m->wire_count == 0, ("page %p is wired", m)); KASSERT(m->hold_count == 0, ("page %p is held", m)); KASSERT(!vm_page_busied(m), ("page %p is busy", m)); KASSERT(m->dirty == 0, ("page %p is dirty", m)); KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, ("page %p has unexpected memattr %d", m, pmap_page_get_memattr(m))); KASSERT(m->valid == 0, ("free page %p is valid", m)); } /* * vm_page_alloc_freelist: * * Allocate a physical page from the specified free page list. * * The caller must always specify an allocation class. * * allocation classes: * VM_ALLOC_NORMAL normal process request * VM_ALLOC_SYSTEM system *really* needs a page * VM_ALLOC_INTERRUPT interrupt time request * * optional allocation flags: * VM_ALLOC_COUNT(number) the number of additional pages that the caller * intends to allocate * VM_ALLOC_WIRED wire the allocated page * VM_ALLOC_ZERO prefer a zeroed page * * This routine may not sleep. */ vm_page_t vm_page_alloc_freelist(int flind, int req) { vm_page_t m; u_int flags; int req_class; req_class = req & VM_ALLOC_CLASS_MASK; /* * The page daemon is allowed to dig deeper into the free page list. */ if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) req_class = VM_ALLOC_SYSTEM; /* * Do not allocate reserved pages unless the req has asked for it. */ mtx_lock(&vm_page_queue_free_mtx); if (vm_cnt.v_free_count > vm_cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM && vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT && vm_cnt.v_free_count > 0)) m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, 0); else { mtx_unlock(&vm_page_queue_free_mtx); atomic_add_int(&vm_pageout_deficit, max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); pagedaemon_wakeup(); return (NULL); } if (m == NULL) { mtx_unlock(&vm_page_queue_free_mtx); return (NULL); } vm_phys_freecnt_adj(m, -1); mtx_unlock(&vm_page_queue_free_mtx); vm_page_alloc_check(m); /* * Initialize the page. Only the PG_ZERO flag is inherited. */ m->aflags = 0; flags = 0; if ((req & VM_ALLOC_ZERO) != 0) flags = PG_ZERO; m->flags &= flags; if ((req & VM_ALLOC_WIRED) != 0) { /* * The page lock is not required for wiring a page that does * not belong to an object. */ atomic_add_int(&vm_cnt.v_wire_count, 1); m->wire_count = 1; } /* Unmanaged pages don't use "act_count". */ m->oflags = VPO_UNMANAGED; if (vm_paging_needed()) pagedaemon_wakeup(); return (m); } #define VPSC_ANY 0 /* No restrictions. */ #define VPSC_NORESERV 1 /* Skip reservations; implies VPSC_NOSUPER. */ #define VPSC_NOSUPER 2 /* Skip superpages. */ /* * vm_page_scan_contig: * * Scan vm_page_array[] between the specified entries "m_start" and * "m_end" for a run of contiguous physical pages that satisfy the * specified conditions, and return the lowest page in the run. The * specified "alignment" determines the alignment of the lowest physical * page in the run. If the specified "boundary" is non-zero, then the * run of physical pages cannot span a physical address that is a * multiple of "boundary". * * "m_end" is never dereferenced, so it need not point to a vm_page * structure within vm_page_array[]. * * "npages" must be greater than zero. "m_start" and "m_end" must not * span a hole (or discontiguity) in the physical address space. Both * "alignment" and "boundary" must be a power of two. */ vm_page_t vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end, u_long alignment, vm_paddr_t boundary, int options) { struct mtx *m_mtx, *new_mtx; vm_object_t object; vm_paddr_t pa; vm_page_t m, m_run; #if VM_NRESERVLEVEL > 0 int level; #endif int m_inc, order, run_ext, run_len; KASSERT(npages > 0, ("npages is 0")); KASSERT(powerof2(alignment), ("alignment is not a power of 2")); KASSERT(powerof2(boundary), ("boundary is not a power of 2")); m_run = NULL; run_len = 0; m_mtx = NULL; for (m = m_start; m < m_end && run_len < npages; m += m_inc) { KASSERT((m->flags & (PG_FICTITIOUS | PG_MARKER)) == 0, ("page %p is PG_FICTITIOUS or PG_MARKER", m)); /* * If the current page would be the start of a run, check its * physical address against the end, alignment, and boundary * conditions. If it doesn't satisfy these conditions, either * terminate the scan or advance to the next page that * satisfies the failed condition. */ if (run_len == 0) { KASSERT(m_run == NULL, ("m_run != NULL")); if (m + npages > m_end) break; pa = VM_PAGE_TO_PHYS(m); if ((pa & (alignment - 1)) != 0) { m_inc = atop(roundup2(pa, alignment) - pa); continue; } if (rounddown2(pa ^ (pa + ptoa(npages) - 1), boundary) != 0) { m_inc = atop(roundup2(pa, boundary) - pa); continue; } } else KASSERT(m_run != NULL, ("m_run == NULL")); /* * Avoid releasing and reacquiring the same page lock. */ new_mtx = vm_page_lockptr(m); if (m_mtx != new_mtx) { if (m_mtx != NULL) mtx_unlock(m_mtx); m_mtx = new_mtx; mtx_lock(m_mtx); } m_inc = 1; retry: if (m->wire_count != 0 || m->hold_count != 0) run_ext = 0; #if VM_NRESERVLEVEL > 0 else if ((level = vm_reserv_level(m)) >= 0 && (options & VPSC_NORESERV) != 0) { run_ext = 0; /* Advance to the end of the reservation. */ pa = VM_PAGE_TO_PHYS(m); m_inc = atop(roundup2(pa + 1, vm_reserv_size(level)) - pa); } #endif else if ((object = m->object) != NULL) { /* * The page is considered eligible for relocation if * and only if it could be laundered or reclaimed by * the page daemon. */ if (!VM_OBJECT_TRYRLOCK(object)) { mtx_unlock(m_mtx); VM_OBJECT_RLOCK(object); mtx_lock(m_mtx); if (m->object != object) { /* * The page may have been freed. */ VM_OBJECT_RUNLOCK(object); goto retry; } else if (m->wire_count != 0 || m->hold_count != 0) { run_ext = 0; goto unlock; } } KASSERT((m->flags & PG_UNHOLDFREE) == 0, ("page %p is PG_UNHOLDFREE", m)); /* Don't care: PG_NODUMP, PG_ZERO. */ if (object->type != OBJT_DEFAULT && object->type != OBJT_SWAP && object->type != OBJT_VNODE) { run_ext = 0; #if VM_NRESERVLEVEL > 0 } else if ((options & VPSC_NOSUPER) != 0 && (level = vm_reserv_level_iffullpop(m)) >= 0) { run_ext = 0; /* Advance to the end of the superpage. */ pa = VM_PAGE_TO_PHYS(m); m_inc = atop(roundup2(pa + 1, vm_reserv_size(level)) - pa); #endif } else if (object->memattr == VM_MEMATTR_DEFAULT && m->queue != PQ_NONE && !vm_page_busied(m)) { /* * The page is allocated but eligible for * relocation. Extend the current run by one * page. */ KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, ("page %p has an unexpected memattr", m)); KASSERT((m->oflags & (VPO_SWAPINPROG | VPO_SWAPSLEEP | VPO_UNMANAGED)) == 0, ("page %p has unexpected oflags", m)); /* Don't care: VPO_NOSYNC. */ run_ext = 1; } else run_ext = 0; unlock: VM_OBJECT_RUNLOCK(object); #if VM_NRESERVLEVEL > 0 } else if (level >= 0) { /* * The page is reserved but not yet allocated. In * other words, it is still free. Extend the current * run by one page. */ run_ext = 1; #endif } else if ((order = m->order) < VM_NFREEORDER) { /* * The page is enqueued in the physical memory * allocator's free page queues. Moreover, it is the * first page in a power-of-two-sized run of * contiguous free pages. Add these pages to the end * of the current run, and jump ahead. */ run_ext = 1 << order; m_inc = 1 << order; } else { /* * Skip the page for one of the following reasons: (1) * It is enqueued in the physical memory allocator's * free page queues. However, it is not the first * page in a run of contiguous free pages. (This case * rarely occurs because the scan is performed in * ascending order.) (2) It is not reserved, and it is * transitioning from free to allocated. (Conversely, * the transition from allocated to free for managed * pages is blocked by the page lock.) (3) It is * allocated but not contained by an object and not * wired, e.g., allocated by Xen's balloon driver. */ run_ext = 0; } /* * Extend or reset the current run of pages. */ if (run_ext > 0) { if (run_len == 0) m_run = m; run_len += run_ext; } else { if (run_len > 0) { m_run = NULL; run_len = 0; } } } if (m_mtx != NULL) mtx_unlock(m_mtx); if (run_len >= npages) return (m_run); return (NULL); } /* * vm_page_reclaim_run: * * Try to relocate each of the allocated virtual pages within the * specified run of physical pages to a new physical address. Free the * physical pages underlying the relocated virtual pages. A virtual page * is relocatable if and only if it could be laundered or reclaimed by * the page daemon. Whenever possible, a virtual page is relocated to a * physical address above "high". * * Returns 0 if every physical page within the run was already free or * just freed by a successful relocation. Otherwise, returns a non-zero * value indicating why the last attempt to relocate a virtual page was * unsuccessful. * * "req_class" must be an allocation class. */ static int vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run, vm_paddr_t high) { struct mtx *m_mtx, *new_mtx; struct spglist free; vm_object_t object; vm_paddr_t pa; vm_page_t m, m_end, m_new; int error, order, req; KASSERT((req_class & VM_ALLOC_CLASS_MASK) == req_class, ("req_class is not an allocation class")); SLIST_INIT(&free); error = 0; m = m_run; m_end = m_run + npages; m_mtx = NULL; for (; error == 0 && m < m_end; m++) { KASSERT((m->flags & (PG_FICTITIOUS | PG_MARKER)) == 0, ("page %p is PG_FICTITIOUS or PG_MARKER", m)); /* * Avoid releasing and reacquiring the same page lock. */ new_mtx = vm_page_lockptr(m); if (m_mtx != new_mtx) { if (m_mtx != NULL) mtx_unlock(m_mtx); m_mtx = new_mtx; mtx_lock(m_mtx); } retry: if (m->wire_count != 0 || m->hold_count != 0) error = EBUSY; else if ((object = m->object) != NULL) { /* * The page is relocated if and only if it could be * laundered or reclaimed by the page daemon. */ if (!VM_OBJECT_TRYWLOCK(object)) { mtx_unlock(m_mtx); VM_OBJECT_WLOCK(object); mtx_lock(m_mtx); if (m->object != object) { /* * The page may have been freed. */ VM_OBJECT_WUNLOCK(object); goto retry; } else if (m->wire_count != 0 || m->hold_count != 0) { error = EBUSY; goto unlock; } } KASSERT((m->flags & PG_UNHOLDFREE) == 0, ("page %p is PG_UNHOLDFREE", m)); /* Don't care: PG_NODUMP, PG_ZERO. */ if (object->type != OBJT_DEFAULT && object->type != OBJT_SWAP && object->type != OBJT_VNODE) error = EINVAL; else if (object->memattr != VM_MEMATTR_DEFAULT) error = EINVAL; else if (m->queue != PQ_NONE && !vm_page_busied(m)) { KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, ("page %p has an unexpected memattr", m)); KASSERT((m->oflags & (VPO_SWAPINPROG | VPO_SWAPSLEEP | VPO_UNMANAGED)) == 0, ("page %p has unexpected oflags", m)); /* Don't care: VPO_NOSYNC. */ if (m->valid != 0) { /* * First, try to allocate a new page * that is above "high". Failing * that, try to allocate a new page * that is below "m_run". Allocate * the new page between the end of * "m_run" and "high" only as a last * resort. */ req = req_class | VM_ALLOC_NOOBJ; if ((m->flags & PG_NODUMP) != 0) req |= VM_ALLOC_NODUMP; if (trunc_page(high) != ~(vm_paddr_t)PAGE_MASK) { m_new = vm_page_alloc_contig( NULL, 0, req, 1, round_page(high), ~(vm_paddr_t)0, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); } else m_new = NULL; if (m_new == NULL) { pa = VM_PAGE_TO_PHYS(m_run); m_new = vm_page_alloc_contig( NULL, 0, req, 1, 0, pa - 1, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); } if (m_new == NULL) { pa += ptoa(npages); m_new = vm_page_alloc_contig( NULL, 0, req, 1, pa, high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); } if (m_new == NULL) { error = ENOMEM; goto unlock; } KASSERT(m_new->wire_count == 0, ("page %p is wired", m)); /* * Replace "m" with the new page. For * vm_page_replace(), "m" must be busy * and dequeued. Finally, change "m" * as if vm_page_free() was called. */ if (object->ref_count != 0) pmap_remove_all(m); m_new->aflags = m->aflags; KASSERT(m_new->oflags == VPO_UNMANAGED, ("page %p is managed", m)); m_new->oflags = m->oflags & VPO_NOSYNC; pmap_copy_page(m, m_new); m_new->valid = m->valid; m_new->dirty = m->dirty; m->flags &= ~PG_ZERO; vm_page_xbusy(m); vm_page_remque(m); vm_page_replace_checked(m_new, object, m->pindex, m); m->valid = 0; vm_page_undirty(m); /* * The new page must be deactivated * before the object is unlocked. */ new_mtx = vm_page_lockptr(m_new); if (m_mtx != new_mtx) { mtx_unlock(m_mtx); m_mtx = new_mtx; mtx_lock(m_mtx); } vm_page_deactivate(m_new); } else { m->flags &= ~PG_ZERO; vm_page_remque(m); vm_page_remove(m); KASSERT(m->dirty == 0, ("page %p is dirty", m)); } SLIST_INSERT_HEAD(&free, m, plinks.s.ss); } else error = EBUSY; unlock: VM_OBJECT_WUNLOCK(object); } else { mtx_lock(&vm_page_queue_free_mtx); order = m->order; if (order < VM_NFREEORDER) { /* * The page is enqueued in the physical memory * allocator's free page queues. Moreover, it * is the first page in a power-of-two-sized * run of contiguous free pages. Jump ahead * to the last page within that run, and * continue from there. */ m += (1 << order) - 1; } #if VM_NRESERVLEVEL > 0 else if (vm_reserv_is_page_free(m)) order = 0; #endif mtx_unlock(&vm_page_queue_free_mtx); if (order == VM_NFREEORDER) error = EINVAL; } } if (m_mtx != NULL) mtx_unlock(m_mtx); if ((m = SLIST_FIRST(&free)) != NULL) { mtx_lock(&vm_page_queue_free_mtx); do { SLIST_REMOVE_HEAD(&free, plinks.s.ss); vm_phys_freecnt_adj(m, 1); #if VM_NRESERVLEVEL > 0 if (!vm_reserv_free_page(m)) #else if (true) #endif vm_phys_free_pages(m, 0); } while ((m = SLIST_FIRST(&free)) != NULL); vm_page_free_wakeup(); mtx_unlock(&vm_page_queue_free_mtx); } return (error); } #define NRUNS 16 CTASSERT(powerof2(NRUNS)); #define RUN_INDEX(count) ((count) & (NRUNS - 1)) #define MIN_RECLAIM 8 /* * vm_page_reclaim_contig: * * Reclaim allocated, contiguous physical memory satisfying the specified * conditions by relocating the virtual pages using that physical memory. * Returns true if reclamation is successful and false otherwise. Since * relocation requires the allocation of physical pages, reclamation may * fail due to a shortage of free pages. When reclamation fails, callers * are expected to perform VM_WAIT before retrying a failed allocation * operation, e.g., vm_page_alloc_contig(). * * The caller must always specify an allocation class through "req". * * allocation classes: * VM_ALLOC_NORMAL normal process request * VM_ALLOC_SYSTEM system *really* needs a page * VM_ALLOC_INTERRUPT interrupt time request * * The optional allocation flags are ignored. * * "npages" must be greater than zero. Both "alignment" and "boundary" * must be a power of two. */ bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) { vm_paddr_t curr_low; vm_page_t m_run, m_runs[NRUNS]; u_long count, reclaimed; int error, i, options, req_class; KASSERT(npages > 0, ("npages is 0")); KASSERT(powerof2(alignment), ("alignment is not a power of 2")); KASSERT(powerof2(boundary), ("boundary is not a power of 2")); req_class = req & VM_ALLOC_CLASS_MASK; /* * The page daemon is allowed to dig deeper into the free page list. */ if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) req_class = VM_ALLOC_SYSTEM; /* * Return if the number of free pages cannot satisfy the requested * allocation. */ count = vm_cnt.v_free_count; if (count < npages + vm_cnt.v_free_reserved || (count < npages + vm_cnt.v_interrupt_free_min && req_class == VM_ALLOC_SYSTEM) || (count < npages && req_class == VM_ALLOC_INTERRUPT)) return (false); /* * Scan up to three times, relaxing the restrictions ("options") on * the reclamation of reservations and superpages each time. */ for (options = VPSC_NORESERV;;) { /* * Find the highest runs that satisfy the given constraints * and restrictions, and record them in "m_runs". */ curr_low = low; count = 0; for (;;) { m_run = vm_phys_scan_contig(npages, curr_low, high, alignment, boundary, options); if (m_run == NULL) break; curr_low = VM_PAGE_TO_PHYS(m_run) + ptoa(npages); m_runs[RUN_INDEX(count)] = m_run; count++; } /* * Reclaim the highest runs in LIFO (descending) order until * the number of reclaimed pages, "reclaimed", is at least * MIN_RECLAIM. Reset "reclaimed" each time because each * reclamation is idempotent, and runs will (likely) recur * from one scan to the next as restrictions are relaxed. */ reclaimed = 0; for (i = 0; count > 0 && i < NRUNS; i++) { count--; m_run = m_runs[RUN_INDEX(count)]; error = vm_page_reclaim_run(req_class, npages, m_run, high); if (error == 0) { reclaimed += npages; if (reclaimed >= MIN_RECLAIM) return (true); } } /* * Either relax the restrictions on the next scan or return if * the last scan had no restrictions. */ if (options == VPSC_NORESERV) options = VPSC_NOSUPER; else if (options == VPSC_NOSUPER) options = VPSC_ANY; else if (options == VPSC_ANY) return (reclaimed != 0); } } /* * vm_wait: (also see VM_WAIT macro) * * Sleep until free pages are available for allocation. * - Called in various places before memory allocations. */ void vm_wait(void) { mtx_lock(&vm_page_queue_free_mtx); if (curproc == pageproc) { vm_pageout_pages_needed = 1; msleep(&vm_pageout_pages_needed, &vm_page_queue_free_mtx, PDROP | PSWP, "VMWait", 0); } else { if (__predict_false(pageproc == NULL)) panic("vm_wait in early boot"); if (!vm_pageout_wanted) { vm_pageout_wanted = true; wakeup(&vm_pageout_wanted); } vm_pages_needed = true; msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PVM, "vmwait", 0); } } /* * vm_waitpfault: (also see VM_WAITPFAULT macro) * * Sleep until free pages are available for allocation. * - Called only in vm_fault so that processes page faulting * can be easily tracked. * - Sleeps at a lower priority than vm_wait() so that vm_wait()ing * processes will be able to grab memory first. Do not change * this balance without careful testing first. */ void vm_waitpfault(void) { mtx_lock(&vm_page_queue_free_mtx); if (!vm_pageout_wanted) { vm_pageout_wanted = true; wakeup(&vm_pageout_wanted); } vm_pages_needed = true; msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PUSER, "pfault", 0); } struct vm_pagequeue * vm_page_pagequeue(vm_page_t m) { if (vm_page_in_laundry(m)) return (&vm_dom[0].vmd_pagequeues[m->queue]); else return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]); } /* * vm_page_dequeue: * * Remove the given page from its current page queue. * * The page must be locked. */ void vm_page_dequeue(vm_page_t m) { struct vm_pagequeue *pq; vm_page_assert_locked(m); KASSERT(m->queue < PQ_COUNT, ("vm_page_dequeue: page %p is not queued", m)); pq = vm_page_pagequeue(m); vm_pagequeue_lock(pq); m->queue = PQ_NONE; TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_dec(pq); vm_pagequeue_unlock(pq); } /* * vm_page_dequeue_locked: * * Remove the given page from its current page queue. * * The page and page queue must be locked. */ void vm_page_dequeue_locked(vm_page_t m) { struct vm_pagequeue *pq; vm_page_lock_assert(m, MA_OWNED); pq = vm_page_pagequeue(m); vm_pagequeue_assert_locked(pq); m->queue = PQ_NONE; TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_dec(pq); } /* * vm_page_enqueue: * * Add the given page to the specified page queue. * * The page must be locked. */ static void vm_page_enqueue(uint8_t queue, vm_page_t m) { struct vm_pagequeue *pq; vm_page_lock_assert(m, MA_OWNED); KASSERT(queue < PQ_COUNT, ("vm_page_enqueue: invalid queue %u request for page %p", queue, m)); if (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE) pq = &vm_dom[0].vmd_pagequeues[queue]; else pq = &vm_phys_domain(m)->vmd_pagequeues[queue]; vm_pagequeue_lock(pq); m->queue = queue; TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_inc(pq); vm_pagequeue_unlock(pq); } /* * vm_page_requeue: * * Move the given page to the tail of its current page queue. * * The page must be locked. */ void vm_page_requeue(vm_page_t m) { struct vm_pagequeue *pq; vm_page_lock_assert(m, MA_OWNED); KASSERT(m->queue != PQ_NONE, ("vm_page_requeue: page %p is not queued", m)); pq = vm_page_pagequeue(m); vm_pagequeue_lock(pq); TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); vm_pagequeue_unlock(pq); } /* * vm_page_requeue_locked: * * Move the given page to the tail of its current page queue. * * The page queue must be locked. */ void vm_page_requeue_locked(vm_page_t m) { struct vm_pagequeue *pq; KASSERT(m->queue != PQ_NONE, ("vm_page_requeue_locked: page %p is not queued", m)); pq = vm_page_pagequeue(m); vm_pagequeue_assert_locked(pq); TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); } /* * vm_page_activate: * * Put the specified page on the active list (if appropriate). * Ensure that act_count is at least ACT_INIT but do not otherwise * mess with it. * * The page must be locked. */ void vm_page_activate(vm_page_t m) { int queue; vm_page_lock_assert(m, MA_OWNED); if ((queue = m->queue) != PQ_ACTIVE) { if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { if (m->act_count < ACT_INIT) m->act_count = ACT_INIT; if (queue != PQ_NONE) vm_page_dequeue(m); vm_page_enqueue(PQ_ACTIVE, m); } else KASSERT(queue == PQ_NONE, ("vm_page_activate: wired page %p is queued", m)); } else { if (m->act_count < ACT_INIT) m->act_count = ACT_INIT; } } /* * vm_page_free_wakeup: * * Helper routine for vm_page_free_toq(). This routine is called * when a page is added to the free queues. * * The page queues must be locked. */ static inline void vm_page_free_wakeup(void) { mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); /* * if pageout daemon needs pages, then tell it that there are * some free. */ if (vm_pageout_pages_needed && vm_cnt.v_free_count >= vm_cnt.v_pageout_free_min) { wakeup(&vm_pageout_pages_needed); vm_pageout_pages_needed = 0; } /* * wakeup processes that are waiting on memory if we hit a * high water mark. And wakeup scheduler process if we have * lots of memory. this process will swapin processes. */ if (vm_pages_needed && !vm_page_count_min()) { vm_pages_needed = false; wakeup(&vm_cnt.v_free_count); } } /* * vm_page_free_toq: * * Returns the given page to the free list, * disassociating it with any VM object. * * The object must be locked. The page must be locked if it is managed. */ void vm_page_free_toq(vm_page_t m) { if ((m->oflags & VPO_UNMANAGED) == 0) { vm_page_lock_assert(m, MA_OWNED); KASSERT(!pmap_page_is_mapped(m), ("vm_page_free_toq: freeing mapped page %p", m)); } else KASSERT(m->queue == PQ_NONE, ("vm_page_free_toq: unmanaged page %p is queued", m)); PCPU_INC(cnt.v_tfree); if (vm_page_sbusied(m)) panic("vm_page_free: freeing busy page %p", m); /* * Unqueue, then remove page. Note that we cannot destroy * the page here because we do not want to call the pager's * callback routine until after we've put the page on the * appropriate free queue. */ vm_page_remque(m); vm_page_remove(m); /* * If fictitious remove object association and * return, otherwise delay object association removal. */ if ((m->flags & PG_FICTITIOUS) != 0) { return; } m->valid = 0; vm_page_undirty(m); if (m->wire_count != 0) panic("vm_page_free: freeing wired page %p", m); if (m->hold_count != 0) { m->flags &= ~PG_ZERO; KASSERT((m->flags & PG_UNHOLDFREE) == 0, ("vm_page_free: freeing PG_UNHOLDFREE page %p", m)); m->flags |= PG_UNHOLDFREE; } else { /* * Restore the default memory attribute to the page. */ if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT) pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT); /* * Insert the page into the physical memory allocator's free * page queues. */ mtx_lock(&vm_page_queue_free_mtx); vm_phys_freecnt_adj(m, 1); #if VM_NRESERVLEVEL > 0 if (!vm_reserv_free_page(m)) #else if (TRUE) #endif vm_phys_free_pages(m, 0); vm_page_free_wakeup(); mtx_unlock(&vm_page_queue_free_mtx); } } /* * vm_page_wire: * * Mark this page as wired down by yet * another map, removing it from paging queues * as necessary. * * If the page is fictitious, then its wire count must remain one. * * The page must be locked. */ void vm_page_wire(vm_page_t m) { /* * Only bump the wire statistics if the page is not already wired, * and only unqueue the page if it is on some queue (if it is unmanaged * it is already off the queues). */ vm_page_lock_assert(m, MA_OWNED); if ((m->flags & PG_FICTITIOUS) != 0) { KASSERT(m->wire_count == 1, ("vm_page_wire: fictitious page %p's wire count isn't one", m)); return; } if (m->wire_count == 0) { KASSERT((m->oflags & VPO_UNMANAGED) == 0 || m->queue == PQ_NONE, ("vm_page_wire: unmanaged page %p is queued", m)); vm_page_remque(m); atomic_add_int(&vm_cnt.v_wire_count, 1); } m->wire_count++; KASSERT(m->wire_count != 0, ("vm_page_wire: wire_count overflow m=%p", m)); } /* * vm_page_unwire: * * Release one wiring of the specified page, potentially allowing it to be * paged out. Returns TRUE if the number of wirings transitions to zero and * FALSE otherwise. * * Only managed pages belonging to an object can be paged out. If the number * of wirings transitions to zero and the page is eligible for page out, then * the page is added to the specified paging queue (unless PQ_NONE is * specified). * * If a page is fictitious, then its wire count must always be one. * * A managed page must be locked. */ boolean_t vm_page_unwire(vm_page_t m, uint8_t queue) { KASSERT(queue < PQ_COUNT || queue == PQ_NONE, ("vm_page_unwire: invalid queue %u request for page %p", queue, m)); if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_assert_locked(m); if ((m->flags & PG_FICTITIOUS) != 0) { KASSERT(m->wire_count == 1, ("vm_page_unwire: fictitious page %p's wire count isn't one", m)); return (FALSE); } if (m->wire_count > 0) { m->wire_count--; if (m->wire_count == 0) { atomic_subtract_int(&vm_cnt.v_wire_count, 1); if ((m->oflags & VPO_UNMANAGED) == 0 && m->object != NULL && queue != PQ_NONE) vm_page_enqueue(queue, m); return (TRUE); } else return (FALSE); } else panic("vm_page_unwire: page %p's wire count is zero", m); } /* * Move the specified page to the inactive queue. * * Normally, "noreuse" is FALSE, resulting in LRU ordering of the inactive * queue. However, setting "noreuse" to TRUE will accelerate the specified * page's reclamation, but it will not unmap the page from any address space. * This is implemented by inserting the page near the head of the inactive * queue, using a marker page to guide FIFO insertion ordering. * * The page must be locked. */ static inline void _vm_page_deactivate(vm_page_t m, boolean_t noreuse) { struct vm_pagequeue *pq; int queue; vm_page_assert_locked(m); /* * Ignore if the page is already inactive, unless it is unlikely to be * reactivated. */ if ((queue = m->queue) == PQ_INACTIVE && !noreuse) return; if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { pq = &vm_phys_domain(m)->vmd_pagequeues[PQ_INACTIVE]; /* Avoid multiple acquisitions of the inactive queue lock. */ if (queue == PQ_INACTIVE) { vm_pagequeue_lock(pq); vm_page_dequeue_locked(m); } else { if (queue != PQ_NONE) vm_page_dequeue(m); vm_pagequeue_lock(pq); } m->queue = PQ_INACTIVE; if (noreuse) TAILQ_INSERT_BEFORE(&vm_phys_domain(m)->vmd_inacthead, m, plinks.q); else TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_inc(pq); vm_pagequeue_unlock(pq); } } /* * Move the specified page to the inactive queue. * * The page must be locked. */ void vm_page_deactivate(vm_page_t m) { _vm_page_deactivate(m, FALSE); } /* * Move the specified page to the inactive queue with the expectation * that it is unlikely to be reused. * * The page must be locked. */ void vm_page_deactivate_noreuse(vm_page_t m) { _vm_page_deactivate(m, TRUE); } /* * vm_page_launder * * Put a page in the laundry. */ void vm_page_launder(vm_page_t m) { int queue; vm_page_assert_locked(m); if ((queue = m->queue) != PQ_LAUNDRY) { if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { if (queue != PQ_NONE) vm_page_dequeue(m); vm_page_enqueue(PQ_LAUNDRY, m); } else KASSERT(queue == PQ_NONE, ("wired page %p is queued", m)); } } /* * vm_page_unswappable * * Put a page in the PQ_UNSWAPPABLE holding queue. */ void vm_page_unswappable(vm_page_t m) { vm_page_assert_locked(m); KASSERT(m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0, ("page %p already unswappable", m)); if (m->queue != PQ_NONE) vm_page_dequeue(m); vm_page_enqueue(PQ_UNSWAPPABLE, m); } /* * vm_page_try_to_free() * * Attempt to free the page. If we cannot free it, we do nothing. * 1 is returned on success, 0 on failure. */ int vm_page_try_to_free(vm_page_t m) { vm_page_lock_assert(m, MA_OWNED); if (m->object != NULL) VM_OBJECT_ASSERT_WLOCKED(m->object); if (m->dirty || m->hold_count || m->wire_count || (m->oflags & VPO_UNMANAGED) != 0 || vm_page_busied(m)) return (0); pmap_remove_all(m); if (m->dirty) return (0); vm_page_free(m); return (1); } /* * vm_page_advise * * Apply the specified advice to the given page. * * The object and page must be locked. */ void vm_page_advise(vm_page_t m, int advice) { vm_page_assert_locked(m); VM_OBJECT_ASSERT_WLOCKED(m->object); if (advice == MADV_FREE) /* * Mark the page clean. This will allow the page to be freed * without first paging it out. MADV_FREE pages are often * quickly reused by malloc(3), so we do not do anything that * would result in a page fault on a later access. */ vm_page_undirty(m); else if (advice != MADV_DONTNEED) { if (advice == MADV_WILLNEED) vm_page_activate(m); return; } /* * Clear any references to the page. Otherwise, the page daemon will * immediately reactivate the page. */ vm_page_aflag_clear(m, PGA_REFERENCED); if (advice != MADV_FREE && m->dirty == 0 && pmap_is_modified(m)) vm_page_dirty(m); /* * Place clean pages near the head of the inactive queue rather than * the tail, thus defeating the queue's LRU operation and ensuring that * the page will be reused quickly. Dirty pages not already in the * laundry are moved there. */ if (m->dirty == 0) vm_page_deactivate_noreuse(m); else vm_page_launder(m); } /* * Grab a page, waiting until we are waken up due to the page * changing state. We keep on waiting, if the page continues * to be in the object. If the page doesn't exist, first allocate it * and then conditionally zero it. * * This routine may sleep. * * The object must be locked on entry. The lock will, however, be released * and reacquired if the routine sleeps. */ vm_page_t vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) { vm_page_t m; int sleep; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || (allocflags & VM_ALLOC_IGN_SBUSY) != 0, ("vm_page_grab: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch")); retrylookup: if ((m = vm_page_lookup(object, pindex)) != NULL) { sleep = (allocflags & VM_ALLOC_IGN_SBUSY) != 0 ? vm_page_xbusied(m) : vm_page_busied(m); if (sleep) { if ((allocflags & VM_ALLOC_NOWAIT) != 0) return (NULL); /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_aflag_set(m, PGA_REFERENCED); vm_page_lock(m); VM_OBJECT_WUNLOCK(object); vm_page_busy_sleep(m, "pgrbwt", (allocflags & VM_ALLOC_IGN_SBUSY) != 0); VM_OBJECT_WLOCK(object); goto retrylookup; } else { if ((allocflags & VM_ALLOC_WIRED) != 0) { vm_page_lock(m); vm_page_wire(m); vm_page_unlock(m); } if ((allocflags & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0) vm_page_xbusy(m); if ((allocflags & VM_ALLOC_SBUSY) != 0) vm_page_sbusy(m); return (m); } } m = vm_page_alloc(object, pindex, allocflags); if (m == NULL) { if ((allocflags & VM_ALLOC_NOWAIT) != 0) return (NULL); VM_OBJECT_WUNLOCK(object); VM_WAIT; VM_OBJECT_WLOCK(object); goto retrylookup; } if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); return (m); } /* * Mapping function for valid or dirty bits in a page. * * Inputs are required to range within a page. */ vm_page_bits_t vm_page_bits(int base, int size) { int first_bit; int last_bit; KASSERT( base + size <= PAGE_SIZE, ("vm_page_bits: illegal base/size %d/%d", base, size) ); if (size == 0) /* handle degenerate case */ return (0); first_bit = base >> DEV_BSHIFT; last_bit = (base + size - 1) >> DEV_BSHIFT; return (((vm_page_bits_t)2 << last_bit) - ((vm_page_bits_t)1 << first_bit)); } /* * vm_page_set_valid_range: * * Sets portions of a page valid. The arguments are expected * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive * of any partial chunks touched by the range. The invalid portion of * such chunks will be zeroed. * * (base + size) must be less then or equal to PAGE_SIZE. */ void vm_page_set_valid_range(vm_page_t m, int base, int size) { int endoff, frag; VM_OBJECT_ASSERT_WLOCKED(m->object); if (size == 0) /* handle degenerate case */ return; /* * If the base is not DEV_BSIZE aligned and the valid * bit is clear, we have to zero out a portion of the * first block. */ if ((frag = rounddown2(base, DEV_BSIZE)) != base && (m->valid & (1 << (base >> DEV_BSHIFT))) == 0) pmap_zero_page_area(m, frag, base - frag); /* * If the ending offset is not DEV_BSIZE aligned and the * valid bit is clear, we have to zero out a portion of * the last block. */ endoff = base + size; if ((frag = rounddown2(endoff, DEV_BSIZE)) != endoff && (m->valid & (1 << (endoff >> DEV_BSHIFT))) == 0) pmap_zero_page_area(m, endoff, DEV_BSIZE - (endoff & (DEV_BSIZE - 1))); /* * Assert that no previously invalid block that is now being validated * is already dirty. */ KASSERT((~m->valid & vm_page_bits(base, size) & m->dirty) == 0, ("vm_page_set_valid_range: page %p is dirty", m)); /* * Set valid bits inclusive of any overlap. */ m->valid |= vm_page_bits(base, size); } /* * Clear the given bits from the specified page's dirty field. */ static __inline void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits) { uintptr_t addr; #if PAGE_SIZE < 16384 int shift; #endif /* * If the object is locked and the page is neither exclusive busy nor * write mapped, then the page's dirty field cannot possibly be * set by a concurrent pmap operation. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && !pmap_page_is_write_mapped(m)) m->dirty &= ~pagebits; else { /* * The pmap layer can call vm_page_dirty() without * holding a distinguished lock. The combination of * the object's lock and an atomic operation suffice * to guarantee consistency of the page dirty field. * * For PAGE_SIZE == 32768 case, compiler already * properly aligns the dirty field, so no forcible * alignment is needed. Only require existence of * atomic_clear_64 when page size is 32768. */ addr = (uintptr_t)&m->dirty; #if PAGE_SIZE == 32768 atomic_clear_64((uint64_t *)addr, pagebits); #elif PAGE_SIZE == 16384 atomic_clear_32((uint32_t *)addr, pagebits); #else /* PAGE_SIZE <= 8192 */ /* * Use a trick to perform a 32-bit atomic on the * containing aligned word, to not depend on the existence * of atomic_clear_{8, 16}. */ shift = addr & (sizeof(uint32_t) - 1); #if BYTE_ORDER == BIG_ENDIAN shift = (sizeof(uint32_t) - sizeof(m->dirty) - shift) * NBBY; #else shift *= NBBY; #endif addr &= ~(sizeof(uint32_t) - 1); atomic_clear_32((uint32_t *)addr, pagebits << shift); #endif /* PAGE_SIZE */ } } /* * vm_page_set_validclean: * * Sets portions of a page valid and clean. The arguments are expected * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive * of any partial chunks touched by the range. The invalid portion of * such chunks will be zero'd. * * (base + size) must be less then or equal to PAGE_SIZE. */ void vm_page_set_validclean(vm_page_t m, int base, int size) { vm_page_bits_t oldvalid, pagebits; int endoff, frag; VM_OBJECT_ASSERT_WLOCKED(m->object); if (size == 0) /* handle degenerate case */ return; /* * If the base is not DEV_BSIZE aligned and the valid * bit is clear, we have to zero out a portion of the * first block. */ if ((frag = rounddown2(base, DEV_BSIZE)) != base && (m->valid & ((vm_page_bits_t)1 << (base >> DEV_BSHIFT))) == 0) pmap_zero_page_area(m, frag, base - frag); /* * If the ending offset is not DEV_BSIZE aligned and the * valid bit is clear, we have to zero out a portion of * the last block. */ endoff = base + size; if ((frag = rounddown2(endoff, DEV_BSIZE)) != endoff && (m->valid & ((vm_page_bits_t)1 << (endoff >> DEV_BSHIFT))) == 0) pmap_zero_page_area(m, endoff, DEV_BSIZE - (endoff & (DEV_BSIZE - 1))); /* * Set valid, clear dirty bits. If validating the entire * page we can safely clear the pmap modify bit. We also * use this opportunity to clear the VPO_NOSYNC flag. If a process * takes a write fault on a MAP_NOSYNC memory area the flag will * be set again. * * We set valid bits inclusive of any overlap, but we can only * clear dirty bits for DEV_BSIZE chunks that are fully within * the range. */ oldvalid = m->valid; pagebits = vm_page_bits(base, size); m->valid |= pagebits; #if 0 /* NOT YET */ if ((frag = base & (DEV_BSIZE - 1)) != 0) { frag = DEV_BSIZE - frag; base += frag; size -= frag; if (size < 0) size = 0; } pagebits = vm_page_bits(base, size & (DEV_BSIZE - 1)); #endif if (base == 0 && size == PAGE_SIZE) { /* * The page can only be modified within the pmap if it is * mapped, and it can only be mapped if it was previously * fully valid. */ if (oldvalid == VM_PAGE_BITS_ALL) /* * Perform the pmap_clear_modify() first. Otherwise, * a concurrent pmap operation, such as * pmap_protect(), could clear a modification in the * pmap and set the dirty field on the page before * pmap_clear_modify() had begun and after the dirty * field was cleared here. */ pmap_clear_modify(m); m->dirty = 0; m->oflags &= ~VPO_NOSYNC; } else if (oldvalid != VM_PAGE_BITS_ALL) m->dirty &= ~pagebits; else vm_page_clear_dirty_mask(m, pagebits); } void vm_page_clear_dirty(vm_page_t m, int base, int size) { vm_page_clear_dirty_mask(m, vm_page_bits(base, size)); } /* * vm_page_set_invalid: * * Invalidates DEV_BSIZE'd chunks within a page. Both the * valid and dirty bits for the effected areas are cleared. */ void vm_page_set_invalid(vm_page_t m, int base, int size) { vm_page_bits_t bits; vm_object_t object; object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); if (object->type == OBJT_VNODE && base == 0 && IDX_TO_OFF(m->pindex) + size >= object->un_pager.vnp.vnp_size) bits = VM_PAGE_BITS_ALL; else bits = vm_page_bits(base, size); if (object->ref_count != 0 && m->valid == VM_PAGE_BITS_ALL && bits != 0) pmap_remove_all(m); KASSERT((bits == 0 && m->valid == VM_PAGE_BITS_ALL) || !pmap_page_is_mapped(m), ("vm_page_set_invalid: page %p is mapped", m)); m->valid &= ~bits; m->dirty &= ~bits; } /* * vm_page_zero_invalid() * * The kernel assumes that the invalid portions of a page contain * garbage, but such pages can be mapped into memory by user code. * When this occurs, we must zero out the non-valid portions of the * page so user code sees what it expects. * * Pages are most often semi-valid when the end of a file is mapped * into memory and the file's size is not page aligned. */ void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid) { int b; int i; VM_OBJECT_ASSERT_WLOCKED(m->object); /* * Scan the valid bits looking for invalid sections that * must be zeroed. Invalid sub-DEV_BSIZE'd areas ( where the * valid bit may be set ) have already been zeroed by * vm_page_set_validclean(). */ for (b = i = 0; i <= PAGE_SIZE / DEV_BSIZE; ++i) { if (i == (PAGE_SIZE / DEV_BSIZE) || (m->valid & ((vm_page_bits_t)1 << i))) { if (i > b) { pmap_zero_page_area(m, b << DEV_BSHIFT, (i - b) << DEV_BSHIFT); } b = i + 1; } } /* * setvalid is TRUE when we can safely set the zero'd areas * as being valid. We can do this if there are no cache consistancy * issues. e.g. it is ok to do with UFS, but not ok to do with NFS. */ if (setvalid) m->valid = VM_PAGE_BITS_ALL; } /* * vm_page_is_valid: * * Is (partial) page valid? Note that the case where size == 0 * will return FALSE in the degenerate case where the page is * entirely invalid, and TRUE otherwise. */ int vm_page_is_valid(vm_page_t m, int base, int size) { vm_page_bits_t bits; VM_OBJECT_ASSERT_LOCKED(m->object); bits = vm_page_bits(base, size); return (m->valid != 0 && (m->valid & bits) == bits); } /* * vm_page_ps_is_valid: * * Returns TRUE if the entire (super)page is valid and FALSE otherwise. */ boolean_t vm_page_ps_is_valid(vm_page_t m) { int i, npages; VM_OBJECT_ASSERT_LOCKED(m->object); npages = atop(pagesizes[m->psind]); /* * The physically contiguous pages that make up a superpage, i.e., a * page with a page size index ("psind") greater than zero, will * occupy adjacent entries in vm_page_array[]. */ for (i = 0; i < npages; i++) { if (m[i].valid != VM_PAGE_BITS_ALL) return (FALSE); } return (TRUE); } /* * Set the page's dirty bits if the page is modified. */ void vm_page_test_dirty(vm_page_t m) { VM_OBJECT_ASSERT_WLOCKED(m->object); if (m->dirty != VM_PAGE_BITS_ALL && pmap_is_modified(m)) vm_page_dirty(m); } void vm_page_lock_KBI(vm_page_t m, const char *file, int line) { mtx_lock_flags_(vm_page_lockptr(m), 0, file, line); } void vm_page_unlock_KBI(vm_page_t m, const char *file, int line) { mtx_unlock_flags_(vm_page_lockptr(m), 0, file, line); } int vm_page_trylock_KBI(vm_page_t m, const char *file, int line) { return (mtx_trylock_flags_(vm_page_lockptr(m), 0, file, line)); } #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) void vm_page_assert_locked_KBI(vm_page_t m, const char *file, int line) { vm_page_lock_assert_KBI(m, MA_OWNED, file, line); } void vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line) { mtx_assert_(vm_page_lockptr(m), a, file, line); } #endif #ifdef INVARIANTS void vm_page_object_lock_assert(vm_page_t m) { /* * Certain of the page's fields may only be modified by the * holder of the containing object's lock or the exclusive busy. * holder. Unfortunately, the holder of the write busy is * not recorded, and thus cannot be checked here. */ if (m->object != NULL && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_WLOCKED(m->object); } void vm_page_assert_pga_writeable(vm_page_t m, uint8_t bits) { if ((bits & PGA_WRITEABLE) == 0) return; /* * The PGA_WRITEABLE flag can only be set if the page is * managed, is exclusively busied or the object is locked. * Currently, this flag is only set by pmap_enter(). */ KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("PGA_WRITEABLE on unmanaged page")); if (!vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); } #endif #include "opt_ddb.h" #ifdef DDB #include #include DB_SHOW_COMMAND(page, vm_page_print_page_info) { db_printf("vm_cnt.v_free_count: %d\n", vm_cnt.v_free_count); db_printf("vm_cnt.v_inactive_count: %d\n", vm_cnt.v_inactive_count); db_printf("vm_cnt.v_active_count: %d\n", vm_cnt.v_active_count); db_printf("vm_cnt.v_laundry_count: %d\n", vm_cnt.v_laundry_count); db_printf("vm_cnt.v_wire_count: %d\n", vm_cnt.v_wire_count); db_printf("vm_cnt.v_free_reserved: %d\n", vm_cnt.v_free_reserved); db_printf("vm_cnt.v_free_min: %d\n", vm_cnt.v_free_min); db_printf("vm_cnt.v_free_target: %d\n", vm_cnt.v_free_target); db_printf("vm_cnt.v_inactive_target: %d\n", vm_cnt.v_inactive_target); } DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) { int dom; db_printf("pq_free %d\n", vm_cnt.v_free_count); for (dom = 0; dom < vm_ndomains; dom++) { db_printf( "dom %d page_cnt %d free %d pq_act %d pq_inact %d pq_laund %d pq_unsw %d\n", dom, vm_dom[dom].vmd_page_count, vm_dom[dom].vmd_free_count, vm_dom[dom].vmd_pagequeues[PQ_ACTIVE].pq_cnt, vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt, vm_dom[dom].vmd_pagequeues[PQ_LAUNDRY].pq_cnt, vm_dom[dom].vmd_pagequeues[PQ_UNSWAPPABLE].pq_cnt); } } DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo) { vm_page_t m; boolean_t phys; if (!have_addr) { db_printf("show pginfo addr\n"); return; } phys = strchr(modif, 'p') != NULL; if (phys) m = PHYS_TO_VM_PAGE(addr); else m = (vm_page_t)addr; db_printf( "page %p obj %p pidx 0x%jx phys 0x%jx q %d hold %d wire %d\n" " af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n", m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr, m->queue, m->hold_count, m->wire_count, m->aflags, m->oflags, m->flags, m->act_count, m->busy_lock, m->valid, m->dirty); } #endif /* DDB */ Index: projects/ipsec/sys/x86/acpica/acpi_wakeup.c =================================================================== --- projects/ipsec/sys/x86/acpica/acpi_wakeup.c (revision 313186) +++ projects/ipsec/sys/x86/acpica/acpi_wakeup.c (revision 313187) @@ -1,427 +1,424 @@ /*- * Copyright (c) 2001 Takanori Watanabe * Copyright (c) 2001-2012 Mitsuru IWASAKI * Copyright (c) 2003 Peter Wemm * Copyright (c) 2008-2012 Jung-uk Kim * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #if defined(__amd64__) #define DEV_APIC #else #include "opt_apic.h" #endif -#ifdef __i386__ -#include "opt_npx.h" -#endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_APIC #include #include #endif #ifdef SMP #include #include #endif #include #include #include "acpi_wakecode.h" #include "acpi_wakedata.h" /* Make sure the code is less than a page and leave room for the stack. */ CTASSERT(sizeof(wakecode) < PAGE_SIZE - 1024); extern int acpi_resume_beep; extern int acpi_reset_video; #ifdef SMP extern struct susppcb **susppcbs; static cpuset_t suspcpus; #else static struct susppcb **susppcbs; #endif static void *acpi_alloc_wakeup_handler(void **); static void acpi_stop_beep(void *); #ifdef SMP static int acpi_wakeup_ap(struct acpi_softc *, int); static void acpi_wakeup_cpus(struct acpi_softc *); #endif #ifdef __amd64__ #define ACPI_WAKEPAGES 4 #else #define ACPI_WAKEPAGES 1 #endif #define WAKECODE_FIXUP(offset, type, val) do { \ type *addr; \ addr = (type *)(sc->acpi_wakeaddr + (offset)); \ *addr = val; \ } while (0) static void acpi_stop_beep(void *arg) { if (acpi_resume_beep != 0) timer_spkr_release(); } #ifdef SMP static int acpi_wakeup_ap(struct acpi_softc *sc, int cpu) { struct pcb *pcb; int vector = (sc->acpi_wakephys >> 12) & 0xff; int apic_id = cpu_apic_ids[cpu]; int ms; pcb = &susppcbs[cpu]->sp_pcb; WAKECODE_FIXUP(wakeup_pcb, struct pcb *, pcb); WAKECODE_FIXUP(wakeup_gdt, uint16_t, pcb->pcb_gdt.rd_limit); WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t, pcb->pcb_gdt.rd_base); ipi_startup(apic_id, vector); /* Wait up to 5 seconds for it to resume. */ for (ms = 0; ms < 5000; ms++) { if (!CPU_ISSET(cpu, &suspended_cpus)) return (1); /* return SUCCESS */ DELAY(1000); } return (0); /* return FAILURE */ } #define WARMBOOT_TARGET 0 #define WARMBOOT_OFF (KERNBASE + 0x0467) #define WARMBOOT_SEG (KERNBASE + 0x0469) #define CMOS_REG (0x70) #define CMOS_DATA (0x71) #define BIOS_RESET (0x0f) #define BIOS_WARM (0x0a) static void acpi_wakeup_cpus(struct acpi_softc *sc) { uint32_t mpbioswarmvec; int cpu; u_char mpbiosreason; /* save the current value of the warm-start vector */ mpbioswarmvec = *((uint32_t *)WARMBOOT_OFF); outb(CMOS_REG, BIOS_RESET); mpbiosreason = inb(CMOS_DATA); /* setup a vector to our boot code */ *((volatile u_short *)WARMBOOT_OFF) = WARMBOOT_TARGET; *((volatile u_short *)WARMBOOT_SEG) = sc->acpi_wakephys >> 4; outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ /* Wake up each AP. */ for (cpu = 1; cpu < mp_ncpus; cpu++) { if (!CPU_ISSET(cpu, &suspcpus)) continue; if (acpi_wakeup_ap(sc, cpu) == 0) { /* restore the warmstart vector */ *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec; panic("acpi_wakeup: failed to resume AP #%d (PHY #%d)", cpu, cpu_apic_ids[cpu]); } } /* restore the warmstart vector */ *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec; outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); } #endif int acpi_sleep_machdep(struct acpi_softc *sc, int state) { ACPI_STATUS status; struct pcb *pcb; if (sc->acpi_wakeaddr == 0ul) return (-1); /* couldn't alloc wake memory */ #ifdef SMP suspcpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &suspcpus); #endif if (acpi_resume_beep != 0) timer_spkr_acquire(); AcpiSetFirmwareWakingVector(sc->acpi_wakephys, 0); intr_suspend(); pcb = &susppcbs[0]->sp_pcb; if (savectx(pcb)) { #ifdef __amd64__ fpususpend(susppcbs[0]->sp_fpususpend); -#elif defined(DEV_NPX) +#else npxsuspend(susppcbs[0]->sp_fpususpend); #endif #ifdef SMP if (!CPU_EMPTY(&suspcpus) && suspend_cpus(suspcpus) == 0) { device_printf(sc->acpi_dev, "Failed to suspend APs\n"); return (0); /* couldn't sleep */ } #endif WAKECODE_FIXUP(resume_beep, uint8_t, (acpi_resume_beep != 0)); WAKECODE_FIXUP(reset_video, uint8_t, (acpi_reset_video != 0)); #ifndef __amd64__ WAKECODE_FIXUP(wakeup_cr4, register_t, pcb->pcb_cr4); #endif WAKECODE_FIXUP(wakeup_pcb, struct pcb *, pcb); WAKECODE_FIXUP(wakeup_gdt, uint16_t, pcb->pcb_gdt.rd_limit); WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t, pcb->pcb_gdt.rd_base); /* Call ACPICA to enter the desired sleep state */ if (state == ACPI_STATE_S4 && sc->acpi_s4bios) status = AcpiEnterSleepStateS4bios(); else status = AcpiEnterSleepState(state); if (ACPI_FAILURE(status)) { device_printf(sc->acpi_dev, "AcpiEnterSleepState failed - %s\n", AcpiFormatException(status)); return (0); /* couldn't sleep */ } for (;;) ia32_pause(); } else { #ifdef __amd64__ fpuresume(susppcbs[0]->sp_fpususpend); -#elif defined(DEV_NPX) +#else npxresume(susppcbs[0]->sp_fpususpend); #endif } return (1); /* wakeup successfully */ } int acpi_wakeup_machdep(struct acpi_softc *sc, int state, int sleep_result, int intr_enabled) { if (sleep_result == -1) return (sleep_result); if (!intr_enabled) { /* Wakeup MD procedures in interrupt disabled context */ if (sleep_result == 1) { pmap_init_pat(); initializecpu(); PCPU_SET(switchtime, 0); PCPU_SET(switchticks, ticks); #ifdef DEV_APIC lapic_xapic_mode(); #endif #ifdef SMP if (!CPU_EMPTY(&suspcpus)) acpi_wakeup_cpus(sc); #endif } #ifdef SMP if (!CPU_EMPTY(&suspcpus)) restart_cpus(suspcpus); #endif mca_resume(); #ifdef __amd64__ if (vmm_resume_p != NULL) vmm_resume_p(); #endif intr_resume(/*suspend_cancelled*/false); AcpiSetFirmwareWakingVector(0, 0); } else { /* Wakeup MD procedures in interrupt enabled context */ if (sleep_result == 1 && mem_range_softc.mr_op != NULL && mem_range_softc.mr_op->reinit != NULL) mem_range_softc.mr_op->reinit(&mem_range_softc); } return (sleep_result); } static void * acpi_alloc_wakeup_handler(void *wakepages[ACPI_WAKEPAGES]) { int i; memset(wakepages, 0, ACPI_WAKEPAGES * sizeof(*wakepages)); /* * Specify the region for our wakeup code. We want it in the low 1 MB * region, excluding real mode IVT (0-0x3ff), BDA (0x400-0x4ff), EBDA * (less than 128KB, below 0xa0000, must be excluded by SMAP and DSDT), * and ROM area (0xa0000 and above). The temporary page tables must be * page-aligned. */ for (i = 0; i < ACPI_WAKEPAGES; i++) { wakepages[i] = contigmalloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT, 0x500, 0xa0000, PAGE_SIZE, 0ul); if (wakepages[i] == NULL) { printf("%s: can't alloc wake memory\n", __func__); goto freepages; } } if (EVENTHANDLER_REGISTER(power_resume, acpi_stop_beep, NULL, EVENTHANDLER_PRI_LAST) == NULL) { printf("%s: can't register event handler\n", __func__); goto freepages; } susppcbs = malloc(mp_ncpus * sizeof(*susppcbs), M_DEVBUF, M_WAITOK); for (i = 0; i < mp_ncpus; i++) { susppcbs[i] = malloc(sizeof(**susppcbs), M_DEVBUF, M_WAITOK); susppcbs[i]->sp_fpususpend = alloc_fpusave(M_WAITOK); } return (wakepages); freepages: for (i = 0; i < ACPI_WAKEPAGES; i++) if (wakepages[i] != NULL) contigfree(wakepages[i], PAGE_SIZE, M_DEVBUF); return (NULL); } void acpi_install_wakeup_handler(struct acpi_softc *sc) { static void *wakeaddr; void *wakepages[ACPI_WAKEPAGES]; #ifdef __amd64__ uint64_t *pt4, *pt3, *pt2; vm_paddr_t pt4pa, pt3pa, pt2pa; int i; #endif if (wakeaddr != NULL) return; if (acpi_alloc_wakeup_handler(wakepages) == NULL) return; wakeaddr = wakepages[0]; sc->acpi_wakeaddr = (vm_offset_t)wakeaddr; sc->acpi_wakephys = vtophys(wakeaddr); #ifdef __amd64__ pt4 = wakepages[1]; pt3 = wakepages[2]; pt2 = wakepages[3]; pt4pa = vtophys(pt4); pt3pa = vtophys(pt3); pt2pa = vtophys(pt2); #endif bcopy(wakecode, (void *)sc->acpi_wakeaddr, sizeof(wakecode)); /* Patch GDT base address, ljmp targets. */ WAKECODE_FIXUP((bootgdtdesc + 2), uint32_t, sc->acpi_wakephys + bootgdt); WAKECODE_FIXUP((wakeup_sw32 + 2), uint32_t, sc->acpi_wakephys + wakeup_32); #ifdef __amd64__ WAKECODE_FIXUP((wakeup_sw64 + 1), uint32_t, sc->acpi_wakephys + wakeup_64); WAKECODE_FIXUP(wakeup_pagetables, uint32_t, pt4pa); #endif /* Save pointers to some global data. */ WAKECODE_FIXUP(wakeup_ret, void *, resumectx); #ifndef __amd64__ #if defined(PAE) || defined(PAE_TABLES) WAKECODE_FIXUP(wakeup_cr3, register_t, vtophys(kernel_pmap->pm_pdpt)); #else WAKECODE_FIXUP(wakeup_cr3, register_t, vtophys(kernel_pmap->pm_pdir)); #endif #else /* __amd64__ */ /* Create the initial 1GB replicated page tables */ for (i = 0; i < 512; i++) { /* * Each slot of the level 4 pages points * to the same level 3 page */ pt4[i] = (uint64_t)pt3pa; pt4[i] |= PG_V | PG_RW | PG_U; /* * Each slot of the level 3 pages points * to the same level 2 page */ pt3[i] = (uint64_t)pt2pa; pt3[i] |= PG_V | PG_RW | PG_U; /* The level 2 page slots are mapped with 2MB pages for 1GB. */ pt2[i] = i * (2 * 1024 * 1024); pt2[i] |= PG_V | PG_RW | PG_PS | PG_U; } #endif /* !__amd64__ */ if (bootverbose) device_printf(sc->acpi_dev, "wakeup code va %#jx pa %#jx\n", (uintmax_t)sc->acpi_wakeaddr, (uintmax_t)sc->acpi_wakephys); } Index: projects/ipsec/sys/x86/x86/cpu_machdep.c =================================================================== --- projects/ipsec/sys/x86/x86/cpu_machdep.c (revision 313186) +++ projects/ipsec/sys/x86/x86/cpu_machdep.c (revision 313187) @@ -1,571 +1,570 @@ /*- * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_atpic.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_isa.h" #include "opt_kdb.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" #include "opt_mp_watchdog.h" #include "opt_platform.h" #ifdef __i386__ -#include "opt_npx.h" #include "opt_apic.h" #include "opt_xbox.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #include #include #include #include #include #include #define STATE_RUNNING 0x0 #define STATE_MWAIT 0x1 #define STATE_SLEEPING 0x2 /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { /* Not applicable */ } void acpi_cpu_c1(void) { __asm __volatile("sti; hlt"); } void acpi_cpu_idle_mwait(uint32_t mwait_hint) { int *state; /* * XXXKIB. Software coordination mode should be supported, * but all Intel CPUs provide hardware coordination. */ state = (int *)PCPU_PTR(monitorbuf); KASSERT(*state == STATE_SLEEPING, ("cpu_mwait_cx: wrong monitorbuf state")); *state = STATE_MWAIT; cpu_monitor(state, 0, 0); if (*state == STATE_MWAIT) cpu_mwait(MWAIT_INTRBREAK, mwait_hint); /* * We should exit on any event that interrupts mwait, because * that event might be a wanted interrupt. */ *state = STATE_RUNNING; } /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { uint64_t tsc1, tsc2; uint64_t acnt, mcnt, perf; register_t reg; if (pcpu_find(cpu_id) == NULL || rate == NULL) return (EINVAL); #ifdef __i386__ if ((cpu_feature & CPUID_TSC) == 0) return (EOPNOTSUPP); #endif /* * If TSC is P-state invariant and APERF/MPERF MSRs do not exist, * DELAY(9) based logic fails. */ if (tsc_is_invariant && !tsc_perf_stat) return (EOPNOTSUPP); #ifdef SMP if (smp_cpus > 1) { /* Schedule ourselves on the indicated cpu. */ thread_lock(curthread); sched_bind(curthread, cpu_id); thread_unlock(curthread); } #endif /* Calibrate by measuring a short delay. */ reg = intr_disable(); if (tsc_is_invariant) { wrmsr(MSR_MPERF, 0); wrmsr(MSR_APERF, 0); tsc1 = rdtsc(); DELAY(1000); mcnt = rdmsr(MSR_MPERF); acnt = rdmsr(MSR_APERF); tsc2 = rdtsc(); intr_restore(reg); perf = 1000 * acnt / mcnt; *rate = (tsc2 - tsc1) * perf; } else { tsc1 = rdtsc(); DELAY(1000); tsc2 = rdtsc(); intr_restore(reg); *rate = (tsc2 - tsc1) * 1000; } #ifdef SMP if (smp_cpus > 1) { thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); } #endif return (0); } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) halt(); } bool cpu_mwait_usable(void) { return ((cpu_feature2 & CPUID2_MON) != 0 && ((cpu_mon_mwait_flags & (CPUID5_MON_MWAIT_EXT | CPUID5_MWAIT_INTRBREAK)) == (CPUID5_MON_MWAIT_EXT | CPUID5_MWAIT_INTRBREAK))); } void (*cpu_idle_hook)(sbintime_t) = NULL; /* ACPI idle hook. */ static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */ static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */ SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RWTUN, &idle_mwait, 0, "Use MONITOR/MWAIT for short idle"); static void cpu_idle_acpi(sbintime_t sbt) { int *state; state = (int *)PCPU_PTR(monitorbuf); *state = STATE_SLEEPING; /* See comments in cpu_idle_hlt(). */ disable_intr(); if (sched_runnable()) enable_intr(); else if (cpu_idle_hook) cpu_idle_hook(sbt); else acpi_cpu_c1(); *state = STATE_RUNNING; } static void cpu_idle_hlt(sbintime_t sbt) { int *state; state = (int *)PCPU_PTR(monitorbuf); *state = STATE_SLEEPING; /* * Since we may be in a critical section from cpu_idle(), if * an interrupt fires during that critical section we may have * a pending preemption. If the CPU halts, then that thread * may not execute until a later interrupt awakens the CPU. * To handle this race, check for a runnable thread after * disabling interrupts and immediately return if one is * found. Also, we must absolutely guarentee that hlt is * the next instruction after sti. This ensures that any * interrupt that fires after the call to disable_intr() will * immediately awaken the CPU from hlt. Finally, please note * that on x86 this works fine because of interrupts enabled only * after the instruction following sti takes place, while IF is set * to 1 immediately, allowing hlt instruction to acknowledge the * interrupt. */ disable_intr(); if (sched_runnable()) enable_intr(); else acpi_cpu_c1(); *state = STATE_RUNNING; } static void cpu_idle_mwait(sbintime_t sbt) { int *state; state = (int *)PCPU_PTR(monitorbuf); *state = STATE_MWAIT; /* See comments in cpu_idle_hlt(). */ disable_intr(); if (sched_runnable()) { enable_intr(); *state = STATE_RUNNING; return; } cpu_monitor(state, 0, 0); if (*state == STATE_MWAIT) __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0)); else enable_intr(); *state = STATE_RUNNING; } static void cpu_idle_spin(sbintime_t sbt) { int *state; int i; state = (int *)PCPU_PTR(monitorbuf); *state = STATE_RUNNING; /* * The sched_runnable() call is racy but as long as there is * a loop missing it one time will have just a little impact if any * (and it is much better than missing the check at all). */ for (i = 0; i < 1000; i++) { if (sched_runnable()) return; cpu_spinwait(); } } /* * C1E renders the local APIC timer dead, so we disable it by * reading the Interrupt Pending Message register and clearing * both C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27). * * Reference: * "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors" * #32559 revision 3.00+ */ #define MSR_AMDK8_IPM 0xc0010055 #define AMDK8_SMIONCMPHALT (1ULL << 27) #define AMDK8_C1EONCMPHALT (1ULL << 28) #define AMDK8_CMPHALT (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT) void cpu_probe_amdc1e(void) { /* * Detect the presence of C1E capability mostly on latest * dual-cores (or future) k8 family. */ if (cpu_vendor_id == CPU_VENDOR_AMD && (cpu_id & 0x00000f00) == 0x00000f00 && (cpu_id & 0x0fff0000) >= 0x00040000) { cpu_ident_amdc1e = 1; } } void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi; void cpu_idle(int busy) { uint64_t msr; sbintime_t sbt = -1; CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); #ifdef MP_WATCHDOG ap_watchdog(PCPU_GET(cpuid)); #endif /* If we are busy - try to use fast methods. */ if (busy) { if ((cpu_feature2 & CPUID2_MON) && idle_mwait) { cpu_idle_mwait(busy); goto out; } } /* If we have time - switch timers into idle mode. */ if (!busy) { critical_enter(); sbt = cpu_idleclock(); } /* Apply AMD APIC timer C1E workaround. */ if (cpu_ident_amdc1e && cpu_disable_c3_sleep) { msr = rdmsr(MSR_AMDK8_IPM); if (msr & AMDK8_CMPHALT) wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT); } /* Call main idle method. */ cpu_idle_fn(sbt); /* Switch timers back into active mode. */ if (!busy) { cpu_activeclock(); critical_exit(); } out: CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", busy, curcpu); } int cpu_idle_wakeup(int cpu) { struct pcpu *pcpu; int *state; pcpu = pcpu_find(cpu); state = (int *)pcpu->pc_monitorbuf; /* * This doesn't need to be atomic since missing the race will * simply result in unnecessary IPIs. */ if (*state == STATE_SLEEPING) return (0); if (*state == STATE_MWAIT) *state = STATE_RUNNING; return (1); } /* * Ordered by speed/power consumption. */ struct { void *id_fn; char *id_name; } idle_tbl[] = { { cpu_idle_spin, "spin" }, { cpu_idle_mwait, "mwait" }, { cpu_idle_hlt, "hlt" }, #if !defined(__i386__) { cpu_idle_acpi, "acpi" }, #endif { NULL, NULL } }; static int idle_sysctl_available(SYSCTL_HANDLER_ARGS) { char *avail, *p; int error; int i; avail = malloc(256, M_TEMP, M_WAITOK); p = avail; for (i = 0; idle_tbl[i].id_name != NULL; i++) { if (strstr(idle_tbl[i].id_name, "mwait") && (cpu_feature2 & CPUID2_MON) == 0) continue; #if !defined(__i386__) if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && cpu_idle_hook == NULL) continue; #endif p += sprintf(p, "%s%s", p != avail ? ", " : "", idle_tbl[i].id_name); } error = sysctl_handle_string(oidp, avail, 0, req); free(avail, M_TEMP); return (error); } SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD, 0, 0, idle_sysctl_available, "A", "list of available idle functions"); static int idle_sysctl(SYSCTL_HANDLER_ARGS) { char buf[16]; int error; char *p; int i; p = "unknown"; for (i = 0; idle_tbl[i].id_name != NULL; i++) { if (idle_tbl[i].id_fn == cpu_idle_fn) { p = idle_tbl[i].id_name; break; } } strncpy(buf, p, sizeof(buf)); error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) return (error); for (i = 0; idle_tbl[i].id_name != NULL; i++) { if (strstr(idle_tbl[i].id_name, "mwait") && (cpu_feature2 & CPUID2_MON) == 0) continue; #if !defined(__i386__) if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && cpu_idle_hook == NULL) continue; #endif if (strcmp(idle_tbl[i].id_name, buf)) continue; cpu_idle_fn = idle_tbl[i].id_fn; return (0); } return (EINVAL); } SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, idle_sysctl, "A", "currently selected idle function"); static int panic_on_nmi = 1; SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RWTUN, &panic_on_nmi, 0, "Panic on NMI"); int nmi_is_broadcast = 1; SYSCTL_INT(_machdep, OID_AUTO, nmi_is_broadcast, CTLFLAG_RWTUN, &nmi_is_broadcast, 0, "Chipset NMI is broadcast"); #ifdef KDB int kdb_on_nmi = 1; SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RWTUN, &kdb_on_nmi, 0, "Go to KDB on NMI"); #endif #ifdef DEV_ISA void nmi_call_kdb(u_int cpu, u_int type, struct trapframe *frame) { /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(frame->tf_err) == 0) { #ifdef KDB /* * NMI can be hooked up to a pushbutton for debugging. */ if (kdb_on_nmi) { printf("NMI/cpu%d ... going to debugger\n", cpu); kdb_trap(type, 0, frame); } #endif /* KDB */ } else if (panic_on_nmi) { panic("NMI indicates hardware failure"); } } #endif void nmi_handle_intr(u_int type, struct trapframe *frame) { #ifdef DEV_ISA #ifdef SMP if (nmi_is_broadcast) { nmi_call_kdb_smp(type, frame); return; } #endif nmi_call_kdb(PCPU_GET(cpuid), type, frame); #endif } Index: projects/ipsec/sys/x86/x86/identcpu.c =================================================================== --- projects/ipsec/sys/x86/x86/identcpu.c (revision 313186) +++ projects/ipsec/sys/x86/x86/identcpu.c (revision 313187) @@ -1,2420 +1,2414 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * Copyright (c) 1997 KATO Takenori. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Id: machdep.c,v 1.193 1996/06/18 01:22:04 bde Exp */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __i386__ -#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) -#define CPU_ENABLE_SSE -#endif - #define IDENTBLUE_CYRIX486 0 #define IDENTBLUE_IBMCPU 1 #define IDENTBLUE_CYRIXM2 2 static void identifycyrix(void); static void print_transmeta_info(void); #endif static u_int find_cpu_vendor_id(void); static void print_AMD_info(void); static void print_INTEL_info(void); static void print_INTEL_TLB(u_int data); static void print_hypervisor_info(void); static void print_svm_info(void); static void print_via_padlock_info(void); static void print_vmx_info(void); #ifdef __i386__ int cpu; /* Are we 386, 386sx, 486, etc? */ int cpu_class; #endif u_int cpu_feature; /* Feature flags */ u_int cpu_feature2; /* Feature flags */ u_int amd_feature; /* AMD feature flags */ u_int amd_feature2; /* AMD feature flags */ u_int amd_pminfo; /* AMD advanced power management info */ u_int via_feature_rng; /* VIA RNG features */ u_int via_feature_xcrypt; /* VIA ACE features */ u_int cpu_high; /* Highest arg to CPUID */ u_int cpu_exthigh; /* Highest arg to extended CPUID */ u_int cpu_id; /* Stepping ID */ u_int cpu_procinfo; /* HyperThreading Info / Brand Index / CLFUSH */ u_int cpu_procinfo2; /* Multicore info */ char cpu_vendor[20]; /* CPU Origin code */ u_int cpu_vendor_id; /* CPU vendor ID */ -#if defined(__amd64__) || defined(CPU_ENABLE_SSE) u_int cpu_fxsr; /* SSE enabled */ u_int cpu_mxcsr_mask; /* Valid bits in mxcsr */ -#endif u_int cpu_clflush_line_size = 32; u_int cpu_stdext_feature; u_int cpu_stdext_feature2; u_int cpu_max_ext_state_size; u_int cpu_mon_mwait_flags; /* MONITOR/MWAIT flags (CPUID.05H.ECX) */ u_int cpu_mon_min_size; /* MONITOR minimum range size, bytes */ u_int cpu_mon_max_size; /* MONITOR minimum range size, bytes */ u_int cpu_maxphyaddr; /* Max phys addr width in bits */ char machine[] = MACHINE; SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD, &via_feature_rng, 0, "VIA RNG feature available in CPU"); SYSCTL_UINT(_hw, OID_AUTO, via_feature_xcrypt, CTLFLAG_RD, &via_feature_xcrypt, 0, "VIA xcrypt feature available in CPU"); #ifdef __amd64__ #ifdef SCTL_MASK32 extern int adaptive_machine_arch; #endif static int sysctl_hw_machine(SYSCTL_HANDLER_ARGS) { #ifdef SCTL_MASK32 static const char machine32[] = "i386"; #endif int error; #ifdef SCTL_MASK32 if ((req->flags & SCTL_MASK32) != 0 && adaptive_machine_arch) error = SYSCTL_OUT(req, machine32, sizeof(machine32)); else #endif error = SYSCTL_OUT(req, machine, sizeof(machine)); return (error); } SYSCTL_PROC(_hw, HW_MACHINE, machine, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_hw_machine, "A", "Machine class"); #else SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "Machine class"); #endif static char cpu_model[128]; SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD | CTLFLAG_MPSAFE, cpu_model, 0, "Machine model"); static int hw_clockrate; SYSCTL_INT(_hw, OID_AUTO, clockrate, CTLFLAG_RD, &hw_clockrate, 0, "CPU instruction clock rate"); u_int hv_high; char hv_vendor[16]; SYSCTL_STRING(_hw, OID_AUTO, hv_vendor, CTLFLAG_RD | CTLFLAG_MPSAFE, hv_vendor, 0, "Hypervisor vendor"); static eventhandler_tag tsc_post_tag; static char cpu_brand[48]; #ifdef __i386__ #define MAX_BRAND_INDEX 8 static const char *cpu_brandtable[MAX_BRAND_INDEX + 1] = { NULL, /* No brand */ "Intel Celeron", "Intel Pentium III", "Intel Pentium III Xeon", NULL, NULL, NULL, NULL, "Intel Pentium 4" }; static struct { char *cpu_name; int cpu_class; } cpus[] = { { "Intel 80286", CPUCLASS_286 }, /* CPU_286 */ { "i386SX", CPUCLASS_386 }, /* CPU_386SX */ { "i386DX", CPUCLASS_386 }, /* CPU_386 */ { "i486SX", CPUCLASS_486 }, /* CPU_486SX */ { "i486DX", CPUCLASS_486 }, /* CPU_486 */ { "Pentium", CPUCLASS_586 }, /* CPU_586 */ { "Cyrix 486", CPUCLASS_486 }, /* CPU_486DLC */ { "Pentium Pro", CPUCLASS_686 }, /* CPU_686 */ { "Cyrix 5x86", CPUCLASS_486 }, /* CPU_M1SC */ { "Cyrix 6x86", CPUCLASS_486 }, /* CPU_M1 */ { "Blue Lightning", CPUCLASS_486 }, /* CPU_BLUE */ { "Cyrix 6x86MX", CPUCLASS_686 }, /* CPU_M2 */ { "NexGen 586", CPUCLASS_386 }, /* CPU_NX586 (XXX) */ { "Cyrix 486S/DX", CPUCLASS_486 }, /* CPU_CY486DX */ { "Pentium II", CPUCLASS_686 }, /* CPU_PII */ { "Pentium III", CPUCLASS_686 }, /* CPU_PIII */ { "Pentium 4", CPUCLASS_686 }, /* CPU_P4 */ }; #endif static struct { char *vendor; u_int vendor_id; } cpu_vendors[] = { { INTEL_VENDOR_ID, CPU_VENDOR_INTEL }, /* GenuineIntel */ { AMD_VENDOR_ID, CPU_VENDOR_AMD }, /* AuthenticAMD */ { CENTAUR_VENDOR_ID, CPU_VENDOR_CENTAUR }, /* CentaurHauls */ #ifdef __i386__ { NSC_VENDOR_ID, CPU_VENDOR_NSC }, /* Geode by NSC */ { CYRIX_VENDOR_ID, CPU_VENDOR_CYRIX }, /* CyrixInstead */ { TRANSMETA_VENDOR_ID, CPU_VENDOR_TRANSMETA }, /* GenuineTMx86 */ { SIS_VENDOR_ID, CPU_VENDOR_SIS }, /* SiS SiS SiS */ { UMC_VENDOR_ID, CPU_VENDOR_UMC }, /* UMC UMC UMC */ { NEXGEN_VENDOR_ID, CPU_VENDOR_NEXGEN }, /* NexGenDriven */ { RISE_VENDOR_ID, CPU_VENDOR_RISE }, /* RiseRiseRise */ #if 0 /* XXX CPUID 8000_0000h and 8086_0000h, not 0000_0000h */ { "TransmetaCPU", CPU_VENDOR_TRANSMETA }, #endif #endif }; void printcpuinfo(void) { u_int regs[4], i; char *brand; printf("CPU: "); #ifdef __i386__ cpu_class = cpus[cpu].cpu_class; strncpy(cpu_model, cpus[cpu].cpu_name, sizeof (cpu_model)); #else strncpy(cpu_model, "Hammer", sizeof (cpu_model)); #endif /* Check for extended CPUID information and a processor name. */ if (cpu_exthigh >= 0x80000004) { brand = cpu_brand; for (i = 0x80000002; i < 0x80000005; i++) { do_cpuid(i, regs); memcpy(brand, regs, sizeof(regs)); brand += sizeof(regs); } } switch (cpu_vendor_id) { case CPU_VENDOR_INTEL: #ifdef __i386__ if ((cpu_id & 0xf00) > 0x300) { u_int brand_index; cpu_model[0] = '\0'; switch (cpu_id & 0x3000) { case 0x1000: strcpy(cpu_model, "Overdrive "); break; case 0x2000: strcpy(cpu_model, "Dual "); break; } switch (cpu_id & 0xf00) { case 0x400: strcat(cpu_model, "i486 "); /* Check the particular flavor of 486 */ switch (cpu_id & 0xf0) { case 0x00: case 0x10: strcat(cpu_model, "DX"); break; case 0x20: strcat(cpu_model, "SX"); break; case 0x30: strcat(cpu_model, "DX2"); break; case 0x40: strcat(cpu_model, "SL"); break; case 0x50: strcat(cpu_model, "SX2"); break; case 0x70: strcat(cpu_model, "DX2 Write-Back Enhanced"); break; case 0x80: strcat(cpu_model, "DX4"); break; } break; case 0x500: /* Check the particular flavor of 586 */ strcat(cpu_model, "Pentium"); switch (cpu_id & 0xf0) { case 0x00: strcat(cpu_model, " A-step"); break; case 0x10: strcat(cpu_model, "/P5"); break; case 0x20: strcat(cpu_model, "/P54C"); break; case 0x30: strcat(cpu_model, "/P24T"); break; case 0x40: strcat(cpu_model, "/P55C"); break; case 0x70: strcat(cpu_model, "/P54C"); break; case 0x80: strcat(cpu_model, "/P55C (quarter-micron)"); break; default: /* nothing */ break; } #if defined(I586_CPU) && !defined(NO_F00F_HACK) /* * XXX - If/when Intel fixes the bug, this * should also check the version of the * CPU, not just that it's a Pentium. */ has_f00f_bug = 1; #endif break; case 0x600: /* Check the particular flavor of 686 */ switch (cpu_id & 0xf0) { case 0x00: strcat(cpu_model, "Pentium Pro A-step"); break; case 0x10: strcat(cpu_model, "Pentium Pro"); break; case 0x30: case 0x50: case 0x60: strcat(cpu_model, "Pentium II/Pentium II Xeon/Celeron"); cpu = CPU_PII; break; case 0x70: case 0x80: case 0xa0: case 0xb0: strcat(cpu_model, "Pentium III/Pentium III Xeon/Celeron"); cpu = CPU_PIII; break; default: strcat(cpu_model, "Unknown 80686"); break; } break; case 0xf00: strcat(cpu_model, "Pentium 4"); cpu = CPU_P4; break; default: strcat(cpu_model, "unknown"); break; } /* * If we didn't get a brand name from the extended * CPUID, try to look it up in the brand table. */ if (cpu_high > 0 && *cpu_brand == '\0') { brand_index = cpu_procinfo & CPUID_BRAND_INDEX; if (brand_index <= MAX_BRAND_INDEX && cpu_brandtable[brand_index] != NULL) strcpy(cpu_brand, cpu_brandtable[brand_index]); } } #else /* Please make up your mind folks! */ strcat(cpu_model, "EM64T"); #endif break; case CPU_VENDOR_AMD: /* * Values taken from AMD Processor Recognition * http://www.amd.com/K6/k6docs/pdf/20734g.pdf * (also describes ``Features'' encodings. */ strcpy(cpu_model, "AMD "); #ifdef __i386__ switch (cpu_id & 0xFF0) { case 0x410: strcat(cpu_model, "Standard Am486DX"); break; case 0x430: strcat(cpu_model, "Enhanced Am486DX2 Write-Through"); break; case 0x470: strcat(cpu_model, "Enhanced Am486DX2 Write-Back"); break; case 0x480: strcat(cpu_model, "Enhanced Am486DX4/Am5x86 Write-Through"); break; case 0x490: strcat(cpu_model, "Enhanced Am486DX4/Am5x86 Write-Back"); break; case 0x4E0: strcat(cpu_model, "Am5x86 Write-Through"); break; case 0x4F0: strcat(cpu_model, "Am5x86 Write-Back"); break; case 0x500: strcat(cpu_model, "K5 model 0"); break; case 0x510: strcat(cpu_model, "K5 model 1"); break; case 0x520: strcat(cpu_model, "K5 PR166 (model 2)"); break; case 0x530: strcat(cpu_model, "K5 PR200 (model 3)"); break; case 0x560: strcat(cpu_model, "K6"); break; case 0x570: strcat(cpu_model, "K6 266 (model 1)"); break; case 0x580: strcat(cpu_model, "K6-2"); break; case 0x590: strcat(cpu_model, "K6-III"); break; case 0x5a0: strcat(cpu_model, "Geode LX"); break; default: strcat(cpu_model, "Unknown"); break; } #else if ((cpu_id & 0xf00) == 0xf00) strcat(cpu_model, "AMD64 Processor"); else strcat(cpu_model, "Unknown"); #endif break; #ifdef __i386__ case CPU_VENDOR_CYRIX: strcpy(cpu_model, "Cyrix "); switch (cpu_id & 0xff0) { case 0x440: strcat(cpu_model, "MediaGX"); break; case 0x520: strcat(cpu_model, "6x86"); break; case 0x540: cpu_class = CPUCLASS_586; strcat(cpu_model, "GXm"); break; case 0x600: strcat(cpu_model, "6x86MX"); break; default: /* * Even though CPU supports the cpuid * instruction, it can be disabled. * Therefore, this routine supports all Cyrix * CPUs. */ switch (cyrix_did & 0xf0) { case 0x00: switch (cyrix_did & 0x0f) { case 0x00: strcat(cpu_model, "486SLC"); break; case 0x01: strcat(cpu_model, "486DLC"); break; case 0x02: strcat(cpu_model, "486SLC2"); break; case 0x03: strcat(cpu_model, "486DLC2"); break; case 0x04: strcat(cpu_model, "486SRx"); break; case 0x05: strcat(cpu_model, "486DRx"); break; case 0x06: strcat(cpu_model, "486SRx2"); break; case 0x07: strcat(cpu_model, "486DRx2"); break; case 0x08: strcat(cpu_model, "486SRu"); break; case 0x09: strcat(cpu_model, "486DRu"); break; case 0x0a: strcat(cpu_model, "486SRu2"); break; case 0x0b: strcat(cpu_model, "486DRu2"); break; default: strcat(cpu_model, "Unknown"); break; } break; case 0x10: switch (cyrix_did & 0x0f) { case 0x00: strcat(cpu_model, "486S"); break; case 0x01: strcat(cpu_model, "486S2"); break; case 0x02: strcat(cpu_model, "486Se"); break; case 0x03: strcat(cpu_model, "486S2e"); break; case 0x0a: strcat(cpu_model, "486DX"); break; case 0x0b: strcat(cpu_model, "486DX2"); break; case 0x0f: strcat(cpu_model, "486DX4"); break; default: strcat(cpu_model, "Unknown"); break; } break; case 0x20: if ((cyrix_did & 0x0f) < 8) strcat(cpu_model, "6x86"); /* Where did you get it? */ else strcat(cpu_model, "5x86"); break; case 0x30: strcat(cpu_model, "6x86"); break; case 0x40: if ((cyrix_did & 0xf000) == 0x3000) { cpu_class = CPUCLASS_586; strcat(cpu_model, "GXm"); } else strcat(cpu_model, "MediaGX"); break; case 0x50: strcat(cpu_model, "6x86MX"); break; case 0xf0: switch (cyrix_did & 0x0f) { case 0x0d: strcat(cpu_model, "Overdrive CPU"); break; case 0x0e: strcpy(cpu_model, "Texas Instruments 486SXL"); break; case 0x0f: strcat(cpu_model, "486SLC/DLC"); break; default: strcat(cpu_model, "Unknown"); break; } break; default: strcat(cpu_model, "Unknown"); break; } break; } break; case CPU_VENDOR_RISE: strcpy(cpu_model, "Rise "); switch (cpu_id & 0xff0) { case 0x500: /* 6401 and 6441 (Kirin) */ case 0x520: /* 6510 (Lynx) */ strcat(cpu_model, "mP6"); break; default: strcat(cpu_model, "Unknown"); } break; #endif case CPU_VENDOR_CENTAUR: #ifdef __i386__ switch (cpu_id & 0xff0) { case 0x540: strcpy(cpu_model, "IDT WinChip C6"); break; case 0x580: strcpy(cpu_model, "IDT WinChip 2"); break; case 0x590: strcpy(cpu_model, "IDT WinChip 3"); break; case 0x660: strcpy(cpu_model, "VIA C3 Samuel"); break; case 0x670: if (cpu_id & 0x8) strcpy(cpu_model, "VIA C3 Ezra"); else strcpy(cpu_model, "VIA C3 Samuel 2"); break; case 0x680: strcpy(cpu_model, "VIA C3 Ezra-T"); break; case 0x690: strcpy(cpu_model, "VIA C3 Nehemiah"); break; case 0x6a0: case 0x6d0: strcpy(cpu_model, "VIA C7 Esther"); break; case 0x6f0: strcpy(cpu_model, "VIA Nano"); break; default: strcpy(cpu_model, "VIA/IDT Unknown"); } #else strcpy(cpu_model, "VIA "); if ((cpu_id & 0xff0) == 0x6f0) strcat(cpu_model, "Nano Processor"); else strcat(cpu_model, "Unknown"); #endif break; #ifdef __i386__ case CPU_VENDOR_IBM: strcpy(cpu_model, "Blue Lightning CPU"); break; case CPU_VENDOR_NSC: switch (cpu_id & 0xff0) { case 0x540: strcpy(cpu_model, "Geode SC1100"); cpu = CPU_GEODE1100; break; default: strcpy(cpu_model, "Geode/NSC unknown"); break; } break; #endif default: strcat(cpu_model, "Unknown"); break; } /* * Replace cpu_model with cpu_brand minus leading spaces if * we have one. */ brand = cpu_brand; while (*brand == ' ') ++brand; if (*brand != '\0') strcpy(cpu_model, brand); printf("%s (", cpu_model); if (tsc_freq != 0) { hw_clockrate = (tsc_freq + 5000) / 1000000; printf("%jd.%02d-MHz ", (intmax_t)(tsc_freq + 4999) / 1000000, (u_int)((tsc_freq + 4999) / 10000) % 100); } #ifdef __i386__ switch(cpu_class) { case CPUCLASS_286: printf("286"); break; case CPUCLASS_386: printf("386"); break; #if defined(I486_CPU) case CPUCLASS_486: printf("486"); break; #endif #if defined(I586_CPU) case CPUCLASS_586: printf("586"); break; #endif #if defined(I686_CPU) case CPUCLASS_686: printf("686"); break; #endif default: printf("Unknown"); /* will panic below... */ } #else printf("K8"); #endif printf("-class CPU)\n"); if (*cpu_vendor) printf(" Origin=\"%s\"", cpu_vendor); if (cpu_id) printf(" Id=0x%x", cpu_id); if (cpu_vendor_id == CPU_VENDOR_INTEL || cpu_vendor_id == CPU_VENDOR_AMD || cpu_vendor_id == CPU_VENDOR_CENTAUR || #ifdef __i386__ cpu_vendor_id == CPU_VENDOR_TRANSMETA || cpu_vendor_id == CPU_VENDOR_RISE || cpu_vendor_id == CPU_VENDOR_NSC || (cpu_vendor_id == CPU_VENDOR_CYRIX && ((cpu_id & 0xf00) > 0x500)) || #endif 0) { printf(" Family=0x%x", CPUID_TO_FAMILY(cpu_id)); printf(" Model=0x%x", CPUID_TO_MODEL(cpu_id)); printf(" Stepping=%u", cpu_id & CPUID_STEPPING); #ifdef __i386__ if (cpu_vendor_id == CPU_VENDOR_CYRIX) printf("\n DIR=0x%04x", cyrix_did); #endif /* * AMD CPUID Specification * http://support.amd.com/us/Embedded_TechDocs/25481.pdf * * Intel Processor Identification and CPUID Instruction * http://www.intel.com/assets/pdf/appnote/241618.pdf */ if (cpu_high > 0) { /* * Here we should probably set up flags indicating * whether or not various features are available. * The interesting ones are probably VME, PSE, PAE, * and PGE. The code already assumes without bothering * to check that all CPUs >= Pentium have a TSC and * MSRs. */ printf("\n Features=0x%b", cpu_feature, "\020" "\001FPU" /* Integral FPU */ "\002VME" /* Extended VM86 mode support */ "\003DE" /* Debugging Extensions (CR4.DE) */ "\004PSE" /* 4MByte page tables */ "\005TSC" /* Timestamp counter */ "\006MSR" /* Machine specific registers */ "\007PAE" /* Physical address extension */ "\010MCE" /* Machine Check support */ "\011CX8" /* CMPEXCH8 instruction */ "\012APIC" /* SMP local APIC */ "\013oldMTRR" /* Previous implementation of MTRR */ "\014SEP" /* Fast System Call */ "\015MTRR" /* Memory Type Range Registers */ "\016PGE" /* PG_G (global bit) support */ "\017MCA" /* Machine Check Architecture */ "\020CMOV" /* CMOV instruction */ "\021PAT" /* Page attributes table */ "\022PSE36" /* 36 bit address space support */ "\023PN" /* Processor Serial number */ "\024CLFLUSH" /* Has the CLFLUSH instruction */ "\025" "\026DTS" /* Debug Trace Store */ "\027ACPI" /* ACPI support */ "\030MMX" /* MMX instructions */ "\031FXSR" /* FXSAVE/FXRSTOR */ "\032SSE" /* Streaming SIMD Extensions */ "\033SSE2" /* Streaming SIMD Extensions #2 */ "\034SS" /* Self snoop */ "\035HTT" /* Hyperthreading (see EBX bit 16-23) */ "\036TM" /* Thermal Monitor clock slowdown */ "\037IA64" /* CPU can execute IA64 instructions */ "\040PBE" /* Pending Break Enable */ ); if (cpu_feature2 != 0) { printf("\n Features2=0x%b", cpu_feature2, "\020" "\001SSE3" /* SSE3 */ "\002PCLMULQDQ" /* Carry-Less Mul Quadword */ "\003DTES64" /* 64-bit Debug Trace */ "\004MON" /* MONITOR/MWAIT Instructions */ "\005DS_CPL" /* CPL Qualified Debug Store */ "\006VMX" /* Virtual Machine Extensions */ "\007SMX" /* Safer Mode Extensions */ "\010EST" /* Enhanced SpeedStep */ "\011TM2" /* Thermal Monitor 2 */ "\012SSSE3" /* SSSE3 */ "\013CNXT-ID" /* L1 context ID available */ "\014SDBG" /* IA32 silicon debug */ "\015FMA" /* Fused Multiply Add */ "\016CX16" /* CMPXCHG16B Instruction */ "\017xTPR" /* Send Task Priority Messages*/ "\020PDCM" /* Perf/Debug Capability MSR */ "\021" "\022PCID" /* Process-context Identifiers*/ "\023DCA" /* Direct Cache Access */ "\024SSE4.1" /* SSE 4.1 */ "\025SSE4.2" /* SSE 4.2 */ "\026x2APIC" /* xAPIC Extensions */ "\027MOVBE" /* MOVBE Instruction */ "\030POPCNT" /* POPCNT Instruction */ "\031TSCDLT" /* TSC-Deadline Timer */ "\032AESNI" /* AES Crypto */ "\033XSAVE" /* XSAVE/XRSTOR States */ "\034OSXSAVE" /* OS-Enabled State Management*/ "\035AVX" /* Advanced Vector Extensions */ "\036F16C" /* Half-precision conversions */ "\037RDRAND" /* RDRAND Instruction */ "\040HV" /* Hypervisor */ ); } if (amd_feature != 0) { printf("\n AMD Features=0x%b", amd_feature, "\020" /* in hex */ "\001" /* Same */ "\002" /* Same */ "\003" /* Same */ "\004" /* Same */ "\005" /* Same */ "\006" /* Same */ "\007" /* Same */ "\010" /* Same */ "\011" /* Same */ "\012" /* Same */ "\013" /* Undefined */ "\014SYSCALL" /* Have SYSCALL/SYSRET */ "\015" /* Same */ "\016" /* Same */ "\017" /* Same */ "\020" /* Same */ "\021" /* Same */ "\022" /* Same */ "\023" /* Reserved, unknown */ "\024MP" /* Multiprocessor Capable */ "\025NX" /* Has EFER.NXE, NX */ "\026" /* Undefined */ "\027MMX+" /* AMD MMX Extensions */ "\030" /* Same */ "\031" /* Same */ "\032FFXSR" /* Fast FXSAVE/FXRSTOR */ "\033Page1GB" /* 1-GB large page support */ "\034RDTSCP" /* RDTSCP */ "\035" /* Undefined */ "\036LM" /* 64 bit long mode */ "\0373DNow!+" /* AMD 3DNow! Extensions */ "\0403DNow!" /* AMD 3DNow! */ ); } if (amd_feature2 != 0) { printf("\n AMD Features2=0x%b", amd_feature2, "\020" "\001LAHF" /* LAHF/SAHF in long mode */ "\002CMP" /* CMP legacy */ "\003SVM" /* Secure Virtual Mode */ "\004ExtAPIC" /* Extended APIC register */ "\005CR8" /* CR8 in legacy mode */ "\006ABM" /* LZCNT instruction */ "\007SSE4A" /* SSE4A */ "\010MAS" /* Misaligned SSE mode */ "\011Prefetch" /* 3DNow! Prefetch/PrefetchW */ "\012OSVW" /* OS visible workaround */ "\013IBS" /* Instruction based sampling */ "\014XOP" /* XOP extended instructions */ "\015SKINIT" /* SKINIT/STGI */ "\016WDT" /* Watchdog timer */ "\017" "\020LWP" /* Lightweight Profiling */ "\021FMA4" /* 4-operand FMA instructions */ "\022TCE" /* Translation Cache Extension */ "\023" "\024NodeId" /* NodeId MSR support */ "\025" "\026TBM" /* Trailing Bit Manipulation */ "\027Topology" /* Topology Extensions */ "\030PCXC" /* Core perf count */ "\031PNXC" /* NB perf count */ "\032" "\033DBE" /* Data Breakpoint extension */ "\034PTSC" /* Performance TSC */ "\035PL2I" /* L2I perf count */ "\036" "\037" "\040" ); } if (cpu_stdext_feature != 0) { printf("\n Structured Extended Features=0x%b", cpu_stdext_feature, "\020" /* RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */ "\001FSGSBASE" "\002TSCADJ" "\003SGX" /* Bit Manipulation Instructions */ "\004BMI1" /* Hardware Lock Elision */ "\005HLE" /* Advanced Vector Instructions 2 */ "\006AVX2" /* FDP_EXCPTN_ONLY */ "\007FDPEXC" /* Supervisor Mode Execution Prot. */ "\010SMEP" /* Bit Manipulation Instructions */ "\011BMI2" "\012ERMS" /* Invalidate Processor Context ID */ "\013INVPCID" /* Restricted Transactional Memory */ "\014RTM" "\015PQM" "\016NFPUSG" /* Intel Memory Protection Extensions */ "\017MPX" "\020PQE" /* AVX512 Foundation */ "\021AVX512F" "\022AVX512DQ" /* Enhanced NRBG */ "\023RDSEED" /* ADCX + ADOX */ "\024ADX" /* Supervisor Mode Access Prevention */ "\025SMAP" "\026AVX512IFMA" "\027PCOMMIT" "\030CLFLUSHOPT" "\031CLWB" "\032PROCTRACE" "\033AVX512PF" "\034AVX512ER" "\035AVX512CD" "\036SHA" "\037AVX512BW" ); } if (cpu_stdext_feature2 != 0) { printf("\n Structured Extended Features2=0x%b", cpu_stdext_feature2, "\020" "\001PREFETCHWT1" "\002AVX512VBMI" "\003UMIP" "\004PKU" "\005OSPKE" "\027RDPID" "\037SGXLC" ); } if ((cpu_feature2 & CPUID2_XSAVE) != 0) { cpuid_count(0xd, 0x1, regs); if (regs[0] != 0) { printf("\n XSAVE Features=0x%b", regs[0], "\020" "\001XSAVEOPT" "\002XSAVEC" "\003XINUSE" "\004XSAVES"); } } if (via_feature_rng != 0 || via_feature_xcrypt != 0) print_via_padlock_info(); if (cpu_feature2 & CPUID2_VMX) print_vmx_info(); if (amd_feature2 & AMDID2_SVM) print_svm_info(); if ((cpu_feature & CPUID_HTT) && cpu_vendor_id == CPU_VENDOR_AMD) cpu_feature &= ~CPUID_HTT; /* * If this CPU supports P-state invariant TSC then * mention the capability. */ if (tsc_is_invariant) { printf("\n TSC: P-state invariant"); if (tsc_perf_stat) printf(", performance statistics"); } } #ifdef __i386__ } else if (cpu_vendor_id == CPU_VENDOR_CYRIX) { printf(" DIR=0x%04x", cyrix_did); printf(" Stepping=%u", (cyrix_did & 0xf000) >> 12); printf(" Revision=%u", (cyrix_did & 0x0f00) >> 8); #ifndef CYRIX_CACHE_REALLY_WORKS if (cpu == CPU_M1 && (cyrix_did & 0xff00) < 0x1700) printf("\n CPU cache: write-through mode"); #endif #endif } /* Avoid ugly blank lines: only print newline when we have to. */ if (*cpu_vendor || cpu_id) printf("\n"); if (bootverbose) { if (cpu_vendor_id == CPU_VENDOR_AMD) print_AMD_info(); else if (cpu_vendor_id == CPU_VENDOR_INTEL) print_INTEL_info(); #ifdef __i386__ else if (cpu_vendor_id == CPU_VENDOR_TRANSMETA) print_transmeta_info(); #endif } print_hypervisor_info(); } #ifdef __i386__ void panicifcpuunsupported(void) { #if !defined(lint) #if !defined(I486_CPU) && !defined(I586_CPU) && !defined(I686_CPU) #error This kernel is not configured for one of the supported CPUs #endif #else /* lint */ #endif /* lint */ /* * Now that we have told the user what they have, * let them know if that machine type isn't configured. */ switch (cpu_class) { case CPUCLASS_286: /* a 286 should not make it this far, anyway */ case CPUCLASS_386: #if !defined(I486_CPU) case CPUCLASS_486: #endif #if !defined(I586_CPU) case CPUCLASS_586: #endif #if !defined(I686_CPU) case CPUCLASS_686: #endif panic("CPU class not configured"); default: break; } } static volatile u_int trap_by_rdmsr; /* * Special exception 6 handler. * The rdmsr instruction generates invalid opcodes fault on 486-class * Cyrix CPU. Stacked eip register points the rdmsr instruction in the * function identblue() when this handler is called. Stacked eip should * be advanced. */ inthand_t bluetrap6; #ifdef __GNUCLIKE_ASM __asm (" \n\ .text \n\ .p2align 2,0x90 \n\ .type " __XSTRING(CNAME(bluetrap6)) ",@function \n\ " __XSTRING(CNAME(bluetrap6)) ": \n\ ss \n\ movl $0xa8c1d," __XSTRING(CNAME(trap_by_rdmsr)) " \n\ addl $2, (%esp) /* rdmsr is a 2-byte instruction */ \n\ iret \n\ "); #endif /* * Special exception 13 handler. * Accessing non-existent MSR generates general protection fault. */ inthand_t bluetrap13; #ifdef __GNUCLIKE_ASM __asm (" \n\ .text \n\ .p2align 2,0x90 \n\ .type " __XSTRING(CNAME(bluetrap13)) ",@function \n\ " __XSTRING(CNAME(bluetrap13)) ": \n\ ss \n\ movl $0xa89c4," __XSTRING(CNAME(trap_by_rdmsr)) " \n\ popl %eax /* discard error code */ \n\ addl $2, (%esp) /* rdmsr is a 2-byte instruction */ \n\ iret \n\ "); #endif /* * Distinguish IBM Blue Lightning CPU from Cyrix CPUs that does not * support cpuid instruction. This function should be called after * loading interrupt descriptor table register. * * I don't like this method that handles fault, but I couldn't get * information for any other methods. Does blue giant know? */ static int identblue(void) { trap_by_rdmsr = 0; /* * Cyrix 486-class CPU does not support rdmsr instruction. * The rdmsr instruction generates invalid opcode fault, and exception * will be trapped by bluetrap6() on Cyrix 486-class CPU. The * bluetrap6() set the magic number to trap_by_rdmsr. */ setidt(IDT_UD, bluetrap6, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* * Certain BIOS disables cpuid instruction of Cyrix 6x86MX CPU. * In this case, rdmsr generates general protection fault, and * exception will be trapped by bluetrap13(). */ setidt(IDT_GP, bluetrap13, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); rdmsr(0x1002); /* Cyrix CPU generates fault. */ if (trap_by_rdmsr == 0xa8c1d) return IDENTBLUE_CYRIX486; else if (trap_by_rdmsr == 0xa89c4) return IDENTBLUE_CYRIXM2; return IDENTBLUE_IBMCPU; } /* * identifycyrix() set lower 16 bits of cyrix_did as follows: * * F E D C B A 9 8 7 6 5 4 3 2 1 0 * +-------+-------+---------------+ * | SID | RID | Device ID | * | (DIR 1) | (DIR 0) | * +-------+-------+---------------+ */ static void identifycyrix(void) { register_t saveintr; int ccr2_test = 0, dir_test = 0; u_char ccr2, ccr3; saveintr = intr_disable(); ccr2 = read_cyrix_reg(CCR2); write_cyrix_reg(CCR2, ccr2 ^ CCR2_LOCK_NW); read_cyrix_reg(CCR2); if (read_cyrix_reg(CCR2) != ccr2) ccr2_test = 1; write_cyrix_reg(CCR2, ccr2); ccr3 = read_cyrix_reg(CCR3); write_cyrix_reg(CCR3, ccr3 ^ CCR3_MAPEN3); read_cyrix_reg(CCR3); if (read_cyrix_reg(CCR3) != ccr3) dir_test = 1; /* CPU supports DIRs. */ write_cyrix_reg(CCR3, ccr3); if (dir_test) { /* Device ID registers are available. */ cyrix_did = read_cyrix_reg(DIR1) << 8; cyrix_did += read_cyrix_reg(DIR0); } else if (ccr2_test) cyrix_did = 0x0010; /* 486S A-step */ else cyrix_did = 0x00ff; /* Old 486SLC/DLC and TI486SXLC/SXL */ intr_restore(saveintr); } #endif /* Update TSC freq with the value indicated by the caller. */ static void tsc_freq_changed(void *arg __unused, const struct cf_level *level, int status) { /* If there was an error during the transition, don't do anything. */ if (status != 0) return; /* Total setting for this level gives the new frequency in MHz. */ hw_clockrate = level->total_set.freq; } static void hook_tsc_freq(void *arg __unused) { if (tsc_is_invariant) return; tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change, tsc_freq_changed, NULL, EVENTHANDLER_PRI_ANY); } SYSINIT(hook_tsc_freq, SI_SUB_CONFIGURE, SI_ORDER_ANY, hook_tsc_freq, NULL); static const char *const vm_bnames[] = { "QEMU", /* QEMU */ "Plex86", /* Plex86 */ "Bochs", /* Bochs */ "Xen", /* Xen */ "BHYVE", /* bhyve */ "Seabios", /* KVM */ NULL }; static const char *const vm_pnames[] = { "VMware Virtual Platform", /* VMWare VM */ "Virtual Machine", /* Microsoft VirtualPC */ "VirtualBox", /* Sun xVM VirtualBox */ "Parallels Virtual Platform", /* Parallels VM */ "KVM", /* KVM */ NULL }; static void identify_hypervisor(void) { u_int regs[4]; char *p; int i; /* * [RFC] CPUID usage for interaction between Hypervisors and Linux. * http://lkml.org/lkml/2008/10/1/246 * * KB1009458: Mechanisms to determine if software is running in * a VMware virtual machine * http://kb.vmware.com/kb/1009458 */ if (cpu_feature2 & CPUID2_HV) { vm_guest = VM_GUEST_VM; do_cpuid(0x40000000, regs); if (regs[0] >= 0x40000000) { hv_high = regs[0]; ((u_int *)&hv_vendor)[0] = regs[1]; ((u_int *)&hv_vendor)[1] = regs[2]; ((u_int *)&hv_vendor)[2] = regs[3]; hv_vendor[12] = '\0'; if (strcmp(hv_vendor, "VMwareVMware") == 0) vm_guest = VM_GUEST_VMWARE; else if (strcmp(hv_vendor, "Microsoft Hv") == 0) vm_guest = VM_GUEST_HV; else if (strcmp(hv_vendor, "KVMKVMKVM") == 0) vm_guest = VM_GUEST_KVM; } return; } /* * Examine SMBIOS strings for older hypervisors. */ p = kern_getenv("smbios.system.serial"); if (p != NULL) { if (strncmp(p, "VMware-", 7) == 0 || strncmp(p, "VMW", 3) == 0) { vmware_hvcall(VMW_HVCMD_GETVERSION, regs); if (regs[1] == VMW_HVMAGIC) { vm_guest = VM_GUEST_VMWARE; freeenv(p); return; } } freeenv(p); } /* * XXX: Some of these entries may not be needed since they were * added to FreeBSD before the checks above. */ p = kern_getenv("smbios.bios.vendor"); if (p != NULL) { for (i = 0; vm_bnames[i] != NULL; i++) if (strcmp(p, vm_bnames[i]) == 0) { vm_guest = VM_GUEST_VM; freeenv(p); return; } freeenv(p); } p = kern_getenv("smbios.system.product"); if (p != NULL) { for (i = 0; vm_pnames[i] != NULL; i++) if (strcmp(p, vm_pnames[i]) == 0) { vm_guest = VM_GUEST_VM; freeenv(p); return; } freeenv(p); } } bool fix_cpuid(void) { uint64_t msr; /* * Clear "Limit CPUID Maxval" bit and return true if the caller should * get the largest standard CPUID function number again if it is set * from BIOS. It is necessary for probing correct CPU topology later * and for the correct operation of the AVX-aware userspace. */ if (cpu_vendor_id == CPU_VENDOR_INTEL && ((CPUID_TO_FAMILY(cpu_id) == 0xf && CPUID_TO_MODEL(cpu_id) >= 0x3) || (CPUID_TO_FAMILY(cpu_id) == 0x6 && CPUID_TO_MODEL(cpu_id) >= 0xe))) { msr = rdmsr(MSR_IA32_MISC_ENABLE); if ((msr & IA32_MISC_EN_LIMCPUID) != 0) { msr &= ~IA32_MISC_EN_LIMCPUID; wrmsr(MSR_IA32_MISC_ENABLE, msr); return (true); } } /* * Re-enable AMD Topology Extension that could be disabled by BIOS * on some notebook processors. Without the extension it's really * hard to determine the correct CPU cache topology. * See BIOS and Kernel Developer’s Guide (BKDG) for AMD Family 15h * Models 60h-6Fh Processors, Publication # 50742. */ if (cpu_vendor_id == CPU_VENDOR_AMD && CPUID_TO_FAMILY(cpu_id) == 0x15) { msr = rdmsr(MSR_EXTFEATURES); if ((msr & ((uint64_t)1 << 54)) == 0) { msr |= (uint64_t)1 << 54; wrmsr(MSR_EXTFEATURES, msr); return (true); } } return (false); } /* * Final stage of CPU identification. */ #ifdef __i386__ void finishidentcpu(void) #else void identify_cpu(void) #endif { u_int regs[4], cpu_stdext_disable; #ifdef __i386__ u_char ccr3; #endif #ifdef __amd64__ do_cpuid(0, regs); cpu_high = regs[0]; ((u_int *)&cpu_vendor)[0] = regs[1]; ((u_int *)&cpu_vendor)[1] = regs[3]; ((u_int *)&cpu_vendor)[2] = regs[2]; cpu_vendor[12] = '\0'; do_cpuid(1, regs); cpu_id = regs[0]; cpu_procinfo = regs[1]; cpu_feature = regs[3]; cpu_feature2 = regs[2]; #endif identify_hypervisor(); cpu_vendor_id = find_cpu_vendor_id(); if (fix_cpuid()) { do_cpuid(0, regs); cpu_high = regs[0]; } if (cpu_high >= 5 && (cpu_feature2 & CPUID2_MON) != 0) { do_cpuid(5, regs); cpu_mon_mwait_flags = regs[2]; cpu_mon_min_size = regs[0] & CPUID5_MON_MIN_SIZE; cpu_mon_max_size = regs[1] & CPUID5_MON_MAX_SIZE; } if (cpu_high >= 7) { cpuid_count(7, 0, regs); cpu_stdext_feature = regs[1]; /* * Some hypervisors fail to filter out unsupported * extended features. For now, disable the * extensions, activation of which requires setting a * bit in CR4, and which VM monitors do not support. */ if (cpu_feature2 & CPUID2_HV) { cpu_stdext_disable = CPUID_STDEXT_FSGSBASE | CPUID_STDEXT_SMEP; } else cpu_stdext_disable = 0; TUNABLE_INT_FETCH("hw.cpu_stdext_disable", &cpu_stdext_disable); cpu_stdext_feature &= ~cpu_stdext_disable; cpu_stdext_feature2 = regs[2]; } #ifdef __i386__ if (cpu_high > 0 && (cpu_vendor_id == CPU_VENDOR_INTEL || cpu_vendor_id == CPU_VENDOR_AMD || cpu_vendor_id == CPU_VENDOR_TRANSMETA || cpu_vendor_id == CPU_VENDOR_CENTAUR || cpu_vendor_id == CPU_VENDOR_NSC)) { do_cpuid(0x80000000, regs); if (regs[0] >= 0x80000000) cpu_exthigh = regs[0]; } #else if (cpu_vendor_id == CPU_VENDOR_INTEL || cpu_vendor_id == CPU_VENDOR_AMD || cpu_vendor_id == CPU_VENDOR_CENTAUR) { do_cpuid(0x80000000, regs); cpu_exthigh = regs[0]; } #endif if (cpu_exthigh >= 0x80000001) { do_cpuid(0x80000001, regs); amd_feature = regs[3] & ~(cpu_feature & 0x0183f3ff); amd_feature2 = regs[2]; } if (cpu_exthigh >= 0x80000007) { do_cpuid(0x80000007, regs); amd_pminfo = regs[3]; } if (cpu_exthigh >= 0x80000008) { do_cpuid(0x80000008, regs); cpu_maxphyaddr = regs[0] & 0xff; cpu_procinfo2 = regs[2]; } else { cpu_maxphyaddr = (cpu_feature & CPUID_PAE) != 0 ? 36 : 32; } #ifdef __i386__ if (cpu_vendor_id == CPU_VENDOR_CYRIX) { if (cpu == CPU_486) { /* * These conditions are equivalent to: * - CPU does not support cpuid instruction. * - Cyrix/IBM CPU is detected. */ if (identblue() == IDENTBLUE_IBMCPU) { strcpy(cpu_vendor, "IBM"); cpu_vendor_id = CPU_VENDOR_IBM; cpu = CPU_BLUE; return; } } switch (cpu_id & 0xf00) { case 0x600: /* * Cyrix's datasheet does not describe DIRs. * Therefor, I assume it does not have them * and use the result of the cpuid instruction. * XXX they seem to have it for now at least. -Peter */ identifycyrix(); cpu = CPU_M2; break; default: identifycyrix(); /* * This routine contains a trick. * Don't check (cpu_id & 0x00f0) == 0x50 to detect M2, now. */ switch (cyrix_did & 0x00f0) { case 0x00: case 0xf0: cpu = CPU_486DLC; break; case 0x10: cpu = CPU_CY486DX; break; case 0x20: if ((cyrix_did & 0x000f) < 8) cpu = CPU_M1; else cpu = CPU_M1SC; break; case 0x30: cpu = CPU_M1; break; case 0x40: /* MediaGX CPU */ cpu = CPU_M1SC; break; default: /* M2 and later CPUs are treated as M2. */ cpu = CPU_M2; /* * enable cpuid instruction. */ ccr3 = read_cyrix_reg(CCR3); write_cyrix_reg(CCR3, CCR3_MAPEN0); write_cyrix_reg(CCR4, read_cyrix_reg(CCR4) | CCR4_CPUID); write_cyrix_reg(CCR3, ccr3); do_cpuid(0, regs); cpu_high = regs[0]; /* eax */ do_cpuid(1, regs); cpu_id = regs[0]; /* eax */ cpu_feature = regs[3]; /* edx */ break; } } } else if (cpu == CPU_486 && *cpu_vendor == '\0') { /* * There are BlueLightning CPUs that do not change * undefined flags by dividing 5 by 2. In this case, * the CPU identification routine in locore.s leaves * cpu_vendor null string and puts CPU_486 into the * cpu. */ if (identblue() == IDENTBLUE_IBMCPU) { strcpy(cpu_vendor, "IBM"); cpu_vendor_id = CPU_VENDOR_IBM; cpu = CPU_BLUE; return; } } #endif } static u_int find_cpu_vendor_id(void) { int i; for (i = 0; i < nitems(cpu_vendors); i++) if (strcmp(cpu_vendor, cpu_vendors[i].vendor) == 0) return (cpu_vendors[i].vendor_id); return (0); } static void print_AMD_assoc(int i) { if (i == 255) printf(", fully associative\n"); else printf(", %d-way associative\n", i); } static void print_AMD_l2_assoc(int i) { switch (i & 0x0f) { case 0: printf(", disabled/not present\n"); break; case 1: printf(", direct mapped\n"); break; case 2: printf(", 2-way associative\n"); break; case 4: printf(", 4-way associative\n"); break; case 6: printf(", 8-way associative\n"); break; case 8: printf(", 16-way associative\n"); break; case 15: printf(", fully associative\n"); break; default: printf(", reserved configuration\n"); break; } } static void print_AMD_info(void) { #ifdef __i386__ uint64_t amd_whcr; #endif u_int regs[4]; if (cpu_exthigh >= 0x80000005) { do_cpuid(0x80000005, regs); printf("L1 2MB data TLB: %d entries", (regs[0] >> 16) & 0xff); print_AMD_assoc(regs[0] >> 24); printf("L1 2MB instruction TLB: %d entries", regs[0] & 0xff); print_AMD_assoc((regs[0] >> 8) & 0xff); printf("L1 4KB data TLB: %d entries", (regs[1] >> 16) & 0xff); print_AMD_assoc(regs[1] >> 24); printf("L1 4KB instruction TLB: %d entries", regs[1] & 0xff); print_AMD_assoc((regs[1] >> 8) & 0xff); printf("L1 data cache: %d kbytes", regs[2] >> 24); printf(", %d bytes/line", regs[2] & 0xff); printf(", %d lines/tag", (regs[2] >> 8) & 0xff); print_AMD_assoc((regs[2] >> 16) & 0xff); printf("L1 instruction cache: %d kbytes", regs[3] >> 24); printf(", %d bytes/line", regs[3] & 0xff); printf(", %d lines/tag", (regs[3] >> 8) & 0xff); print_AMD_assoc((regs[3] >> 16) & 0xff); } if (cpu_exthigh >= 0x80000006) { do_cpuid(0x80000006, regs); if ((regs[0] >> 16) != 0) { printf("L2 2MB data TLB: %d entries", (regs[0] >> 16) & 0xfff); print_AMD_l2_assoc(regs[0] >> 28); printf("L2 2MB instruction TLB: %d entries", regs[0] & 0xfff); print_AMD_l2_assoc((regs[0] >> 28) & 0xf); } else { printf("L2 2MB unified TLB: %d entries", regs[0] & 0xfff); print_AMD_l2_assoc((regs[0] >> 28) & 0xf); } if ((regs[1] >> 16) != 0) { printf("L2 4KB data TLB: %d entries", (regs[1] >> 16) & 0xfff); print_AMD_l2_assoc(regs[1] >> 28); printf("L2 4KB instruction TLB: %d entries", (regs[1] >> 16) & 0xfff); print_AMD_l2_assoc((regs[1] >> 28) & 0xf); } else { printf("L2 4KB unified TLB: %d entries", (regs[1] >> 16) & 0xfff); print_AMD_l2_assoc((regs[1] >> 28) & 0xf); } printf("L2 unified cache: %d kbytes", regs[2] >> 16); printf(", %d bytes/line", regs[2] & 0xff); printf(", %d lines/tag", (regs[2] >> 8) & 0x0f); print_AMD_l2_assoc((regs[2] >> 12) & 0x0f); } #ifdef __i386__ if (((cpu_id & 0xf00) == 0x500) && (((cpu_id & 0x0f0) > 0x80) || (((cpu_id & 0x0f0) == 0x80) && (cpu_id & 0x00f) > 0x07))) { /* K6-2(new core [Stepping 8-F]), K6-III or later */ amd_whcr = rdmsr(0xc0000082); if (!(amd_whcr & (0x3ff << 22))) { printf("Write Allocate Disable\n"); } else { printf("Write Allocate Enable Limit: %dM bytes\n", (u_int32_t)((amd_whcr & (0x3ff << 22)) >> 22) * 4); printf("Write Allocate 15-16M bytes: %s\n", (amd_whcr & (1 << 16)) ? "Enable" : "Disable"); } } else if (((cpu_id & 0xf00) == 0x500) && ((cpu_id & 0x0f0) > 0x50)) { /* K6, K6-2(old core) */ amd_whcr = rdmsr(0xc0000082); if (!(amd_whcr & (0x7f << 1))) { printf("Write Allocate Disable\n"); } else { printf("Write Allocate Enable Limit: %dM bytes\n", (u_int32_t)((amd_whcr & (0x7f << 1)) >> 1) * 4); printf("Write Allocate 15-16M bytes: %s\n", (amd_whcr & 0x0001) ? "Enable" : "Disable"); printf("Hardware Write Allocate Control: %s\n", (amd_whcr & 0x0100) ? "Enable" : "Disable"); } } #endif /* * Opteron Rev E shows a bug as in very rare occasions a read memory * barrier is not performed as expected if it is followed by a * non-atomic read-modify-write instruction. * As long as that bug pops up very rarely (intensive machine usage * on other operating systems generally generates one unexplainable * crash any 2 months) and as long as a model specific fix would be * impractical at this stage, print out a warning string if the broken * model and family are identified. */ if (CPUID_TO_FAMILY(cpu_id) == 0xf && CPUID_TO_MODEL(cpu_id) >= 0x20 && CPUID_TO_MODEL(cpu_id) <= 0x3f) printf("WARNING: This architecture revision has known SMP " "hardware bugs which may cause random instability\n"); } static void print_INTEL_info(void) { u_int regs[4]; u_int rounds, regnum; u_int nwaycode, nway; if (cpu_high >= 2) { rounds = 0; do { do_cpuid(0x2, regs); if (rounds == 0 && (rounds = (regs[0] & 0xff)) == 0) break; /* we have a buggy CPU */ for (regnum = 0; regnum <= 3; ++regnum) { if (regs[regnum] & (1<<31)) continue; if (regnum != 0) print_INTEL_TLB(regs[regnum] & 0xff); print_INTEL_TLB((regs[regnum] >> 8) & 0xff); print_INTEL_TLB((regs[regnum] >> 16) & 0xff); print_INTEL_TLB((regs[regnum] >> 24) & 0xff); } } while (--rounds > 0); } if (cpu_exthigh >= 0x80000006) { do_cpuid(0x80000006, regs); nwaycode = (regs[2] >> 12) & 0x0f; if (nwaycode >= 0x02 && nwaycode <= 0x08) nway = 1 << (nwaycode / 2); else nway = 0; printf("L2 cache: %u kbytes, %u-way associative, %u bytes/line\n", (regs[2] >> 16) & 0xffff, nway, regs[2] & 0xff); } } static void print_INTEL_TLB(u_int data) { switch (data) { case 0x0: case 0x40: default: break; case 0x1: printf("Instruction TLB: 4 KB pages, 4-way set associative, 32 entries\n"); break; case 0x2: printf("Instruction TLB: 4 MB pages, fully associative, 2 entries\n"); break; case 0x3: printf("Data TLB: 4 KB pages, 4-way set associative, 64 entries\n"); break; case 0x4: printf("Data TLB: 4 MB Pages, 4-way set associative, 8 entries\n"); break; case 0x6: printf("1st-level instruction cache: 8 KB, 4-way set associative, 32 byte line size\n"); break; case 0x8: printf("1st-level instruction cache: 16 KB, 4-way set associative, 32 byte line size\n"); break; case 0x9: printf("1st-level instruction cache: 32 KB, 4-way set associative, 64 byte line size\n"); break; case 0xa: printf("1st-level data cache: 8 KB, 2-way set associative, 32 byte line size\n"); break; case 0xb: printf("Instruction TLB: 4 MByte pages, 4-way set associative, 4 entries\n"); break; case 0xc: printf("1st-level data cache: 16 KB, 4-way set associative, 32 byte line size\n"); break; case 0xd: printf("1st-level data cache: 16 KBytes, 4-way set associative, 64 byte line size"); break; case 0xe: printf("1st-level data cache: 24 KBytes, 6-way set associative, 64 byte line size\n"); break; case 0x1d: printf("2nd-level cache: 128 KBytes, 2-way set associative, 64 byte line size\n"); break; case 0x21: printf("2nd-level cache: 256 KBytes, 8-way set associative, 64 byte line size\n"); break; case 0x22: printf("3rd-level cache: 512 KB, 4-way set associative, sectored cache, 64 byte line size\n"); break; case 0x23: printf("3rd-level cache: 1 MB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x24: printf("2nd-level cache: 1 MBytes, 16-way set associative, 64 byte line size\n"); break; case 0x25: printf("3rd-level cache: 2 MB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x29: printf("3rd-level cache: 4 MB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x2c: printf("1st-level data cache: 32 KB, 8-way set associative, 64 byte line size\n"); break; case 0x30: printf("1st-level instruction cache: 32 KB, 8-way set associative, 64 byte line size\n"); break; case 0x39: /* De-listed in SDM rev. 54 */ printf("2nd-level cache: 128 KB, 4-way set associative, sectored cache, 64 byte line size\n"); break; case 0x3b: /* De-listed in SDM rev. 54 */ printf("2nd-level cache: 128 KB, 2-way set associative, sectored cache, 64 byte line size\n"); break; case 0x3c: /* De-listed in SDM rev. 54 */ printf("2nd-level cache: 256 KB, 4-way set associative, sectored cache, 64 byte line size\n"); break; case 0x41: printf("2nd-level cache: 128 KB, 4-way set associative, 32 byte line size\n"); break; case 0x42: printf("2nd-level cache: 256 KB, 4-way set associative, 32 byte line size\n"); break; case 0x43: printf("2nd-level cache: 512 KB, 4-way set associative, 32 byte line size\n"); break; case 0x44: printf("2nd-level cache: 1 MB, 4-way set associative, 32 byte line size\n"); break; case 0x45: printf("2nd-level cache: 2 MB, 4-way set associative, 32 byte line size\n"); break; case 0x46: printf("3rd-level cache: 4 MB, 4-way set associative, 64 byte line size\n"); break; case 0x47: printf("3rd-level cache: 8 MB, 8-way set associative, 64 byte line size\n"); break; case 0x48: printf("2nd-level cache: 3MByte, 12-way set associative, 64 byte line size\n"); break; case 0x49: if (CPUID_TO_FAMILY(cpu_id) == 0xf && CPUID_TO_MODEL(cpu_id) == 0x6) printf("3rd-level cache: 4MB, 16-way set associative, 64-byte line size\n"); else printf("2nd-level cache: 4 MByte, 16-way set associative, 64 byte line size"); break; case 0x4a: printf("3rd-level cache: 6MByte, 12-way set associative, 64 byte line size\n"); break; case 0x4b: printf("3rd-level cache: 8MByte, 16-way set associative, 64 byte line size\n"); break; case 0x4c: printf("3rd-level cache: 12MByte, 12-way set associative, 64 byte line size\n"); break; case 0x4d: printf("3rd-level cache: 16MByte, 16-way set associative, 64 byte line size\n"); break; case 0x4e: printf("2nd-level cache: 6MByte, 24-way set associative, 64 byte line size\n"); break; case 0x4f: printf("Instruction TLB: 4 KByte pages, 32 entries\n"); break; case 0x50: printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 64 entries\n"); break; case 0x51: printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 128 entries\n"); break; case 0x52: printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 256 entries\n"); break; case 0x55: printf("Instruction TLB: 2-MByte or 4-MByte pages, fully associative, 7 entries\n"); break; case 0x56: printf("Data TLB0: 4 MByte pages, 4-way set associative, 16 entries\n"); break; case 0x57: printf("Data TLB0: 4 KByte pages, 4-way associative, 16 entries\n"); break; case 0x59: printf("Data TLB0: 4 KByte pages, fully associative, 16 entries\n"); break; case 0x5a: printf("Data TLB0: 2-MByte or 4 MByte pages, 4-way set associative, 32 entries\n"); break; case 0x5b: printf("Data TLB: 4 KB or 4 MB pages, fully associative, 64 entries\n"); break; case 0x5c: printf("Data TLB: 4 KB or 4 MB pages, fully associative, 128 entries\n"); break; case 0x5d: printf("Data TLB: 4 KB or 4 MB pages, fully associative, 256 entries\n"); break; case 0x60: printf("1st-level data cache: 16 KB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x61: printf("Instruction TLB: 4 KByte pages, fully associative, 48 entries\n"); break; case 0x63: printf("Data TLB: 2 MByte or 4 MByte pages, 4-way set associative, 32 entries and a separate array with 1 GByte pages, 4-way set associative, 4 entries\n"); break; case 0x64: printf("Data TLB: 4 KBytes pages, 4-way set associative, 512 entries\n"); break; case 0x66: printf("1st-level data cache: 8 KB, 4-way set associative, sectored cache, 64 byte line size\n"); break; case 0x67: printf("1st-level data cache: 16 KB, 4-way set associative, sectored cache, 64 byte line size\n"); break; case 0x68: printf("1st-level data cache: 32 KB, 4 way set associative, sectored cache, 64 byte line size\n"); break; case 0x6a: printf("uTLB: 4KByte pages, 8-way set associative, 64 entries\n"); break; case 0x6b: printf("DTLB: 4KByte pages, 8-way set associative, 256 entries\n"); break; case 0x6c: printf("DTLB: 2M/4M pages, 8-way set associative, 128 entries\n"); break; case 0x6d: printf("DTLB: 1 GByte pages, fully associative, 16 entries\n"); break; case 0x70: printf("Trace cache: 12K-uops, 8-way set associative\n"); break; case 0x71: printf("Trace cache: 16K-uops, 8-way set associative\n"); break; case 0x72: printf("Trace cache: 32K-uops, 8-way set associative\n"); break; case 0x76: printf("Instruction TLB: 2M/4M pages, fully associative, 8 entries\n"); break; case 0x78: printf("2nd-level cache: 1 MB, 4-way set associative, 64-byte line size\n"); break; case 0x79: printf("2nd-level cache: 128 KB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x7a: printf("2nd-level cache: 256 KB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x7b: printf("2nd-level cache: 512 KB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x7c: printf("2nd-level cache: 1 MB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x7d: printf("2nd-level cache: 2-MB, 8-way set associative, 64-byte line size\n"); break; case 0x7f: printf("2nd-level cache: 512-KB, 2-way set associative, 64-byte line size\n"); break; case 0x80: printf("2nd-level cache: 512 KByte, 8-way set associative, 64-byte line size\n"); break; case 0x82: printf("2nd-level cache: 256 KB, 8-way set associative, 32 byte line size\n"); break; case 0x83: printf("2nd-level cache: 512 KB, 8-way set associative, 32 byte line size\n"); break; case 0x84: printf("2nd-level cache: 1 MB, 8-way set associative, 32 byte line size\n"); break; case 0x85: printf("2nd-level cache: 2 MB, 8-way set associative, 32 byte line size\n"); break; case 0x86: printf("2nd-level cache: 512 KB, 4-way set associative, 64 byte line size\n"); break; case 0x87: printf("2nd-level cache: 1 MB, 8-way set associative, 64 byte line size\n"); break; case 0xa0: printf("DTLB: 4k pages, fully associative, 32 entries\n"); break; case 0xb0: printf("Instruction TLB: 4 KB Pages, 4-way set associative, 128 entries\n"); break; case 0xb1: printf("Instruction TLB: 2M pages, 4-way, 8 entries or 4M pages, 4-way, 4 entries\n"); break; case 0xb2: printf("Instruction TLB: 4KByte pages, 4-way set associative, 64 entries\n"); break; case 0xb3: printf("Data TLB: 4 KB Pages, 4-way set associative, 128 entries\n"); break; case 0xb4: printf("Data TLB1: 4 KByte pages, 4-way associative, 256 entries\n"); break; case 0xb5: printf("Instruction TLB: 4KByte pages, 8-way set associative, 64 entries\n"); break; case 0xb6: printf("Instruction TLB: 4KByte pages, 8-way set associative, 128 entries\n"); break; case 0xba: printf("Data TLB1: 4 KByte pages, 4-way associative, 64 entries\n"); break; case 0xc0: printf("Data TLB: 4 KByte and 4 MByte pages, 4-way associative, 8 entries\n"); break; case 0xc1: printf("Shared 2nd-Level TLB: 4 KByte/2MByte pages, 8-way associative, 1024 entries\n"); break; case 0xc2: printf("DTLB: 4 KByte/2 MByte pages, 4-way associative, 16 entries\n"); break; case 0xc3: printf("Shared 2nd-Level TLB: 4 KByte /2 MByte pages, 6-way associative, 1536 entries. Also 1GBbyte pages, 4-way, 16 entries\n"); break; case 0xc4: printf("DTLB: 2M/4M Byte pages, 4-way associative, 32 entries\n"); break; case 0xca: printf("Shared 2nd-Level TLB: 4 KByte pages, 4-way associative, 512 entries\n"); break; case 0xd0: printf("3rd-level cache: 512 KByte, 4-way set associative, 64 byte line size\n"); break; case 0xd1: printf("3rd-level cache: 1 MByte, 4-way set associative, 64 byte line size\n"); break; case 0xd2: printf("3rd-level cache: 2 MByte, 4-way set associative, 64 byte line size\n"); break; case 0xd6: printf("3rd-level cache: 1 MByte, 8-way set associative, 64 byte line size\n"); break; case 0xd7: printf("3rd-level cache: 2 MByte, 8-way set associative, 64 byte line size\n"); break; case 0xd8: printf("3rd-level cache: 4 MByte, 8-way set associative, 64 byte line size\n"); break; case 0xdc: printf("3rd-level cache: 1.5 MByte, 12-way set associative, 64 byte line size\n"); break; case 0xdd: printf("3rd-level cache: 3 MByte, 12-way set associative, 64 byte line size\n"); break; case 0xde: printf("3rd-level cache: 6 MByte, 12-way set associative, 64 byte line size\n"); break; case 0xe2: printf("3rd-level cache: 2 MByte, 16-way set associative, 64 byte line size\n"); break; case 0xe3: printf("3rd-level cache: 4 MByte, 16-way set associative, 64 byte line size\n"); break; case 0xe4: printf("3rd-level cache: 8 MByte, 16-way set associative, 64 byte line size\n"); break; case 0xea: printf("3rd-level cache: 12MByte, 24-way set associative, 64 byte line size\n"); break; case 0xeb: printf("3rd-level cache: 18MByte, 24-way set associative, 64 byte line size\n"); break; case 0xec: printf("3rd-level cache: 24MByte, 24-way set associative, 64 byte line size\n"); break; case 0xf0: printf("64-Byte prefetching\n"); break; case 0xf1: printf("128-Byte prefetching\n"); break; } } static void print_svm_info(void) { u_int features, regs[4]; uint64_t msr; int comma; printf("\n SVM: "); do_cpuid(0x8000000A, regs); features = regs[3]; msr = rdmsr(MSR_VM_CR); if ((msr & VM_CR_SVMDIS) == VM_CR_SVMDIS) printf("(disabled in BIOS) "); if (!bootverbose) { comma = 0; if (features & (1 << 0)) { printf("%sNP", comma ? "," : ""); comma = 1; } if (features & (1 << 3)) { printf("%sNRIP", comma ? "," : ""); comma = 1; } if (features & (1 << 5)) { printf("%sVClean", comma ? "," : ""); comma = 1; } if (features & (1 << 6)) { printf("%sAFlush", comma ? "," : ""); comma = 1; } if (features & (1 << 7)) { printf("%sDAssist", comma ? "," : ""); comma = 1; } printf("%sNAsids=%d", comma ? "," : "", regs[1]); return; } printf("Features=0x%b", features, "\020" "\001NP" /* Nested paging */ "\002LbrVirt" /* LBR virtualization */ "\003SVML" /* SVM lock */ "\004NRIPS" /* NRIP save */ "\005TscRateMsr" /* MSR based TSC rate control */ "\006VmcbClean" /* VMCB clean bits */ "\007FlushByAsid" /* Flush by ASID */ "\010DecodeAssist" /* Decode assist */ "\011" "\012" "\013PauseFilter" /* PAUSE intercept filter */ "\014" "\015PauseFilterThreshold" /* PAUSE filter threshold */ "\016AVIC" /* virtual interrupt controller */ ); printf("\nRevision=%d, ASIDs=%d", regs[0] & 0xff, regs[1]); } #ifdef __i386__ static void print_transmeta_info(void) { u_int regs[4], nreg = 0; do_cpuid(0x80860000, regs); nreg = regs[0]; if (nreg >= 0x80860001) { do_cpuid(0x80860001, regs); printf(" Processor revision %u.%u.%u.%u\n", (regs[1] >> 24) & 0xff, (regs[1] >> 16) & 0xff, (regs[1] >> 8) & 0xff, regs[1] & 0xff); } if (nreg >= 0x80860002) { do_cpuid(0x80860002, regs); printf(" Code Morphing Software revision %u.%u.%u-%u-%u\n", (regs[1] >> 24) & 0xff, (regs[1] >> 16) & 0xff, (regs[1] >> 8) & 0xff, regs[1] & 0xff, regs[2]); } if (nreg >= 0x80860006) { char info[65]; do_cpuid(0x80860003, (u_int*) &info[0]); do_cpuid(0x80860004, (u_int*) &info[16]); do_cpuid(0x80860005, (u_int*) &info[32]); do_cpuid(0x80860006, (u_int*) &info[48]); info[64] = 0; printf(" %s\n", info); } } #endif static void print_via_padlock_info(void) { u_int regs[4]; do_cpuid(0xc0000001, regs); printf("\n VIA Padlock Features=0x%b", regs[3], "\020" "\003RNG" /* RNG */ "\007AES" /* ACE */ "\011AES-CTR" /* ACE2 */ "\013SHA1,SHA256" /* PHE */ "\015RSA" /* PMM */ ); } static uint32_t vmx_settable(uint64_t basic, int msr, int true_msr) { uint64_t val; if (basic & (1ULL << 55)) val = rdmsr(true_msr); else val = rdmsr(msr); /* Just report the controls that can be set to 1. */ return (val >> 32); } static void print_vmx_info(void) { uint64_t basic, msr; uint32_t entry, exit, mask, pin, proc, proc2; int comma; printf("\n VT-x: "); msr = rdmsr(MSR_IA32_FEATURE_CONTROL); if (!(msr & IA32_FEATURE_CONTROL_VMX_EN)) printf("(disabled in BIOS) "); basic = rdmsr(MSR_VMX_BASIC); pin = vmx_settable(basic, MSR_VMX_PINBASED_CTLS, MSR_VMX_TRUE_PINBASED_CTLS); proc = vmx_settable(basic, MSR_VMX_PROCBASED_CTLS, MSR_VMX_TRUE_PROCBASED_CTLS); if (proc & PROCBASED_SECONDARY_CONTROLS) proc2 = vmx_settable(basic, MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2); else proc2 = 0; exit = vmx_settable(basic, MSR_VMX_EXIT_CTLS, MSR_VMX_TRUE_EXIT_CTLS); entry = vmx_settable(basic, MSR_VMX_ENTRY_CTLS, MSR_VMX_TRUE_ENTRY_CTLS); if (!bootverbose) { comma = 0; if (exit & VM_EXIT_SAVE_PAT && exit & VM_EXIT_LOAD_PAT && entry & VM_ENTRY_LOAD_PAT) { printf("%sPAT", comma ? "," : ""); comma = 1; } if (proc & PROCBASED_HLT_EXITING) { printf("%sHLT", comma ? "," : ""); comma = 1; } if (proc & PROCBASED_MTF) { printf("%sMTF", comma ? "," : ""); comma = 1; } if (proc & PROCBASED_PAUSE_EXITING) { printf("%sPAUSE", comma ? "," : ""); comma = 1; } if (proc2 & PROCBASED2_ENABLE_EPT) { printf("%sEPT", comma ? "," : ""); comma = 1; } if (proc2 & PROCBASED2_UNRESTRICTED_GUEST) { printf("%sUG", comma ? "," : ""); comma = 1; } if (proc2 & PROCBASED2_ENABLE_VPID) { printf("%sVPID", comma ? "," : ""); comma = 1; } if (proc & PROCBASED_USE_TPR_SHADOW && proc2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES && proc2 & PROCBASED2_VIRTUALIZE_X2APIC_MODE && proc2 & PROCBASED2_APIC_REGISTER_VIRTUALIZATION && proc2 & PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY) { printf("%sVID", comma ? "," : ""); comma = 1; if (pin & PINBASED_POSTED_INTERRUPT) printf(",PostIntr"); } return; } mask = basic >> 32; printf("Basic Features=0x%b", mask, "\020" "\02132PA" /* 32-bit physical addresses */ "\022SMM" /* SMM dual-monitor */ "\027INS/OUTS" /* VM-exit info for INS and OUTS */ "\030TRUE" /* TRUE_CTLS MSRs */ ); printf("\n Pin-Based Controls=0x%b", pin, "\020" "\001ExtINT" /* External-interrupt exiting */ "\004NMI" /* NMI exiting */ "\006VNMI" /* Virtual NMIs */ "\007PreTmr" /* Activate VMX-preemption timer */ "\010PostIntr" /* Process posted interrupts */ ); printf("\n Primary Processor Controls=0x%b", proc, "\020" "\003INTWIN" /* Interrupt-window exiting */ "\004TSCOff" /* Use TSC offsetting */ "\010HLT" /* HLT exiting */ "\012INVLPG" /* INVLPG exiting */ "\013MWAIT" /* MWAIT exiting */ "\014RDPMC" /* RDPMC exiting */ "\015RDTSC" /* RDTSC exiting */ "\020CR3-LD" /* CR3-load exiting */ "\021CR3-ST" /* CR3-store exiting */ "\024CR8-LD" /* CR8-load exiting */ "\025CR8-ST" /* CR8-store exiting */ "\026TPR" /* Use TPR shadow */ "\027NMIWIN" /* NMI-window exiting */ "\030MOV-DR" /* MOV-DR exiting */ "\031IO" /* Unconditional I/O exiting */ "\032IOmap" /* Use I/O bitmaps */ "\034MTF" /* Monitor trap flag */ "\035MSRmap" /* Use MSR bitmaps */ "\036MONITOR" /* MONITOR exiting */ "\037PAUSE" /* PAUSE exiting */ ); if (proc & PROCBASED_SECONDARY_CONTROLS) printf("\n Secondary Processor Controls=0x%b", proc2, "\020" "\001APIC" /* Virtualize APIC accesses */ "\002EPT" /* Enable EPT */ "\003DT" /* Descriptor-table exiting */ "\004RDTSCP" /* Enable RDTSCP */ "\005x2APIC" /* Virtualize x2APIC mode */ "\006VPID" /* Enable VPID */ "\007WBINVD" /* WBINVD exiting */ "\010UG" /* Unrestricted guest */ "\011APIC-reg" /* APIC-register virtualization */ "\012VID" /* Virtual-interrupt delivery */ "\013PAUSE-loop" /* PAUSE-loop exiting */ "\014RDRAND" /* RDRAND exiting */ "\015INVPCID" /* Enable INVPCID */ "\016VMFUNC" /* Enable VM functions */ "\017VMCS" /* VMCS shadowing */ "\020EPT#VE" /* EPT-violation #VE */ "\021XSAVES" /* Enable XSAVES/XRSTORS */ ); printf("\n Exit Controls=0x%b", mask, "\020" "\003DR" /* Save debug controls */ /* Ignore Host address-space size */ "\015PERF" /* Load MSR_PERF_GLOBAL_CTRL */ "\020AckInt" /* Acknowledge interrupt on exit */ "\023PAT-SV" /* Save MSR_PAT */ "\024PAT-LD" /* Load MSR_PAT */ "\025EFER-SV" /* Save MSR_EFER */ "\026EFER-LD" /* Load MSR_EFER */ "\027PTMR-SV" /* Save VMX-preemption timer value */ ); printf("\n Entry Controls=0x%b", mask, "\020" "\003DR" /* Save debug controls */ /* Ignore IA-32e mode guest */ /* Ignore Entry to SMM */ /* Ignore Deactivate dual-monitor treatment */ "\016PERF" /* Load MSR_PERF_GLOBAL_CTRL */ "\017PAT" /* Load MSR_PAT */ "\020EFER" /* Load MSR_EFER */ ); if (proc & PROCBASED_SECONDARY_CONTROLS && (proc2 & (PROCBASED2_ENABLE_EPT | PROCBASED2_ENABLE_VPID)) != 0) { msr = rdmsr(MSR_VMX_EPT_VPID_CAP); mask = msr; printf("\n EPT Features=0x%b", mask, "\020" "\001XO" /* Execute-only translations */ "\007PW4" /* Page-walk length of 4 */ "\011UC" /* EPT paging-structure mem can be UC */ "\017WB" /* EPT paging-structure mem can be WB */ "\0212M" /* EPT PDE can map a 2-Mbyte page */ "\0221G" /* EPT PDPTE can map a 1-Gbyte page */ "\025INVEPT" /* INVEPT is supported */ "\026AD" /* Accessed and dirty flags for EPT */ "\032single" /* INVEPT single-context type */ "\033all" /* INVEPT all-context type */ ); mask = msr >> 32; printf("\n VPID Features=0x%b", mask, "\020" "\001INVVPID" /* INVVPID is supported */ "\011individual" /* INVVPID individual-address type */ "\012single" /* INVVPID single-context type */ "\013all" /* INVVPID all-context type */ /* INVVPID single-context-retaining-globals type */ "\014single-globals" ); } } static void print_hypervisor_info(void) { if (*hv_vendor) printf("Hypervisor: Origin = \"%s\"\n", hv_vendor); } Index: projects/ipsec/tools/build/options/WITHOUT_BLACKLIST =================================================================== --- projects/ipsec/tools/build/options/WITHOUT_BLACKLIST (revision 313186) +++ projects/ipsec/tools/build/options/WITHOUT_BLACKLIST (revision 313187) @@ -1,2 +1,2 @@ .\" $FreeBSD$ -Set this if you do not want to build blacklistd / blacklistctl. +Set this if you do not want to build blacklistd/blacklistctl. Index: projects/ipsec/tools/build/options/WITHOUT_CLANG_BOOTSTRAP =================================================================== --- projects/ipsec/tools/build/options/WITHOUT_CLANG_BOOTSTRAP (revision 313186) +++ projects/ipsec/tools/build/options/WITHOUT_CLANG_BOOTSTRAP (revision 313187) @@ -1,5 +1,5 @@ .\" $FreeBSD$ -Set to not build the Clang C/C++ compiler during the bootstrap phase of the build. -You must enable either gcc or clang bootstrap to be able to build the system, -unless an alternative compiler is provided via -XCC. +Set to not build the Clang C/C++ compiler during the bootstrap phase of +the build. +To be able to build the system, either gcc or clang bootstrap must be +enabled unless an alternate compiler is provided via XCC. Index: projects/ipsec/tools/build/options/WITHOUT_CROSS_COMPILER =================================================================== --- projects/ipsec/tools/build/options/WITHOUT_CROSS_COMPILER (revision 313186) +++ projects/ipsec/tools/build/options/WITHOUT_CROSS_COMPILER (revision 313187) @@ -1,13 +1,13 @@ .\" $FreeBSD$ Set to not build any cross compiler in the cross-tools stage of buildworld. -If you are compiling a different version of +When compiling a different version of .Fx -than what is installed on the system, you will need to provide an alternate +than what is installed on the system, provide an alternate compiler with XCC to ensure success. -If you are compiling with an identical version of +When compiling with an identical version of .Fx to the host, this option may be safely used. This option may also be safe when the host version of .Fx is close to the sources being built, but all bets are off if there have been any changes to the toolchain between the versions. Index: projects/ipsec/tools/build/options/WITHOUT_DIALOG =================================================================== --- projects/ipsec/tools/build/options/WITHOUT_DIALOG (revision 313186) +++ projects/ipsec/tools/build/options/WITHOUT_DIALOG (revision 313187) @@ -1,2 +1,7 @@ .\" $FreeBSD$ -Set to not build dialog(1), dialog(1,3), and dpv(1,3). +Set to not build +.Xr dialog 1 , +.Xr dialog 3 , +.Xr dpv 1 , +and +.Xr dpv 3 . Index: projects/ipsec/tools/build/options/WITHOUT_DMAGENT =================================================================== --- projects/ipsec/tools/build/options/WITHOUT_DMAGENT (revision 313186) +++ projects/ipsec/tools/build/options/WITHOUT_DMAGENT (revision 313187) @@ -1,2 +1,2 @@ .\" $FreeBSD$ -Set to not build dma Mail Transport Agent +Set to not build dma Mail Transport Agent. Index: projects/ipsec/tools/build/options/WITHOUT_DOCCOMPRESS =================================================================== --- projects/ipsec/tools/build/options/WITHOUT_DOCCOMPRESS (revision 313186) +++ projects/ipsec/tools/build/options/WITHOUT_DOCCOMPRESS (revision 313187) @@ -1,3 +1,3 @@ .\" $FreeBSD$ -Set to not to install compressed system documentation. +Set to not install compressed system documentation. Only the uncompressed version will be installed. Index: projects/ipsec/tools/build/options/WITHOUT_GPL_DTC =================================================================== --- projects/ipsec/tools/build/options/WITHOUT_GPL_DTC (revision 313186) +++ projects/ipsec/tools/build/options/WITHOUT_GPL_DTC (revision 313187) @@ -1,3 +1,3 @@ .\" $FreeBSD$ -Set to build the BSD licensed version of the device tree compiler, instead of the -GPL'd one from elinux.org. +Set to build the BSD licensed version of the device tree compiler rather +than the GPLed one from elinux.org. Index: projects/ipsec/tools/build/options/WITHOUT_INSTALLLIB =================================================================== --- projects/ipsec/tools/build/options/WITHOUT_INSTALLLIB (revision 313186) +++ projects/ipsec/tools/build/options/WITHOUT_INSTALLLIB (revision 313187) @@ -1,8 +1,8 @@ .\" $FreeBSD$ -Set this if you do not want to install optional libraries. -For example when creating a +Set this to not install optional libraries. +For example, when creating a .Xr nanobsd 8 image. .Bf -symbolic The option does not work for build targets. .Ef Index: projects/ipsec/tools/build/options/WITHOUT_KERBEROS =================================================================== --- projects/ipsec/tools/build/options/WITHOUT_KERBEROS (revision 313186) +++ projects/ipsec/tools/build/options/WITHOUT_KERBEROS (revision 313187) @@ -1,2 +1,2 @@ .\" $FreeBSD$ -Set this if you do not want to build Kerberos 5 (KTH Heimdal). +Set this to not build Kerberos 5 (KTH Heimdal). Index: projects/ipsec/tools/build/options/WITH_CCACHE_BUILD =================================================================== --- projects/ipsec/tools/build/options/WITH_CCACHE_BUILD (revision 313186) +++ projects/ipsec/tools/build/options/WITH_CCACHE_BUILD (revision 313187) @@ -1,39 +1,39 @@ .\" $FreeBSD$ Set to use .Xr ccache 1 for the build. No configuration is required except to install the .Sy devel/ccache package. -Using with -.Xr distcc 1 -should set +When using with +.Xr distcc 1 , +set .Sy CCACHE_PREFIX=/usr/local/bin/distcc . The default cache directory of .Pa $HOME/.ccache will be used, which can be overridden by setting .Sy CCACHE_DIR . The .Sy CCACHE_COMPILERCHECK option defaults to .Sy content when using the in-tree bootstrap compiler, and .Sy mtime when using an external compiler. The .Sy CCACHE_CPP2 option is used for Clang but not GCC. .Pp Sharing a cache between multiple work directories requires using a layout similar to .Pa /some/prefix/src .Pa /some/prefix/obj and an environment such as: .Bd -literal -offset indent CCACHE_BASEDIR='${SRCTOP:H}' MAKEOBJDIRPREFIX='${SRCTOP:H}/obj' .Ed .Pp See .Xr ccache 1 for more configuration options. Index: projects/ipsec/tools/build/options/WITH_DIRDEPS_BUILD =================================================================== --- projects/ipsec/tools/build/options/WITH_DIRDEPS_BUILD (revision 313186) +++ projects/ipsec/tools/build/options/WITH_DIRDEPS_BUILD (revision 313187) @@ -1,35 +1,36 @@ .\" $FreeBSD$ This is an experimental build system. For details see http://www.crufty.net/sjg/docs/freebsd-meta-mode.htm. Build commands can be seen from the top-level with: .Dl make show-valid-targets The build is driven by dirdeps.mk using .Va DIRDEPS stored in Makefile.depend files found in each directory. .Pp The build can be started from anywhere, and behaves the same. The initial instance of -.Xr make 1 +.Xr make 1 recursively reads .Va DIRDEPS -from Makefile.depend +from +.Pa Makefile.depend , computing a graph of tree dependencies from the current origin. Setting .Va NO_DIRDEPS -will skip checking dirdep dependencies and will only build in the current +skips checking dirdep dependencies and will only build in the current and child directories. .Va NO_DIRDEPS_BELOW -will skip building any dirdeps and only build the current directory. +skips building any dirdeps and only build the current directory. .Pp This also utilizes the .Va WITH_META_MODE logic for incremental builds. .Pp -The build will hide commands ran unless +The build hides commands executed unless .Va NO_SILENT is defined. .Pp Note that there is currently no mass install feature for this. .Pp Index: projects/ipsec/tools/build/options/WITH_INSTALL_AS_USER =================================================================== --- projects/ipsec/tools/build/options/WITH_INSTALL_AS_USER (revision 313186) +++ projects/ipsec/tools/build/options/WITH_INSTALL_AS_USER (revision 313187) @@ -1,9 +1,9 @@ .\" $FreeBSD$ Set to make install targets succeed for non-root users by installing files with owner and group attributes set to that of the user running the .Xr make 1 command. -The user still has to set the +The user still must set the .Va DESTDIR variable to point to a directory where the user has write permissions. Index: projects/ipsec/tools/build/options/WITH_META_MODE =================================================================== --- projects/ipsec/tools/build/options/WITH_META_MODE (revision 313186) +++ projects/ipsec/tools/build/options/WITH_META_MODE (revision 313187) @@ -1,51 +1,51 @@ .\" $FreeBSD$ -Creates +Create .Xr make 1 meta files when building, which can provide a reliable incremental build when using .Xr filemon 4 . -The meta file is created in the OBJDIR as +The meta file is created in OBJDIR as .Pa target.meta . -These meta files track the command ran, its output, and the current directory. +These meta files track the command that was executed, its output, and the +current directory. The .Xr filemon 4 module is required unless .Va NO_FILEMON is defined. -When the module is loaded, any files used by the commands executed will be -tracked as -dependencies for the target in its meta file. -The target will be considered out-of-date and rebuilt if any of the following -are true compared to the last build: +When the module is loaded, any files used by the commands executed are +tracked as dependencies for the target in its meta file. +The target is considered out-of-date and rebuilt if any of these +conditions are true compared to the last build: .Bl -bullet -compact .It The command to execute changes. .It The current working directory changes. .It The target's meta file is missing. .It The target's meta file is missing filemon data when filemon is loaded and a previous run did not have it loaded. .It [requires .Xr filemon 4 ] Files read, executed or linked to are newer than the target. .It [requires .Xr filemon 4 ] Files read, written, executed or linked are missing. .El The meta files can also be useful for debugging. .Pp -The build will hide commands ran unless +The build hides commands that are executed unless .Va NO_SILENT is defined. -Errors will cause +Errors cause .Xr make 1 to show some of its environment for further debugging. .Pp The build operates as it normally would otherwise. This option originally invoked a different build system but that was renamed to .Va WITH_DIRDEPS_BUILD . Index: projects/ipsec/tools/build/options/WITH_STAGING_MAN =================================================================== --- projects/ipsec/tools/build/options/WITH_STAGING_MAN (revision 313186) +++ projects/ipsec/tools/build/options/WITH_STAGING_MAN (revision 313187) @@ -1,2 +1,2 @@ .\" $FreeBSD$ -Enable staging of MAN pages to stage tree. +Enable staging of man pages to stage tree. Index: projects/ipsec/tools/build/options/makeman =================================================================== --- projects/ipsec/tools/build/options/makeman (revision 313186) +++ projects/ipsec/tools/build/options/makeman (revision 313187) @@ -1,352 +1,353 @@ #!/bin/sh # # This file is in the public domain. set -o errexit export LC_ALL=C ident='$FreeBSD$' t=$(mktemp -d -t makeman) trap 'test -d $t && rm -rf $t' exit srcdir=$(realpath ../../..) make="make -C $srcdir -m $srcdir/share/mk" # # usage: no_targets all_targets yes_targets # no_targets() { for t1 in $1 ; do for t2 in $2 ; do if [ "${t1}" = "${t2}" ] ; then continue 2 fi done echo ${t1} done } show_options() { ALL_TARGETS=$(echo $(${make} targets | tail -n +2)) rm -f $t/settings for target in ${ALL_TARGETS} ; do prev_opt= env -i ${make} showconfig \ SRC_ENV_CONF=/dev/null SRCCONF=/dev/null \ __MAKE_CONF=/dev/null \ TARGET_ARCH=${target#*/} TARGET=${target%/*} | while read var _ val ; do opt=${var#MK_} if [ $opt = "$prev_opt" ]; then echo "$target: ignoring duplicate option $opt" >&2 continue fi prev_opt=$opt case ${val} in yes) echo ${opt} ${target} ;; no) echo ${opt} ;; *) echo "make showconfig broken: ${var} ${_} ${val} (not yes or no)" >&2 exit 1 ;; esac done > $t/settings.target if [ -r $t/settings ] ; then join -t\ $t/settings $t/settings.target > $t/settings.new mv $t/settings.new $t/settings else mv $t/settings.target $t/settings fi done while read opt targets ; do if [ "${targets}" = "${ALL_TARGETS}" ] ; then echo "WITHOUT_${opt}" elif [ -z "${targets}" ] ; then echo "WITH_${opt}" else echo "WITHOUT_${opt}" $(no_targets "${ALL_TARGETS}" "${targets}") echo "WITH_${opt} ${targets}" fi done < $t/settings } # # usage: show { settings | with | without } ... # show() { mode=$1 ; shift case ${mode} in settings) yes_prefix=WITH no_prefix=WITHOUT ;; with) yes_prefix=WITH no_prefix=WITH ;; without) yes_prefix=WITHOUT no_prefix=WITHOUT ;; *) echo 'internal error' >&2 exit 1 ;; esac env -i ${make} .MAKE.MODE=normal "$@" showconfig __MAKE_CONF=/dev/null \ SRCCONF=/dev/null | while read var _ val ; do opt=${var#MK_} case ${val} in yes) echo ${yes_prefix}_${opt} ;; no) echo ${no_prefix}_${opt} ;; *) echo "make showconfig broken: ${var} ${_} ${val} (not yes or no)" >&2 exit 1 ;; esac done } main() { + echo "building src.conf.5 man page from files in ${PWD}" >&2 ident=${ident#$} ident=${ident% $} fbsdid='$'FreeBSD'$' cat < $t/config_default # Work around WITH_LDNS_UTILS forcing BIND_UTILS off by parsing the # actual config that results from enabling every WITH_ option. This # can be reverted if/when we no longer have options that disable # others. show with SRC_ENV_CONF=/dev/null | sort | sed 's/$/=/' > $t/src.conf show settings SRC_ENV_CONF=$t/src.conf | sort > $t/config_WITH_ALL show without SRC_ENV_CONF=/dev/null | sort > $t/config_WITHOUT_ALL env_only_options="$(${make} -V __ENV_ONLY_OPTIONS)" show_options | while read opt targets ; do if [ ! -f ${opt} ] ; then echo "no description found for ${opt}, skipping" >&2 continue fi echo ".It Va ${opt}" sed -e's/\$\(FreeBSD: .*\) \$/from \1/' ${opt} if [ -n "${targets}" ] ; then echo '.Pp' - echo 'It is a default setting on' + echo 'This is a default setting on' echo $(echo ${targets} | sed -e's/ /, /g' -e's/\(.*\), /\1 and /'). fi if [ "${opt%%_*}" = 'WITHOUT' ] ; then sed -n "/^WITH_${opt#WITHOUT_}$/!s/$/=/p" $t/config_WITH_ALL > $t/src.conf show settings SRC_ENV_CONF=$t/src.conf -D${opt} | sort > $t/config_WITH_ALL_${opt} comm -13 $t/config_WITH_ALL $t/config_WITH_ALL_${opt} | sed -n "/^${opt}$/!p" > $t/deps elif [ "${opt%%_*}" = 'WITH' ] ; then sed -n "/^WITHOUT${opt#WITH}$/!s/$/=/p" $t/config_WITHOUT_ALL > $t/src.conf show settings SRC_ENV_CONF=$t/src.conf -D${opt} | sort > $t/config_WITHOUT_ALL_${opt} comm -13 $t/config_WITHOUT_ALL $t/config_WITHOUT_ALL_${opt} | sed -n "/^${opt}$/!p" > $t/deps else echo 'internal error' >&2 exit 1 fi show settings SRC_ENV_CONF=/dev/null -D${opt} | sort > $t/config_${opt} comm -13 $t/config_default $t/config_${opt} | sed -n "/^${opt}$/!p" | comm -13 $t/deps - > $t/deps2 # Work around BIND_UTILS=no being the default when every WITH_ # option is enabled. if [ "$(cat $t/deps2)" = WITHOUT_BIND_UTILS ]; then sort $t/deps $t/deps2 > $t/_deps mv $t/_deps $t/deps :> $t/deps2 fi havedeps=0 if [ -s $t/deps ] ; then havedeps=1 - echo 'When set, it also enforces the following options:' + echo 'When set, it enforces these options:' echo '.Pp' echo '.Bl -item -compact' while read opt2 ; do echo '.It' echo ".Va ${opt2}" done < $t/deps echo '.El' fi if [ -s $t/deps2 ] ; then if [ ${havedeps} -eq 1 ] ; then echo '.Pp' fi - echo 'When set, the following options are also in effect:' + echo 'When set, these options are also in effect:' echo '.Pp' echo '.Bl -inset -compact' while read opt2 ; do echo ".It Va ${opt2}" noopt=$(echo ${opt2} | sed -e's/WITH_/WITHOUT_/;t' -e's/WITHOUT_/WITH_/') echo '(unless' echo ".Va ${noopt}" echo 'is set explicitly)' done < $t/deps2 echo '.El' fi case " ${env_only_options} " in *\ ${opt#*_}\ *) echo ".Pp" echo "This must be set in the environment, make command line, or" echo ".Pa /etc/src-env.conf ," echo "not" echo ".Pa /etc/src.conf ." ;; esac twiddle >&2 done cat < .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions, and the following disclaimer, .\" without modification. .\" 2. Redistributions in binary form must reproduce at minimum a disclaimer .\" substantially similar to the "NO WARRANTY" disclaimer below .\" ("Disclaimer") and any redistribution must be conditioned upon .\" including a substantially similar Disclaimer requirement for further .\" binary redistribution. .\" .\" NO WARRANTY .\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS .\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT .\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR .\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT .\" HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, .\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING .\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGES. .\" .\" ctladm utility man page. .\" .\" Author: Ken Merry .\" .\" $Id: //depot/users/kenm/FreeBSD-test2/usr.sbin/ctladm/ctladm.8#3 $ .\" $FreeBSD$ .\" .Dd January 23, 2017 .Dt CTLADM 8 .Os .Sh NAME .Nm ctladm .Nd CAM Target Layer control utility .Sh SYNOPSIS .Nm .Aq Ar command .Op lun .Op generic args .Op command args .Nm .Ic tur .Aq lun .Op general options .Nm .Ic inquiry .Aq lun .Op general options .Nm .Ic reqsense .Aq lun .Op general options .Nm .Ic reportluns .Aq lun .Op general options .Nm .Ic read .Aq lun .Op general options .Aq Fl l Ar lba .Aq Fl d Ar datalen .Aq Fl f Ar file|- .Aq Fl b Ar blocksize_bytes .Op Fl c Ar cdbsize .Op Fl N .Nm .Ic write .Aq lun .Op general options .Aq Fl l Ar lba .Aq Fl d Ar datalen .Aq Fl f Ar file|- .Aq Fl b Ar blocksize_bytes .Op Fl c Ar cdbsize .Op Fl N .Nm .Ic readcap .Aq lun .Op general options .Op Fl c Ar cdbsize .Nm .Ic modesense .Aq lun .Aq Fl m Ar page | Fl l .Op Fl P Ar pc .Op Fl d .Op Fl S Ar subpage .Op Fl c Ar size .Nm .Ic start .Aq lun .Op general options .Op Fl i .Op Fl o .Nm .Ic stop .Aq lun .Op general options .Op Fl i .Op Fl o .Nm .Ic synccache .Aq lun .Op general options .Op Fl l Ar lba .Op Fl b Ar blockcount .Op Fl r .Op Fl i .Op Fl c Ar cdbsize .Nm .Ic lunlist .Nm .Ic delay .Aq lun .Aq Fl l Ar datamove|done .Aq Fl t Ar secs .Op Fl T Ar oneshot|cont .Nm .Ic inject .Aq Fl i Ar action .Aq Fl p Ar pattern .Op Fl r Ar lba,len .Op Fl s Ar len fmt Op Ar args .Op Fl c .Op Fl d Ar delete_id .Nm .Ic create .Aq Fl b Ar backend .Op Fl B Ar blocksize .Op Fl d Ar device_id .Op Fl l Ar lun_id .Op Fl o Ar name=value .Op Fl s Ar size_bytes .Op Fl S Ar serial_num .Op Fl t Ar device_type .Nm .Ic remove .Aq Fl b Ar backend .Aq Fl l Ar lun_id .Op Fl o Ar name=value .Nm .Ic modify .Aq Fl b Ar backend .Aq Fl l Ar lun_id .Op Fl o Ar name=value .Aq Fl s Ar size_bytes .Nm .Ic devlist .Op Fl b Ar backend .Op Fl v .Op Fl x .Nm .Ic port .Op Fl o Ar on|off .Op Fl w Ar wwpn .Op Fl W Ar wwnn .Op Fl p Ar targ_port .Op Fl t Ar fe_type .Nm .Ic portlist .Op Fl f Ar frontend .Op Fl i .Op Fl l .Op Fl p Ar targ_port .Op Fl q .Op Fl v .Op Fl x .Nm .Ic lunmap .Aq Fl p Ar targ_port .Op Fl l Ar pLUN .Op Fl L Ar cLUN .Nm .Ic dumpooa .Nm .Ic dumpstructs .Nm .Ic islist .Op Fl v .Op Fl x .Nm .Ic islogout .Aq Fl a | Fl c Ar connection-id | Fl i Ar name | Fl p Ar portal .Nm .Ic isterminate .Aq Fl a | Fl c Ar connection-id | Fl i Ar name | Fl p Ar portal .Nm .Ic help .Sh DESCRIPTION The .Nm utility is designed to provide a way to access and control the CAM Target Layer (CTL). It provides a way to send .Tn SCSI commands to the CTL layer, and also provides some meta-commands that utilize .Tn SCSI commands. (For instance, the .Ic lunlist command is implemented using the .Tn SCSI REPORT LUNS and INQUIRY commands.) .Pp The .Nm utility has a number of primary functions, many of which require a device identifier. The device identifier takes the following form: .Bl -tag -width 14n .It lun Specify the LUN number to operate on. .El Many of the primary functions of the .Nm utility take the following optional arguments: .Bl -tag -width 10n .It Fl C Ar retries Specify the number of times to retry a command in the event of failure. .It Fl D Ar device Specify the device to open. This allows opening a device other than the default device, .Pa /dev/cam/ctl , to be opened for sending commands. .It Fl I Ar id Specify the initiator number to use. By default, .Nm will use 7 as the initiator number. .El .Pp Primary commands: .Bl -tag -width 11n .It Ic tur Send the .Tn SCSI TEST UNIT READY command to the device and report whether or not it is ready. .It Ic inquiry Send the .Tn SCSI INQUIRY command to the device and display some of the returned inquiry data. .It Ic reqsense Send the .Tn SCSI REQUEST SENSE command to the device and display the returned sense information. .It Ic reportluns Send the .Tn SCSI REPORT LUNS command to the device and display supported LUNs. .It Ic read Send a .Tn SCSI READ command to the device, and write the requested data to a file or stdout. .Bl -tag -width 12n .It Fl l Ar lba Specify the starting Logical Block Address for the READ. This can be specified in decimal, octal (starting with 0), hexadecimal (starting with 0x) or any other base supported by .Xr strtoull 3 . .It Fl d Ar datalen Specify the length, in 512 byte blocks, of the READ request. .It Fl f Ar file Specify the destination for the data read by the READ command. Either a filename or .Sq - for stdout may be specified. .It Fl c Ar cdbsize Specify the minimum .Tn SCSI CDB (Command Data Block) size to be used for the READ request. Allowable values are 6, 10, 12 and 16. Depending upon the LBA and amount of data requested, a larger CDB size may be used to satisfy the request. (e.g., for LBAs above 0xffffffff, READ(16) must be used to satisfy the request.) .It Fl b Ar blocksize Specify the blocksize of the underlying .Tn SCSI device, so the transfer length can be calculated accurately. The blocksize can be obtained via the .Tn SCSI READ CAPACITY command. .It Fl N Do not copy data to .Nm from the kernel when doing a read, just execute the command without copying data. This is to be used for performance testing. .El .It Ic write Read data from a file or stdin, and write the data to the device using the .Tn SCSI WRITE command. .Bl -tag -width 12n .It Fl l Ar lba Specify the starting Logical Block Address for the WRITE. This can be specified in decimal, octal (starting with 0), hexadecimal (starting with 0x) or any other base supported by .Xr strtoull 3 . .It Fl d Ar atalen Specify the length, in 512 byte blocks, of the WRITE request. .It Fl f Ar file Specify the source for the data to be written by the WRITE command. Either a filename or .Sq - for stdin may be specified. .It Fl c Ar cdbsize Specify the minimum .Tn SCSI CDB (Command Data Block) size to be used for the READ request. Allowable values are 6, 10, 12 and 16. Depending upon the LBA and amount of data requested, a larger CDB size may be used to satisfy the request. (e.g., for LBAs above 0xffffffff, READ(16) must be used to satisfy the request.) .It Fl b Ar blocksize Specify the blocksize of the underlying .Tn SCSI device, so the transfer length can be calculated accurately. The blocksize can be obtained via the .Tn SCSI READ CAPACITY command. .It Fl N Do not copy data to .Nm to the kernel when doing a write, just execute the command without copying data. This is to be used for performance testing. .El .It Ic readcap Send the .Tn SCSI READ CAPACITY command to the device and display the device size and device block size. By default, READ CAPACITY(10) is used. If the device returns a maximum LBA of 0xffffffff, however, .Nm will automatically issue a READ CAPACITY(16), which is implemented as a service action of the SERVICE ACTION IN(16) opcode. The user can specify the minimum CDB size with the .Fl c argument. Valid values for the .Fl c option are 10 and 16. If a 10 byte CDB is specified, the request will be automatically reissued with a 16 byte CDB if the maximum LBA returned is 0xffffffff. .It Ic modesense Send a .Tn SCSI MODE SENSE command to the device, and display the requested mode page(s) or page list. .Bl -tag -width 10n .It Fl m Ar page Specify the mode page to display. This option and the .Fl l option are mutually exclusive. One of the two must be specified, though. Mode page numbers may be specified in decimal or hexadecimal. .It Fl l Request that the list of mode pages supported by the device be returned. This option and the .Fl m option are mutually exclusive. One of the two must be specified, though. .It Fl P Ar pc Specify the mode page control value. Possible values are: .Bl -tag -width 2n -compact .It 0 Current values. .It 1 Changeable value bitmask. .It 2 Default values. .It 3 Saved values. .El .It Fl d Disable block descriptors when sending the mode sense request. .It Fl S Ar subpage Specify the subpage used with the mode sense request. .It Fl c Ar cdbsize Specify the CDB size used for the mode sense request. Supported values are 6 and 10. .El .It Ic start Send the .Tn SCSI START STOP UNIT command to the specified LUN with the start bit set. .Bl -tag -width 4n .It Fl i Set the immediate bit in the CDB. Note that CTL does not support the immediate bit, so this is primarily useful for making sure that CTL returns the proper error. .El .It Ic stop Send the .Tn SCSI START STOP UNIT command to the specified LUN with the start bit cleared. We use an ordered tag to stop the LUN, so we can guarantee that all pending I/O executes before it is stopped. (CTL guarantees this anyway, but .Nm sends an ordered tag for completeness.) .Bl -tag -width 4n .It Fl i Set the immediate bit in the CDB. Note that CTL does not support the immediate bit, so this is primarily useful for making sure that CTL returns the proper error. .El .It Ic synccache Send the .Tn SCSI SYNCHRONIZE CACHE command to the device. By default, SYNCHRONIZE CACHE(10) is used. If the specified starting LBA is greater than 0xffffffff or the length is greater than 0xffff, though, SYNCHRONIZE CACHE(16) will be used. The 16 byte command will also be used if the user specifies a 16 byte CDB with the .Fl c argument. .Bl -tag -width 14n .It Fl l Ar lba Specify the starting LBA of the cache region to synchronize. This option is a no-op for CTL. If you send a SYNCHRONIZE CACHE command, it will sync the cache for the entire LUN. .It Fl b Ar blockcount Specify the length of the cache region to synchronize. This option is a no-op for CTL. If you send a SYNCHRONIZE CACHE command, it will sync the cache for the entire LUN. .It Fl r Specify relative addressing for the starting LBA. CTL does not support relative addressing, since it only works for linked commands, and CTL does not support linked commands. .It Fl i Tell the target to return status immediately after issuing the SYNCHRONIZE CACHE command rather than waiting for the cache to finish syncing. CTL does not support this bit. .It Fl c Ar cdbsize Specify the minimum CDB size. Valid values are 10 and 16 bytes. .El .It Ic lunlist List all LUNs registered with CTL. Because this command uses the ioctl port, it will only work when the FETDs (Front End Target Drivers) are enabled. This command is the equivalent of doing a REPORT LUNS on one LUN and then an INQUIRY on each LUN in the system. .It Ic delay Delay commands at the given location. There are two places where commands may be delayed currently: before data is transferred .Pq Dq datamove and just prior to sending status to the host .Pq Dq done . One of the two must be supplied as an argument to the .Fl l option. The .Fl t option must also be specified. .Bl -tag -width 12n .It Fl l Ar delayloc Delay command(s) at the specified location. This can either be at the data movement stage (datamove) or prior to command completion (done). .It Fl t Ar delaytime Delay command(s) for the specified number of seconds. This must be specified. If set to 0, it will clear out any previously set delay for this particular location (datamove or done). .It Fl T Ar delaytype Specify the delay type. By default, the .Ic delay option will delay the next command sent to the given LUN. With the .Fl T Ar cont option, every command will be delayed by the specified period of time. With the .Fl T Ar oneshot the next command sent to the given LUN will be delayed and all subsequent commands will be completed normally. This is the default. .El .It Ic inject Inject the specified type of error for the LUN specified, when a command that matches the given pattern is seen. The sense data returned is in either fixed or descriptor format, depending upon the status of the D_SENSE bit in the control mode page (page 0xa) for the LUN. .Pp Errors are only injected for commands that have not already failed for other reasons. By default, only the first command matching the pattern specified is returned with the supplied error. .Pp If the .Fl c flag is specified, all commands matching the pattern will be returned with the specified error until the error injection command is deleted with .Fl d flag. .Bl -tag -width 17n .It Fl i Ar action Specify the error to return: .Bl -tag -width 10n .It aborted Return the next matching command on the specified LUN with the sense key ABORTED COMMAND (0x0b), and the ASC/ASCQ 0x45,0x00 ("Select or reselect failure"). .It mediumerr Return the next matching command on the specified LUN with the sense key MEDIUM ERROR (0x03) and the ASC/ASCQ 0x11,0x00 ("Unrecovered read error") for reads, or ASC/ASCQ 0x0c,0x02 ("Write error - auto reallocation failed") for write errors. .It ua Return the next matching command on the specified LUN with the sense key UNIT ATTENTION (0x06) and the ASC/ASCQ 0x29,0x00 ("POWER ON, RESET, OR BUS DEVICE RESET OCCURRED"). .It custom Return the next matching command on the specified LUN with the supplied sense data. The .Fl s argument must be specified. .El .It Fl p Ar pattern Specify which commands should be returned with the given error. .Bl -tag -width 10n .It read The error should apply to READ(6), READ(10), READ(12), READ(16), etc. .It write The error should apply to WRITE(6), WRITE(10), WRITE(12), WRITE(16), WRITE AND VERIFY(10), etc. .It rw The error should apply to both read and write type commands. .It readcap The error should apply to READ CAPACITY(10) and READ CAPACITY(16) commands. .It tur The error should apply to TEST UNIT READY commands. .It any The error should apply to any command. .El .It Fl r Ar lba,len Specify the starting lba and length of the range of LBAs which should trigger an error. This option is only applies when read and/or write patterns are specified. If used with other command types, the error will never be triggered. .It Fl s Ar len fmt Op Ar args Specify the sense data that is to be returned for custom actions. If the format is .Sq - , len bytes of sense data will be read from standard input and written to the sense buffer. If len is longer than 252 bytes (the maximum allowable .Tn SCSI sense data length), it will be truncated to that length. The sense data format is described in .Xr cam_cdbparse 3 . .It Fl c The error injection should be persistent, instead of happening once. Persistent errors must be deleted with the .Fl d argument. .It Fl d Ar delete_id Delete the specified error injection serial number. The serial number is returned when the error is injected. .El .It Ic port Perform one of several CTL frontend port operations. Either get a list of frontend ports .Pq Fl l , turn one or more frontends on or off .Pq Fl o Ar on|off , or set the World Wide Node Name .Pq Fl w Ar wwnn or World Wide Port Name .Pq Fl W Ar wwpn for a given port. One of .Fl l , .Fl o , or .Fl w or .Fl W must be specified. The WWNN and WWPN may both be specified at the same time, but cannot be combined with enabling/disabling or listing ports. .Bl -tag -width 12n .It Fl o Ar on|off Turn the specified CTL frontend ports off or on. If no port number or port type is specified, all ports are turned on or off. .It Fl p Ar targ_port Specify the frontend port number. The port numbers can be found in the frontend port list. .It Fl t Ar fe_type Specify the frontend type. Currently defined port types are .Dq fc (Fibre Channel), .Dq scsi (Parallel SCSI), .Dq ioctl (CTL ioctl interface), and .Dq internal (CTL CAM SIM). .It Fl w Ar wwnn Set the World Wide Node Name for the given port. The .Fl n argument must be specified, since this is only possible to implement on a single port. As a general rule, the WWNN should be the same across all ports on the system. .It Fl W Ar wwpn Set the World Wide Port Name for the given port. The .Fl n argument must be specified, since this is only possible to implement on a single port. As a general rule, the WWPN must be different for every port in the system. .El .It Ic portlist List CTL frontend ports. .Bl -tag -width 12n .It Fl f Ar frontend Specify the frontend type. .It Fl i Report target and connected initiators addresses. .It Fl l Report LUN mapping. .It Fl p Ar targ_port Specify the frontend port number. .It Fl q Omit the header in the port list output. .It Fl v Enable verbose output (report all port options). .It Fl x Output the port list in XML format. .El .It Ic lunmap Change LUN mapping for specified port. If both .Ar pLUN and .Ar cLUN are specified -- LUN will be mapped. If .Ar pLUN is specified, but .Ar cLUN is not -- LUN will be unmapped. If neither .Ar pLUN nor .Ar cLUN are specified -- LUN mapping will be disabled, exposing all CTL LUNs. .Bl -tag -width 12n .It Fl p Ar targ_port Specify the frontend port number. .It Fl l Ar pLUN LUN number visible by specified port. .It Fl L Ar cLUN CTL LUN number. .El .It Ic dumpooa Dump the OOA (Order Of Arrival) queue for each LUN registered with CTL. .It Ic dumpstructs Dump the CTL structures to the console. .It Ic create Create a new LUN. The backend must be specified, and depending upon the backend requested, some of the other options may be required. If the LUN is created successfully, the LUN configuration will be displayed. If LUN creation fails, a message will be displayed describing the failure. .Bl -tag -width 14n .It Fl b Ar backend The .Fl b flag is required. This specifies the name backend to use when creating the LUN. Examples are .Dq ramdisk and .Dq block . .It Fl B Ar blocksize Specify the blocksize of the backend in bytes. .It Fl d Ar device_id Specify the LUN-associated string to use in the .Tn SCSI INQUIRY VPD page 0x83 data. .It Fl l Ar lun_id Request that a particular LUN number be assigned. If the requested LUN number is not available, the request will fail. .It Fl o Ar name=value Specify a backend-specific name/value pair. Multiple .Fl o arguments may be specified. Refer to the backend documentation for arguments that may be used. .It Fl s Ar size_bytes Specify the size of the LUN in bytes. Some backends may allow setting the size (e.g. the ramdisk backend) and for others the size may be implicit (e.g. the block backend). .It Fl S Ar serial_num Specify the serial number to be used in the .Tn SCSI INQUIRY VPD page 0x80 data. .It Fl t Ar device_type Specify the numeric SCSI device type to use when creating the LUN. If this flag is not used, the type of LUN created is backend-specific. Not all LUN types are supported. Currently CTL supports Direct Access (type 0), Processor (type 3) and CD/DVD (type 5) LUNs. The backend requested may or may not support all of the LUN types that CTL supports. .El .It Ic remove Remove a LUN. The backend must be specified, and the LUN number must also be specified. Backend-specific options may also be specified with the .Fl o flag. .Bl -tag -width 14n .It Fl b Ar backend Specify the backend that owns the LUN to be removed. Examples are .Dq ramdisk and .Dq block . .It Fl l Ar lun_id Specify the LUN number to remove. .It Fl o Ar name=value Specify a backend-specific name/value pair. Multiple .Fl o arguments may be specified. Refer to the backend documentation for arguments that may be used. .El .It Ic modify Modify a LUN size. The backend, the LUN number, and the size must be specified. .Bl -tag -width 14n .It Fl b Ar backend Specify the backend that owns the LUN to be removed. Examples are .Dq ramdisk and .Dq block . .It Fl l Ar lun_id Specify the LUN number to remove. .It Fl o Ar name=value Specify a backend-specific name/value pair. Multiple .Fl o arguments may be specified. Refer to the backend documentation for arguments that may be used. .It Fl s Ar size_bytes Specify the size of the LUN in bytes. For the .Dq block backend, an .Dq auto keyword may be passed instead; this will make CTL use the size of backing file or device. .El .It Ic devlist Get a list of all configured LUNs. This also includes the LUN size and blocksize, serial number and device ID. .Bl -tag -width 11n .It Fl b Ar backend Specify the backend. This restricts the LUN list to the named backend. Examples are .Dq ramdisk and .Dq block . .It Fl v Be verbose. This will also display any backend-specific LUN attributes in addition to the standard per-LUN information. .It Fl x Dump the raw XML. The LUN list information from the kernel comes in XML format, and this option allows the display of the raw XML data. This option and the .Fl v and .Fl b options are mutually exclusive. If you specify .Fl x , the entire LUN database is displayed in XML format. .El .It Ic islist Get a list of currently running iSCSI sessions. This includes initiator and target names and the unique connection IDs. .Bl -tag -width 11n .It Fl v Verbose mode. .It Fl x Dump the raw XML. The sessions list information from the kernel comes in XML format, and this option allows the display of the raw XML data. .El .It Ic islogout Ask the initiator to log out iSCSI sessions matching criteria. .Bl -tag -width 11n .It Fl a Log out all sessions. .It Fl c Specify connection ID. .It Fl i Specify initiator name. .It Fl p Specify initiator portal (hostname or IP address). .El .It Ic isterminate Forcibly terminate iSCSI sessions matching criteria. .Bl -tag -width 11n .It Fl a Terminate all sessions. .It Fl c Specify connection ID. .It Fl i Specify initiator name. .It Fl p Specify initiator portal (hostname or IP address). .El .It Ic help Display .Nm usage information. .El .Sh OPTIONS Number of additional configuration options may be specified for LUNs. Some options are global, others are backend-specific. .Pp Global options: .Bl -tag -width 12n .It Va vendor Specifies LUN vendor string up to 8 chars. .It Va product Specifies LUN product string up to 16 chars. .It Va revision Specifies LUN revision string up to 4 chars. .It Va scsiname Specifies LUN SCSI name string. .It Va eui Specifies LUN EUI-64 identifier. .It Va naa Specifies LUN NAA identifier. .It Va uuid Specifies LUN locally assigned RFC 4122 UUID identifier. EUI, NAA or UUID identifier should be set to UNIQUE value to allow EXTENDED COPY command access the LUN. Non-unique LUN identifiers may lead to data corruption. Some initiators may not support later introduced UUID identifiers. .It Va ha_role Setting to "primary" or "secondary" overrides default role of the node in HA cluster, set by kern.cam.ctl.ha_role sysctl. .It Va insecure_tpc Setting to "on" allows EXTENDED COPY command sent to this LUN access other LUNs on this host, not accessible otherwise. This allows to offload copying between different iSCSI targets residing on the same host in trusted environments. .It Va readcache Set to "off", disables read caching for the LUN, if supported by the backend. .It Va readonly Set to "on", blocks all media write operations to the LUN, reporting it as write protected. .It Va removable Set to "on", makes LUN removable. .It Va reordering Set to "unrestricted", allows target to process commands with SIMPLE task attribute in arbitrary order. Any data integrity exposures related to command sequence order shall be explicitly handled by the application client through the selection of appropriate commands and task attributes. The default value is "restricted". It improves data integrity, but may introduce some additional delays. .It Va serseq -Set to "on" to serialize conseсutive reads/writes. -Set to "read" to serialize conseсutive reads. +Set to "on" to serialize consecutive reads/writes. +Set to "read" to serialize consecutive reads. Set to "off" to allow them be issued in parallel. Parallel issue of consecutive operations may confuse logic of the backing file system, hurting performance; but it may improve performance of backing stores without prefetch/write-back. .It Va pblocksize .It Va pblockoffset Specify physical block size and offset of the device. .It Va ublocksize .It Va ublockoffset Specify UNMAP block size and offset of the device. .It Va rpm Specifies medium rotation rate of the device: 0 -- not reported, 1 -- non-rotating (SSD), >1024 -- value in revolutions per minute. .It Va formfactor Specifies nominal form factor of the device: 0 -- not reported, 1 -- 5.25", 2 -- 3.5", 3 -- 2.5", 4 -- 1.8", 5 -- less then 1.8". .It Va provisioning_type When UNMAP support is enabled, this option specifies provisioning type: "resource", "thin" or "unknown". Default value is "thin". Logical units without UNMAP support are reported as fully provisioned. .It Va unmap Setting to "on" or "off" controls UNMAP support for the logical unit. Default value is "on" if supported by the backend. .It Va unmap_max_lba .It Va unmap_max_descr Specify maximum allowed number of LBAs and block descriptors per UNMAP command to report in Block Limits VPD page. .It Va write_same_max_lba Specify maximum allowed number of LBAs per WRITE SAME command to report in Block Limits VPD page. .It Va avail-threshold .It Va used-threshold .It Va pool-avail-threshold .It Va pool-used-threshold Set per-LUN/-pool thin provisioning soft thresholds. LUN will establish UNIT ATTENTION condition if its or pool available space get below configured avail values, or its or pool used space get above configured used values. Pool thresholds are working only for ZVOL-backed LUNs. .It Va writecache Set to "off", disables write caching for the LUN, if supported by the backend. .El .Pp Options specific for block backend: .Bl -tag -width 12n .It Va file Specifies file or device name to use for backing store. .It Va num_threads Specifies number of backend threads to use for this LUN. .El .Pp Options specific for ramdisk backend: .Bl -tag -width 12n .It Va capacity Specifies capacity of backing store (maximum RAM for data). The default value is zero, that disables backing store completely, making all writes go to nowhere, while all reads return zeroes. .El .Sh EXAMPLES .Dl ctladm tur 1 .Pp Send a .Tn SCSI TEST UNIT READY command to LUN 1. .Pp .Dl ctladm modesense 1 -l .Pp Display the list of mode pages supported by LUN 1. .Pp .Dl ctladm modesense 0 -m 10 -P 3 -d -c 10 .Pp Display the saved version of the Control mode page (page 10) on LUN 0. Disable fetching block descriptors, and use a 10 byte MODE SENSE command instead of the default 6 byte command. .Bd -literal ctladm read 2 -l 0 -d 1 -b 512 -f - > foo .Ed .Pp Read the first 512 byte block from LUN 2 and dump it to the file .Pa foo . .Bd -literal ctladm write 3 -l 0xff432140 -d 20 -b 512 -f /tmp/bar .Ed .Pp Read 10240 bytes from the file .Pa /tmp/bar and write it to LUN 3. starting at LBA 0xff432140. .Pp .Dl ctladm create -b ramdisk -s 10485760000000000 .Pp Create a LUN with the .Dq fake ramdisk as a backing store. The LUN will claim to have a size of approximately 10 terabytes, while having no real data store (all written data are lost). .Pp .Dl ctladm create -b ramdisk -s 10T -o capacity=10G .Pp Create a thin provisioned LUN with a ramdisk as a backing store. The LUN will have maximal backing store capacity of 10 gigabytes, while reporting size of 10 terabytes, .Pp .Dl ctladm create -b block -o file=src/usr.sbin/ctladm/ctladm.8 .Pp Create a LUN using the block backend, and specify the file .Pa src/usr.sbin/ctladm/ctladm.8 as the backing store. The size of the LUN will be derived from the size of the file. .Pp .Dl ctladm create -b block -o file=src/usr.sbin/ctladm/ctladm.8 -S MYSERIAL321 -d MYDEVID123 .Pp Create a LUN using the block backend, specify the file .Pa src/usr.sbin/ctladm/ctladm.8 as the backing store, and specify the .Tn SCSI VPD page 0x80 and 0x83 serial number .Fl ( S ) and device ID .Fl ( d ) . .Pp .Dl ctladm remove -b block -l 12 .Pp Remove LUN 12, which is handled by the block backend, from the system. .Pp .Dl ctladm devlist .Pp List configured LUNs in the system, along with their backend and serial number. This works when the Front End Target Drivers are enabled or disabled. .Pp .Dl ctladm lunlist .Pp List all LUNs in the system, along with their inquiry data and device type. This only works when the FETDs are enabled, since the commands go through the ioctl port. .Pp .Dl ctladm inject 6 -i mediumerr -p read -r 0,512 -c .Pp Inject a medium error on LUN 6 for every read that covers the first 512 blocks of the LUN. .Bd -literal -offset indent ctladm inject 6 -i custom -p tur -s 18 "f0 0 02 s12 04 02" .Ed .Pp Inject a custom error on LUN 6 for the next TEST UNIT READY command only. This will result in a sense key of NOT READY (0x02), and an ASC/ASCQ of 0x04,0x02 ("Logical unit not ready, initializing command required"). .Sh SEE ALSO .Xr cam 3 , .Xr cam_cdbparse 3 , .Xr cam 4 , .Xr ctl 4 , .Xr xpt 4 , .Xr camcontrol 8 , .Xr ctld 8 , .Xr ctlstat 8 .Sh HISTORY The .Nm utility was originally written during the Winter/Spring of 2003 as an interface to CTL. .Sh AUTHORS .An Ken Merry Aq Mt ken@FreeBSD.org Index: projects/ipsec =================================================================== --- projects/ipsec (revision 313186) +++ projects/ipsec (revision 313187) Property changes on: projects/ipsec ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r313116-313186