Index: vendor/llvm/dist-release_80/CMakeLists.txt =================================================================== --- vendor/llvm/dist-release_80/CMakeLists.txt (revision 348931) +++ vendor/llvm/dist-release_80/CMakeLists.txt (revision 348932) @@ -1,1082 +1,1082 @@ # See docs/CMake.html for instructions about how to build LLVM with CMake. cmake_minimum_required(VERSION 3.4.3) if(POLICY CMP0068) cmake_policy(SET CMP0068 NEW) set(CMAKE_BUILD_WITH_INSTALL_NAME_DIR ON) endif() if(POLICY CMP0075) cmake_policy(SET CMP0075 NEW) endif() if(NOT DEFINED LLVM_VERSION_MAJOR) set(LLVM_VERSION_MAJOR 8) endif() if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 0) endif() if(NOT DEFINED LLVM_VERSION_PATCH) - set(LLVM_VERSION_PATCH 0) + set(LLVM_VERSION_PATCH 1) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX "") endif() if (NOT PACKAGE_VERSION) set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}") endif() if ((CMAKE_GENERATOR MATCHES "Visual Studio") AND (CMAKE_GENERATOR_TOOLSET STREQUAL "")) message(WARNING "Visual Studio generators use the x86 host compiler by " "default, even for 64-bit targets. This can result in linker " "instability and out of memory errors. To use the 64-bit " "host compiler, pass -Thost=x64 on the CMake command line.") endif() project(LLVM VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH} LANGUAGES C CXX ASM) if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) message(STATUS "No build type selected, default to Debug") set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type (default Debug)" FORCE) endif() # This should only apply if you are both on an Apple host, and targeting Apple. if(CMAKE_HOST_APPLE AND APPLE) # if CMAKE_LIBTOOL is not set, try and find it with xcrun or find_program if(NOT CMAKE_LIBTOOL) if(NOT CMAKE_XCRUN) find_program(CMAKE_XCRUN NAMES xcrun) endif() if(CMAKE_XCRUN) execute_process(COMMAND ${CMAKE_XCRUN} -find libtool OUTPUT_VARIABLE CMAKE_LIBTOOL OUTPUT_STRIP_TRAILING_WHITESPACE) endif() if(NOT CMAKE_LIBTOOL OR NOT EXISTS CMAKE_LIBTOOL) find_program(CMAKE_LIBTOOL NAMES libtool) endif() endif() get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES) if(CMAKE_LIBTOOL) set(CMAKE_LIBTOOL ${CMAKE_LIBTOOL} CACHE PATH "libtool executable") message(STATUS "Found libtool - ${CMAKE_LIBTOOL}") execute_process(COMMAND ${CMAKE_LIBTOOL} -V OUTPUT_VARIABLE LIBTOOL_V_OUTPUT OUTPUT_STRIP_TRAILING_WHITESPACE) if("${LIBTOOL_V_OUTPUT}" MATCHES ".*cctools-([0-9.]+).*") string(REGEX REPLACE ".*cctools-([0-9.]+).*" "\\1" LIBTOOL_VERSION ${LIBTOOL_V_OUTPUT}) if(NOT LIBTOOL_VERSION VERSION_LESS "862") set(LIBTOOL_NO_WARNING_FLAG "-no_warning_for_no_symbols") endif() endif() foreach(lang ${languages}) set(CMAKE_${lang}_CREATE_STATIC_LIBRARY "\"${CMAKE_LIBTOOL}\" -static ${LIBTOOL_NO_WARNING_FLAG} -o \ ") endforeach() endif() # If DYLD_LIBRARY_PATH is set we need to set it on archiver commands if(DYLD_LIBRARY_PATH) set(dyld_envar "DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}") foreach(lang ${languages}) foreach(cmd ${CMAKE_${lang}_CREATE_STATIC_LIBRARY}) list(APPEND CMAKE_${lang}_CREATE_STATIC_LIBRARY_NEW "${dyld_envar} ${cmd}") endforeach() set(CMAKE_${lang}_CREATE_STATIC_LIBRARY ${CMAKE_${lang}_CREATE_STATIC_LIBRARY_NEW}) endforeach() endif() endif() # Side-by-side subprojects layout: automatically set the # LLVM_EXTERNAL_${project}_SOURCE_DIR using LLVM_ALL_PROJECTS # This allows an easy way of setting up a build directory for llvm and another # one for llvm+clang+... using the same sources. set(LLVM_ALL_PROJECTS "clang;libcxx;libcxxabi;libunwind;lldb;compiler-rt;lld;polly;debuginfo-tests") set(LLVM_ENABLE_PROJECTS "" CACHE STRING "Semicolon-separated list of projects to build (${LLVM_ALL_PROJECTS}), or \"all\".") if( LLVM_ENABLE_PROJECTS STREQUAL "all" ) set( LLVM_ENABLE_PROJECTS ${LLVM_ALL_PROJECTS}) endif() foreach(proj ${LLVM_ENABLE_PROJECTS}) set(PROJ_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${proj}") if(NOT EXISTS "${PROJ_DIR}" OR NOT IS_DIRECTORY "${PROJ_DIR}") message(FATAL_ERROR "LLVM_ENABLE_PROJECTS requests ${proj} but directory not found: ${PROJ_DIR}") endif() string(TOUPPER "${proj}" upper_proj) STRING(REGEX REPLACE "-" "_" upper_proj ${upper_proj}) set(LLVM_EXTERNAL_${upper_proj}_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${proj}") # There is a widely spread opinion that clang-tools-extra should be merged # into clang. The following simulates it by always enabling clang-tools-extra # when enabling clang. if (proj STREQUAL "clang") set(LLVM_EXTERNAL_CLANG_TOOLS_EXTRA_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../clang-tools-extra") endif() endforeach() # Build llvm with ccache if the package is present set(LLVM_CCACHE_BUILD OFF CACHE BOOL "Set to ON for a ccache enabled build") if(LLVM_CCACHE_BUILD) find_program(CCACHE_PROGRAM ccache) if(CCACHE_PROGRAM) set(LLVM_CCACHE_MAXSIZE "" CACHE STRING "Size of ccache") set(LLVM_CCACHE_DIR "" CACHE STRING "Directory to keep ccached data") set(LLVM_CCACHE_PARAMS "CCACHE_CPP2=yes CCACHE_HASHDIR=yes" CACHE STRING "Parameters to pass through to ccache") set(CCACHE_PROGRAM "${LLVM_CCACHE_PARAMS} ${CCACHE_PROGRAM}") if (LLVM_CCACHE_MAXSIZE) set(CCACHE_PROGRAM "CCACHE_MAXSIZE=${LLVM_CCACHE_MAXSIZE} ${CCACHE_PROGRAM}") endif() if (LLVM_CCACHE_DIR) set(CCACHE_PROGRAM "CCACHE_DIR=${LLVM_CCACHE_DIR} ${CCACHE_PROGRAM}") endif() set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PROGRAM}) else() message(FATAL_ERROR "Unable to find the program ccache. Set LLVM_CCACHE_BUILD to OFF") endif() endif() option(LLVM_DEPENDENCY_DEBUGGING "Dependency debugging mode to verify correctly expressed library dependencies (Darwin only)" OFF) # Some features of the LLVM build may be disallowed when dependency debugging is # enabled. In particular you cannot use ccache because we want to force compile # operations to always happen. if(LLVM_DEPENDENCY_DEBUGGING) if(NOT CMAKE_HOST_APPLE) message(FATAL_ERROR "Dependency debugging is only currently supported on Darwin hosts.") endif() if(LLVM_CCACHE_BUILD) message(FATAL_ERROR "Cannot enable dependency debugging while using ccache.") endif() endif() option(LLVM_ENABLE_DAGISEL_COV "Debug: Prints tablegen patterns that were used for selecting" OFF) option(LLVM_ENABLE_GISEL_COV "Enable collection of GlobalISel rule coverage" OFF) if(LLVM_ENABLE_GISEL_COV) set(LLVM_GISEL_COV_PREFIX "${CMAKE_BINARY_DIR}/gisel-coverage-" CACHE STRING "Provide a filename prefix to collect the GlobalISel rule coverage") endif() # Add path for custom modules set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules" ) # Generate a CompilationDatabase (compile_commands.json file) for our build, # for use by clang_complete, YouCompleteMe, etc. set(CMAKE_EXPORT_COMPILE_COMMANDS 1) option(LLVM_INSTALL_BINUTILS_SYMLINKS "Install symlinks from the binutils tool names to the corresponding LLVM tools." OFF) option(LLVM_INSTALL_UTILS "Include utility binaries in the 'install' target." OFF) option(LLVM_INSTALL_TOOLCHAIN_ONLY "Only include toolchain files in the 'install' target." OFF) # Unfortunatly Clang is too eager to search directories for module maps, which can cause the # installed version of the maps to be found when building LLVM from source. Therefore we turn off # the installation by default. See llvm.org/PR31905. option(LLVM_INSTALL_MODULEMAPS "Install the modulemap files in the 'install' target." OFF) option(LLVM_USE_FOLDERS "Enable solution folders in Visual Studio. Disable for Express versions." ON) if ( LLVM_USE_FOLDERS ) set_property(GLOBAL PROPERTY USE_FOLDERS ON) endif() include(VersionFromVCS) option(LLVM_APPEND_VC_REV "Embed the version control system revision id in LLVM" ON) set(PACKAGE_NAME LLVM) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_BUGREPORT "https://bugs.llvm.org/") set(BUG_REPORT_URL "${PACKAGE_BUGREPORT}" CACHE STRING "Default URL where bug reports are to be submitted.") # Configure CPack. set(CPACK_PACKAGE_INSTALL_DIRECTORY "LLVM") set(CPACK_PACKAGE_VENDOR "LLVM") set(CPACK_PACKAGE_VERSION_MAJOR ${LLVM_VERSION_MAJOR}) set(CPACK_PACKAGE_VERSION_MINOR ${LLVM_VERSION_MINOR}) set(CPACK_PACKAGE_VERSION_PATCH ${LLVM_VERSION_PATCH}) set(CPACK_PACKAGE_VERSION ${PACKAGE_VERSION}) set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.TXT") set(CPACK_NSIS_COMPRESSOR "/SOLID lzma \r\n SetCompressorDictSize 32") if(WIN32 AND NOT UNIX) set(CPACK_PACKAGE_INSTALL_REGISTRY_KEY "LLVM") set(CPACK_PACKAGE_ICON "${CMAKE_CURRENT_SOURCE_DIR}\\\\cmake\\\\nsis_logo.bmp") set(CPACK_NSIS_MUI_ICON "${CMAKE_CURRENT_SOURCE_DIR}\\\\cmake\\\\nsis_icon.ico") set(CPACK_NSIS_MUI_UNIICON "${CMAKE_CURRENT_SOURCE_DIR}\\\\cmake\\\\nsis_icon.ico") set(CPACK_NSIS_MODIFY_PATH "ON") set(CPACK_NSIS_ENABLE_UNINSTALL_BEFORE_INSTALL "ON") if( CMAKE_CL_64 ) set(CPACK_NSIS_INSTALL_ROOT "$PROGRAMFILES64") endif() endif() include(CPack) # Sanity check our source directory to make sure that we are not trying to # generate an in-source build (unless on MSVC_IDE, where it is ok), and to make # sure that we don't have any stray generated files lying around in the tree # (which would end up getting picked up by header search, instead of the correct # versions). if( CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR AND NOT MSVC_IDE ) message(FATAL_ERROR "In-source builds are not allowed. Please create a directory and run cmake from there, passing the path to this source directory as the last argument. This process created the file `CMakeCache.txt' and the directory `CMakeFiles'. Please delete them.") endif() string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE) if (CMAKE_BUILD_TYPE AND NOT uppercase_CMAKE_BUILD_TYPE MATCHES "^(DEBUG|RELEASE|RELWITHDEBINFO|MINSIZEREL)$") message(FATAL_ERROR "Invalid value for CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") endif() set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" ) set(LLVM_TOOLS_INSTALL_DIR "bin" CACHE STRING "Path for binary subdirectory (defaults to 'bin')") mark_as_advanced(LLVM_TOOLS_INSTALL_DIR) set(LLVM_UTILS_INSTALL_DIR "${LLVM_TOOLS_INSTALL_DIR}" CACHE STRING "Path to install LLVM utilities (enabled by LLVM_INSTALL_UTILS=ON) (defaults to LLVM_TOOLS_INSTALL_DIR)") mark_as_advanced(LLVM_UTILS_INSTALL_DIR) # They are used as destination of target generators. set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin) set(LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX}) if(WIN32 OR CYGWIN) # DLL platform -- put DLLs into bin. set(LLVM_SHLIB_OUTPUT_INTDIR ${LLVM_RUNTIME_OUTPUT_INTDIR}) else() set(LLVM_SHLIB_OUTPUT_INTDIR ${LLVM_LIBRARY_OUTPUT_INTDIR}) endif() # Each of them corresponds to llvm-config's. set(LLVM_TOOLS_BINARY_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR}) # --bindir set(LLVM_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}) # --libdir set(LLVM_MAIN_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR} ) # --src-root set(LLVM_MAIN_INCLUDE_DIR ${LLVM_MAIN_SRC_DIR}/include ) # --includedir set(LLVM_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} ) # --prefix # Note: LLVM_CMAKE_PATH does not include generated files set(LLVM_CMAKE_PATH ${LLVM_MAIN_SRC_DIR}/cmake/modules) set(LLVM_EXAMPLES_BINARY_DIR ${LLVM_BINARY_DIR}/examples) set(LLVM_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/include) # List of all targets to be built by default: set(LLVM_ALL_TARGETS AArch64 AMDGPU ARM BPF Hexagon Lanai Mips MSP430 NVPTX PowerPC Sparc SystemZ WebAssembly X86 XCore ) # List of targets with JIT support: set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM Mips SystemZ) set(LLVM_TARGETS_TO_BUILD "all" CACHE STRING "Semicolon-separated list of targets to build, or \"all\".") set(LLVM_EXPERIMENTAL_TARGETS_TO_BUILD "" CACHE STRING "Semicolon-separated list of experimental targets to build.") option(BUILD_SHARED_LIBS "Build all libraries as shared libraries instead of static" OFF) option(LLVM_ENABLE_BACKTRACES "Enable embedding backtraces on crash." ON) if(LLVM_ENABLE_BACKTRACES) set(ENABLE_BACKTRACES 1) endif() option(LLVM_ENABLE_CRASH_OVERRIDES "Enable crash overrides." ON) if(LLVM_ENABLE_CRASH_OVERRIDES) set(ENABLE_CRASH_OVERRIDES 1) endif() option(LLVM_ENABLE_CRASH_DUMPS "Turn on memory dumps on crashes. Currently only implemented on Windows." OFF) option(LLVM_ENABLE_FFI "Use libffi to call external functions from the interpreter" OFF) set(FFI_LIBRARY_DIR "" CACHE PATH "Additional directory, where CMake should search for libffi.so") set(FFI_INCLUDE_DIR "" CACHE PATH "Additional directory, where CMake should search for ffi.h or ffi/ffi.h") set(LLVM_TARGET_ARCH "host" CACHE STRING "Set target to use for LLVM JIT or use \"host\" for automatic detection.") option(LLVM_ENABLE_TERMINFO "Use terminfo database if available." ON) set(LLVM_ENABLE_LIBXML2 "ON" CACHE STRING "Use libxml2 if available. Can be ON, OFF, or FORCE_ON") option(LLVM_ENABLE_LIBEDIT "Use libedit if available." ON) option(LLVM_ENABLE_LIBPFM "Use libpfm for performance counters if available." ON) option(LLVM_ENABLE_THREADS "Use threads if available." ON) option(LLVM_ENABLE_ZLIB "Use zlib for compression/decompression if available." ON) if( LLVM_TARGETS_TO_BUILD STREQUAL "all" ) set( LLVM_TARGETS_TO_BUILD ${LLVM_ALL_TARGETS} ) endif() set(LLVM_TARGETS_TO_BUILD ${LLVM_TARGETS_TO_BUILD} ${LLVM_EXPERIMENTAL_TARGETS_TO_BUILD}) list(REMOVE_DUPLICATES LLVM_TARGETS_TO_BUILD) option(LLVM_ENABLE_PIC "Build Position-Independent Code" ON) option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." ON) option(LLVM_ENABLE_MODULES "Compile with C++ modules enabled." OFF) if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") option(LLVM_ENABLE_MODULE_DEBUGGING "Compile with -gmodules." ON) option(LLVM_ENABLE_LOCAL_SUBMODULE_VISIBILITY "Compile with -fmodules-local-submodule-visibility." OFF) else() option(LLVM_ENABLE_MODULE_DEBUGGING "Compile with -gmodules." OFF) option(LLVM_ENABLE_LOCAL_SUBMODULE_VISIBILITY "Compile with -fmodules-local-submodule-visibility." ON) endif() option(LLVM_ENABLE_CXX1Y "Compile with C++1y enabled." OFF) option(LLVM_ENABLE_CXX1Z "Compile with C++1z enabled." OFF) option(LLVM_ENABLE_LIBCXX "Use libc++ if available." OFF) option(LLVM_ENABLE_LLD "Use lld as C and C++ linker." OFF) option(LLVM_ENABLE_PEDANTIC "Compile with pedantic enabled." ON) option(LLVM_ENABLE_WERROR "Fail and stop if a warning is triggered." OFF) option(LLVM_ENABLE_DUMP "Enable dump functions even when assertions are disabled" OFF) if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" ) option(LLVM_ENABLE_ASSERTIONS "Enable assertions" OFF) else() option(LLVM_ENABLE_ASSERTIONS "Enable assertions" ON) endif() option(LLVM_ENABLE_EXPENSIVE_CHECKS "Enable expensive checks" OFF) set(LLVM_ABI_BREAKING_CHECKS "WITH_ASSERTS" CACHE STRING "Enable abi-breaking checks. Can be WITH_ASSERTS, FORCE_ON or FORCE_OFF.") option(LLVM_FORCE_USE_OLD_TOOLCHAIN "Set to ON to force using an old, unsupported host toolchain." OFF) option(LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN "Set to ON to only warn when using a toolchain which is about to be deprecated, instead of emitting an error." OFF) option(LLVM_USE_INTEL_JITEVENTS "Use Intel JIT API to inform Intel(R) VTune(TM) Amplifier XE 2011 about JIT code" OFF) if( LLVM_USE_INTEL_JITEVENTS ) # Verify we are on a supported platform if( NOT CMAKE_SYSTEM_NAME MATCHES "Windows" AND NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) message(FATAL_ERROR "Intel JIT API support is available on Linux and Windows only.") endif() endif( LLVM_USE_INTEL_JITEVENTS ) option(LLVM_USE_OPROFILE "Use opagent JIT interface to inform OProfile about JIT code" OFF) option(LLVM_EXTERNALIZE_DEBUGINFO "Generate dSYM files and strip executables and libraries (Darwin Only)" OFF) set(LLVM_CODESIGNING_IDENTITY "" CACHE STRING "Sign executables and dylibs with the given identity or skip if empty (Darwin Only)") # If enabled, verify we are on a platform that supports oprofile. if( LLVM_USE_OPROFILE ) if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) message(FATAL_ERROR "OProfile support is available on Linux only.") endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) endif( LLVM_USE_OPROFILE ) option(LLVM_USE_PERF "Use perf JIT interface to inform perf about JIT code" OFF) # If enabled, verify we are on a platform that supports perf. if( LLVM_USE_PERF ) if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) message(FATAL_ERROR "perf support is available on Linux only.") endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) endif( LLVM_USE_PERF ) set(LLVM_USE_SANITIZER "" CACHE STRING "Define the sanitizer used to build binaries and tests.") option(LLVM_OPTIMIZE_SANITIZED_BUILDS "Pass -O1 on debug sanitizer builds" ON) set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH "Path to fuzzing library for linking with fuzz targets") option(LLVM_USE_SPLIT_DWARF "Use -gsplit-dwarf when compiling llvm." OFF) option(LLVM_POLLY_LINK_INTO_TOOLS "Statically link Polly into tools (if available)" ON) option(LLVM_POLLY_BUILD "Build LLVM with Polly" ON) if (EXISTS ${LLVM_MAIN_SRC_DIR}/tools/polly/CMakeLists.txt) set(POLLY_IN_TREE TRUE) elseif(LLVM_EXTERNAL_POLLY_SOURCE_DIR) set(POLLY_IN_TREE TRUE) else() set(POLLY_IN_TREE FALSE) endif() if (LLVM_POLLY_BUILD AND POLLY_IN_TREE) set(WITH_POLLY ON) else() set(WITH_POLLY OFF) endif() if (LLVM_POLLY_LINK_INTO_TOOLS AND WITH_POLLY) set(LINK_POLLY_INTO_TOOLS ON) else() set(LINK_POLLY_INTO_TOOLS OFF) endif() # Define an option controlling whether we should build for 32-bit on 64-bit # platforms, where supported. if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 ) # TODO: support other platforms and toolchains. option(LLVM_BUILD_32_BITS "Build 32 bits executables and libraries." OFF) endif() # Define the default arguments to use with 'lit', and an option for the user to # override. set(LIT_ARGS_DEFAULT "-sv") if (MSVC_IDE OR XCODE) set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar") endif() set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit") # On Win32 hosts, provide an option to specify the path to the GnuWin32 tools. if( WIN32 AND NOT CYGWIN ) set(LLVM_LIT_TOOLS_DIR "" CACHE PATH "Path to GnuWin32 tools") endif() # Define options to control the inclusion and default build behavior for # components which may not strictly be necessary (tools, examples, and tests). # # This is primarily to support building smaller or faster project files. option(LLVM_INCLUDE_TOOLS "Generate build targets for the LLVM tools." ON) option(LLVM_BUILD_TOOLS "Build the LLVM tools. If OFF, just generate build targets." ON) option(LLVM_INCLUDE_UTILS "Generate build targets for the LLVM utils." ON) option(LLVM_BUILD_UTILS "Build LLVM utility binaries. If OFF, just generate build targets." ON) option(LLVM_INCLUDE_RUNTIMES "Generate build targets for the LLVM runtimes." ON) option(LLVM_BUILD_RUNTIMES "Build the LLVM runtimes. If OFF, just generate build targets." ON) option(LLVM_BUILD_RUNTIME "Build the LLVM runtime libraries." ON) option(LLVM_BUILD_EXAMPLES "Build the LLVM example programs. If OFF, just generate build targets." OFF) option(LLVM_INCLUDE_EXAMPLES "Generate build targets for the LLVM examples" ON) option(LLVM_BUILD_TESTS "Build LLVM unit tests. If OFF, just generate build targets." OFF) option(LLVM_INCLUDE_TESTS "Generate build targets for the LLVM unit tests." ON) option(LLVM_INCLUDE_GO_TESTS "Include the Go bindings tests in test build targets." ON) option(LLVM_BUILD_BENCHMARKS "Add LLVM benchmark targets to the list of default targets. If OFF, benchmarks still could be built using Benchmarks target." OFF) option(LLVM_INCLUDE_BENCHMARKS "Generate benchmark targets. If OFF, benchmarks can't be built." ON) option (LLVM_BUILD_DOCS "Build the llvm documentation." OFF) option (LLVM_INCLUDE_DOCS "Generate build targets for llvm documentation." ON) option (LLVM_ENABLE_DOXYGEN "Use doxygen to generate llvm API documentation." OFF) option (LLVM_ENABLE_SPHINX "Use Sphinx to generate llvm documentation." OFF) option (LLVM_ENABLE_OCAMLDOC "Build OCaml bindings documentation." ON) option (LLVM_ENABLE_BINDINGS "Build bindings." ON) set(LLVM_INSTALL_DOXYGEN_HTML_DIR "share/doc/llvm/doxygen-html" CACHE STRING "Doxygen-generated HTML documentation install directory") set(LLVM_INSTALL_OCAMLDOC_HTML_DIR "share/doc/llvm/ocaml-html" CACHE STRING "OCamldoc-generated HTML documentation install directory") option (LLVM_BUILD_EXTERNAL_COMPILER_RT "Build compiler-rt as an external project." OFF) option (LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO "Show target and host info when tools are invoked with --version." ON) # You can configure which libraries from LLVM you want to include in the # shared library by setting LLVM_DYLIB_COMPONENTS to a semi-colon delimited # list of LLVM components. All component names handled by llvm-config are valid. if(NOT DEFINED LLVM_DYLIB_COMPONENTS) set(LLVM_DYLIB_COMPONENTS "all" CACHE STRING "Semicolon-separated list of components to include in libLLVM, or \"all\".") endif() option(LLVM_LINK_LLVM_DYLIB "Link tools against the libllvm dynamic library" OFF) if(MSVC) option(LLVM_BUILD_LLVM_C_DYLIB "Build LLVM-C.dll (Windows only)" OFF) else() option(LLVM_BUILD_LLVM_C_DYLIB "Build libllvm-c re-export library (Darwin only)" OFF) endif() set(LLVM_BUILD_LLVM_DYLIB_default OFF) if(LLVM_LINK_LLVM_DYLIB OR (LLVM_BUILD_LLVM_C_DYLIB AND NOT MSVC)) set(LLVM_BUILD_LLVM_DYLIB_default ON) endif() option(LLVM_BUILD_LLVM_DYLIB "Build libllvm dynamic library" ${LLVM_BUILD_LLVM_DYLIB_default}) option(LLVM_OPTIMIZED_TABLEGEN "Force TableGen to be built with optimization" OFF) if(CMAKE_CROSSCOMPILING OR (LLVM_OPTIMIZED_TABLEGEN AND (LLVM_ENABLE_ASSERTIONS OR CMAKE_CONFIGURATION_TYPES))) set(LLVM_USE_HOST_TOOLS ON) endif() if (MSVC_IDE AND NOT (MSVC_VERSION LESS 1900)) option(LLVM_ADD_NATIVE_VISUALIZERS_TO_SOLUTION "Configure project to use Visual Studio native visualizers" TRUE) else() set(LLVM_ADD_NATIVE_VISUALIZERS_TO_SOLUTION FALSE CACHE INTERNAL "For Visual Studio 2013, manually copy natvis files to Documents\\Visual Studio 2013\\Visualizers" FORCE) endif() if (LLVM_BUILD_INSTRUMENTED OR LLVM_BUILD_INSTRUMENTED_COVERAGE OR LLVM_ENABLE_IR_PGO) if(NOT LLVM_PROFILE_MERGE_POOL_SIZE) # A pool size of 1-2 is probably sufficient on a SSD. 3-4 should be fine # for spining disks. Anything higher may only help on slower mediums. set(LLVM_PROFILE_MERGE_POOL_SIZE "4") endif() if(NOT LLVM_PROFILE_FILE_PATTERN) if(NOT LLVM_PROFILE_DATA_DIR) file(TO_NATIVE_PATH "${LLVM_BINARY_DIR}/profiles" LLVM_PROFILE_DATA_DIR) endif() file(TO_NATIVE_PATH "${LLVM_PROFILE_DATA_DIR}/%${LLVM_PROFILE_MERGE_POOL_SIZE}m.profraw" LLVM_PROFILE_FILE_PATTERN) endif() endif() if (LLVM_BUILD_STATIC) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static") endif() # Override the default target with an environment variable named by LLVM_TARGET_TRIPLE_ENV. set(LLVM_TARGET_TRIPLE_ENV CACHE STRING "The name of environment variable to override default target. Disabled by blank.") mark_as_advanced(LLVM_TARGET_TRIPLE_ENV) set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR OFF CACHE BOOL "Enable per-target runtimes directory") # All options referred to from HandleLLVMOptions have to be specified # BEFORE this include, otherwise options will not be correctly set on # first cmake run include(config-ix) string(REPLACE "Native" ${LLVM_NATIVE_ARCH} LLVM_TARGETS_TO_BUILD "${LLVM_TARGETS_TO_BUILD}") list(REMOVE_DUPLICATES LLVM_TARGETS_TO_BUILD) # By default, we target the host, but this can be overridden at CMake # invocation time. set(LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_HOST_TRIPLE}" CACHE STRING "Default target for which LLVM will generate code." ) set(TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}") message(STATUS "LLVM host triple: ${LLVM_HOST_TRIPLE}") message(STATUS "LLVM default target triple: ${LLVM_DEFAULT_TARGET_TRIPLE}") include(HandleLLVMOptions) # Verify that we can find a Python 2 interpreter. Python 3 is unsupported. # FIXME: We should support systems with only Python 3, but that requires work # on LLDB. set(Python_ADDITIONAL_VERSIONS 2.7) include(FindPythonInterp) if( NOT PYTHONINTERP_FOUND ) message(FATAL_ERROR "Unable to find Python interpreter, required for builds and testing. Please install Python or specify the PYTHON_EXECUTABLE CMake variable.") endif() if( ${PYTHON_VERSION_STRING} VERSION_LESS 2.7 ) message(FATAL_ERROR "Python 2.7 or newer is required") endif() ###### # LLVMBuild Integration # # We use llvm-build to generate all the data required by the CMake based # build system in one swoop: # # - We generate a file (a CMake fragment) in the object root which contains # all the definitions that are required by CMake. # # - We generate the library table used by llvm-config. # # - We generate the dependencies for the CMake fragment, so that we will # automatically reconfigure ourselves. set(LLVMBUILDTOOL "${LLVM_MAIN_SRC_DIR}/utils/llvm-build/llvm-build") set(LLVMCONFIGLIBRARYDEPENDENCIESINC "${LLVM_BINARY_DIR}/tools/llvm-config/LibraryDependencies.inc") set(LLVMBUILDCMAKEFRAG "${LLVM_BINARY_DIR}/LLVMBuild.cmake") # Create the list of optional components that are enabled if (LLVM_USE_INTEL_JITEVENTS) set(LLVMOPTIONALCOMPONENTS IntelJITEvents) endif (LLVM_USE_INTEL_JITEVENTS) if (LLVM_USE_OPROFILE) set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} OProfileJIT) endif (LLVM_USE_OPROFILE) if (LLVM_USE_PERF) set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} PerfJITEvents) endif (LLVM_USE_PERF) message(STATUS "Constructing LLVMBuild project information") execute_process( COMMAND ${PYTHON_EXECUTABLE} -B ${LLVMBUILDTOOL} --native-target "${LLVM_NATIVE_ARCH}" --enable-targets "${LLVM_TARGETS_TO_BUILD}" --enable-optional-components "${LLVMOPTIONALCOMPONENTS}" --write-library-table ${LLVMCONFIGLIBRARYDEPENDENCIESINC} --write-cmake-fragment ${LLVMBUILDCMAKEFRAG} OUTPUT_VARIABLE LLVMBUILDOUTPUT ERROR_VARIABLE LLVMBUILDERRORS OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_STRIP_TRAILING_WHITESPACE RESULT_VARIABLE LLVMBUILDRESULT) # On Win32, CMake doesn't properly handle piping the default output/error # streams into the GUI console. So, we explicitly catch and report them. if( NOT "${LLVMBUILDOUTPUT}" STREQUAL "") message(STATUS "llvm-build output: ${LLVMBUILDOUTPUT}") endif() if( NOT "${LLVMBUILDRESULT}" STREQUAL "0" ) message(FATAL_ERROR "Unexpected failure executing llvm-build: ${LLVMBUILDERRORS}") endif() # Include the generated CMake fragment. This will define properties from the # LLVMBuild files in a format which is easy to consume from CMake, and will add # the dependencies so that CMake will reconfigure properly when the LLVMBuild # files change. include(${LLVMBUILDCMAKEFRAG}) ###### # Configure all of the various header file fragments LLVM uses which depend on # configuration variables. set(LLVM_ENUM_TARGETS "") set(LLVM_ENUM_ASM_PRINTERS "") set(LLVM_ENUM_ASM_PARSERS "") set(LLVM_ENUM_DISASSEMBLERS "") foreach(t ${LLVM_TARGETS_TO_BUILD}) set( td ${LLVM_MAIN_SRC_DIR}/lib/Target/${t} ) list(FIND LLVM_ALL_TARGETS ${t} idx) list(FIND LLVM_EXPERIMENTAL_TARGETS_TO_BUILD ${t} idy) # At this point, LLVMBUILDTOOL already checked all the targets passed in # LLVM_TARGETS_TO_BUILD and LLVM_EXPERIMENTAL_TARGETS_TO_BUILD, so # this test just makes sure that any experimental targets were passed via # LLVM_EXPERIMENTAL_TARGETS_TO_BUILD, not LLVM_TARGETS_TO_BUILD. if( idx LESS 0 AND idy LESS 0 ) message(FATAL_ERROR "The target `${t}' is experimental and must be passed " "via LLVM_EXPERIMENTAL_TARGETS_TO_BUILD.") else() set(LLVM_ENUM_TARGETS "${LLVM_ENUM_TARGETS}LLVM_TARGET(${t})\n") endif() file(GLOB asmp_file "${td}/*AsmPrinter.cpp") if( asmp_file ) set(LLVM_ENUM_ASM_PRINTERS "${LLVM_ENUM_ASM_PRINTERS}LLVM_ASM_PRINTER(${t})\n") endif() if( EXISTS ${td}/AsmParser/CMakeLists.txt ) set(LLVM_ENUM_ASM_PARSERS "${LLVM_ENUM_ASM_PARSERS}LLVM_ASM_PARSER(${t})\n") endif() if( EXISTS ${td}/Disassembler/CMakeLists.txt ) set(LLVM_ENUM_DISASSEMBLERS "${LLVM_ENUM_DISASSEMBLERS}LLVM_DISASSEMBLER(${t})\n") endif() endforeach(t) # Produce the target definition files, which provide a way for clients to easily # include various classes of targets. configure_file( ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmPrinters.def.in ${LLVM_INCLUDE_DIR}/llvm/Config/AsmPrinters.def ) configure_file( ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmParsers.def.in ${LLVM_INCLUDE_DIR}/llvm/Config/AsmParsers.def ) configure_file( ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/Disassemblers.def.in ${LLVM_INCLUDE_DIR}/llvm/Config/Disassemblers.def ) configure_file( ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/Targets.def.in ${LLVM_INCLUDE_DIR}/llvm/Config/Targets.def ) # Configure the three LLVM configuration header files. configure_file( ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/config.h.cmake ${LLVM_INCLUDE_DIR}/llvm/Config/config.h) configure_file( ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/llvm-config.h.cmake ${LLVM_INCLUDE_DIR}/llvm/Config/llvm-config.h) configure_file( ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/abi-breaking.h.cmake ${LLVM_INCLUDE_DIR}/llvm/Config/abi-breaking.h) # Add target for generating source rpm package. set(LLVM_SRPM_USER_BINARY_SPECFILE ${CMAKE_CURRENT_SOURCE_DIR}/llvm.spec.in CACHE FILEPATH ".spec file to use for srpm generation") set(LLVM_SRPM_BINARY_SPECFILE ${CMAKE_CURRENT_BINARY_DIR}/llvm.spec) set(LLVM_SRPM_DIR "${CMAKE_CURRENT_BINARY_DIR}/srpm") # SVN_REVISION and GIT_COMMIT get set by the call to add_version_info_from_vcs. # DUMMY_VAR contains a version string which we don't care about. add_version_info_from_vcs(DUMMY_VAR) if ( SVN_REVISION ) set(LLVM_RPM_SPEC_REVISION "r${SVN_REVISION}") elseif ( GIT_COMMIT ) set (LLVM_RPM_SPEC_REVISION "g${GIT_COMMIT}") endif() configure_file( ${LLVM_SRPM_USER_BINARY_SPECFILE} ${LLVM_SRPM_BINARY_SPECFILE} @ONLY) add_custom_target(srpm COMMAND cpack -G TGZ --config CPackSourceConfig.cmake -B ${LLVM_SRPM_DIR}/SOURCES COMMAND rpmbuild -bs --define '_topdir ${LLVM_SRPM_DIR}' ${LLVM_SRPM_BINARY_SPECFILE}) set_target_properties(srpm PROPERTIES FOLDER "Misc") # They are not referenced. See set_output_directory(). set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/bin ) set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX} ) set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX} ) if(APPLE AND DARWIN_LTO_LIBRARY) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-lto_library -Wl,${DARWIN_LTO_LIBRARY}") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-lto_library -Wl,${DARWIN_LTO_LIBRARY}") set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,-lto_library -Wl,${DARWIN_LTO_LIBRARY}") endif() # Work around a broken bfd ld behavior. When linking a binary with a # foo.so library, it will try to find any library that foo.so uses and # check its symbols. This is wasteful (the check was done when foo.so # was created) and can fail since it is not the dynamic linker and # doesn't know how to handle search paths correctly. if (UNIX AND NOT APPLE AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "SunOS|AIX") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-allow-shlib-undefined") endif() set(CMAKE_INCLUDE_CURRENT_DIR ON) include_directories( ${LLVM_INCLUDE_DIR} ${LLVM_MAIN_INCLUDE_DIR}) # when crosscompiling import the executable targets from a file if(LLVM_USE_HOST_TOOLS) include(CrossCompile) endif(LLVM_USE_HOST_TOOLS) if(LLVM_TARGET_IS_CROSSCOMPILE_HOST) # Dummy use to avoid CMake Warning: Manually-specified variables were not used # (this is a variable that CrossCompile sets on recursive invocations) endif() if(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)") # On FreeBSD, /usr/local/* is not used by default. In order to build LLVM # with libxml2, iconv.h, etc., we must add /usr/local paths. include_directories(SYSTEM "/usr/local/include") link_directories("/usr/local/lib") endif(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)") if( ${CMAKE_SYSTEM_NAME} MATCHES SunOS ) # special hack for Solaris to handle crazy system sys/regset.h include_directories("${LLVM_MAIN_INCLUDE_DIR}/llvm/Support/Solaris") endif( ${CMAKE_SYSTEM_NAME} MATCHES SunOS ) # Make sure we don't get -rdynamic in every binary. For those that need it, # use export_executable_symbols(target). set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "") set(LLVM_PROFDATA_FILE "" CACHE FILEPATH "Profiling data file to use when compiling in order to improve runtime performance.") if(LLVM_PROFDATA_FILE AND EXISTS ${LLVM_PROFDATA_FILE}) if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" ) add_definitions("-fprofile-instr-use=${LLVM_PROFDATA_FILE}") else() message(FATAL_ERROR "LLVM_PROFDATA_FILE can only be specified when compiling with clang") endif() endif() include(AddLLVM) include(TableGen) if( MINGW AND NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" ) # People report that -O3 is unreliable on MinGW. The traditional # build also uses -O2 for that reason: llvm_replace_compiler_option(CMAKE_CXX_FLAGS_RELEASE "-O3" "-O2") endif() # Put this before tblgen. Else we have a circular dependence. add_subdirectory(lib/Demangle) add_subdirectory(lib/Support) add_subdirectory(lib/TableGen) add_subdirectory(utils/TableGen) add_subdirectory(include/llvm) add_subdirectory(lib) if( LLVM_INCLUDE_UTILS ) add_subdirectory(utils/FileCheck) add_subdirectory(utils/PerfectShuffle) add_subdirectory(utils/count) add_subdirectory(utils/not) add_subdirectory(utils/yaml-bench) else() if ( LLVM_INCLUDE_TESTS ) message(FATAL_ERROR "Including tests when not building utils will not work. Either set LLVM_INCLUDE_UTILS to On, or set LLVM_INCLUDE_TESTS to Off.") endif() endif() # Use LLVM_ADD_NATIVE_VISUALIZERS_TO_SOLUTION instead of LLVM_INCLUDE_UTILS because it is not really a util if (LLVM_ADD_NATIVE_VISUALIZERS_TO_SOLUTION) add_subdirectory(utils/LLVMVisualizers) endif() foreach( binding ${LLVM_BINDINGS_LIST} ) if( EXISTS "${LLVM_MAIN_SRC_DIR}/bindings/${binding}/CMakeLists.txt" ) add_subdirectory(bindings/${binding}) endif() endforeach() add_subdirectory(projects) if( LLVM_INCLUDE_TOOLS ) add_subdirectory(tools) endif() if( LLVM_INCLUDE_RUNTIMES ) add_subdirectory(runtimes) endif() if( LLVM_INCLUDE_EXAMPLES ) add_subdirectory(examples) endif() if( LLVM_INCLUDE_TESTS ) if(EXISTS ${LLVM_MAIN_SRC_DIR}/projects/test-suite AND TARGET clang) include(LLVMExternalProjectUtils) llvm_ExternalProject_Add(test-suite ${LLVM_MAIN_SRC_DIR}/projects/test-suite USE_TOOLCHAIN EXCLUDE_FROM_ALL NO_INSTALL ALWAYS_CLEAN) endif() add_subdirectory(utils/lit) add_subdirectory(test) add_subdirectory(unittests) if( LLVM_INCLUDE_UTILS ) add_subdirectory(utils/unittest) endif() if (WIN32) # This utility is used to prevent crashing tests from calling Dr. Watson on # Windows. add_subdirectory(utils/KillTheDoctor) endif() # Add a global check rule now that all subdirectories have been traversed # and we know the total set of lit testsuites. get_property(LLVM_LIT_TESTSUITES GLOBAL PROPERTY LLVM_LIT_TESTSUITES) get_property(LLVM_LIT_PARAMS GLOBAL PROPERTY LLVM_LIT_PARAMS) get_property(LLVM_LIT_DEPENDS GLOBAL PROPERTY LLVM_LIT_DEPENDS) get_property(LLVM_LIT_EXTRA_ARGS GLOBAL PROPERTY LLVM_LIT_EXTRA_ARGS) get_property(LLVM_ADDITIONAL_TEST_TARGETS GLOBAL PROPERTY LLVM_ADDITIONAL_TEST_TARGETS) get_property(LLVM_ADDITIONAL_TEST_DEPENDS GLOBAL PROPERTY LLVM_ADDITIONAL_TEST_DEPENDS) add_lit_target(check-all "Running all regression tests" ${LLVM_LIT_TESTSUITES} PARAMS ${LLVM_LIT_PARAMS} DEPENDS ${LLVM_LIT_DEPENDS} ${LLVM_ADDITIONAL_TEST_TARGETS} ARGS ${LLVM_LIT_EXTRA_ARGS} ) if(TARGET check-runtimes) add_dependencies(check-all check-runtimes) endif() add_custom_target(test-depends DEPENDS ${LLVM_LIT_DEPENDS} ${LLVM_ADDITIONAL_TEST_DEPENDS}) set_target_properties(test-depends PROPERTIES FOLDER "Tests") endif() if (LLVM_INCLUDE_DOCS) add_subdirectory(docs) endif() add_subdirectory(cmake/modules) # Do this last so that all lit targets have already been created. if (LLVM_INCLUDE_UTILS) add_subdirectory(utils/llvm-lit) endif() if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) install(DIRECTORY include/llvm include/llvm-c DESTINATION include COMPONENT llvm-headers FILES_MATCHING PATTERN "*.def" PATTERN "*.h" PATTERN "*.td" PATTERN "*.inc" PATTERN "LICENSE.TXT" PATTERN ".svn" EXCLUDE ) install(DIRECTORY ${LLVM_INCLUDE_DIR}/llvm ${LLVM_INCLUDE_DIR}/llvm-c DESTINATION include COMPONENT llvm-headers FILES_MATCHING PATTERN "*.def" PATTERN "*.h" PATTERN "*.gen" PATTERN "*.inc" # Exclude include/llvm/CMakeFiles/intrinsics_gen.dir, matched by "*.def" PATTERN "CMakeFiles" EXCLUDE PATTERN "config.h" EXCLUDE PATTERN ".svn" EXCLUDE ) if (LLVM_INSTALL_MODULEMAPS) install(DIRECTORY include/llvm include/llvm-c DESTINATION include COMPONENT llvm-headers FILES_MATCHING PATTERN "module.modulemap" ) install(FILES include/llvm/module.install.modulemap DESTINATION include/llvm COMPONENT llvm-headers RENAME "module.extern.modulemap" ) endif(LLVM_INSTALL_MODULEMAPS) # Installing the headers needs to depend on generating any public # tablegen'd headers. add_custom_target(llvm-headers DEPENDS intrinsics_gen) set_target_properties(llvm-headers PROPERTIES FOLDER "Misc") if (NOT LLVM_ENABLE_IDE) add_llvm_install_targets(install-llvm-headers DEPENDS llvm-headers COMPONENT llvm-headers) endif() # Custom target to install all libraries. add_custom_target(llvm-libraries) set_target_properties(llvm-libraries PROPERTIES FOLDER "Misc") if (NOT LLVM_ENABLE_IDE) add_llvm_install_targets(install-llvm-libraries DEPENDS llvm-libraries COMPONENT llvm-libraries) endif() get_property(LLVM_LIBS GLOBAL PROPERTY LLVM_LIBS) if(LLVM_LIBS) list(REMOVE_DUPLICATES LLVM_LIBS) foreach(lib ${LLVM_LIBS}) add_dependencies(llvm-libraries ${lib}) if (NOT LLVM_ENABLE_IDE) add_dependencies(install-llvm-libraries install-${lib}) endif() endforeach() endif() endif() # This must be at the end of the LLVM root CMakeLists file because it must run # after all targets are created. if(LLVM_DISTRIBUTION_COMPONENTS) if(LLVM_ENABLE_IDE) message(FATAL_ERROR "LLVM_DISTRIBUTION_COMPONENTS cannot be specified with multi-configuration generators (i.e. Xcode or Visual Studio)") endif() add_custom_target(distribution) add_custom_target(install-distribution) add_custom_target(install-distribution-stripped) foreach(target ${LLVM_DISTRIBUTION_COMPONENTS} ${LLVM_RUNTIME_DISTRIBUTION_COMPONENTS}) if(TARGET ${target}) add_dependencies(distribution ${target}) else() message(SEND_ERROR "Specified distribution component '${target}' doesn't have a target") endif() if(TARGET install-${target}) add_dependencies(install-distribution install-${target}) else() message(SEND_ERROR "Specified distribution component '${target}' doesn't have an install target") endif() if(TARGET install-${target}-stripped) add_dependencies(install-distribution-stripped install-${target}-stripped) else() message(SEND_ERROR "Specified distribution component '${target}' doesn't have an install-stripped target." " Its installation target creation should be changed to use add_llvm_install_targets," " or you should manually create the 'install-${target}-stripped' target.") endif() endforeach() endif() # This allows us to deploy the Universal CRT DLLs by passing -DCMAKE_INSTALL_UCRT_LIBRARIES=ON to CMake if (MSVC AND CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows" AND CMAKE_INSTALL_UCRT_LIBRARIES) include(InstallRequiredSystemLibraries) endif() if (LLVM_INCLUDE_BENCHMARKS) # Override benchmark defaults so that when the library itself is updated these # modifications are not lost. set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing" FORCE) set(BENCHMARK_ENABLE_EXCEPTIONS OFF CACHE BOOL "Disable benchmark exceptions" FORCE) set(BENCHMARK_ENABLE_INSTALL OFF CACHE BOOL "Don't install benchmark" FORCE) set(BENCHMARK_DOWNLOAD_DEPENDENCIES OFF CACHE BOOL "Don't download dependencies" FORCE) set(BENCHMARK_ENABLE_GTEST_TESTS OFF CACHE BOOL "Disable Google Test in benchmark" FORCE) # Since LLVM requires C++11 it is safe to assume that std::regex is available. set(HAVE_STD_REGEX ON CACHE BOOL "OK" FORCE) add_subdirectory(utils/benchmark) add_subdirectory(benchmarks) endif() Index: vendor/llvm/dist-release_80/cmake/modules/AddLLVM.cmake =================================================================== --- vendor/llvm/dist-release_80/cmake/modules/AddLLVM.cmake (revision 348931) +++ vendor/llvm/dist-release_80/cmake/modules/AddLLVM.cmake (revision 348932) @@ -1,1741 +1,1742 @@ include(LLVMProcessSources) include(LLVM-Config) include(DetermineGCCCompatible) function(llvm_update_compile_flags name) get_property(sources TARGET ${name} PROPERTY SOURCES) if("${sources}" MATCHES "\\.c(;|$)") set(update_src_props ON) endif() # LLVM_REQUIRES_EH is an internal flag that individual targets can use to # force EH if(LLVM_REQUIRES_EH OR LLVM_ENABLE_EH) if(NOT (LLVM_REQUIRES_RTTI OR LLVM_ENABLE_RTTI)) message(AUTHOR_WARNING "Exception handling requires RTTI. Enabling RTTI for ${name}") set(LLVM_REQUIRES_RTTI ON) endif() if(MSVC) list(APPEND LLVM_COMPILE_FLAGS "/EHsc") endif() else() if(LLVM_COMPILER_IS_GCC_COMPATIBLE) list(APPEND LLVM_COMPILE_FLAGS "-fno-exceptions") elseif(MSVC) list(APPEND LLVM_COMPILE_DEFINITIONS _HAS_EXCEPTIONS=0) list(APPEND LLVM_COMPILE_FLAGS "/EHs-c-") endif() endif() # LLVM_REQUIRES_RTTI is an internal flag that individual # targets can use to force RTTI set(LLVM_CONFIG_HAS_RTTI YES CACHE INTERNAL "") if(NOT (LLVM_REQUIRES_RTTI OR LLVM_ENABLE_RTTI)) set(LLVM_CONFIG_HAS_RTTI NO CACHE INTERNAL "") list(APPEND LLVM_COMPILE_DEFINITIONS GTEST_HAS_RTTI=0) if (LLVM_COMPILER_IS_GCC_COMPATIBLE) list(APPEND LLVM_COMPILE_FLAGS "-fno-rtti") elseif (MSVC) list(APPEND LLVM_COMPILE_FLAGS "/GR-") endif () elseif(MSVC) list(APPEND LLVM_COMPILE_FLAGS "/GR") endif() # Assume that; # - LLVM_COMPILE_FLAGS is list. # - PROPERTY COMPILE_FLAGS is string. string(REPLACE ";" " " target_compile_flags " ${LLVM_COMPILE_FLAGS}") if(update_src_props) foreach(fn ${sources}) get_filename_component(suf ${fn} EXT) if("${suf}" STREQUAL ".cpp") set_property(SOURCE ${fn} APPEND_STRING PROPERTY COMPILE_FLAGS "${target_compile_flags}") endif() endforeach() else() # Update target props, since all sources are C++. set_property(TARGET ${name} APPEND_STRING PROPERTY COMPILE_FLAGS "${target_compile_flags}") endif() set_property(TARGET ${name} APPEND PROPERTY COMPILE_DEFINITIONS ${LLVM_COMPILE_DEFINITIONS}) endfunction() function(add_llvm_symbol_exports target_name export_file) if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") set(native_export_file "${target_name}.exports") add_custom_command(OUTPUT ${native_export_file} COMMAND sed -e "s/^/_/" < ${export_file} > ${native_export_file} DEPENDS ${export_file} VERBATIM COMMENT "Creating export file for ${target_name}") set_property(TARGET ${target_name} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,-exported_symbols_list,\"${CMAKE_CURRENT_BINARY_DIR}/${native_export_file}\"") elseif(${CMAKE_SYSTEM_NAME} MATCHES "AIX") set_property(TARGET ${target_name} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,-bE:${export_file}") elseif(LLVM_HAVE_LINK_VERSION_SCRIPT) # Gold and BFD ld require a version script rather than a plain list. set(native_export_file "${target_name}.exports") # FIXME: Don't write the "local:" line on OpenBSD. # in the export file, also add a linker script to version LLVM symbols (form: LLVM_N.M) add_custom_command(OUTPUT ${native_export_file} COMMAND echo "LLVM_${LLVM_VERSION_MAJOR} {" > ${native_export_file} COMMAND grep -q "[[:alnum:]]" ${export_file} && echo " global:" >> ${native_export_file} || : COMMAND sed -e "s/$/;/" -e "s/^/ /" < ${export_file} >> ${native_export_file} COMMAND echo " local: *;" >> ${native_export_file} COMMAND echo "};" >> ${native_export_file} DEPENDS ${export_file} VERBATIM COMMENT "Creating export file for ${target_name}") if (${LLVM_LINKER_IS_SOLARISLD}) set_property(TARGET ${target_name} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,-M,\"${CMAKE_CURRENT_BINARY_DIR}/${native_export_file}\"") else() set_property(TARGET ${target_name} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,--version-script,\"${CMAKE_CURRENT_BINARY_DIR}/${native_export_file}\"") endif() else() set(native_export_file "${target_name}.def") add_custom_command(OUTPUT ${native_export_file} COMMAND ${PYTHON_EXECUTABLE} -c "import sys;print(''.join(['EXPORTS\\n']+sys.stdin.readlines(),))" < ${export_file} > ${native_export_file} DEPENDS ${export_file} VERBATIM COMMENT "Creating export file for ${target_name}") set(export_file_linker_flag "${CMAKE_CURRENT_BINARY_DIR}/${native_export_file}") if(MSVC) set(export_file_linker_flag "/DEF:\"${export_file_linker_flag}\"") endif() set_property(TARGET ${target_name} APPEND_STRING PROPERTY LINK_FLAGS " ${export_file_linker_flag}") endif() add_custom_target(${target_name}_exports DEPENDS ${native_export_file}) set_target_properties(${target_name}_exports PROPERTIES FOLDER "Misc") get_property(srcs TARGET ${target_name} PROPERTY SOURCES) foreach(src ${srcs}) get_filename_component(extension ${src} EXT) if(extension STREQUAL ".cpp") set(first_source_file ${src}) break() endif() endforeach() # Force re-linking when the exports file changes. Actually, it # forces recompilation of the source file. The LINK_DEPENDS target # property only works for makefile-based generators. # FIXME: This is not safe because this will create the same target # ${native_export_file} in several different file: # - One where we emitted ${target_name}_exports # - One where we emitted the build command for the following object. # set_property(SOURCE ${first_source_file} APPEND PROPERTY # OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${native_export_file}) set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${native_export_file}) add_dependencies(${target_name} ${target_name}_exports) # Add dependency to *_exports later -- CMake issue 14747 list(APPEND LLVM_COMMON_DEPENDS ${target_name}_exports) set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} PARENT_SCOPE) endfunction(add_llvm_symbol_exports) if(APPLE) execute_process( COMMAND "${CMAKE_LINKER}" -v ERROR_VARIABLE stderr ) set(LLVM_LINKER_DETECTED YES) if("${stderr}" MATCHES "PROJECT:ld64") set(LLVM_LINKER_IS_LD64 YES) message(STATUS "Linker detection: ld64") else() set(LLVM_LINKER_DETECTED NO) message(STATUS "Linker detection: unknown") endif() elseif(NOT WIN32) # Detect what linker we have here if( LLVM_USE_LINKER ) set(command ${CMAKE_C_COMPILER} -fuse-ld=${LLVM_USE_LINKER} -Wl,--version) else() separate_arguments(flags UNIX_COMMAND "${CMAKE_EXE_LINKER_FLAGS}") set(command ${CMAKE_C_COMPILER} ${flags} -Wl,--version) endif() execute_process( COMMAND ${command} OUTPUT_VARIABLE stdout ERROR_VARIABLE stderr ) set(LLVM_LINKER_DETECTED YES) if("${stdout}" MATCHES "GNU gold") set(LLVM_LINKER_IS_GOLD YES) message(STATUS "Linker detection: GNU Gold") elseif("${stdout}" MATCHES "^LLD") set(LLVM_LINKER_IS_LLD YES) message(STATUS "Linker detection: LLD") elseif("${stdout}" MATCHES "GNU ld") set(LLVM_LINKER_IS_GNULD YES) message(STATUS "Linker detection: GNU ld") elseif("${stderr}" MATCHES "Solaris Link Editors" OR "${stdout}" MATCHES "Solaris Link Editors") set(LLVM_LINKER_IS_SOLARISLD YES) message(STATUS "Linker detection: Solaris ld") else() set(LLVM_LINKER_DETECTED NO) message(STATUS "Linker detection: unknown") endif() endif() function(add_link_opts target_name) # Don't use linker optimizations in debug builds since it slows down the # linker in a context where the optimizations are not important. if (NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG") # Pass -O3 to the linker. This enabled different optimizations on different # linkers. if(NOT (${CMAKE_SYSTEM_NAME} MATCHES "Darwin|SunOS|AIX" OR WIN32)) set_property(TARGET ${target_name} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,-O3") endif() if(LLVM_LINKER_IS_GOLD) # With gold gc-sections is always safe. set_property(TARGET ${target_name} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,--gc-sections") # Note that there is a bug with -Wl,--icf=safe so it is not safe # to enable. See https://sourceware.org/bugzilla/show_bug.cgi?id=17704. endif() if(NOT LLVM_NO_DEAD_STRIP) if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") # ld64's implementation of -dead_strip breaks tools that use plugins. set_property(TARGET ${target_name} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,-dead_strip") elseif(${CMAKE_SYSTEM_NAME} MATCHES "SunOS") set_property(TARGET ${target_name} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,-z -Wl,discard-unused=sections") elseif(NOT WIN32 AND NOT LLVM_LINKER_IS_GOLD AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "OpenBSD") # Object files are compiled with -ffunction-data-sections. # Versions of bfd ld < 2.23.1 have a bug in --gc-sections that breaks # tools that use plugins. Always pass --gc-sections once we require # a newer linker. set_property(TARGET ${target_name} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,--gc-sections") endif() endif() endif() endfunction(add_link_opts) # Set each output directory according to ${CMAKE_CONFIGURATION_TYPES}. # Note: Don't set variables CMAKE_*_OUTPUT_DIRECTORY any more, # or a certain builder, for eaxample, msbuild.exe, would be confused. function(set_output_directory target) cmake_parse_arguments(ARG "" "BINARY_DIR;LIBRARY_DIR" "" ${ARGN}) # module_dir -- corresponding to LIBRARY_OUTPUT_DIRECTORY. # It affects output of add_library(MODULE). if(WIN32 OR CYGWIN) # DLL platform set(module_dir ${ARG_BINARY_DIR}) else() set(module_dir ${ARG_LIBRARY_DIR}) endif() if(NOT "${CMAKE_CFG_INTDIR}" STREQUAL ".") foreach(build_mode ${CMAKE_CONFIGURATION_TYPES}) string(TOUPPER "${build_mode}" CONFIG_SUFFIX) if(ARG_BINARY_DIR) string(REPLACE ${CMAKE_CFG_INTDIR} ${build_mode} bi ${ARG_BINARY_DIR}) set_target_properties(${target} PROPERTIES "RUNTIME_OUTPUT_DIRECTORY_${CONFIG_SUFFIX}" ${bi}) endif() if(ARG_LIBRARY_DIR) string(REPLACE ${CMAKE_CFG_INTDIR} ${build_mode} li ${ARG_LIBRARY_DIR}) set_target_properties(${target} PROPERTIES "ARCHIVE_OUTPUT_DIRECTORY_${CONFIG_SUFFIX}" ${li}) endif() if(module_dir) string(REPLACE ${CMAKE_CFG_INTDIR} ${build_mode} mi ${module_dir}) set_target_properties(${target} PROPERTIES "LIBRARY_OUTPUT_DIRECTORY_${CONFIG_SUFFIX}" ${mi}) endif() endforeach() else() if(ARG_BINARY_DIR) set_target_properties(${target} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${ARG_BINARY_DIR}) endif() if(ARG_LIBRARY_DIR) set_target_properties(${target} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${ARG_LIBRARY_DIR}) endif() if(module_dir) set_target_properties(${target} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${module_dir}) endif() endif() endfunction() # If on Windows and building with MSVC, add the resource script containing the # VERSIONINFO data to the project. This embeds version resource information # into the output .exe or .dll. # TODO: Enable for MinGW Windows builds too. # function(add_windows_version_resource_file OUT_VAR) set(sources ${ARGN}) if (MSVC AND CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") set(resource_file ${LLVM_SOURCE_DIR}/resources/windows_version_resource.rc) if(EXISTS ${resource_file}) set(sources ${sources} ${resource_file}) source_group("Resource Files" ${resource_file}) set(windows_resource_file ${resource_file} PARENT_SCOPE) endif() endif(MSVC AND CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") set(${OUT_VAR} ${sources} PARENT_SCOPE) endfunction(add_windows_version_resource_file) # set_windows_version_resource_properties(name resource_file... # VERSION_MAJOR int # Optional major version number (defaults to LLVM_VERSION_MAJOR) # VERSION_MINOR int # Optional minor version number (defaults to LLVM_VERSION_MINOR) # VERSION_PATCHLEVEL int # Optional patchlevel version number (defaults to LLVM_VERSION_PATCH) # VERSION_STRING # Optional version string (defaults to PACKAGE_VERSION) # PRODUCT_NAME # Optional product name string (defaults to "LLVM") # ) function(set_windows_version_resource_properties name resource_file) cmake_parse_arguments(ARG "" "VERSION_MAJOR;VERSION_MINOR;VERSION_PATCHLEVEL;VERSION_STRING;PRODUCT_NAME" "" ${ARGN}) if (NOT DEFINED ARG_VERSION_MAJOR) set(ARG_VERSION_MAJOR ${LLVM_VERSION_MAJOR}) endif() if (NOT DEFINED ARG_VERSION_MINOR) set(ARG_VERSION_MINOR ${LLVM_VERSION_MINOR}) endif() if (NOT DEFINED ARG_VERSION_PATCHLEVEL) set(ARG_VERSION_PATCHLEVEL ${LLVM_VERSION_PATCH}) endif() if (NOT DEFINED ARG_VERSION_STRING) set(ARG_VERSION_STRING ${PACKAGE_VERSION}) endif() if (NOT DEFINED ARG_PRODUCT_NAME) set(ARG_PRODUCT_NAME "LLVM") endif() set_property(SOURCE ${resource_file} PROPERTY COMPILE_FLAGS /nologo) set_property(SOURCE ${resource_file} PROPERTY COMPILE_DEFINITIONS "RC_VERSION_FIELD_1=${ARG_VERSION_MAJOR}" "RC_VERSION_FIELD_2=${ARG_VERSION_MINOR}" "RC_VERSION_FIELD_3=${ARG_VERSION_PATCHLEVEL}" "RC_VERSION_FIELD_4=0" "RC_FILE_VERSION=\"${ARG_VERSION_STRING}\"" "RC_INTERNAL_NAME=\"${name}\"" "RC_PRODUCT_NAME=\"${ARG_PRODUCT_NAME}\"" "RC_PRODUCT_VERSION=\"${ARG_VERSION_STRING}\"") endfunction(set_windows_version_resource_properties) # llvm_add_library(name sources... # SHARED;STATIC # STATIC by default w/o BUILD_SHARED_LIBS. # SHARED by default w/ BUILD_SHARED_LIBS. # OBJECT # Also create an OBJECT library target. Default if STATIC && SHARED. # MODULE # Target ${name} might not be created on unsupported platforms. # Check with "if(TARGET ${name})". # DISABLE_LLVM_LINK_LLVM_DYLIB # Do not link this library to libLLVM, even if # LLVM_LINK_LLVM_DYLIB is enabled. # OUTPUT_NAME name # Corresponds to OUTPUT_NAME in target properties. # DEPENDS targets... # Same semantics as add_dependencies(). # LINK_COMPONENTS components... # Same as the variable LLVM_LINK_COMPONENTS. # LINK_LIBS lib_targets... # Same semantics as target_link_libraries(). # ADDITIONAL_HEADERS # May specify header files for IDE generators. # SONAME # Should set SONAME link flags and create symlinks # NO_INSTALL_RPATH # Suppress default RPATH settings in shared libraries. # PLUGIN_TOOL # The tool (i.e. cmake target) that this plugin will link against # ) function(llvm_add_library name) cmake_parse_arguments(ARG "MODULE;SHARED;STATIC;OBJECT;DISABLE_LLVM_LINK_LLVM_DYLIB;SONAME;NO_INSTALL_RPATH" "OUTPUT_NAME;PLUGIN_TOOL" "ADDITIONAL_HEADERS;DEPENDS;LINK_COMPONENTS;LINK_LIBS;OBJLIBS" ${ARGN}) list(APPEND LLVM_COMMON_DEPENDS ${ARG_DEPENDS}) if(ARG_ADDITIONAL_HEADERS) # Pass through ADDITIONAL_HEADERS. set(ARG_ADDITIONAL_HEADERS ADDITIONAL_HEADERS ${ARG_ADDITIONAL_HEADERS}) endif() if(ARG_OBJLIBS) set(ALL_FILES ${ARG_OBJLIBS}) else() llvm_process_sources(ALL_FILES ${ARG_UNPARSED_ARGUMENTS} ${ARG_ADDITIONAL_HEADERS}) endif() if(ARG_MODULE) if(ARG_SHARED OR ARG_STATIC) message(WARNING "MODULE with SHARED|STATIC doesn't make sense.") endif() # Plugins that link against a tool are allowed even when plugins in general are not if(NOT LLVM_ENABLE_PLUGINS AND NOT (ARG_PLUGIN_TOOL AND LLVM_EXPORT_SYMBOLS_FOR_PLUGINS)) message(STATUS "${name} ignored -- Loadable modules not supported on this platform.") return() endif() else() if(ARG_PLUGIN_TOOL) message(WARNING "PLUGIN_TOOL without MODULE doesn't make sense.") endif() if(BUILD_SHARED_LIBS AND NOT ARG_STATIC) set(ARG_SHARED TRUE) endif() if(NOT ARG_SHARED) set(ARG_STATIC TRUE) endif() endif() # Generate objlib if((ARG_SHARED AND ARG_STATIC) OR ARG_OBJECT) # Generate an obj library for both targets. set(obj_name "obj.${name}") add_library(${obj_name} OBJECT EXCLUDE_FROM_ALL ${ALL_FILES} ) llvm_update_compile_flags(${obj_name}) set(ALL_FILES "$") # Do add_dependencies(obj) later due to CMake issue 14747. list(APPEND objlibs ${obj_name}) set_target_properties(${obj_name} PROPERTIES FOLDER "Object Libraries") endif() if(ARG_SHARED AND ARG_STATIC) # static set(name_static "${name}_static") if(ARG_OUTPUT_NAME) set(output_name OUTPUT_NAME "${ARG_OUTPUT_NAME}") endif() # DEPENDS has been appended to LLVM_COMMON_LIBS. llvm_add_library(${name_static} STATIC ${output_name} OBJLIBS ${ALL_FILES} # objlib LINK_LIBS ${ARG_LINK_LIBS} LINK_COMPONENTS ${ARG_LINK_COMPONENTS} ) # FIXME: Add name_static to anywhere in TARGET ${name}'s PROPERTY. set(ARG_STATIC) endif() if(ARG_MODULE) add_library(${name} MODULE ${ALL_FILES}) elseif(ARG_SHARED) add_windows_version_resource_file(ALL_FILES ${ALL_FILES}) add_library(${name} SHARED ${ALL_FILES}) else() add_library(${name} STATIC ${ALL_FILES}) endif() if(NOT ARG_NO_INSTALL_RPATH) if(ARG_MODULE OR ARG_SHARED) llvm_setup_rpath(${name}) endif() endif() setup_dependency_debugging(${name} ${LLVM_COMMON_DEPENDS}) if(DEFINED windows_resource_file) set_windows_version_resource_properties(${name} ${windows_resource_file}) set(windows_resource_file ${windows_resource_file} PARENT_SCOPE) endif() set_output_directory(${name} BINARY_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR} LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}) # $ doesn't require compile flags. if(NOT obj_name) llvm_update_compile_flags(${name}) endif() add_link_opts( ${name} ) if(ARG_OUTPUT_NAME) set_target_properties(${name} PROPERTIES OUTPUT_NAME ${ARG_OUTPUT_NAME} ) endif() if(ARG_MODULE) set_target_properties(${name} PROPERTIES PREFIX "" SUFFIX ${LLVM_PLUGIN_EXT} ) endif() if(ARG_SHARED) if(WIN32) set_target_properties(${name} PROPERTIES PREFIX "" ) endif() # Set SOVERSION on shared libraries that lack explicit SONAME # specifier, on *nix systems that are not Darwin. if(UNIX AND NOT APPLE AND NOT ARG_SONAME) set_target_properties(${name} PROPERTIES # Since 4.0.0, the ABI version is indicated by the major version SOVERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX} VERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX}) endif() endif() if(ARG_MODULE OR ARG_SHARED) # Do not add -Dname_EXPORTS to the command-line when building files in this # target. Doing so is actively harmful for the modules build because it # creates extra module variants, and not useful because we don't use these # macros. set_target_properties( ${name} PROPERTIES DEFINE_SYMBOL "" ) if (LLVM_EXPORTED_SYMBOL_FILE) add_llvm_symbol_exports( ${name} ${LLVM_EXPORTED_SYMBOL_FILE} ) endif() endif() if(ARG_SHARED AND UNIX) if(NOT APPLE AND ARG_SONAME) get_target_property(output_name ${name} OUTPUT_NAME) if(${output_name} STREQUAL "output_name-NOTFOUND") set(output_name ${name}) endif() set(library_name ${output_name}-${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX}) set(api_name ${output_name}-${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}) set_target_properties(${name} PROPERTIES OUTPUT_NAME ${library_name}) llvm_install_library_symlink(${api_name} ${library_name} SHARED COMPONENT ${name} ALWAYS_GENERATE) llvm_install_library_symlink(${output_name} ${library_name} SHARED COMPONENT ${name} ALWAYS_GENERATE) endif() endif() if(ARG_MODULE AND LLVM_EXPORT_SYMBOLS_FOR_PLUGINS AND ARG_PLUGIN_TOOL AND (WIN32 OR CYGWIN)) # On DLL platforms symbols are imported from the tool by linking against it. set(llvm_libs ${ARG_PLUGIN_TOOL}) elseif (DEFINED LLVM_LINK_COMPONENTS OR DEFINED ARG_LINK_COMPONENTS) if (LLVM_LINK_LLVM_DYLIB AND NOT ARG_DISABLE_LLVM_LINK_LLVM_DYLIB) set(llvm_libs LLVM) else() llvm_map_components_to_libnames(llvm_libs ${ARG_LINK_COMPONENTS} ${LLVM_LINK_COMPONENTS} ) endif() else() # Components have not been defined explicitly in CMake, so add the # dependency information for this library as defined by LLVMBuild. # # It would be nice to verify that we have the dependencies for this library # name, but using get_property(... SET) doesn't suffice to determine if a # property has been set to an empty value. get_property(lib_deps GLOBAL PROPERTY LLVMBUILD_LIB_DEPS_${name}) endif() if(ARG_STATIC) set(libtype INTERFACE) else() # We can use PRIVATE since SO knows its dependent libs. set(libtype PRIVATE) endif() target_link_libraries(${name} ${libtype} ${ARG_LINK_LIBS} ${lib_deps} ${llvm_libs} ) if(LLVM_COMMON_DEPENDS) add_dependencies(${name} ${LLVM_COMMON_DEPENDS}) # Add dependencies also to objlibs. # CMake issue 14747 -- add_dependencies() might be ignored to objlib's user. foreach(objlib ${objlibs}) add_dependencies(${objlib} ${LLVM_COMMON_DEPENDS}) endforeach() endif() if(ARG_SHARED OR ARG_MODULE) llvm_externalize_debuginfo(${name}) llvm_codesign(${name}) endif() endfunction() function(add_llvm_install_targets target) cmake_parse_arguments(ARG "" "COMPONENT;PREFIX" "DEPENDS" ${ARGN}) if(ARG_COMPONENT) set(component_option -DCMAKE_INSTALL_COMPONENT="${ARG_COMPONENT}") endif() if(ARG_PREFIX) set(prefix_option -DCMAKE_INSTALL_PREFIX="${ARG_PREFIX}") endif() add_custom_target(${target} DEPENDS ${ARG_DEPENDS} COMMAND "${CMAKE_COMMAND}" ${component_option} ${prefix_option} -P "${CMAKE_BINARY_DIR}/cmake_install.cmake" USES_TERMINAL) add_custom_target(${target}-stripped DEPENDS ${ARG_DEPENDS} COMMAND "${CMAKE_COMMAND}" ${component_option} ${prefix_option} -DCMAKE_INSTALL_DO_STRIP=1 -P "${CMAKE_BINARY_DIR}/cmake_install.cmake" USES_TERMINAL) endfunction() macro(add_llvm_library name) cmake_parse_arguments(ARG "SHARED;BUILDTREE_ONLY;MODULE" "" "" ${ARGN}) if(ARG_MODULE) llvm_add_library(${name} MODULE ${ARG_UNPARSED_ARGUMENTS}) elseif( BUILD_SHARED_LIBS OR ARG_SHARED ) llvm_add_library(${name} SHARED ${ARG_UNPARSED_ARGUMENTS}) else() llvm_add_library(${name} ${ARG_UNPARSED_ARGUMENTS}) endif() # Libraries that are meant to only be exposed via the build tree only are # never installed and are only exported as a target in the special build tree # config file. if (NOT ARG_BUILDTREE_ONLY AND NOT ARG_MODULE) set_property( GLOBAL APPEND PROPERTY LLVM_LIBS ${name} ) endif() if (ARG_MODULE AND NOT TARGET ${name}) # Add empty "phony" target add_custom_target(${name}) elseif( EXCLUDE_FROM_ALL ) set_target_properties( ${name} PROPERTIES EXCLUDE_FROM_ALL ON) elseif(ARG_BUILDTREE_ONLY) set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS_BUILDTREE_ONLY ${name}) else() if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY OR ${name} STREQUAL "LTO" OR ${name} STREQUAL "OptRemarks" OR (LLVM_LINK_LLVM_DYLIB AND ${name} STREQUAL "LLVM")) set(install_dir lib${LLVM_LIBDIR_SUFFIX}) if(ARG_MODULE OR ARG_SHARED OR BUILD_SHARED_LIBS) if(WIN32 OR CYGWIN OR MINGW) set(install_type RUNTIME) set(install_dir bin) else() set(install_type LIBRARY) endif() else() set(install_type ARCHIVE) endif() if (ARG_MODULE) set(install_type LIBRARY) endif() if(${name} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR NOT LLVM_DISTRIBUTION_COMPONENTS) set(export_to_llvmexports EXPORT LLVMExports) set_property(GLOBAL PROPERTY LLVM_HAS_EXPORTS True) endif() install(TARGETS ${name} ${export_to_llvmexports} ${install_type} DESTINATION ${install_dir} COMPONENT ${name}) if (NOT LLVM_ENABLE_IDE) add_llvm_install_targets(install-${name} DEPENDS ${name} COMPONENT ${name}) endif() endif() set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${name}) endif() if (ARG_MODULE) set_target_properties(${name} PROPERTIES FOLDER "Loadable modules") else() set_target_properties(${name} PROPERTIES FOLDER "Libraries") endif() endmacro(add_llvm_library name) macro(add_llvm_executable name) cmake_parse_arguments(ARG "DISABLE_LLVM_LINK_LLVM_DYLIB;IGNORE_EXTERNALIZE_DEBUGINFO;NO_INSTALL_RPATH" "ENTITLEMENTS" "DEPENDS" ${ARGN}) llvm_process_sources( ALL_FILES ${ARG_UNPARSED_ARGUMENTS} ) list(APPEND LLVM_COMMON_DEPENDS ${ARG_DEPENDS}) # Generate objlib if(LLVM_ENABLE_OBJLIB) # Generate an obj library for both targets. set(obj_name "obj.${name}") add_library(${obj_name} OBJECT EXCLUDE_FROM_ALL ${ALL_FILES} ) llvm_update_compile_flags(${obj_name}) set(ALL_FILES "$") set_target_properties(${obj_name} PROPERTIES FOLDER "Object Libraries") endif() add_windows_version_resource_file(ALL_FILES ${ALL_FILES}) if(XCODE) # Note: the dummy.cpp source file provides no definitions. However, # it forces Xcode to properly link the static library. list(APPEND ALL_FILES "${LLVM_MAIN_SRC_DIR}/cmake/dummy.cpp") endif() if( EXCLUDE_FROM_ALL ) add_executable(${name} EXCLUDE_FROM_ALL ${ALL_FILES}) else() add_executable(${name} ${ALL_FILES}) endif() setup_dependency_debugging(${name} ${LLVM_COMMON_DEPENDS}) if(NOT ARG_NO_INSTALL_RPATH) llvm_setup_rpath(${name}) endif() if(DEFINED windows_resource_file) set_windows_version_resource_properties(${name} ${windows_resource_file}) endif() # $ doesn't require compile flags. if(NOT LLVM_ENABLE_OBJLIB) llvm_update_compile_flags(${name}) endif() add_link_opts( ${name} ) # Do not add -Dname_EXPORTS to the command-line when building files in this # target. Doing so is actively harmful for the modules build because it # creates extra module variants, and not useful because we don't use these # macros. set_target_properties( ${name} PROPERTIES DEFINE_SYMBOL "" ) if (LLVM_EXPORTED_SYMBOL_FILE) add_llvm_symbol_exports( ${name} ${LLVM_EXPORTED_SYMBOL_FILE} ) endif(LLVM_EXPORTED_SYMBOL_FILE) if (LLVM_LINK_LLVM_DYLIB AND NOT ARG_DISABLE_LLVM_LINK_LLVM_DYLIB) set(USE_SHARED USE_SHARED) endif() set(EXCLUDE_FROM_ALL OFF) set_output_directory(${name} BINARY_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR} LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}) llvm_config( ${name} ${USE_SHARED} ${LLVM_LINK_COMPONENTS} ) if( LLVM_COMMON_DEPENDS ) add_dependencies( ${name} ${LLVM_COMMON_DEPENDS} ) endif( LLVM_COMMON_DEPENDS ) if(NOT ARG_IGNORE_EXTERNALIZE_DEBUGINFO) llvm_externalize_debuginfo(${name}) endif() if (LLVM_PTHREAD_LIB) # libpthreads overrides some standard library symbols, so main # executable must be linked with it in order to provide consistent # API for all shared libaries loaded by this executable. target_link_libraries(${name} PRIVATE ${LLVM_PTHREAD_LIB}) endif() llvm_codesign(${name} ENTITLEMENTS ${ARG_ENTITLEMENTS}) endmacro(add_llvm_executable name) function(export_executable_symbols target) if (LLVM_EXPORTED_SYMBOL_FILE) # The symbol file should contain the symbols we want the executable to # export set_target_properties(${target} PROPERTIES ENABLE_EXPORTS 1) elseif (LLVM_EXPORT_SYMBOLS_FOR_PLUGINS) # Extract the symbols to export from the static libraries that the # executable links against. set_target_properties(${target} PROPERTIES ENABLE_EXPORTS 1) set(exported_symbol_file ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/${target}.symbols) # We need to consider not just the direct link dependencies, but also the # transitive link dependencies. Do this by starting with the set of direct # dependencies, then the dependencies of those dependencies, and so on. get_target_property(new_libs ${target} LINK_LIBRARIES) set(link_libs ${new_libs}) while(NOT "${new_libs}" STREQUAL "") foreach(lib ${new_libs}) if(TARGET ${lib}) get_target_property(lib_type ${lib} TYPE) if("${lib_type}" STREQUAL "STATIC_LIBRARY") list(APPEND static_libs ${lib}) else() list(APPEND other_libs ${lib}) endif() get_target_property(transitive_libs ${lib} INTERFACE_LINK_LIBRARIES) foreach(transitive_lib ${transitive_libs}) list(FIND link_libs ${transitive_lib} idx) if(TARGET ${transitive_lib} AND idx EQUAL -1) list(APPEND newer_libs ${transitive_lib}) list(APPEND link_libs ${transitive_lib}) endif() endforeach(transitive_lib) endif() endforeach(lib) set(new_libs ${newer_libs}) set(newer_libs "") endwhile() if (MSVC) set(mangling microsoft) else() set(mangling itanium) endif() add_custom_command(OUTPUT ${exported_symbol_file} COMMAND ${PYTHON_EXECUTABLE} ${LLVM_MAIN_SRC_DIR}/utils/extract_symbols.py --mangling=${mangling} ${static_libs} -o ${exported_symbol_file} WORKING_DIRECTORY ${LLVM_LIBRARY_OUTPUT_INTDIR} DEPENDS ${LLVM_MAIN_SRC_DIR}/utils/extract_symbols.py ${static_libs} VERBATIM COMMENT "Generating export list for ${target}") add_llvm_symbol_exports( ${target} ${exported_symbol_file} ) # If something links against this executable then we want a # transitive link against only the libraries whose symbols # we aren't exporting. set_target_properties(${target} PROPERTIES INTERFACE_LINK_LIBRARIES "${other_libs}") # The default import library suffix that cmake uses for cygwin/mingw is # ".dll.a", but for clang.exe that causes a collision with libclang.dll, # where the import libraries of both get named libclang.dll.a. Use a suffix # of ".exe.a" to avoid this. if(CYGWIN OR MINGW) set_target_properties(${target} PROPERTIES IMPORT_SUFFIX ".exe.a") endif() elseif(NOT (WIN32 OR CYGWIN)) # On Windows auto-exporting everything doesn't work because of the limit on # the size of the exported symbol table, but on other platforms we can do # it without any trouble. set_target_properties(${target} PROPERTIES ENABLE_EXPORTS 1) if (APPLE) set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS " -rdynamic") endif() endif() endfunction() if(NOT LLVM_TOOLCHAIN_TOOLS) set (LLVM_TOOLCHAIN_TOOLS llvm-ar llvm-ranlib llvm-lib llvm-objdump llvm-rc + llvm-profdata ) endif() macro(add_llvm_tool name) if( NOT LLVM_BUILD_TOOLS ) set(EXCLUDE_FROM_ALL ON) endif() add_llvm_executable(${name} ${ARGN}) if ( ${name} IN_LIST LLVM_TOOLCHAIN_TOOLS OR NOT LLVM_INSTALL_TOOLCHAIN_ONLY) if( LLVM_BUILD_TOOLS ) if(${name} IN_LIST LLVM_DISTRIBUTION_COMPONENTS OR NOT LLVM_DISTRIBUTION_COMPONENTS) set(export_to_llvmexports EXPORT LLVMExports) set_property(GLOBAL PROPERTY LLVM_HAS_EXPORTS True) endif() install(TARGETS ${name} ${export_to_llvmexports} RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR} COMPONENT ${name}) if (NOT LLVM_ENABLE_IDE) add_llvm_install_targets(install-${name} DEPENDS ${name} COMPONENT ${name}) endif() endif() endif() if( LLVM_BUILD_TOOLS ) set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${name}) endif() set_target_properties(${name} PROPERTIES FOLDER "Tools") endmacro(add_llvm_tool name) macro(add_llvm_example name) if( NOT LLVM_BUILD_EXAMPLES ) set(EXCLUDE_FROM_ALL ON) endif() add_llvm_executable(${name} ${ARGN}) if( LLVM_BUILD_EXAMPLES ) install(TARGETS ${name} RUNTIME DESTINATION examples) endif() set_target_properties(${name} PROPERTIES FOLDER "Examples") endmacro(add_llvm_example name) # This is a macro that is used to create targets for executables that are needed # for development, but that are not intended to be installed by default. macro(add_llvm_utility name) if ( NOT LLVM_BUILD_UTILS ) set(EXCLUDE_FROM_ALL ON) endif() add_llvm_executable(${name} DISABLE_LLVM_LINK_LLVM_DYLIB ${ARGN}) set_target_properties(${name} PROPERTIES FOLDER "Utils") if( LLVM_INSTALL_UTILS AND LLVM_BUILD_UTILS ) install (TARGETS ${name} RUNTIME DESTINATION ${LLVM_UTILS_INSTALL_DIR} COMPONENT ${name}) if (NOT LLVM_ENABLE_IDE) add_llvm_install_targets(install-${name} DEPENDS ${name} COMPONENT ${name}) endif() set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${name}) elseif( LLVM_BUILD_UTILS ) set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS_BUILDTREE_ONLY ${name}) endif() endmacro(add_llvm_utility name) macro(add_llvm_fuzzer name) cmake_parse_arguments(ARG "" "DUMMY_MAIN" "" ${ARGN}) if( LLVM_LIB_FUZZING_ENGINE ) set(LLVM_OPTIONAL_SOURCES ${ARG_DUMMY_MAIN}) add_llvm_executable(${name} ${ARG_UNPARSED_ARGUMENTS}) target_link_libraries(${name} PRIVATE ${LLVM_LIB_FUZZING_ENGINE}) set_target_properties(${name} PROPERTIES FOLDER "Fuzzers") elseif( LLVM_USE_SANITIZE_COVERAGE ) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer") set(LLVM_OPTIONAL_SOURCES ${ARG_DUMMY_MAIN}) add_llvm_executable(${name} ${ARG_UNPARSED_ARGUMENTS}) set_target_properties(${name} PROPERTIES FOLDER "Fuzzers") elseif( ARG_DUMMY_MAIN ) add_llvm_executable(${name} ${ARG_DUMMY_MAIN} ${ARG_UNPARSED_ARGUMENTS}) set_target_properties(${name} PROPERTIES FOLDER "Fuzzers") endif() endmacro() macro(add_llvm_target target_name) include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) add_llvm_library(LLVM${target_name} ${ARGN}) set( CURRENT_LLVM_TARGET LLVM${target_name} ) endmacro(add_llvm_target) function(canonicalize_tool_name name output) string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "" nameStrip ${name}) string(REPLACE "-" "_" nameUNDERSCORE ${nameStrip}) string(TOUPPER ${nameUNDERSCORE} nameUPPER) set(${output} "${nameUPPER}" PARENT_SCOPE) endfunction(canonicalize_tool_name) # Custom add_subdirectory wrapper # Takes in a project name (i.e. LLVM), the subdirectory name, and an optional # path if it differs from the name. function(add_llvm_subdirectory project type name) set(add_llvm_external_dir "${ARGN}") if("${add_llvm_external_dir}" STREQUAL "") set(add_llvm_external_dir ${name}) endif() canonicalize_tool_name(${name} nameUPPER) set(canonical_full_name ${project}_${type}_${nameUPPER}) get_property(already_processed GLOBAL PROPERTY ${canonical_full_name}_PROCESSED) if(already_processed) return() endif() set_property(GLOBAL PROPERTY ${canonical_full_name}_PROCESSED YES) if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${add_llvm_external_dir}/CMakeLists.txt) # Treat it as in-tree subproject. option(${canonical_full_name}_BUILD "Whether to build ${name} as part of ${project}" On) mark_as_advanced(${project}_${type}_${name}_BUILD) if(${canonical_full_name}_BUILD) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${add_llvm_external_dir} ${add_llvm_external_dir}) endif() else() set(LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR "${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR}" CACHE PATH "Path to ${name} source directory") set(${canonical_full_name}_BUILD_DEFAULT ON) if(NOT LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR OR NOT EXISTS ${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR}) set(${canonical_full_name}_BUILD_DEFAULT OFF) endif() if("${LLVM_EXTERNAL_${nameUPPER}_BUILD}" STREQUAL "OFF") set(${canonical_full_name}_BUILD_DEFAULT OFF) endif() option(${canonical_full_name}_BUILD "Whether to build ${name} as part of LLVM" ${${canonical_full_name}_BUILD_DEFAULT}) if (${canonical_full_name}_BUILD) if(EXISTS ${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR}) add_subdirectory(${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR} ${add_llvm_external_dir}) elseif(NOT "${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR}" STREQUAL "") message(WARNING "Nonexistent directory for ${name}: ${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR}") endif() endif() endif() endfunction() # Add external project that may want to be built as part of llvm such as Clang, # lld, and Polly. This adds two options. One for the source directory of the # project, which defaults to ${CMAKE_CURRENT_SOURCE_DIR}/${name}. Another to # enable or disable building it with everything else. # Additional parameter can be specified as the name of directory. macro(add_llvm_external_project name) add_llvm_subdirectory(LLVM TOOL ${name} ${ARGN}) endmacro() macro(add_llvm_tool_subdirectory name) add_llvm_external_project(${name}) endmacro(add_llvm_tool_subdirectory) function(get_project_name_from_src_var var output) string(REGEX MATCH "LLVM_EXTERNAL_(.*)_SOURCE_DIR" MACHED_TOOL "${var}") if(MACHED_TOOL) set(${output} ${CMAKE_MATCH_1} PARENT_SCOPE) else() set(${output} PARENT_SCOPE) endif() endfunction() function(create_subdirectory_options project type) file(GLOB sub-dirs "${CMAKE_CURRENT_SOURCE_DIR}/*") foreach(dir ${sub-dirs}) if(IS_DIRECTORY "${dir}" AND EXISTS "${dir}/CMakeLists.txt") canonicalize_tool_name(${dir} name) option(${project}_${type}_${name}_BUILD "Whether to build ${name} as part of ${project}" On) mark_as_advanced(${project}_${type}_${name}_BUILD) endif() endforeach() endfunction(create_subdirectory_options) function(create_llvm_tool_options) create_subdirectory_options(LLVM TOOL) endfunction(create_llvm_tool_options) function(llvm_add_implicit_projects project) set(list_of_implicit_subdirs "") file(GLOB sub-dirs "${CMAKE_CURRENT_SOURCE_DIR}/*") foreach(dir ${sub-dirs}) if(IS_DIRECTORY "${dir}" AND EXISTS "${dir}/CMakeLists.txt") canonicalize_tool_name(${dir} name) if (${project}_TOOL_${name}_BUILD) get_filename_component(fn "${dir}" NAME) list(APPEND list_of_implicit_subdirs "${fn}") endif() endif() endforeach() foreach(external_proj ${list_of_implicit_subdirs}) add_llvm_subdirectory(${project} TOOL "${external_proj}" ${ARGN}) endforeach() endfunction(llvm_add_implicit_projects) function(add_llvm_implicit_projects) llvm_add_implicit_projects(LLVM) endfunction(add_llvm_implicit_projects) # Generic support for adding a unittest. function(add_unittest test_suite test_name) if( NOT LLVM_BUILD_TESTS ) set(EXCLUDE_FROM_ALL ON) endif() # Our current version of gtest does not properly recognize C++11 support # with MSVC, so it falls back to tr1 / experimental classes. Since LLVM # itself requires C++11, we can safely force it on unconditionally so that # we don't have to fight with the buggy gtest check. add_definitions(-DGTEST_LANG_CXX11=1) add_definitions(-DGTEST_HAS_TR1_TUPLE=0) include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include) include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googlemock/include) if (NOT LLVM_ENABLE_THREADS) list(APPEND LLVM_COMPILE_DEFINITIONS GTEST_HAS_PTHREAD=0) endif () if (SUPPORTS_VARIADIC_MACROS_FLAG) list(APPEND LLVM_COMPILE_FLAGS "-Wno-variadic-macros") endif () # Some parts of gtest rely on this GNU extension, don't warn on it. if(SUPPORTS_GNU_ZERO_VARIADIC_MACRO_ARGUMENTS_FLAG) list(APPEND LLVM_COMPILE_FLAGS "-Wno-gnu-zero-variadic-macro-arguments") endif() set(LLVM_REQUIRES_RTTI OFF) list(APPEND LLVM_LINK_COMPONENTS Support) # gtest needs it for raw_ostream add_llvm_executable(${test_name} IGNORE_EXTERNALIZE_DEBUGINFO NO_INSTALL_RPATH ${ARGN}) set(outdir ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}) set_output_directory(${test_name} BINARY_DIR ${outdir} LIBRARY_DIR ${outdir}) # libpthreads overrides some standard library symbols, so main # executable must be linked with it in order to provide consistent # API for all shared libaries loaded by this executable. target_link_libraries(${test_name} PRIVATE gtest_main gtest ${LLVM_PTHREAD_LIB}) add_dependencies(${test_suite} ${test_name}) get_target_property(test_suite_folder ${test_suite} FOLDER) if (NOT ${test_suite_folder} STREQUAL "NOTFOUND") set_property(TARGET ${test_name} PROPERTY FOLDER "${test_suite_folder}") endif () endfunction() # Use for test binaries that call llvm::getInputFileDirectory(). Use of this # is discouraged. function(add_unittest_with_input_files test_suite test_name) set(LLVM_UNITTEST_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) configure_file( ${LLVM_MAIN_SRC_DIR}/unittests/unittest.cfg.in ${CMAKE_CURRENT_BINARY_DIR}/llvm.srcdir.txt) add_unittest(${test_suite} ${test_name} ${ARGN}) endfunction() # Generic support for adding a benchmark. function(add_benchmark benchmark_name) if( NOT LLVM_BUILD_BENCHMARKS ) set(EXCLUDE_FROM_ALL ON) endif() add_llvm_executable(${benchmark_name} IGNORE_EXTERNALIZE_DEBUGINFO NO_INSTALL_RPATH ${ARGN}) set(outdir ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}) set_output_directory(${benchmark_name} BINARY_DIR ${outdir} LIBRARY_DIR ${outdir}) set_property(TARGET ${benchmark_name} PROPERTY FOLDER "Utils") target_link_libraries(${benchmark_name} PRIVATE benchmark) endfunction() function(llvm_add_go_executable binary pkgpath) cmake_parse_arguments(ARG "ALL" "" "DEPENDS;GOFLAGS" ${ARGN}) if(LLVM_BINDINGS MATCHES "go") # FIXME: This should depend only on the libraries Go needs. get_property(llvmlibs GLOBAL PROPERTY LLVM_LIBS) set(binpath ${CMAKE_BINARY_DIR}/bin/${binary}${CMAKE_EXECUTABLE_SUFFIX}) set(cc "${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_ARG1}") set(cxx "${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_ARG1}") set(cppflags "") get_property(include_dirs DIRECTORY PROPERTY INCLUDE_DIRECTORIES) foreach(d ${include_dirs}) set(cppflags "${cppflags} -I${d}") endforeach(d) set(ldflags "${CMAKE_EXE_LINKER_FLAGS}") add_custom_command(OUTPUT ${binpath} COMMAND ${CMAKE_BINARY_DIR}/bin/llvm-go "go=${GO_EXECUTABLE}" "cc=${cc}" "cxx=${cxx}" "cppflags=${cppflags}" "ldflags=${ldflags}" "packages=${LLVM_GO_PACKAGES}" ${ARG_GOFLAGS} build -o ${binpath} ${pkgpath} DEPENDS llvm-config ${CMAKE_BINARY_DIR}/bin/llvm-go${CMAKE_EXECUTABLE_SUFFIX} ${llvmlibs} ${ARG_DEPENDS} COMMENT "Building Go executable ${binary}" VERBATIM) if (ARG_ALL) add_custom_target(${binary} ALL DEPENDS ${binpath}) else() add_custom_target(${binary} DEPENDS ${binpath}) endif() endif() endfunction() # This function canonicalize the CMake variables passed by names # from CMake boolean to 0/1 suitable for passing into Python or C++, # in place. function(llvm_canonicalize_cmake_booleans) foreach(var ${ARGN}) if(${var}) set(${var} 1 PARENT_SCOPE) else() set(${var} 0 PARENT_SCOPE) endif() endforeach() endfunction(llvm_canonicalize_cmake_booleans) macro(set_llvm_build_mode) # Configuration-time: See Unit/lit.site.cfg.in if (CMAKE_CFG_INTDIR STREQUAL ".") set(LLVM_BUILD_MODE ".") else () set(LLVM_BUILD_MODE "%(build_mode)s") endif () endmacro() # This function provides an automatic way to 'configure'-like generate a file # based on a set of common and custom variables, specifically targeting the # variables needed for the 'lit.site.cfg' files. This function bundles the # common variables that any Lit instance is likely to need, and custom # variables can be passed in. function(configure_lit_site_cfg site_in site_out) cmake_parse_arguments(ARG "" "" "MAIN_CONFIG;OUTPUT_MAPPING" ${ARGN}) if ("${ARG_MAIN_CONFIG}" STREQUAL "") get_filename_component(INPUT_DIR ${site_in} DIRECTORY) set(ARG_MAIN_CONFIG "${INPUT_DIR}/lit.cfg") endif() if ("${ARG_OUTPUT_MAPPING}" STREQUAL "") set(ARG_OUTPUT_MAPPING "${site_out}") endif() foreach(c ${LLVM_TARGETS_TO_BUILD}) set(TARGETS_BUILT "${TARGETS_BUILT} ${c}") endforeach(c) set(TARGETS_TO_BUILD ${TARGETS_BUILT}) set(SHLIBEXT "${LTDL_SHLIB_EXT}") set_llvm_build_mode() # They below might not be the build tree but provided binary tree. set(LLVM_SOURCE_DIR ${LLVM_MAIN_SRC_DIR}) set(LLVM_BINARY_DIR ${LLVM_BINARY_DIR}) string(REPLACE "${CMAKE_CFG_INTDIR}" "${LLVM_BUILD_MODE}" LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}") string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLVM_LIBS_DIR "${LLVM_LIBRARY_DIR}") # SHLIBDIR points the build tree. string(REPLACE "${CMAKE_CFG_INTDIR}" "${LLVM_BUILD_MODE}" SHLIBDIR "${LLVM_SHLIB_OUTPUT_INTDIR}") set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE}) # FIXME: "ENABLE_SHARED" doesn't make sense, since it is used just for # plugins. We may rename it. if(LLVM_ENABLE_PLUGINS) set(ENABLE_SHARED "1") else() set(ENABLE_SHARED "0") endif() if(LLVM_ENABLE_ASSERTIONS AND NOT MSVC_IDE) set(ENABLE_ASSERTIONS "1") else() set(ENABLE_ASSERTIONS "0") endif() set(HOST_OS ${CMAKE_SYSTEM_NAME}) set(HOST_ARCH ${CMAKE_SYSTEM_PROCESSOR}) set(HOST_CC "${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_ARG1}") set(HOST_CXX "${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_ARG1}") set(HOST_LDFLAGS "${CMAKE_EXE_LINKER_FLAGS}") set(LIT_SITE_CFG_IN_HEADER "## Autogenerated from ${site_in}\n## Do not edit!") # Override config_target_triple (and the env) if(LLVM_TARGET_TRIPLE_ENV) # This is expanded into the heading. string(CONCAT LIT_SITE_CFG_IN_HEADER "${LIT_SITE_CFG_IN_HEADER}\n\n" "import os\n" "target_env = \"${LLVM_TARGET_TRIPLE_ENV}\"\n" "config.target_triple = config.environment[target_env] = os.environ.get(target_env, \"${TARGET_TRIPLE}\")\n" ) # This is expanded to; config.target_triple = ""+config.target_triple+"" set(TARGET_TRIPLE "\"+config.target_triple+\"") endif() configure_file(${site_in} ${site_out} @ONLY) if (EXISTS "${ARG_MAIN_CONFIG}") set(PYTHON_STATEMENT "map_config('${ARG_MAIN_CONFIG}', '${site_out}')") get_property(LLVM_LIT_CONFIG_MAP GLOBAL PROPERTY LLVM_LIT_CONFIG_MAP) set(LLVM_LIT_CONFIG_MAP "${LLVM_LIT_CONFIG_MAP}\n${PYTHON_STATEMENT}") set_property(GLOBAL PROPERTY LLVM_LIT_CONFIG_MAP ${LLVM_LIT_CONFIG_MAP}) endif() endfunction() function(dump_all_cmake_variables) get_cmake_property(_variableNames VARIABLES) foreach (_variableName ${_variableNames}) message(STATUS "${_variableName}=${${_variableName}}") endforeach() endfunction() function(get_llvm_lit_path base_dir file_name) cmake_parse_arguments(ARG "ALLOW_EXTERNAL" "" "" ${ARGN}) if (ARG_ALLOW_EXTERNAL) set (LLVM_EXTERNAL_LIT "" CACHE STRING "Command used to spawn lit") if ("${LLVM_EXTERNAL_LIT}" STREQUAL "") set(LLVM_EXTERNAL_LIT "${LLVM_DEFAULT_EXTERNAL_LIT}") endif() if (NOT "${LLVM_EXTERNAL_LIT}" STREQUAL "") if (EXISTS ${LLVM_EXTERNAL_LIT}) get_filename_component(LIT_FILE_NAME ${LLVM_EXTERNAL_LIT} NAME) get_filename_component(LIT_BASE_DIR ${LLVM_EXTERNAL_LIT} DIRECTORY) set(${file_name} ${LIT_FILE_NAME} PARENT_SCOPE) set(${base_dir} ${LIT_BASE_DIR} PARENT_SCOPE) return() else() message(WARN "LLVM_EXTERNAL_LIT set to ${LLVM_EXTERNAL_LIT}, but the path does not exist.") endif() endif() endif() set(lit_file_name "llvm-lit") if (CMAKE_HOST_WIN32 AND NOT CYGWIN) # llvm-lit needs suffix.py for multiprocess to find a main module. set(lit_file_name "${lit_file_name}.py") endif () set(${file_name} ${lit_file_name} PARENT_SCOPE) get_property(LLVM_LIT_BASE_DIR GLOBAL PROPERTY LLVM_LIT_BASE_DIR) if (NOT "${LLVM_LIT_BASE_DIR}" STREQUAL "") set(${base_dir} ${LLVM_LIT_BASE_DIR} PARENT_SCOPE) endif() # Allow individual projects to provide an override if (NOT "${LLVM_LIT_OUTPUT_DIR}" STREQUAL "") set(LLVM_LIT_BASE_DIR ${LLVM_LIT_OUTPUT_DIR}) elseif(NOT "${LLVM_RUNTIME_OUTPUT_INTDIR}" STREQUAL "") set(LLVM_LIT_BASE_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR}) else() set(LLVM_LIT_BASE_DIR "") endif() # Cache this so we don't have to do it again and have subsequent calls # potentially disagree on the value. set_property(GLOBAL PROPERTY LLVM_LIT_BASE_DIR ${LLVM_LIT_BASE_DIR}) set(${base_dir} ${LLVM_LIT_BASE_DIR} PARENT_SCOPE) endfunction() # A raw function to create a lit target. This is used to implement the testuite # management functions. function(add_lit_target target comment) cmake_parse_arguments(ARG "" "" "PARAMS;DEPENDS;ARGS" ${ARGN}) set(LIT_ARGS "${ARG_ARGS} ${LLVM_LIT_ARGS}") separate_arguments(LIT_ARGS) if (NOT CMAKE_CFG_INTDIR STREQUAL ".") list(APPEND LIT_ARGS --param build_mode=${CMAKE_CFG_INTDIR}) endif () # Get the path to the lit to *run* tests with. This can be overriden by # the user by specifying -DLLVM_EXTERNAL_LIT= get_llvm_lit_path( lit_base_dir lit_file_name ALLOW_EXTERNAL ) set(LIT_COMMAND "${PYTHON_EXECUTABLE};${lit_base_dir}/${lit_file_name}") list(APPEND LIT_COMMAND ${LIT_ARGS}) foreach(param ${ARG_PARAMS}) list(APPEND LIT_COMMAND --param ${param}) endforeach() if (ARG_UNPARSED_ARGUMENTS) add_custom_target(${target} COMMAND ${LIT_COMMAND} ${ARG_UNPARSED_ARGUMENTS} COMMENT "${comment}" USES_TERMINAL ) else() add_custom_target(${target} COMMAND ${CMAKE_COMMAND} -E echo "${target} does nothing, no tools built.") message(STATUS "${target} does nothing.") endif() if (ARG_DEPENDS) add_dependencies(${target} ${ARG_DEPENDS}) endif() # Tests should be excluded from "Build Solution". set_target_properties(${target} PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD ON) endfunction() # A function to add a set of lit test suites to be driven through 'check-*' targets. function(add_lit_testsuite target comment) cmake_parse_arguments(ARG "" "" "PARAMS;DEPENDS;ARGS" ${ARGN}) # EXCLUDE_FROM_ALL excludes the test ${target} out of check-all. if(NOT EXCLUDE_FROM_ALL) # Register the testsuites, params and depends for the global check rule. set_property(GLOBAL APPEND PROPERTY LLVM_LIT_TESTSUITES ${ARG_UNPARSED_ARGUMENTS}) set_property(GLOBAL APPEND PROPERTY LLVM_LIT_PARAMS ${ARG_PARAMS}) set_property(GLOBAL APPEND PROPERTY LLVM_LIT_DEPENDS ${ARG_DEPENDS}) set_property(GLOBAL APPEND PROPERTY LLVM_LIT_EXTRA_ARGS ${ARG_ARGS}) endif() # Produce a specific suffixed check rule. add_lit_target(${target} ${comment} ${ARG_UNPARSED_ARGUMENTS} PARAMS ${ARG_PARAMS} DEPENDS ${ARG_DEPENDS} ARGS ${ARG_ARGS} ) endfunction() function(add_lit_testsuites project directory) if (NOT LLVM_ENABLE_IDE) cmake_parse_arguments(ARG "" "" "PARAMS;DEPENDS;ARGS" ${ARGN}) # Search recursively for test directories by assuming anything not # in a directory called Inputs contains tests. file(GLOB_RECURSE to_process LIST_DIRECTORIES true ${directory}/*) foreach(lit_suite ${to_process}) if(NOT IS_DIRECTORY ${lit_suite}) continue() endif() string(FIND ${lit_suite} Inputs is_inputs) string(FIND ${lit_suite} Output is_output) if (NOT (is_inputs EQUAL -1 AND is_output EQUAL -1)) continue() endif() # Create a check- target for the directory. string(REPLACE ${directory} "" name_slash ${lit_suite}) if (name_slash) string(REPLACE "/" "-" name_slash ${name_slash}) string(REPLACE "\\" "-" name_dashes ${name_slash}) string(TOLOWER "${project}${name_dashes}" name_var) add_lit_target("check-${name_var}" "Running lit suite ${lit_suite}" ${lit_suite} PARAMS ${ARG_PARAMS} DEPENDS ${ARG_DEPENDS} ARGS ${ARG_ARGS} ) endif() endforeach() endif() endfunction() function(llvm_install_library_symlink name dest type) cmake_parse_arguments(ARG "ALWAYS_GENERATE" "COMPONENT" "" ${ARGN}) foreach(path ${CMAKE_MODULE_PATH}) if(EXISTS ${path}/LLVMInstallSymlink.cmake) set(INSTALL_SYMLINK ${path}/LLVMInstallSymlink.cmake) break() endif() endforeach() set(component ${ARG_COMPONENT}) if(NOT component) set(component ${name}) endif() set(full_name ${CMAKE_${type}_LIBRARY_PREFIX}${name}${CMAKE_${type}_LIBRARY_SUFFIX}) set(full_dest ${CMAKE_${type}_LIBRARY_PREFIX}${dest}${CMAKE_${type}_LIBRARY_SUFFIX}) set(output_dir lib${LLVM_LIBDIR_SUFFIX}) if(WIN32 AND "${type}" STREQUAL "SHARED") set(output_dir bin) endif() install(SCRIPT ${INSTALL_SYMLINK} CODE "install_symlink(${full_name} ${full_dest} ${output_dir})" COMPONENT ${component}) if (NOT LLVM_ENABLE_IDE AND NOT ARG_ALWAYS_GENERATE) add_llvm_install_targets(install-${name} DEPENDS ${name} ${dest} install-${dest} COMPONENT ${name}) endif() endfunction() function(llvm_install_symlink name dest) cmake_parse_arguments(ARG "ALWAYS_GENERATE" "COMPONENT" "" ${ARGN}) foreach(path ${CMAKE_MODULE_PATH}) if(EXISTS ${path}/LLVMInstallSymlink.cmake) set(INSTALL_SYMLINK ${path}/LLVMInstallSymlink.cmake) break() endif() endforeach() if(ARG_COMPONENT) set(component ${ARG_COMPONENT}) else() if(ARG_ALWAYS_GENERATE) set(component ${dest}) else() set(component ${name}) endif() endif() set(full_name ${name}${CMAKE_EXECUTABLE_SUFFIX}) set(full_dest ${dest}${CMAKE_EXECUTABLE_SUFFIX}) install(SCRIPT ${INSTALL_SYMLINK} CODE "install_symlink(${full_name} ${full_dest} ${LLVM_TOOLS_INSTALL_DIR})" COMPONENT ${component}) if (NOT LLVM_ENABLE_IDE AND NOT ARG_ALWAYS_GENERATE) add_llvm_install_targets(install-${name} DEPENDS ${name} ${dest} install-${dest} COMPONENT ${name}) endif() endfunction() function(add_llvm_tool_symlink link_name target) cmake_parse_arguments(ARG "ALWAYS_GENERATE" "OUTPUT_DIR" "" ${ARGN}) set(dest_binary "$") # This got a bit gross... For multi-configuration generators the target # properties return the resolved value of the string, not the build system # expression. To reconstruct the platform-agnostic path we have to do some # magic. First we grab one of the types, and a type-specific path. Then from # the type-specific path we find the last occurrence of the type in the path, # and replace it with CMAKE_CFG_INTDIR. This allows the build step to be type # agnostic again. if(NOT ARG_OUTPUT_DIR) # If you're not overriding the OUTPUT_DIR, we can make the link relative in # the same directory. if(CMAKE_HOST_UNIX) set(dest_binary "$") endif() if(CMAKE_CONFIGURATION_TYPES) list(GET CMAKE_CONFIGURATION_TYPES 0 first_type) string(TOUPPER ${first_type} first_type_upper) set(first_type_suffix _${first_type_upper}) endif() get_target_property(target_type ${target} TYPE) if(${target_type} STREQUAL "STATIC_LIBRARY") get_target_property(ARG_OUTPUT_DIR ${target} ARCHIVE_OUTPUT_DIRECTORY${first_type_suffix}) elseif(UNIX AND ${target_type} STREQUAL "SHARED_LIBRARY") get_target_property(ARG_OUTPUT_DIR ${target} LIBRARY_OUTPUT_DIRECTORY${first_type_suffix}) else() get_target_property(ARG_OUTPUT_DIR ${target} RUNTIME_OUTPUT_DIRECTORY${first_type_suffix}) endif() if(CMAKE_CONFIGURATION_TYPES) string(FIND "${ARG_OUTPUT_DIR}" "/${first_type}/" type_start REVERSE) string(SUBSTRING "${ARG_OUTPUT_DIR}" 0 ${type_start} path_prefix) string(SUBSTRING "${ARG_OUTPUT_DIR}" ${type_start} -1 path_suffix) string(REPLACE "/${first_type}/" "/${CMAKE_CFG_INTDIR}/" path_suffix ${path_suffix}) set(ARG_OUTPUT_DIR ${path_prefix}${path_suffix}) endif() endif() if(CMAKE_HOST_UNIX) set(LLVM_LINK_OR_COPY create_symlink) else() set(LLVM_LINK_OR_COPY copy) endif() set(output_path "${ARG_OUTPUT_DIR}/${link_name}${CMAKE_EXECUTABLE_SUFFIX}") set(target_name ${link_name}) if(TARGET ${link_name}) set(target_name ${link_name}-link) endif() if(ARG_ALWAYS_GENERATE) set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${dest_binary}) add_custom_command(TARGET ${target} POST_BUILD COMMAND ${CMAKE_COMMAND} -E ${LLVM_LINK_OR_COPY} "${dest_binary}" "${output_path}") else() add_custom_command(OUTPUT ${output_path} COMMAND ${CMAKE_COMMAND} -E ${LLVM_LINK_OR_COPY} "${dest_binary}" "${output_path}" DEPENDS ${target}) add_custom_target(${target_name} ALL DEPENDS ${target} ${output_path}) set_target_properties(${target_name} PROPERTIES FOLDER Tools) # Make sure both the link and target are toolchain tools if (${link_name} IN_LIST LLVM_TOOLCHAIN_TOOLS AND ${target} IN_LIST LLVM_TOOLCHAIN_TOOLS) set(TOOL_IS_TOOLCHAIN ON) endif() if ((TOOL_IS_TOOLCHAIN OR NOT LLVM_INSTALL_TOOLCHAIN_ONLY) AND LLVM_BUILD_TOOLS) llvm_install_symlink(${link_name} ${target}) endif() endif() endfunction() function(llvm_externalize_debuginfo name) if(NOT LLVM_EXTERNALIZE_DEBUGINFO) return() endif() if(NOT LLVM_EXTERNALIZE_DEBUGINFO_SKIP_STRIP) if(APPLE) if(NOT CMAKE_STRIP) set(CMAKE_STRIP xcrun strip) endif() set(strip_command COMMAND ${CMAKE_STRIP} -Sxl $) else() set(strip_command COMMAND ${CMAKE_STRIP} -g -x $) endif() endif() if(LLVM_EXTERNALIZE_DEBUGINFO_OUTPUT_DIR) if(APPLE) set(output_name "$.dSYM") set(output_path "-o=${LLVM_EXTERNALIZE_DEBUGINFO_OUTPUT_DIR}/${output_name}") endif() endif() if(APPLE) if(CMAKE_CXX_FLAGS MATCHES "-flto" OR CMAKE_CXX_FLAGS_${uppercase_CMAKE_BUILD_TYPE} MATCHES "-flto") set(lto_object ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/${name}-lto.o) set_property(TARGET ${name} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,-object_path_lto,${lto_object}") endif() if(NOT CMAKE_DSYMUTIL) set(CMAKE_DSYMUTIL xcrun dsymutil) endif() add_custom_command(TARGET ${name} POST_BUILD COMMAND ${CMAKE_DSYMUTIL} ${output_path} $ ${strip_command} ) else() add_custom_command(TARGET ${name} POST_BUILD COMMAND ${CMAKE_OBJCOPY} --only-keep-debug $ $.debug ${strip_command} -R .gnu_debuglink COMMAND ${CMAKE_OBJCOPY} --add-gnu-debuglink=$.debug $ ) endif() endfunction() # Usage: llvm_codesign(name [ENTITLEMENTS file]) function(llvm_codesign name) cmake_parse_arguments(ARG "" "ENTITLEMENTS" "" ${ARGN}) if(NOT LLVM_CODESIGNING_IDENTITY) return() endif() if(CMAKE_GENERATOR STREQUAL "Xcode") set_target_properties(${name} PROPERTIES XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY ${LLVM_CODESIGNING_IDENTITY} ) if(DEFINED ARG_ENTITLEMENTS) set_target_properties(${name} PROPERTIES XCODE_ATTRIBUTE_CODE_SIGN_ENTITLEMENTS ${ARG_ENTITLEMENTS} ) endif() elseif(APPLE) if(NOT CMAKE_CODESIGN) set(CMAKE_CODESIGN xcrun codesign) endif() if(NOT CMAKE_CODESIGN_ALLOCATE) execute_process( COMMAND xcrun -f codesign_allocate OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE CMAKE_CODESIGN_ALLOCATE ) endif() if(DEFINED ARG_ENTITLEMENTS) set(pass_entitlements --entitlements ${ARG_ENTITLEMENTS}) endif() add_custom_command( TARGET ${name} POST_BUILD COMMAND ${CMAKE_COMMAND} -E env CODESIGN_ALLOCATE=${CMAKE_CODESIGN_ALLOCATE} ${CMAKE_CODESIGN} -s ${LLVM_CODESIGNING_IDENTITY} ${pass_entitlements} $ ) endif() endfunction() function(llvm_setup_rpath name) if(CMAKE_INSTALL_RPATH) return() endif() if(LLVM_INSTALL_PREFIX AND NOT (LLVM_INSTALL_PREFIX STREQUAL CMAKE_INSTALL_PREFIX)) set(extra_libdir ${LLVM_LIBRARY_DIR}) elseif(LLVM_BUILD_LIBRARY_DIR) set(extra_libdir ${LLVM_LIBRARY_DIR}) endif() if (APPLE) set(_install_name_dir INSTALL_NAME_DIR "@rpath") set(_install_rpath "@loader_path/../lib" ${extra_libdir}) elseif(UNIX) set(_install_rpath "\$ORIGIN/../lib${LLVM_LIBDIR_SUFFIX}" ${extra_libdir}) if(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)") set_property(TARGET ${name} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,-z,origin ") endif() if(LLVM_LINKER_IS_GNULD) # $ORIGIN is not interpreted at link time by ld.bfd set_property(TARGET ${name} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,-rpath-link,${LLVM_LIBRARY_OUTPUT_INTDIR} ") endif() else() return() endif() set_target_properties(${name} PROPERTIES BUILD_WITH_INSTALL_RPATH On INSTALL_RPATH "${_install_rpath}" ${_install_name_dir}) endfunction() function(setup_dependency_debugging name) if(NOT LLVM_DEPENDENCY_DEBUGGING) return() endif() if("intrinsics_gen" IN_LIST ARGN) return() endif() set(deny_attributes_inc "(deny file* (literal \"${LLVM_BINARY_DIR}/include/llvm/IR/Attributes.inc\"))") set(deny_intrinsics_inc "(deny file* (literal \"${LLVM_BINARY_DIR}/include/llvm/IR/Intrinsics.inc\"))") set(sandbox_command "sandbox-exec -p '(version 1) (allow default) ${deny_attributes_inc} ${deny_intrinsics_inc}'") set_target_properties(${name} PROPERTIES RULE_LAUNCH_COMPILE ${sandbox_command}) endfunction() # Figure out if we can track VC revisions. function(find_first_existing_file out_var) foreach(file ${ARGN}) if(EXISTS "${file}") set(${out_var} "${file}" PARENT_SCOPE) return() endif() endforeach() endfunction() macro(find_first_existing_vc_file out_var path) find_program(git_executable NAMES git git.exe git.cmd) # Run from a subdirectory to force git to print an absolute path. execute_process(COMMAND ${git_executable} rev-parse --git-dir WORKING_DIRECTORY ${path}/cmake RESULT_VARIABLE git_result OUTPUT_VARIABLE git_dir ERROR_QUIET) if(git_result EQUAL 0) string(STRIP "${git_dir}" git_dir) set(${out_var} "${git_dir}/logs/HEAD") # some branchless cases (e.g. 'repo') may not yet have .git/logs/HEAD if (NOT EXISTS "${git_dir}/logs/HEAD") file(WRITE "${git_dir}/logs/HEAD" "") endif() else() find_first_existing_file(${out_var} "${path}/.svn/wc.db" # SVN 1.7 "${path}/.svn/entries" # SVN 1.6 ) endif() endmacro() Index: vendor/llvm/dist-release_80/cmake/modules/LLVMProcessSources.cmake =================================================================== --- vendor/llvm/dist-release_80/cmake/modules/LLVMProcessSources.cmake (revision 348931) +++ vendor/llvm/dist-release_80/cmake/modules/LLVMProcessSources.cmake (revision 348932) @@ -1,110 +1,118 @@ include(AddFileDependencies) include(CMakeParseArguments) function(llvm_replace_compiler_option var old new) # Replaces a compiler option or switch `old' in `var' by `new'. # If `old' is not in `var', appends `new' to `var'. # Example: llvm_replace_compiler_option(CMAKE_CXX_FLAGS_RELEASE "-O3" "-O2") # If the option already is on the variable, don't add it: if( "${${var}}" MATCHES "(^| )${new}($| )" ) set(n "") else() set(n "${new}") endif() if( "${${var}}" MATCHES "(^| )${old}($| )" ) string( REGEX REPLACE "(^| )${old}($| )" " ${n} " ${var} "${${var}}" ) else() set( ${var} "${${var}} ${n}" ) endif() set( ${var} "${${var}}" PARENT_SCOPE ) endfunction(llvm_replace_compiler_option) macro(add_td_sources srcs) file(GLOB tds *.td) if( tds ) source_group("TableGen descriptions" FILES ${tds}) set_source_files_properties(${tds} PROPERTIES HEADER_FILE_ONLY ON) list(APPEND ${srcs} ${tds}) endif() endmacro(add_td_sources) function(add_header_files_for_glob hdrs_out glob) file(GLOB hds ${glob}) - set(${hdrs_out} ${hds} PARENT_SCOPE) + set(filtered) + foreach(file ${hds}) + # Explicit existence check is necessary to filter dangling symlinks + # out. See https://bugs.gentoo.org/674662. + if(EXISTS ${file}) + list(APPEND filtered ${file}) + endif() + endforeach() + set(${hdrs_out} ${filtered} PARENT_SCOPE) endfunction(add_header_files_for_glob) function(find_all_header_files hdrs_out additional_headerdirs) add_header_files_for_glob(hds *.h) list(APPEND all_headers ${hds}) foreach(additional_dir ${additional_headerdirs}) add_header_files_for_glob(hds "${additional_dir}/*.h") list(APPEND all_headers ${hds}) add_header_files_for_glob(hds "${additional_dir}/*.inc") list(APPEND all_headers ${hds}) endforeach(additional_dir) set( ${hdrs_out} ${all_headers} PARENT_SCOPE ) endfunction(find_all_header_files) function(llvm_process_sources OUT_VAR) cmake_parse_arguments(ARG "" "" "ADDITIONAL_HEADERS;ADDITIONAL_HEADER_DIRS" ${ARGN}) set(sources ${ARG_UNPARSED_ARGUMENTS}) llvm_check_source_file_list( ${sources} ) # This adds .td and .h files to the Visual Studio solution: add_td_sources(sources) find_all_header_files(hdrs "${ARG_ADDITIONAL_HEADER_DIRS}") if (hdrs) set_source_files_properties(${hdrs} PROPERTIES HEADER_FILE_ONLY ON) endif() set_source_files_properties(${ARG_ADDITIONAL_HEADERS} PROPERTIES HEADER_FILE_ONLY ON) list(APPEND sources ${ARG_ADDITIONAL_HEADERS} ${hdrs}) set( ${OUT_VAR} ${sources} PARENT_SCOPE ) endfunction(llvm_process_sources) function(llvm_check_source_file_list) cmake_parse_arguments(ARG "" "SOURCE_DIR" "" ${ARGN}) foreach(l ${ARG_UNPARSED_ARGUMENTS}) get_filename_component(fp ${l} REALPATH) list(APPEND listed ${fp}) endforeach() if(ARG_SOURCE_DIR) file(GLOB globbed "${ARG_SOURCE_DIR}/*.c" "${ARG_SOURCE_DIR}/*.cpp") else() file(GLOB globbed *.c *.cpp) endif() foreach(g ${globbed}) get_filename_component(fn ${g} NAME) if(ARG_SOURCE_DIR) set(entry "${g}") else() set(entry "${fn}") endif() get_filename_component(gp ${g} REALPATH) # Don't reject hidden files. Some editors create backups in the # same directory as the file. if (NOT "${fn}" MATCHES "^\\.") list(FIND LLVM_OPTIONAL_SOURCES ${entry} idx) if( idx LESS 0 ) list(FIND listed ${gp} idx) if( idx LESS 0 ) if(ARG_SOURCE_DIR) set(fn_relative "${ARG_SOURCE_DIR}/${fn}") else() set(fn_relative "${fn}") endif() message(SEND_ERROR "Found unknown source file ${fn_relative} Please update ${CMAKE_CURRENT_LIST_FILE}\n") endif() endif() endif() endforeach() endfunction(llvm_check_source_file_list) Index: vendor/llvm/dist-release_80/docs/ReleaseNotes.rst =================================================================== --- vendor/llvm/dist-release_80/docs/ReleaseNotes.rst (revision 348931) +++ vendor/llvm/dist-release_80/docs/ReleaseNotes.rst (revision 348932) @@ -1,316 +1,336 @@ ======================== LLVM 8.0.0 Release Notes ======================== .. contents:: :local: Introduction ============ This document contains the release notes for the LLVM Compiler Infrastructure, release 8.0.0. Here we describe the status of LLVM, including major improvements from the previous release, improvements in various subprojects of LLVM, and some of the current users of the code. All LLVM releases may be downloaded from the `LLVM releases web site `_. For more information about LLVM, including information about the latest release, please check out the `main LLVM web site `_. If you have questions or comments, the `LLVM Developer's Mailing List `_ is a good place to send them. Minimum Required Compiler Version ================================= As `discussed on the mailing list `_, building LLVM will soon require more recent toolchains as follows: ============= ==== Clang 3.5 Apple Clang 6.0 GCC 5.1 Visual Studio 2017 ============= ==== A new CMake check when configuring LLVM provides a soft-error if your toolchain will become unsupported soon. You can opt out of the soft-error by setting the ``LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN`` CMake variable to ``ON``. Known Issues ============ These are issues that couldn't be fixed before the release. See the bug reports for the latest status. * `PR40547 `_ Clang gets miscompiled by trunk GCC. * `PR40761 `_ "asan-dynamic" doesn't work on FreeBSD. Non-comprehensive list of changes in this release ================================================= * The **llvm-cov** tool can now export lcov trace files using the `-format=lcov` option of the `export` command. * The ``add_llvm_loadable_module`` CMake macro has been removed. The ``add_llvm_library`` macro with the ``MODULE`` argument now provides the same functionality. See `Writing an LLVM Pass `_. * For MinGW, references to data variables that might need to be imported from a dll are accessed via a stub, to allow the linker to convert it to a dllimport if needed. * Added support for labels as offsets in ``.reloc`` directive. * Support for precise identification of X86 instructions with memory operands, by using debug information. This supports profile-driven cache prefetching. It is enabled with the ``-x86-discriminate-memops`` LLVM Flag. * Support for profile-driven software cache prefetching on X86. This is part of a larger system, consisting of: an offline cache prefetches recommender, AutoFDO tooling, and LLVM. In this system, a binary compiled with ``-x86-discriminate-memops`` is run under the observation of the recommender. The recommender identifies certain memory access instructions by their binary file address, and recommends a prefetch of a specific type (NTA, T0, etc) be performed at a specified fixed offset from such an instruction's memory operand. Next, this information needs to be converted to the AutoFDO syntax and the resulting profile may be passed back to the compiler with the LLVM flag ``-prefetch-hints-file``, together with the exact same set of compilation parameters used for the original binary. More information is available in the `RFC `_. * Windows support for libFuzzer (x86_64). Changes to the LLVM IR ---------------------- * Function attribute ``speculative_load_hardening`` has been introduced to allow indicating that `Speculative Load Hardening `_ must be enabled for the function body. Changes to the JIT APIs ----------------------- The ORC (On Request Compilation) JIT APIs have been updated to support concurrent compilation. The existing (non-concurrent) ORC layer classes and related APIs are deprecated, have been renamed with a "Legacy" prefix (e.g. LegacyIRCompileLayer). The deprecated clasess will be removed in LLVM 9. An example JIT stack using the concurrent ORC APIs, called LLJIT, has been added (see include/llvm/ExecutionEngine/Orc/LLJIT.h). The lli tool has been updated to use LLJIT. MCJIT and ExecutionEngine continue to be supported, though ORC should be preferred for new projects. +Changes to the C++ APIs +----------------------- + +Three of the IR library methods related to debugging information for +functions and methods have changed their prototypes: + + DIBuilder::createMethod + DIBuilder::createFunction + DIBuilder::createTempFunctionFwdDecl + +In all cases, several individual parameters were removed, and replaced +by a single 'SPFlags' (subprogram flags) parameter. The individual +parameters are: 'isLocalToUnit'; 'isDefinition'; 'isOptimized'; and +for 'createMethod', 'Virtuality'. The new 'SPFlags' parameter has a +default value equivalent to passing 'false' for the three 'bool' +parameters, and zero (non-virtual) to the 'Virtuality' parameter. For +any old-style API call that passed 'true' or a non-zero virtuality to +these methods, you will need to substitute the correct 'SPFlags' value. +The helper method 'DISubprogram::toSPFlags()' might be useful in making +this conversion. Changes to the AArch64 Target ----------------------------- * Support for Speculative Load Hardening has been added. * Initial support for the Tiny code model, where code and its statically defined symbols must live within 1MB of each other. * Added support for the ``.arch_extension`` assembler directive, just like on ARM. Changes to the Hexagon Target ----------------------------- * Added support for Hexagon/HVX V66 ISA. Changes to the MIPS Target -------------------------- * Improved support of GlobalISel instruction selection framework. * Implemented emission of ``R_MIPS_JALR`` and ``R_MICROMIPS_JALR`` relocations. These relocations provide hints to a linker for optimization of jumps to protected symbols. * ORC JIT has been supported for MIPS and MIPS64 architectures. * Assembler now suggests alternative MIPS instruction mnemonics when an invalid one is specified. * Improved support for MIPS N32 ABI. * Added new instructions (``pll.ps``, ``plu.ps``, ``cvt.s.pu``, ``cvt.s.pl``, ``cvt.ps``, ``sigrie``). * Numerous bug fixes and code cleanups. Changes to the PowerPC Target ----------------------------- * Switched to non-PIC default * Deprecated Darwin support * Enabled Out-of-Order scheduling for P9 * Better overload rules for compatible vector type parameter * Support constraint 'wi', modifier 'x' and VSX registers in inline asm * More ``__float128`` support * Added new builtins like vector int128 ``pack``/``unpack`` and ``stxvw4x.be``/``stxvd2x.be`` * Provided significant improvements to the automatic vectorizer * Code-gen improvements (especially for Power9) * Fixed some long-standing bugs in the back end * Added experimental prologue/epilogue improvements * Enabled builtins tests in compiler-rt * Add ``___fixunstfti``/``floattitf`` in compiler-rt to support conversion between IBM double-double and unsigned int128 * Disable randomized address space when running the sanitizers on Linux ppc64le * Completed support in LLD for ELFv2 * Enabled llvm-exegesis latency mode for PPC Changes to the SystemZ Target ----------------------------- * A number of bugs related to C/C++ language vector extension support were fixed: the ``-mzvector`` option now actually enables the ``__vector`` and ``__bool`` keywords, the ``vec_step`` intrinsic now works, and the ``vec_insert_and_zero`` and ``vec_orc`` intrinsics now generate correct code. * The ``__float128`` keyword, which had been accidentally enabled in some earlier releases, is now no longer supported. On SystemZ, the ``long double`` data type itself already uses the IEEE 128-bit floating-point format. * When the compiler inlines ``strcmp`` or ``memcmp``, the generated code no longer returns ``INT_MIN`` as the negative result value under any circumstances. * Various code-gen improvements, in particular related to improved auto-vectorization, inlining, and instruction scheduling. Changes to the X86 Target ------------------------- * Machine model for AMD bdver2 (Piledriver) CPU was added. It is used to support instruction scheduling and other instruction cost heuristics. * New AVX512F gather and scatter intrinsics were added that take a mask instead of a scalar integer. This removes the need for a bitcast in IR. The new intrinsics are named like the old intrinsics with ``llvm.avx512.`` replaced with ``llvm.avx512.mask.``. The old intrinsics will be removed in a future release. * Added ``cascadelake`` as a CPU name for -march. This is ``skylake-avx512`` with the addition of the ``avx512vnni`` instruction set. * ADCX instruction will no longer be emitted. This instruction is rarely better than the legacy ADC instruction and just increased code size. Changes to the WebAssembly Target --------------------------------- The WebAssembly target is no longer "experimental"! It's now built by default, rather than needing to be enabled with LLVM_EXPERIMENTAL_TARGETS_TO_BUILD. The object file format and core C ABI are now considered stable. That said, the object file format has an ABI versioning capability, and one anticipated use for it will be to add support for returning small structs as multiple return values, once the underlying WebAssembly platform itself supports it. Additionally, multithreading support is not yet included in the stable ABI. Changes to the Nios2 Target --------------------------- * The Nios2 target was removed from this release. Changes to LLDB =============== * Printed source code is now syntax highlighted in the terminal (only for C languages). * The expression command now supports tab completing expressions. External Open Source Projects Using LLVM 8 ========================================== LDC - the LLVM-based D compiler ------------------------------- `D `_ is a language with C-like syntax and static typing. It pragmatically combines efficiency, control, and modeling power, with safety and programmer productivity. D supports powerful concepts like Compile-Time Function Execution (CTFE) and Template Meta-Programming, provides an innovative approach to concurrency and offers many classical paradigms. `LDC `_ uses the frontend from the reference compiler combined with LLVM as backend to produce efficient native code. LDC targets x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on ARM and PowerPC (32/64 bit). Ports to other architectures like AArch64 and MIPS64 are underway. Open Dylan Compiler ------------------- `Dylan `_ is a multi-paradigm functional and object-oriented programming language. It is dynamic while providing a programming model designed to support efficient machine code generation, including fine-grained control over dynamic and static behavior. Dylan also features a powerful macro facility for expressive metaprogramming. The Open Dylan compiler can use LLVM as one of its code-generating back-ends, including full support for debug info generation. (Open Dylan generates LLVM bitcode directly using a native Dylan IR and bitcode library.) Development of a Dylan debugger and interactive REPL making use of the LLDB libraries is in progress. Zig Programming Language ------------------------ `Zig `_ is a system programming language intended to be an alternative to C. It provides high level features such as generics, compile time function execution, and partial evaluation, while exposing low level LLVM IR features such as aliases and intrinsics. Zig uses Clang to provide automatic import of .h symbols, including inline functions and simple macros. Zig uses LLD combined with lazily building compiler-rt to provide out-of-the-box cross-compiling for all supported targets. Additional Information ====================== A wide variety of additional information is available on the `LLVM web page `_, in particular in the `documentation `_ section. The web page also contains versions of the API documentation which is up-to-date with the Subversion version of the source code. You can access versions of these documents specific to this release by going into the ``llvm/docs/`` directory in the LLVM tree. If you have any questions or comments about LLVM, please feel free to contact us via the `mailing lists `_. Index: vendor/llvm/dist-release_80/lib/CodeGen/TargetRegisterInfo.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/CodeGen/TargetRegisterInfo.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/CodeGen/TargetRegisterInfo.cpp (revision 348932) @@ -1,512 +1,518 @@ //==- TargetRegisterInfo.cpp - Target Register Information Implementation --==// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the TargetRegisterInfo interface. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" #include #include #define DEBUG_TYPE "target-reg-info" using namespace llvm; TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, regclass_iterator RCB, regclass_iterator RCE, const char *const *SRINames, const LaneBitmask *SRILaneMasks, LaneBitmask SRICoveringLanes, const RegClassInfo *const RCIs, unsigned Mode) : InfoDesc(ID), SubRegIndexNames(SRINames), SubRegIndexLaneMasks(SRILaneMasks), RegClassBegin(RCB), RegClassEnd(RCE), CoveringLanes(SRICoveringLanes), RCInfos(RCIs), HwMode(Mode) { } TargetRegisterInfo::~TargetRegisterInfo() = default; void TargetRegisterInfo::markSuperRegs(BitVector &RegisterSet, unsigned Reg) const { for (MCSuperRegIterator AI(Reg, this, true); AI.isValid(); ++AI) RegisterSet.set(*AI); } bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet, ArrayRef Exceptions) const { // Check that all super registers of reserved regs are reserved as well. BitVector Checked(getNumRegs()); for (unsigned Reg : RegisterSet.set_bits()) { if (Checked[Reg]) continue; for (MCSuperRegIterator SR(Reg, this); SR.isValid(); ++SR) { if (!RegisterSet[*SR] && !is_contained(Exceptions, Reg)) { dbgs() << "Error: Super register " << printReg(*SR, this) << " of reserved register " << printReg(Reg, this) << " is not reserved.\n"; return false; } // We transitively check superregs. So we can remember this for later // to avoid compiletime explosion in deep register hierarchies. Checked.set(*SR); } } return true; } namespace llvm { Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI, unsigned SubIdx, const MachineRegisterInfo *MRI) { return Printable([Reg, TRI, SubIdx, MRI](raw_ostream &OS) { if (!Reg) OS << "$noreg"; else if (TargetRegisterInfo::isStackSlot(Reg)) OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); else if (TargetRegisterInfo::isVirtualRegister(Reg)) { StringRef Name = MRI ? MRI->getVRegName(Reg) : ""; if (Name != "") { OS << '%' << Name; } else { OS << '%' << TargetRegisterInfo::virtReg2Index(Reg); } } else if (!TRI) OS << '$' << "physreg" << Reg; else if (Reg < TRI->getNumRegs()) { OS << '$'; printLowerCase(TRI->getName(Reg), OS); } else llvm_unreachable("Register kind is unsupported."); if (SubIdx) { if (TRI) OS << ':' << TRI->getSubRegIndexName(SubIdx); else OS << ":sub(" << SubIdx << ')'; } }); } Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { return Printable([Unit, TRI](raw_ostream &OS) { // Generic printout when TRI is missing. if (!TRI) { OS << "Unit~" << Unit; return; } // Check for invalid register units. if (Unit >= TRI->getNumRegUnits()) { OS << "BadUnit~" << Unit; return; } // Normal units have at least one root. MCRegUnitRootIterator Roots(Unit, TRI); assert(Roots.isValid() && "Unit has no roots."); OS << TRI->getName(*Roots); for (++Roots; Roots.isValid(); ++Roots) OS << '~' << TRI->getName(*Roots); }); } Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { return Printable([Unit, TRI](raw_ostream &OS) { if (TRI && TRI->isVirtualRegister(Unit)) { OS << '%' << TargetRegisterInfo::virtReg2Index(Unit); } else { OS << printRegUnit(Unit, TRI); } }); } Printable printRegClassOrBank(unsigned Reg, const MachineRegisterInfo &RegInfo, const TargetRegisterInfo *TRI) { return Printable([Reg, &RegInfo, TRI](raw_ostream &OS) { if (RegInfo.getRegClassOrNull(Reg)) OS << StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower(); else if (RegInfo.getRegBankOrNull(Reg)) OS << StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower(); else { OS << "_"; assert((RegInfo.def_empty(Reg) || RegInfo.getType(Reg).isValid()) && "Generic registers must have a valid type"); } }); } } // end namespace llvm /// getAllocatableClass - Return the maximal subclass of the given register /// class that is alloctable, or NULL. const TargetRegisterClass * TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const { if (!RC || RC->isAllocatable()) return RC; for (BitMaskClassIterator It(RC->getSubClassMask(), *this); It.isValid(); ++It) { const TargetRegisterClass *SubRC = getRegClass(It.getID()); if (SubRC->isAllocatable()) return SubRC; } return nullptr; } /// getMinimalPhysRegClass - Returns the Register Class of a physical /// register of the given type, picking the most sub register class of /// the right type that contains this physreg. const TargetRegisterClass * TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const { assert(isPhysicalRegister(reg) && "reg must be a physical register"); // Pick the most sub register class of the right type that contains // this physreg. const TargetRegisterClass* BestRC = nullptr; for (const TargetRegisterClass* RC : regclasses()) { if ((VT == MVT::Other || isTypeLegalForClass(*RC, VT)) && RC->contains(reg) && (!BestRC || BestRC->hasSubClass(RC))) BestRC = RC; } assert(BestRC && "Couldn't find the register class"); return BestRC; } /// getAllocatableSetForRC - Toggle the bits that represent allocatable /// registers for the specific register class. static void getAllocatableSetForRC(const MachineFunction &MF, const TargetRegisterClass *RC, BitVector &R){ assert(RC->isAllocatable() && "invalid for nonallocatable sets"); ArrayRef Order = RC->getRawAllocationOrder(MF); for (unsigned i = 0; i != Order.size(); ++i) R.set(Order[i]); } BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, const TargetRegisterClass *RC) const { BitVector Allocatable(getNumRegs()); if (RC) { // A register class with no allocatable subclass returns an empty set. const TargetRegisterClass *SubClass = getAllocatableClass(RC); if (SubClass) getAllocatableSetForRC(MF, SubClass, Allocatable); } else { for (const TargetRegisterClass *C : regclasses()) if (C->isAllocatable()) getAllocatableSetForRC(MF, C, Allocatable); } // Mask out the reserved registers BitVector Reserved = getReservedRegs(MF); Allocatable &= Reserved.flip(); return Allocatable; } static inline const TargetRegisterClass *firstCommonClass(const uint32_t *A, const uint32_t *B, const TargetRegisterInfo *TRI, const MVT::SimpleValueType SVT = MVT::SimpleValueType::Any) { const MVT VT(SVT); for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32) if (unsigned Common = *A++ & *B++) { const TargetRegisterClass *RC = TRI->getRegClass(I + countTrailingZeros(Common)); if (SVT == MVT::SimpleValueType::Any || TRI->isTypeLegalForClass(*RC, VT)) return RC; } return nullptr; } const TargetRegisterClass * TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, const TargetRegisterClass *B, const MVT::SimpleValueType SVT) const { // First take care of the trivial cases. if (A == B) return A; if (!A || !B) return nullptr; // Register classes are ordered topologically, so the largest common // sub-class it the common sub-class with the smallest ID. return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this, SVT); } const TargetRegisterClass * TargetRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned Idx) const { assert(A && B && "Missing register class"); assert(Idx && "Bad sub-register index"); // Find Idx in the list of super-register indices. for (SuperRegClassIterator RCI(B, this); RCI.isValid(); ++RCI) if (RCI.getSubReg() == Idx) // The bit mask contains all register classes that are projected into B // by Idx. Find a class that is also a sub-class of A. return firstCommonClass(RCI.getMask(), A->getSubClassMask(), this); return nullptr; } const TargetRegisterClass *TargetRegisterInfo:: getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, const TargetRegisterClass *RCB, unsigned SubB, unsigned &PreA, unsigned &PreB) const { assert(RCA && SubA && RCB && SubB && "Invalid arguments"); // Search all pairs of sub-register indices that project into RCA and RCB // respectively. This is quadratic, but usually the sets are very small. On // most targets like X86, there will only be a single sub-register index // (e.g., sub_16bit projecting into GR16). // // The worst case is a register class like DPR on ARM. // We have indices dsub_0..dsub_7 projecting into that class. // // It is very common that one register class is a sub-register of the other. // Arrange for RCA to be the larger register so the answer will be found in // the first iteration. This makes the search linear for the most common // case. const TargetRegisterClass *BestRC = nullptr; unsigned *BestPreA = &PreA; unsigned *BestPreB = &PreB; if (getRegSizeInBits(*RCA) < getRegSizeInBits(*RCB)) { std::swap(RCA, RCB); std::swap(SubA, SubB); std::swap(BestPreA, BestPreB); } // Also terminate the search one we have found a register class as small as // RCA. unsigned MinSize = getRegSizeInBits(*RCA); for (SuperRegClassIterator IA(RCA, this, true); IA.isValid(); ++IA) { unsigned FinalA = composeSubRegIndices(IA.getSubReg(), SubA); for (SuperRegClassIterator IB(RCB, this, true); IB.isValid(); ++IB) { // Check if a common super-register class exists for this index pair. const TargetRegisterClass *RC = firstCommonClass(IA.getMask(), IB.getMask(), this); if (!RC || getRegSizeInBits(*RC) < MinSize) continue; // The indexes must compose identically: PreA+SubA == PreB+SubB. unsigned FinalB = composeSubRegIndices(IB.getSubReg(), SubB); if (FinalA != FinalB) continue; // Is RC a better candidate than BestRC? if (BestRC && getRegSizeInBits(*RC) >= getRegSizeInBits(*BestRC)) continue; // Yes, RC is the smallest super-register seen so far. BestRC = RC; *BestPreA = IA.getSubReg(); *BestPreB = IB.getSubReg(); // Bail early if we reached MinSize. We won't find a better candidate. if (getRegSizeInBits(*BestRC) == MinSize) return BestRC; } } return BestRC; } /// Check if the registers defined by the pair (RegisterClass, SubReg) /// share the same register file. static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) { // Same register class. if (DefRC == SrcRC) return true; // Both operands are sub registers. Check if they share a register class. unsigned SrcIdx, DefIdx; if (SrcSubReg && DefSubReg) { return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg, SrcIdx, DefIdx) != nullptr; } // At most one of the register is a sub register, make it Src to avoid // duplicating the test. if (!SrcSubReg) { std::swap(DefSubReg, SrcSubReg); std::swap(DefRC, SrcRC); } // One of the register is a sub register, check if we can get a superclass. if (SrcSubReg) return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr; // Plain copy. return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr; } bool TargetRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const { // If this source does not incur a cross register bank copy, use it. return shareSameRegisterFile(*this, DefRC, DefSubReg, SrcRC, SrcSubReg); } // Compute target-independent register allocator hints to help eliminate copies. bool TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, ArrayRef Order, SmallVectorImpl &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo &MRI = MF.getRegInfo(); const std::pair> &Hints_MRI = MRI.getRegAllocationHints(VirtReg); + SmallSet HintedRegs; // First hint may be a target hint. bool Skip = (Hints_MRI.first != 0); for (auto Reg : Hints_MRI.second) { if (Skip) { Skip = false; continue; } // Target-independent hints are either a physical or a virtual register. unsigned Phys = Reg; if (VRM && isVirtualRegister(Phys)) Phys = VRM->getPhys(Phys); + // Don't add the same reg twice (Hints_MRI may contain multiple virtual + // registers allocated to the same physreg). + if (!HintedRegs.insert(Phys).second) + continue; // Check that Phys is a valid hint in VirtReg's register class. if (!isPhysicalRegister(Phys)) continue; if (MRI.isReserved(Phys)) continue; // Check that Phys is in the allocation order. We shouldn't heed hints // from VirtReg's register class if they aren't in the allocation order. The // target probably has a reason for removing the register. if (!is_contained(Order, Phys)) continue; // All clear, tell the register allocator to prefer this register. Hints.push_back(Phys); } return false; } bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const { return !MF.getFunction().hasFnAttribute("no-realign-stack"); } bool TargetRegisterInfo::needsStackRealignment( const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const Function &F = MF.getFunction(); unsigned StackAlign = TFI->getStackAlignment(); bool requiresRealignment = ((MFI.getMaxAlignment() > StackAlign) || F.hasFnAttribute(Attribute::StackAlignment)); if (F.hasFnAttribute("stackrealign") || requiresRealignment) { if (canRealignStack(MF)) return true; LLVM_DEBUG(dbgs() << "Can't realign function's stack: " << F.getName() << "\n"); } return false; } bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0, const uint32_t *mask1) const { unsigned N = (getNumRegs()+31) / 32; for (unsigned I = 0; I < N; ++I) if ((mask0[I] & mask1[I]) != mask0[I]) return false; return true; } unsigned TargetRegisterInfo::getRegSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI) const { const TargetRegisterClass *RC{}; if (isPhysicalRegister(Reg)) { // The size is not directly available for physical registers. // Instead, we need to access a register class that contains Reg and // get the size of that register class. RC = getMinimalPhysRegClass(Reg); } else { LLT Ty = MRI.getType(Reg); unsigned RegSize = Ty.isValid() ? Ty.getSizeInBits() : 0; // If Reg is not a generic register, query the register class to // get its size. if (RegSize) return RegSize; // Since Reg is not a generic register, it must have a register class. RC = MRI.getRegClass(Reg); } assert(RC && "Unable to deduce the register class"); return getRegSizeInBits(*RC); } unsigned TargetRegisterInfo::lookThruCopyLike(unsigned SrcReg, const MachineRegisterInfo *MRI) const { while (true) { const MachineInstr *MI = MRI->getVRegDef(SrcReg); if (!MI->isCopyLike()) return SrcReg; unsigned CopySrcReg; if (MI->isCopy()) CopySrcReg = MI->getOperand(1).getReg(); else { assert(MI->isSubregToReg() && "Bad opcode for lookThruCopyLike"); CopySrcReg = MI->getOperand(2).getReg(); } if (!isVirtualRegister(CopySrcReg)) return CopySrcReg; SrcReg = CopySrcReg; } } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex, const TargetRegisterInfo *TRI) { dbgs() << printReg(Reg, TRI, SubRegIndex) << "\n"; } #endif Index: vendor/llvm/dist-release_80/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp (revision 348932) @@ -1,556 +1,556 @@ //===- DWARFDebugFrame.h - Parsing of .debug_frame ------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include #include using namespace llvm; using namespace dwarf; // See DWARF standard v3, section 7.23 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0; const uint8_t DWARF_CFI_PRIMARY_OPERAND_MASK = 0x3f; Error CFIProgram::parse(DataExtractor Data, uint32_t *Offset, uint32_t EndOffset) { while (*Offset < EndOffset) { uint8_t Opcode = Data.getU8(Offset); // Some instructions have a primary opcode encoded in the top bits. uint8_t Primary = Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK; if (Primary) { // If it's a primary opcode, the first operand is encoded in the bottom // bits of the opcode itself. uint64_t Op1 = Opcode & DWARF_CFI_PRIMARY_OPERAND_MASK; switch (Primary) { default: return createStringError(errc::illegal_byte_sequence, "Invalid primary CFI opcode 0x%" PRIx8, Primary); case DW_CFA_advance_loc: case DW_CFA_restore: addInstruction(Primary, Op1); break; case DW_CFA_offset: addInstruction(Primary, Op1, Data.getULEB128(Offset)); break; } } else { // Extended opcode - its value is Opcode itself. switch (Opcode) { default: return createStringError(errc::illegal_byte_sequence, "Invalid extended CFI opcode 0x%" PRIx8, Opcode); case DW_CFA_nop: case DW_CFA_remember_state: case DW_CFA_restore_state: case DW_CFA_GNU_window_save: // No operands addInstruction(Opcode); break; case DW_CFA_set_loc: // Operands: Address addInstruction(Opcode, Data.getAddress(Offset)); break; case DW_CFA_advance_loc1: // Operands: 1-byte delta addInstruction(Opcode, Data.getU8(Offset)); break; case DW_CFA_advance_loc2: // Operands: 2-byte delta addInstruction(Opcode, Data.getU16(Offset)); break; case DW_CFA_advance_loc4: // Operands: 4-byte delta addInstruction(Opcode, Data.getU32(Offset)); break; case DW_CFA_restore_extended: case DW_CFA_undefined: case DW_CFA_same_value: case DW_CFA_def_cfa_register: case DW_CFA_def_cfa_offset: case DW_CFA_GNU_args_size: // Operands: ULEB128 addInstruction(Opcode, Data.getULEB128(Offset)); break; case DW_CFA_def_cfa_offset_sf: // Operands: SLEB128 addInstruction(Opcode, Data.getSLEB128(Offset)); break; case DW_CFA_offset_extended: case DW_CFA_register: case DW_CFA_def_cfa: case DW_CFA_val_offset: { // Operands: ULEB128, ULEB128 // Note: We can not embed getULEB128 directly into function // argument list. getULEB128 changes Offset and order of evaluation // for arguments is unspecified. auto op1 = Data.getULEB128(Offset); auto op2 = Data.getULEB128(Offset); addInstruction(Opcode, op1, op2); break; } case DW_CFA_offset_extended_sf: case DW_CFA_def_cfa_sf: case DW_CFA_val_offset_sf: { // Operands: ULEB128, SLEB128 // Note: see comment for the previous case auto op1 = Data.getULEB128(Offset); auto op2 = (uint64_t)Data.getSLEB128(Offset); addInstruction(Opcode, op1, op2); break; } case DW_CFA_def_cfa_expression: { uint32_t ExprLength = Data.getULEB128(Offset); addInstruction(Opcode, 0); DataExtractor Extractor( Data.getData().slice(*Offset, *Offset + ExprLength), Data.isLittleEndian(), Data.getAddressSize()); Instructions.back().Expression = DWARFExpression( Extractor, Data.getAddressSize(), dwarf::DWARF_VERSION); *Offset += ExprLength; break; } case DW_CFA_expression: case DW_CFA_val_expression: { auto RegNum = Data.getULEB128(Offset); auto BlockLength = Data.getULEB128(Offset); addInstruction(Opcode, RegNum, 0); DataExtractor Extractor( Data.getData().slice(*Offset, *Offset + BlockLength), Data.isLittleEndian(), Data.getAddressSize()); Instructions.back().Expression = DWARFExpression( Extractor, Data.getAddressSize(), dwarf::DWARF_VERSION); *Offset += BlockLength; break; } } } } return Error::success(); } namespace { } // end anonymous namespace ArrayRef CFIProgram::getOperandTypes() { static OperandType OpTypes[DW_CFA_restore+1][2]; static bool Initialized = false; if (Initialized) { return ArrayRef(&OpTypes[0], DW_CFA_restore+1); } Initialized = true; #define DECLARE_OP2(OP, OPTYPE0, OPTYPE1) \ do { \ OpTypes[OP][0] = OPTYPE0; \ OpTypes[OP][1] = OPTYPE1; \ } while (false) #define DECLARE_OP1(OP, OPTYPE0) DECLARE_OP2(OP, OPTYPE0, OT_None) #define DECLARE_OP0(OP) DECLARE_OP1(OP, OT_None) DECLARE_OP1(DW_CFA_set_loc, OT_Address); DECLARE_OP1(DW_CFA_advance_loc, OT_FactoredCodeOffset); DECLARE_OP1(DW_CFA_advance_loc1, OT_FactoredCodeOffset); DECLARE_OP1(DW_CFA_advance_loc2, OT_FactoredCodeOffset); DECLARE_OP1(DW_CFA_advance_loc4, OT_FactoredCodeOffset); DECLARE_OP1(DW_CFA_MIPS_advance_loc8, OT_FactoredCodeOffset); DECLARE_OP2(DW_CFA_def_cfa, OT_Register, OT_Offset); DECLARE_OP2(DW_CFA_def_cfa_sf, OT_Register, OT_SignedFactDataOffset); DECLARE_OP1(DW_CFA_def_cfa_register, OT_Register); DECLARE_OP1(DW_CFA_def_cfa_offset, OT_Offset); DECLARE_OP1(DW_CFA_def_cfa_offset_sf, OT_SignedFactDataOffset); DECLARE_OP1(DW_CFA_def_cfa_expression, OT_Expression); DECLARE_OP1(DW_CFA_undefined, OT_Register); DECLARE_OP1(DW_CFA_same_value, OT_Register); DECLARE_OP2(DW_CFA_offset, OT_Register, OT_UnsignedFactDataOffset); DECLARE_OP2(DW_CFA_offset_extended, OT_Register, OT_UnsignedFactDataOffset); DECLARE_OP2(DW_CFA_offset_extended_sf, OT_Register, OT_SignedFactDataOffset); DECLARE_OP2(DW_CFA_val_offset, OT_Register, OT_UnsignedFactDataOffset); DECLARE_OP2(DW_CFA_val_offset_sf, OT_Register, OT_SignedFactDataOffset); DECLARE_OP2(DW_CFA_register, OT_Register, OT_Register); DECLARE_OP2(DW_CFA_expression, OT_Register, OT_Expression); DECLARE_OP2(DW_CFA_val_expression, OT_Register, OT_Expression); DECLARE_OP1(DW_CFA_restore, OT_Register); DECLARE_OP1(DW_CFA_restore_extended, OT_Register); DECLARE_OP0(DW_CFA_remember_state); DECLARE_OP0(DW_CFA_restore_state); DECLARE_OP0(DW_CFA_GNU_window_save); DECLARE_OP1(DW_CFA_GNU_args_size, OT_Offset); DECLARE_OP0(DW_CFA_nop); #undef DECLARE_OP0 #undef DECLARE_OP1 #undef DECLARE_OP2 return ArrayRef(&OpTypes[0], DW_CFA_restore+1); } /// Print \p Opcode's operand number \p OperandIdx which has value \p Operand. void CFIProgram::printOperand(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH, const Instruction &Instr, unsigned OperandIdx, uint64_t Operand) const { assert(OperandIdx < 2); uint8_t Opcode = Instr.Opcode; OperandType Type = getOperandTypes()[Opcode][OperandIdx]; switch (Type) { case OT_Unset: { OS << " Unsupported " << (OperandIdx ? "second" : "first") << " operand to"; auto OpcodeName = CallFrameString(Opcode, Arch); if (!OpcodeName.empty()) OS << " " << OpcodeName; else OS << format(" Opcode %x", Opcode); break; } case OT_None: break; case OT_Address: OS << format(" %" PRIx64, Operand); break; case OT_Offset: // The offsets are all encoded in a unsigned form, but in practice // consumers use them signed. It's most certainly legacy due to // the lack of signed variants in the first Dwarf standards. OS << format(" %+" PRId64, int64_t(Operand)); break; case OT_FactoredCodeOffset: // Always Unsigned if (CodeAlignmentFactor) OS << format(" %" PRId64, Operand * CodeAlignmentFactor); else OS << format(" %" PRId64 "*code_alignment_factor" , Operand); break; case OT_SignedFactDataOffset: if (DataAlignmentFactor) OS << format(" %" PRId64, int64_t(Operand) * DataAlignmentFactor); else OS << format(" %" PRId64 "*data_alignment_factor" , int64_t(Operand)); break; case OT_UnsignedFactDataOffset: if (DataAlignmentFactor) OS << format(" %" PRId64, Operand * DataAlignmentFactor); else OS << format(" %" PRId64 "*data_alignment_factor" , Operand); break; case OT_Register: OS << format(" reg%" PRId64, Operand); break; case OT_Expression: assert(Instr.Expression && "missing DWARFExpression object"); OS << " "; Instr.Expression->print(OS, MRI, IsEH); break; } } void CFIProgram::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH, unsigned IndentLevel) const { for (const auto &Instr : Instructions) { uint8_t Opcode = Instr.Opcode; if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK) Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK; OS.indent(2 * IndentLevel); OS << CallFrameString(Opcode, Arch) << ":"; for (unsigned i = 0; i < Instr.Ops.size(); ++i) printOperand(OS, MRI, IsEH, Instr, i, Instr.Ops[i]); OS << '\n'; } } void CIE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const { OS << format("%08x %08x %08x CIE", (uint32_t)Offset, (uint32_t)Length, DW_CIE_ID) << "\n"; OS << format(" Version: %d\n", Version); OS << " Augmentation: \"" << Augmentation << "\"\n"; if (Version >= 4) { OS << format(" Address size: %u\n", (uint32_t)AddressSize); OS << format(" Segment desc size: %u\n", (uint32_t)SegmentDescriptorSize); } OS << format(" Code alignment factor: %u\n", (uint32_t)CodeAlignmentFactor); OS << format(" Data alignment factor: %d\n", (int32_t)DataAlignmentFactor); OS << format(" Return address column: %d\n", (int32_t)ReturnAddressRegister); if (Personality) - OS << format(" Personality Address: %08x\n", *Personality); + OS << format(" Personality Address: %016" PRIx64 "\n", *Personality); if (!AugmentationData.empty()) { OS << " Augmentation data: "; for (uint8_t Byte : AugmentationData) OS << ' ' << hexdigit(Byte >> 4) << hexdigit(Byte & 0xf); OS << "\n"; } OS << "\n"; CFIs.dump(OS, MRI, IsEH); OS << "\n"; } void FDE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const { OS << format("%08x %08x %08x FDE ", (uint32_t)Offset, (uint32_t)Length, (int32_t)LinkedCIEOffset); OS << format("cie=%08x pc=%08x...%08x\n", (int32_t)LinkedCIEOffset, (uint32_t)InitialLocation, (uint32_t)InitialLocation + (uint32_t)AddressRange); if (LSDAAddress) - OS << format(" LSDA Address: %08x\n", *LSDAAddress); + OS << format(" LSDA Address: %016" PRIx64 "\n", *LSDAAddress); CFIs.dump(OS, MRI, IsEH); OS << "\n"; } DWARFDebugFrame::DWARFDebugFrame(Triple::ArchType Arch, bool IsEH, uint64_t EHFrameAddress) : Arch(Arch), IsEH(IsEH), EHFrameAddress(EHFrameAddress) {} DWARFDebugFrame::~DWARFDebugFrame() = default; static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data, uint32_t Offset, int Length) { errs() << "DUMP: "; for (int i = 0; i < Length; ++i) { uint8_t c = Data.getU8(&Offset); errs().write_hex(c); errs() << " "; } errs() << "\n"; } // This is a workaround for old compilers which do not allow // noreturn attribute usage in lambdas. Once the support for those // compilers are phased out, we can remove this and return back to // a ReportError lambda: [StartOffset](const char *ErrorMsg). static void LLVM_ATTRIBUTE_NORETURN ReportError(uint32_t StartOffset, const char *ErrorMsg) { std::string Str; raw_string_ostream OS(Str); OS << format(ErrorMsg, StartOffset); OS.flush(); report_fatal_error(Str); } void DWARFDebugFrame::parse(DWARFDataExtractor Data) { uint32_t Offset = 0; DenseMap CIEs; while (Data.isValidOffset(Offset)) { uint32_t StartOffset = Offset; bool IsDWARF64 = false; uint64_t Length = Data.getU32(&Offset); uint64_t Id; if (Length == UINT32_MAX) { // DWARF-64 is distinguished by the first 32 bits of the initial length // field being 0xffffffff. Then, the next 64 bits are the actual entry // length. IsDWARF64 = true; Length = Data.getU64(&Offset); } // At this point, Offset points to the next field after Length. // Length is the structure size excluding itself. Compute an offset one // past the end of the structure (needed to know how many instructions to // read). // TODO: For honest DWARF64 support, DataExtractor will have to treat // offset_ptr as uint64_t* uint32_t StartStructureOffset = Offset; uint32_t EndStructureOffset = Offset + static_cast(Length); // The Id field's size depends on the DWARF format Id = Data.getUnsigned(&Offset, (IsDWARF64 && !IsEH) ? 8 : 4); bool IsCIE = ((IsDWARF64 && Id == DW64_CIE_ID) || Id == DW_CIE_ID || (IsEH && !Id)); if (IsCIE) { uint8_t Version = Data.getU8(&Offset); const char *Augmentation = Data.getCStr(&Offset); StringRef AugmentationString(Augmentation ? Augmentation : ""); uint8_t AddressSize = Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset); Data.setAddressSize(AddressSize); uint8_t SegmentDescriptorSize = Version < 4 ? 0 : Data.getU8(&Offset); uint64_t CodeAlignmentFactor = Data.getULEB128(&Offset); int64_t DataAlignmentFactor = Data.getSLEB128(&Offset); uint64_t ReturnAddressRegister = Version == 1 ? Data.getU8(&Offset) : Data.getULEB128(&Offset); // Parse the augmentation data for EH CIEs StringRef AugmentationData(""); uint32_t FDEPointerEncoding = DW_EH_PE_absptr; uint32_t LSDAPointerEncoding = DW_EH_PE_omit; Optional Personality; Optional PersonalityEncoding; if (IsEH) { Optional AugmentationLength; uint32_t StartAugmentationOffset; uint32_t EndAugmentationOffset; // Walk the augmentation string to get all the augmentation data. for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) { switch (AugmentationString[i]) { default: ReportError(StartOffset, "Unknown augmentation character in entry at %lx"); case 'L': LSDAPointerEncoding = Data.getU8(&Offset); break; case 'P': { if (Personality) ReportError(StartOffset, "Duplicate personality in entry at %lx"); PersonalityEncoding = Data.getU8(&Offset); Personality = Data.getEncodedPointer( &Offset, *PersonalityEncoding, EHFrameAddress ? EHFrameAddress + Offset : 0); break; } case 'R': FDEPointerEncoding = Data.getU8(&Offset); break; case 'S': // Current frame is a signal trampoline. break; case 'z': if (i) ReportError(StartOffset, "'z' must be the first character at %lx"); // Parse the augmentation length first. We only parse it if // the string contains a 'z'. AugmentationLength = Data.getULEB128(&Offset); StartAugmentationOffset = Offset; EndAugmentationOffset = Offset + static_cast(*AugmentationLength); break; case 'B': // B-Key is used for signing functions associated with this // augmentation string break; } } if (AugmentationLength.hasValue()) { if (Offset != EndAugmentationOffset) ReportError(StartOffset, "Parsing augmentation data at %lx failed"); AugmentationData = Data.getData().slice(StartAugmentationOffset, EndAugmentationOffset); } } auto Cie = llvm::make_unique( StartOffset, Length, Version, AugmentationString, AddressSize, SegmentDescriptorSize, CodeAlignmentFactor, DataAlignmentFactor, ReturnAddressRegister, AugmentationData, FDEPointerEncoding, LSDAPointerEncoding, Personality, PersonalityEncoding, Arch); CIEs[StartOffset] = Cie.get(); Entries.emplace_back(std::move(Cie)); } else { // FDE uint64_t CIEPointer = Id; uint64_t InitialLocation = 0; uint64_t AddressRange = 0; Optional LSDAAddress; CIE *Cie = CIEs[IsEH ? (StartStructureOffset - CIEPointer) : CIEPointer]; if (IsEH) { // The address size is encoded in the CIE we reference. if (!Cie) ReportError(StartOffset, "Parsing FDE data at %lx failed due to missing CIE"); if (auto Val = Data.getEncodedPointer( &Offset, Cie->getFDEPointerEncoding(), EHFrameAddress ? EHFrameAddress + Offset : 0)) { InitialLocation = *Val; } if (auto Val = Data.getEncodedPointer( &Offset, Cie->getFDEPointerEncoding(), 0)) { AddressRange = *Val; } StringRef AugmentationString = Cie->getAugmentationString(); if (!AugmentationString.empty()) { // Parse the augmentation length and data for this FDE. uint64_t AugmentationLength = Data.getULEB128(&Offset); uint32_t EndAugmentationOffset = Offset + static_cast(AugmentationLength); // Decode the LSDA if the CIE augmentation string said we should. if (Cie->getLSDAPointerEncoding() != DW_EH_PE_omit) { LSDAAddress = Data.getEncodedPointer( &Offset, Cie->getLSDAPointerEncoding(), EHFrameAddress ? Offset + EHFrameAddress : 0); } if (Offset != EndAugmentationOffset) ReportError(StartOffset, "Parsing augmentation data at %lx failed"); } } else { InitialLocation = Data.getAddress(&Offset); AddressRange = Data.getAddress(&Offset); } Entries.emplace_back(new FDE(StartOffset, Length, CIEPointer, InitialLocation, AddressRange, Cie, LSDAAddress, Arch)); } if (Error E = Entries.back()->cfis().parse(Data, &Offset, EndStructureOffset)) { report_fatal_error(toString(std::move(E))); } if (Offset != EndStructureOffset) ReportError(StartOffset, "Parsing entry instructions at %lx failed"); } } FrameEntry *DWARFDebugFrame::getEntryAtOffset(uint64_t Offset) const { auto It = std::lower_bound(Entries.begin(), Entries.end(), Offset, [](const std::unique_ptr &E, uint64_t Offset) { return E->getOffset() < Offset; }); if (It != Entries.end() && (*It)->getOffset() == Offset) return It->get(); return nullptr; } void DWARFDebugFrame::dump(raw_ostream &OS, const MCRegisterInfo *MRI, Optional Offset) const { if (Offset) { if (auto *Entry = getEntryAtOffset(*Offset)) Entry->dump(OS, MRI, IsEH); return; } OS << "\n"; for (const auto &Entry : Entries) Entry->dump(OS, MRI, IsEH); } Index: vendor/llvm/dist-release_80/lib/MC/ELFObjectWriter.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/MC/ELFObjectWriter.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/MC/ELFObjectWriter.cpp (revision 348932) @@ -1,1545 +1,1546 @@ //===- lib/MC/ELFObjectWriter.cpp - ELF File Writer -----------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements ELF object file writer information. // //===----------------------------------------------------------------------===// #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCFragment.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Host.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/StringSaver.h" #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include #include #include #include #include using namespace llvm; #undef DEBUG_TYPE #define DEBUG_TYPE "reloc-info" namespace { using SectionIndexMapTy = DenseMap; class ELFObjectWriter; struct ELFWriter; bool isDwoSection(const MCSectionELF &Sec) { return Sec.getSectionName().endswith(".dwo"); } class SymbolTableWriter { ELFWriter &EWriter; bool Is64Bit; // indexes we are going to write to .symtab_shndx. std::vector ShndxIndexes; // The numbel of symbols written so far. unsigned NumWritten; void createSymtabShndx(); template void write(T Value); public: SymbolTableWriter(ELFWriter &EWriter, bool Is64Bit); void writeSymbol(uint32_t name, uint8_t info, uint64_t value, uint64_t size, uint8_t other, uint32_t shndx, bool Reserved); ArrayRef getShndxIndexes() const { return ShndxIndexes; } }; struct ELFWriter { ELFObjectWriter &OWriter; support::endian::Writer W; enum DwoMode { AllSections, NonDwoOnly, DwoOnly, } Mode; static uint64_t SymbolValue(const MCSymbol &Sym, const MCAsmLayout &Layout); static bool isInSymtab(const MCAsmLayout &Layout, const MCSymbolELF &Symbol, bool Used, bool Renamed); /// Helper struct for containing some precomputed information on symbols. struct ELFSymbolData { const MCSymbolELF *Symbol; uint32_t SectionIndex; StringRef Name; // Support lexicographic sorting. bool operator<(const ELFSymbolData &RHS) const { unsigned LHSType = Symbol->getType(); unsigned RHSType = RHS.Symbol->getType(); if (LHSType == ELF::STT_SECTION && RHSType != ELF::STT_SECTION) return false; if (LHSType != ELF::STT_SECTION && RHSType == ELF::STT_SECTION) return true; if (LHSType == ELF::STT_SECTION && RHSType == ELF::STT_SECTION) return SectionIndex < RHS.SectionIndex; return Name < RHS.Name; } }; /// @} /// @name Symbol Table Data /// @{ StringTableBuilder StrTabBuilder{StringTableBuilder::ELF}; /// @} // This holds the symbol table index of the last local symbol. unsigned LastLocalSymbolIndex; // This holds the .strtab section index. unsigned StringTableIndex; // This holds the .symtab section index. unsigned SymbolTableIndex; // Sections in the order they are to be output in the section table. std::vector SectionTable; unsigned addToSectionTable(const MCSectionELF *Sec); // TargetObjectWriter wrappers. bool is64Bit() const; bool hasRelocationAddend() const; void align(unsigned Alignment); bool maybeWriteCompression(uint64_t Size, SmallVectorImpl &CompressedContents, bool ZLibStyle, unsigned Alignment); public: ELFWriter(ELFObjectWriter &OWriter, raw_pwrite_stream &OS, bool IsLittleEndian, DwoMode Mode) : OWriter(OWriter), W(OS, IsLittleEndian ? support::little : support::big), Mode(Mode) {} void WriteWord(uint64_t Word) { if (is64Bit()) W.write(Word); else W.write(Word); } template void write(T Val) { W.write(Val); } void writeHeader(const MCAssembler &Asm); void writeSymbol(SymbolTableWriter &Writer, uint32_t StringIndex, ELFSymbolData &MSD, const MCAsmLayout &Layout); // Start and end offset of each section using SectionOffsetsTy = std::map>; // Map from a signature symbol to the group section index using RevGroupMapTy = DenseMap; /// Compute the symbol table data /// /// \param Asm - The assembler. /// \param SectionIndexMap - Maps a section to its index. /// \param RevGroupMap - Maps a signature symbol to the group section. void computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout, const SectionIndexMapTy &SectionIndexMap, const RevGroupMapTy &RevGroupMap, SectionOffsetsTy &SectionOffsets); void writeAddrsigSection(); MCSectionELF *createRelocationSection(MCContext &Ctx, const MCSectionELF &Sec); const MCSectionELF *createStringTable(MCContext &Ctx); void writeSectionHeader(const MCAsmLayout &Layout, const SectionIndexMapTy &SectionIndexMap, const SectionOffsetsTy &SectionOffsets); void writeSectionData(const MCAssembler &Asm, MCSection &Sec, const MCAsmLayout &Layout); void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags, uint64_t Address, uint64_t Offset, uint64_t Size, uint32_t Link, uint32_t Info, uint64_t Alignment, uint64_t EntrySize); void writeRelocations(const MCAssembler &Asm, const MCSectionELF &Sec); uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout); void writeSection(const SectionIndexMapTy &SectionIndexMap, uint32_t GroupSymbolIndex, uint64_t Offset, uint64_t Size, const MCSectionELF &Section); }; class ELFObjectWriter : public MCObjectWriter { /// The target specific ELF writer instance. std::unique_ptr TargetObjectWriter; DenseMap> Relocations; DenseMap Renames; bool EmitAddrsigSection = false; std::vector AddrsigSyms; bool hasRelocationAddend() const; bool shouldRelocateWithSymbol(const MCAssembler &Asm, const MCSymbolRefExpr *RefA, const MCSymbolELF *Sym, uint64_t C, unsigned Type) const; public: ELFObjectWriter(std::unique_ptr MOTW) : TargetObjectWriter(std::move(MOTW)) {} void reset() override { Relocations.clear(); Renames.clear(); MCObjectWriter::reset(); } bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB, bool InSet, bool IsPCRel) const override; virtual bool checkRelocation(MCContext &Ctx, SMLoc Loc, const MCSectionELF *From, const MCSectionELF *To) { return true; } void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) override; void executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) override; void emitAddrsigSection() override { EmitAddrsigSection = true; } void addAddrsigSymbol(const MCSymbol *Sym) override { AddrsigSyms.push_back(Sym); } friend struct ELFWriter; }; class ELFSingleObjectWriter : public ELFObjectWriter { raw_pwrite_stream &OS; bool IsLittleEndian; public: ELFSingleObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS, bool IsLittleEndian) : ELFObjectWriter(std::move(MOTW)), OS(OS), IsLittleEndian(IsLittleEndian) {} uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override { return ELFWriter(*this, OS, IsLittleEndian, ELFWriter::AllSections) .writeObject(Asm, Layout); } friend struct ELFWriter; }; class ELFDwoObjectWriter : public ELFObjectWriter { raw_pwrite_stream &OS, &DwoOS; bool IsLittleEndian; public: ELFDwoObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS, raw_pwrite_stream &DwoOS, bool IsLittleEndian) : ELFObjectWriter(std::move(MOTW)), OS(OS), DwoOS(DwoOS), IsLittleEndian(IsLittleEndian) {} virtual bool checkRelocation(MCContext &Ctx, SMLoc Loc, const MCSectionELF *From, const MCSectionELF *To) override { if (isDwoSection(*From)) { Ctx.reportError(Loc, "A dwo section may not contain relocations"); return false; } if (To && isDwoSection(*To)) { Ctx.reportError(Loc, "A relocation may not refer to a dwo section"); return false; } return true; } uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override { uint64_t Size = ELFWriter(*this, OS, IsLittleEndian, ELFWriter::NonDwoOnly) .writeObject(Asm, Layout); Size += ELFWriter(*this, DwoOS, IsLittleEndian, ELFWriter::DwoOnly) .writeObject(Asm, Layout); return Size; } }; } // end anonymous namespace void ELFWriter::align(unsigned Alignment) { uint64_t Padding = OffsetToAlignment(W.OS.tell(), Alignment); W.OS.write_zeros(Padding); } unsigned ELFWriter::addToSectionTable(const MCSectionELF *Sec) { SectionTable.push_back(Sec); StrTabBuilder.add(Sec->getSectionName()); return SectionTable.size(); } void SymbolTableWriter::createSymtabShndx() { if (!ShndxIndexes.empty()) return; ShndxIndexes.resize(NumWritten); } template void SymbolTableWriter::write(T Value) { EWriter.write(Value); } SymbolTableWriter::SymbolTableWriter(ELFWriter &EWriter, bool Is64Bit) : EWriter(EWriter), Is64Bit(Is64Bit), NumWritten(0) {} void SymbolTableWriter::writeSymbol(uint32_t name, uint8_t info, uint64_t value, uint64_t size, uint8_t other, uint32_t shndx, bool Reserved) { bool LargeIndex = shndx >= ELF::SHN_LORESERVE && !Reserved; if (LargeIndex) createSymtabShndx(); if (!ShndxIndexes.empty()) { if (LargeIndex) ShndxIndexes.push_back(shndx); else ShndxIndexes.push_back(0); } uint16_t Index = LargeIndex ? uint16_t(ELF::SHN_XINDEX) : shndx; if (Is64Bit) { write(name); // st_name write(info); // st_info write(other); // st_other write(Index); // st_shndx write(value); // st_value write(size); // st_size } else { write(name); // st_name write(uint32_t(value)); // st_value write(uint32_t(size)); // st_size write(info); // st_info write(other); // st_other write(Index); // st_shndx } ++NumWritten; } bool ELFWriter::is64Bit() const { return OWriter.TargetObjectWriter->is64Bit(); } bool ELFWriter::hasRelocationAddend() const { return OWriter.hasRelocationAddend(); } // Emit the ELF header. void ELFWriter::writeHeader(const MCAssembler &Asm) { // ELF Header // ---------- // // Note // ---- // emitWord method behaves differently for ELF32 and ELF64, writing // 4 bytes in the former and 8 in the latter. W.OS << ELF::ElfMagic; // e_ident[EI_MAG0] to e_ident[EI_MAG3] W.OS << char(is64Bit() ? ELF::ELFCLASS64 : ELF::ELFCLASS32); // e_ident[EI_CLASS] // e_ident[EI_DATA] W.OS << char(W.Endian == support::little ? ELF::ELFDATA2LSB : ELF::ELFDATA2MSB); W.OS << char(ELF::EV_CURRENT); // e_ident[EI_VERSION] // e_ident[EI_OSABI] W.OS << char(OWriter.TargetObjectWriter->getOSABI()); W.OS << char(0); // e_ident[EI_ABIVERSION] W.OS.write_zeros(ELF::EI_NIDENT - ELF::EI_PAD); W.write(ELF::ET_REL); // e_type W.write(OWriter.TargetObjectWriter->getEMachine()); // e_machine = target W.write(ELF::EV_CURRENT); // e_version WriteWord(0); // e_entry, no entry point in .o file WriteWord(0); // e_phoff, no program header for .o WriteWord(0); // e_shoff = sec hdr table off in bytes // e_flags = whatever the target wants W.write(Asm.getELFHeaderEFlags()); // e_ehsize = ELF header size W.write(is64Bit() ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr)); W.write(0); // e_phentsize = prog header entry size W.write(0); // e_phnum = # prog header entries = 0 // e_shentsize = Section header entry size W.write(is64Bit() ? sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr)); // e_shnum = # of section header ents W.write(0); // e_shstrndx = Section # of '.shstrtab' assert(StringTableIndex < ELF::SHN_LORESERVE); W.write(StringTableIndex); } uint64_t ELFWriter::SymbolValue(const MCSymbol &Sym, const MCAsmLayout &Layout) { if (Sym.isCommon() && Sym.isExternal()) return Sym.getCommonAlignment(); uint64_t Res; if (!Layout.getSymbolOffset(Sym, Res)) return 0; if (Layout.getAssembler().isThumbFunc(&Sym)) Res |= 1; return Res; } static uint8_t mergeTypeForSet(uint8_t origType, uint8_t newType) { uint8_t Type = newType; // Propagation rules: // IFUNC > FUNC > OBJECT > NOTYPE // TLS_OBJECT > OBJECT > NOTYPE // // dont let the new type degrade the old type switch (origType) { default: break; case ELF::STT_GNU_IFUNC: if (Type == ELF::STT_FUNC || Type == ELF::STT_OBJECT || Type == ELF::STT_NOTYPE || Type == ELF::STT_TLS) Type = ELF::STT_GNU_IFUNC; break; case ELF::STT_FUNC: if (Type == ELF::STT_OBJECT || Type == ELF::STT_NOTYPE || Type == ELF::STT_TLS) Type = ELF::STT_FUNC; break; case ELF::STT_OBJECT: if (Type == ELF::STT_NOTYPE) Type = ELF::STT_OBJECT; break; case ELF::STT_TLS: if (Type == ELF::STT_OBJECT || Type == ELF::STT_NOTYPE || Type == ELF::STT_GNU_IFUNC || Type == ELF::STT_FUNC) Type = ELF::STT_TLS; break; } return Type; } void ELFWriter::writeSymbol(SymbolTableWriter &Writer, uint32_t StringIndex, ELFSymbolData &MSD, const MCAsmLayout &Layout) { const auto &Symbol = cast(*MSD.Symbol); const MCSymbolELF *Base = cast_or_null(Layout.getBaseSymbol(Symbol)); // This has to be in sync with when computeSymbolTable uses SHN_ABS or // SHN_COMMON. bool IsReserved = !Base || Symbol.isCommon(); // Binding and Type share the same byte as upper and lower nibbles uint8_t Binding = Symbol.getBinding(); uint8_t Type = Symbol.getType(); if (Base) { Type = mergeTypeForSet(Type, Base->getType()); } uint8_t Info = (Binding << 4) | Type; // Other and Visibility share the same byte with Visibility using the lower // 2 bits uint8_t Visibility = Symbol.getVisibility(); uint8_t Other = Symbol.getOther() | Visibility; uint64_t Value = SymbolValue(*MSD.Symbol, Layout); uint64_t Size = 0; const MCExpr *ESize = MSD.Symbol->getSize(); if (!ESize && Base) ESize = Base->getSize(); if (ESize) { int64_t Res; if (!ESize->evaluateKnownAbsolute(Res, Layout)) report_fatal_error("Size expression must be absolute."); Size = Res; } // Write out the symbol table entry Writer.writeSymbol(StringIndex, Info, Value, Size, Other, MSD.SectionIndex, IsReserved); } // True if the assembler knows nothing about the final value of the symbol. // This doesn't cover the comdat issues, since in those cases the assembler // can at least know that all symbols in the section will move together. static bool isWeak(const MCSymbolELF &Sym) { if (Sym.getType() == ELF::STT_GNU_IFUNC) return true; switch (Sym.getBinding()) { default: llvm_unreachable("Unknown binding"); case ELF::STB_LOCAL: return false; case ELF::STB_GLOBAL: return false; case ELF::STB_WEAK: case ELF::STB_GNU_UNIQUE: return true; } } bool ELFWriter::isInSymtab(const MCAsmLayout &Layout, const MCSymbolELF &Symbol, bool Used, bool Renamed) { if (Symbol.isVariable()) { const MCExpr *Expr = Symbol.getVariableValue(); if (const MCSymbolRefExpr *Ref = dyn_cast(Expr)) { if (Ref->getKind() == MCSymbolRefExpr::VK_WEAKREF) return false; } } if (Used) return true; if (Renamed) return false; if (Symbol.isVariable() && Symbol.isUndefined()) { // FIXME: this is here just to diagnose the case of a var = commmon_sym. Layout.getBaseSymbol(Symbol); return false; } if (Symbol.isUndefined() && !Symbol.isBindingSet()) return false; if (Symbol.isTemporary()) return false; if (Symbol.getType() == ELF::STT_SECTION) return false; return true; } void ELFWriter::computeSymbolTable( MCAssembler &Asm, const MCAsmLayout &Layout, const SectionIndexMapTy &SectionIndexMap, const RevGroupMapTy &RevGroupMap, SectionOffsetsTy &SectionOffsets) { MCContext &Ctx = Asm.getContext(); SymbolTableWriter Writer(*this, is64Bit()); // Symbol table unsigned EntrySize = is64Bit() ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32; MCSectionELF *SymtabSection = Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0, EntrySize, ""); SymtabSection->setAlignment(is64Bit() ? 8 : 4); SymbolTableIndex = addToSectionTable(SymtabSection); align(SymtabSection->getAlignment()); uint64_t SecStart = W.OS.tell(); // The first entry is the undefined symbol entry. Writer.writeSymbol(0, 0, 0, 0, 0, 0, false); std::vector LocalSymbolData; std::vector ExternalSymbolData; // Add the data for the symbols. bool HasLargeSectionIndex = false; for (const MCSymbol &S : Asm.symbols()) { const auto &Symbol = cast(S); bool Used = Symbol.isUsedInReloc(); bool WeakrefUsed = Symbol.isWeakrefUsedInReloc(); bool isSignature = Symbol.isSignature(); if (!isInSymtab(Layout, Symbol, Used || WeakrefUsed || isSignature, OWriter.Renames.count(&Symbol))) continue; if (Symbol.isTemporary() && Symbol.isUndefined()) { Ctx.reportError(SMLoc(), "Undefined temporary symbol"); continue; } ELFSymbolData MSD; MSD.Symbol = cast(&Symbol); bool Local = Symbol.getBinding() == ELF::STB_LOCAL; assert(Local || !Symbol.isTemporary()); if (Symbol.isAbsolute()) { MSD.SectionIndex = ELF::SHN_ABS; } else if (Symbol.isCommon()) { assert(!Local); MSD.SectionIndex = ELF::SHN_COMMON; } else if (Symbol.isUndefined()) { if (isSignature && !Used) { MSD.SectionIndex = RevGroupMap.lookup(&Symbol); if (MSD.SectionIndex >= ELF::SHN_LORESERVE) HasLargeSectionIndex = true; } else { MSD.SectionIndex = ELF::SHN_UNDEF; } } else { const MCSectionELF &Section = static_cast(Symbol.getSection()); // We may end up with a situation when section symbol is technically // defined, but should not be. That happens because we explicitly // pre-create few .debug_* sections to have accessors. // And if these sections were not really defined in the code, but were // referenced, we simply error out. if (!Section.isRegistered()) { assert(static_cast(Symbol).getType() == ELF::STT_SECTION); Ctx.reportError(SMLoc(), "Undefined section reference: " + Symbol.getName()); continue; } if (Mode == NonDwoOnly && isDwoSection(Section)) continue; MSD.SectionIndex = SectionIndexMap.lookup(&Section); assert(MSD.SectionIndex && "Invalid section index!"); if (MSD.SectionIndex >= ELF::SHN_LORESERVE) HasLargeSectionIndex = true; } StringRef Name = Symbol.getName(); // Sections have their own string table if (Symbol.getType() != ELF::STT_SECTION) { MSD.Name = Name; StrTabBuilder.add(Name); } if (Local) LocalSymbolData.push_back(MSD); else ExternalSymbolData.push_back(MSD); } // This holds the .symtab_shndx section index. unsigned SymtabShndxSectionIndex = 0; if (HasLargeSectionIndex) { MCSectionELF *SymtabShndxSection = Ctx.getELFSection(".symtab_shndxr", ELF::SHT_SYMTAB_SHNDX, 0, 4, ""); SymtabShndxSectionIndex = addToSectionTable(SymtabShndxSection); SymtabShndxSection->setAlignment(4); } ArrayRef FileNames = Asm.getFileNames(); for (const std::string &Name : FileNames) StrTabBuilder.add(Name); StrTabBuilder.finalize(); // File symbols are emitted first and handled separately from normal symbols, // i.e. a non-STT_FILE symbol with the same name may appear. for (const std::string &Name : FileNames) Writer.writeSymbol(StrTabBuilder.getOffset(Name), ELF::STT_FILE | ELF::STB_LOCAL, 0, 0, ELF::STV_DEFAULT, ELF::SHN_ABS, true); // Symbols are required to be in lexicographic order. array_pod_sort(LocalSymbolData.begin(), LocalSymbolData.end()); array_pod_sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); // Set the symbol indices. Local symbols must come before all other // symbols with non-local bindings. unsigned Index = FileNames.size() + 1; for (ELFSymbolData &MSD : LocalSymbolData) { unsigned StringIndex = MSD.Symbol->getType() == ELF::STT_SECTION ? 0 : StrTabBuilder.getOffset(MSD.Name); MSD.Symbol->setIndex(Index++); writeSymbol(Writer, StringIndex, MSD, Layout); } // Write the symbol table entries. LastLocalSymbolIndex = Index; for (ELFSymbolData &MSD : ExternalSymbolData) { unsigned StringIndex = StrTabBuilder.getOffset(MSD.Name); MSD.Symbol->setIndex(Index++); writeSymbol(Writer, StringIndex, MSD, Layout); assert(MSD.Symbol->getBinding() != ELF::STB_LOCAL); } uint64_t SecEnd = W.OS.tell(); SectionOffsets[SymtabSection] = std::make_pair(SecStart, SecEnd); ArrayRef ShndxIndexes = Writer.getShndxIndexes(); if (ShndxIndexes.empty()) { assert(SymtabShndxSectionIndex == 0); return; } assert(SymtabShndxSectionIndex != 0); SecStart = W.OS.tell(); const MCSectionELF *SymtabShndxSection = SectionTable[SymtabShndxSectionIndex - 1]; for (uint32_t Index : ShndxIndexes) write(Index); SecEnd = W.OS.tell(); SectionOffsets[SymtabShndxSection] = std::make_pair(SecStart, SecEnd); } void ELFWriter::writeAddrsigSection() { for (const MCSymbol *Sym : OWriter.AddrsigSyms) encodeULEB128(Sym->getIndex(), W.OS); } MCSectionELF *ELFWriter::createRelocationSection(MCContext &Ctx, const MCSectionELF &Sec) { if (OWriter.Relocations[&Sec].empty()) return nullptr; const StringRef SectionName = Sec.getSectionName(); std::string RelaSectionName = hasRelocationAddend() ? ".rela" : ".rel"; RelaSectionName += SectionName; unsigned EntrySize; if (hasRelocationAddend()) EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rela) : sizeof(ELF::Elf32_Rela); else EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel); unsigned Flags = 0; if (Sec.getFlags() & ELF::SHF_GROUP) Flags = ELF::SHF_GROUP; MCSectionELF *RelaSection = Ctx.createELFRelSection( RelaSectionName, hasRelocationAddend() ? ELF::SHT_RELA : ELF::SHT_REL, Flags, EntrySize, Sec.getGroup(), &Sec); RelaSection->setAlignment(is64Bit() ? 8 : 4); return RelaSection; } // Include the debug info compression header. bool ELFWriter::maybeWriteCompression( uint64_t Size, SmallVectorImpl &CompressedContents, bool ZLibStyle, unsigned Alignment) { if (ZLibStyle) { uint64_t HdrSize = is64Bit() ? sizeof(ELF::Elf32_Chdr) : sizeof(ELF::Elf64_Chdr); if (Size <= HdrSize + CompressedContents.size()) return false; // Platform specific header is followed by compressed data. if (is64Bit()) { // Write Elf64_Chdr header. write(static_cast(ELF::ELFCOMPRESS_ZLIB)); write(static_cast(0)); // ch_reserved field. write(static_cast(Size)); write(static_cast(Alignment)); } else { // Write Elf32_Chdr header otherwise. write(static_cast(ELF::ELFCOMPRESS_ZLIB)); write(static_cast(Size)); write(static_cast(Alignment)); } return true; } // "ZLIB" followed by 8 bytes representing the uncompressed size of the section, // useful for consumers to preallocate a buffer to decompress into. const StringRef Magic = "ZLIB"; if (Size <= Magic.size() + sizeof(Size) + CompressedContents.size()) return false; W.OS << Magic; support::endian::write(W.OS, Size, support::big); return true; } void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, const MCAsmLayout &Layout) { MCSectionELF &Section = static_cast(Sec); StringRef SectionName = Section.getSectionName(); auto &MC = Asm.getContext(); const auto &MAI = MC.getAsmInfo(); // Compressing debug_frame requires handling alignment fragments which is // more work (possibly generalizing MCAssembler.cpp:writeFragment to allow // for writing to arbitrary buffers) for little benefit. bool CompressionEnabled = MAI->compressDebugSections() != DebugCompressionType::None; if (!CompressionEnabled || !SectionName.startswith(".debug_") || SectionName == ".debug_frame") { Asm.writeSectionData(W.OS, &Section, Layout); return; } assert((MAI->compressDebugSections() == DebugCompressionType::Z || MAI->compressDebugSections() == DebugCompressionType::GNU) && "expected zlib or zlib-gnu style compression"); SmallVector UncompressedData; raw_svector_ostream VecOS(UncompressedData); Asm.writeSectionData(VecOS, &Section, Layout); SmallVector CompressedContents; if (Error E = zlib::compress( StringRef(UncompressedData.data(), UncompressedData.size()), CompressedContents)) { consumeError(std::move(E)); W.OS << UncompressedData; return; } bool ZlibStyle = MAI->compressDebugSections() == DebugCompressionType::Z; if (!maybeWriteCompression(UncompressedData.size(), CompressedContents, ZlibStyle, Sec.getAlignment())) { W.OS << UncompressedData; return; } if (ZlibStyle) // Set the compressed flag. That is zlib style. Section.setFlags(Section.getFlags() | ELF::SHF_COMPRESSED); else // Add "z" prefix to section name. This is zlib-gnu style. MC.renameELFSection(&Section, (".z" + SectionName.drop_front(1)).str()); W.OS << CompressedContents; } void ELFWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags, uint64_t Address, uint64_t Offset, uint64_t Size, uint32_t Link, uint32_t Info, uint64_t Alignment, uint64_t EntrySize) { W.write(Name); // sh_name: index into string table W.write(Type); // sh_type WriteWord(Flags); // sh_flags WriteWord(Address); // sh_addr WriteWord(Offset); // sh_offset WriteWord(Size); // sh_size W.write(Link); // sh_link W.write(Info); // sh_info WriteWord(Alignment); // sh_addralign WriteWord(EntrySize); // sh_entsize } void ELFWriter::writeRelocations(const MCAssembler &Asm, const MCSectionELF &Sec) { std::vector &Relocs = OWriter.Relocations[&Sec]; // We record relocations by pushing to the end of a vector. Reverse the vector // to get the relocations in the order they were created. // In most cases that is not important, but it can be for special sections // (.eh_frame) or specific relocations (TLS optimizations on SystemZ). std::reverse(Relocs.begin(), Relocs.end()); // Sort the relocation entries. MIPS needs this. OWriter.TargetObjectWriter->sortRelocs(Asm, Relocs); for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { const ELFRelocationEntry &Entry = Relocs[e - i - 1]; unsigned Index = Entry.Symbol ? Entry.Symbol->getIndex() : 0; if (is64Bit()) { write(Entry.Offset); if (OWriter.TargetObjectWriter->getEMachine() == ELF::EM_MIPS) { write(uint32_t(Index)); write(OWriter.TargetObjectWriter->getRSsym(Entry.Type)); write(OWriter.TargetObjectWriter->getRType3(Entry.Type)); write(OWriter.TargetObjectWriter->getRType2(Entry.Type)); write(OWriter.TargetObjectWriter->getRType(Entry.Type)); } else { struct ELF::Elf64_Rela ERE64; ERE64.setSymbolAndType(Index, Entry.Type); write(ERE64.r_info); } if (hasRelocationAddend()) write(Entry.Addend); } else { write(uint32_t(Entry.Offset)); struct ELF::Elf32_Rela ERE32; ERE32.setSymbolAndType(Index, Entry.Type); write(ERE32.r_info); if (hasRelocationAddend()) write(uint32_t(Entry.Addend)); if (OWriter.TargetObjectWriter->getEMachine() == ELF::EM_MIPS) { if (uint32_t RType = OWriter.TargetObjectWriter->getRType2(Entry.Type)) { write(uint32_t(Entry.Offset)); ERE32.setSymbolAndType(0, RType); write(ERE32.r_info); write(uint32_t(0)); } if (uint32_t RType = OWriter.TargetObjectWriter->getRType3(Entry.Type)) { write(uint32_t(Entry.Offset)); ERE32.setSymbolAndType(0, RType); write(ERE32.r_info); write(uint32_t(0)); } } } } } const MCSectionELF *ELFWriter::createStringTable(MCContext &Ctx) { const MCSectionELF *StrtabSection = SectionTable[StringTableIndex - 1]; StrTabBuilder.write(W.OS); return StrtabSection; } void ELFWriter::writeSection(const SectionIndexMapTy &SectionIndexMap, uint32_t GroupSymbolIndex, uint64_t Offset, uint64_t Size, const MCSectionELF &Section) { uint64_t sh_link = 0; uint64_t sh_info = 0; switch(Section.getType()) { default: // Nothing to do. break; case ELF::SHT_DYNAMIC: llvm_unreachable("SHT_DYNAMIC in a relocatable object"); case ELF::SHT_REL: case ELF::SHT_RELA: { sh_link = SymbolTableIndex; assert(sh_link && ".symtab not found"); const MCSection *InfoSection = Section.getAssociatedSection(); sh_info = SectionIndexMap.lookup(cast(InfoSection)); break; } case ELF::SHT_SYMTAB: sh_link = StringTableIndex; sh_info = LastLocalSymbolIndex; break; case ELF::SHT_SYMTAB_SHNDX: case ELF::SHT_LLVM_CALL_GRAPH_PROFILE: case ELF::SHT_LLVM_ADDRSIG: sh_link = SymbolTableIndex; break; case ELF::SHT_GROUP: sh_link = SymbolTableIndex; sh_info = GroupSymbolIndex; break; } if (Section.getFlags() & ELF::SHF_LINK_ORDER) { const MCSymbol *Sym = Section.getAssociatedSymbol(); const MCSectionELF *Sec = cast(&Sym->getSection()); sh_link = SectionIndexMap.lookup(Sec); } WriteSecHdrEntry(StrTabBuilder.getOffset(Section.getSectionName()), Section.getType(), Section.getFlags(), 0, Offset, Size, sh_link, sh_info, Section.getAlignment(), Section.getEntrySize()); } void ELFWriter::writeSectionHeader( const MCAsmLayout &Layout, const SectionIndexMapTy &SectionIndexMap, const SectionOffsetsTy &SectionOffsets) { const unsigned NumSections = SectionTable.size(); // Null section first. uint64_t FirstSectionSize = (NumSections + 1) >= ELF::SHN_LORESERVE ? NumSections + 1 : 0; WriteSecHdrEntry(0, 0, 0, 0, 0, FirstSectionSize, 0, 0, 0, 0); for (const MCSectionELF *Section : SectionTable) { uint32_t GroupSymbolIndex; unsigned Type = Section->getType(); if (Type != ELF::SHT_GROUP) GroupSymbolIndex = 0; else GroupSymbolIndex = Section->getGroup()->getIndex(); const std::pair &Offsets = SectionOffsets.find(Section)->second; uint64_t Size; if (Type == ELF::SHT_NOBITS) Size = Layout.getSectionAddressSize(Section); else Size = Offsets.second - Offsets.first; writeSection(SectionIndexMap, GroupSymbolIndex, Offsets.first, Size, *Section); } } uint64_t ELFWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { uint64_t StartOffset = W.OS.tell(); MCContext &Ctx = Asm.getContext(); MCSectionELF *StrtabSection = Ctx.getELFSection(".strtab", ELF::SHT_STRTAB, 0); StringTableIndex = addToSectionTable(StrtabSection); RevGroupMapTy RevGroupMap; SectionIndexMapTy SectionIndexMap; std::map> GroupMembers; // Write out the ELF header ... writeHeader(Asm); // ... then the sections ... SectionOffsetsTy SectionOffsets; std::vector Groups; std::vector Relocations; for (MCSection &Sec : Asm) { MCSectionELF &Section = static_cast(Sec); if (Mode == NonDwoOnly && isDwoSection(Section)) continue; if (Mode == DwoOnly && !isDwoSection(Section)) continue; align(Section.getAlignment()); // Remember the offset into the file for this section. uint64_t SecStart = W.OS.tell(); const MCSymbolELF *SignatureSymbol = Section.getGroup(); writeSectionData(Asm, Section, Layout); uint64_t SecEnd = W.OS.tell(); SectionOffsets[&Section] = std::make_pair(SecStart, SecEnd); MCSectionELF *RelSection = createRelocationSection(Ctx, Section); if (SignatureSymbol) { Asm.registerSymbol(*SignatureSymbol); unsigned &GroupIdx = RevGroupMap[SignatureSymbol]; if (!GroupIdx) { MCSectionELF *Group = Ctx.createELFGroupSection(SignatureSymbol); GroupIdx = addToSectionTable(Group); Group->setAlignment(4); Groups.push_back(Group); } std::vector &Members = GroupMembers[SignatureSymbol]; Members.push_back(&Section); if (RelSection) Members.push_back(RelSection); } SectionIndexMap[&Section] = addToSectionTable(&Section); if (RelSection) { SectionIndexMap[RelSection] = addToSectionTable(RelSection); Relocations.push_back(RelSection); } OWriter.TargetObjectWriter->addTargetSectionFlags(Ctx, Section); } MCSectionELF *CGProfileSection = nullptr; if (!Asm.CGProfile.empty()) { CGProfileSection = Ctx.getELFSection(".llvm.call-graph-profile", ELF::SHT_LLVM_CALL_GRAPH_PROFILE, ELF::SHF_EXCLUDE, 16, ""); SectionIndexMap[CGProfileSection] = addToSectionTable(CGProfileSection); } for (MCSectionELF *Group : Groups) { align(Group->getAlignment()); // Remember the offset into the file for this section. uint64_t SecStart = W.OS.tell(); const MCSymbol *SignatureSymbol = Group->getGroup(); assert(SignatureSymbol); write(uint32_t(ELF::GRP_COMDAT)); for (const MCSectionELF *Member : GroupMembers[SignatureSymbol]) { uint32_t SecIndex = SectionIndexMap.lookup(Member); write(SecIndex); } uint64_t SecEnd = W.OS.tell(); SectionOffsets[Group] = std::make_pair(SecStart, SecEnd); } if (Mode == DwoOnly) { // dwo files don't have symbol tables or relocations, but they do have // string tables. StrTabBuilder.finalize(); } else { MCSectionELF *AddrsigSection; if (OWriter.EmitAddrsigSection) { AddrsigSection = Ctx.getELFSection(".llvm_addrsig", ELF::SHT_LLVM_ADDRSIG, ELF::SHF_EXCLUDE); addToSectionTable(AddrsigSection); } // Compute symbol table information. computeSymbolTable(Asm, Layout, SectionIndexMap, RevGroupMap, SectionOffsets); for (MCSectionELF *RelSection : Relocations) { align(RelSection->getAlignment()); // Remember the offset into the file for this section. uint64_t SecStart = W.OS.tell(); writeRelocations(Asm, cast(*RelSection->getAssociatedSection())); uint64_t SecEnd = W.OS.tell(); SectionOffsets[RelSection] = std::make_pair(SecStart, SecEnd); } if (OWriter.EmitAddrsigSection) { uint64_t SecStart = W.OS.tell(); writeAddrsigSection(); uint64_t SecEnd = W.OS.tell(); SectionOffsets[AddrsigSection] = std::make_pair(SecStart, SecEnd); } } if (CGProfileSection) { uint64_t SecStart = W.OS.tell(); for (const MCAssembler::CGProfileEntry &CGPE : Asm.CGProfile) { W.write(CGPE.From->getSymbol().getIndex()); W.write(CGPE.To->getSymbol().getIndex()); W.write(CGPE.Count); } uint64_t SecEnd = W.OS.tell(); SectionOffsets[CGProfileSection] = std::make_pair(SecStart, SecEnd); } { uint64_t SecStart = W.OS.tell(); const MCSectionELF *Sec = createStringTable(Ctx); uint64_t SecEnd = W.OS.tell(); SectionOffsets[Sec] = std::make_pair(SecStart, SecEnd); } uint64_t NaturalAlignment = is64Bit() ? 8 : 4; align(NaturalAlignment); const uint64_t SectionHeaderOffset = W.OS.tell(); // ... then the section header table ... writeSectionHeader(Layout, SectionIndexMap, SectionOffsets); uint16_t NumSections = support::endian::byte_swap( (SectionTable.size() + 1 >= ELF::SHN_LORESERVE) ? (uint16_t)ELF::SHN_UNDEF : SectionTable.size() + 1, W.Endian); unsigned NumSectionsOffset; auto &Stream = static_cast(W.OS); if (is64Bit()) { uint64_t Val = support::endian::byte_swap(SectionHeaderOffset, W.Endian); Stream.pwrite(reinterpret_cast(&Val), sizeof(Val), offsetof(ELF::Elf64_Ehdr, e_shoff)); NumSectionsOffset = offsetof(ELF::Elf64_Ehdr, e_shnum); } else { uint32_t Val = support::endian::byte_swap(SectionHeaderOffset, W.Endian); Stream.pwrite(reinterpret_cast(&Val), sizeof(Val), offsetof(ELF::Elf32_Ehdr, e_shoff)); NumSectionsOffset = offsetof(ELF::Elf32_Ehdr, e_shnum); } Stream.pwrite(reinterpret_cast(&NumSections), sizeof(NumSections), NumSectionsOffset); return W.OS.tell() - StartOffset; } bool ELFObjectWriter::hasRelocationAddend() const { return TargetObjectWriter->hasRelocationAddend(); } void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) { // The presence of symbol versions causes undefined symbols and // versions declared with @@@ to be renamed. for (const std::pair &P : Asm.Symvers) { StringRef AliasName = P.first; const auto &Symbol = cast(*P.second); size_t Pos = AliasName.find('@'); assert(Pos != StringRef::npos); StringRef Prefix = AliasName.substr(0, Pos); StringRef Rest = AliasName.substr(Pos); StringRef Tail = Rest; if (Rest.startswith("@@@")) Tail = Rest.substr(Symbol.isUndefined() ? 2 : 1); auto *Alias = cast(Asm.getContext().getOrCreateSymbol(Prefix + Tail)); Asm.registerSymbol(*Alias); const MCExpr *Value = MCSymbolRefExpr::create(&Symbol, Asm.getContext()); Alias->setVariableValue(Value); // Aliases defined with .symvar copy the binding from the symbol they alias. // This is the first place we are able to copy this information. Alias->setExternal(Symbol.isExternal()); Alias->setBinding(Symbol.getBinding()); + Alias->setOther(Symbol.getOther()); if (!Symbol.isUndefined() && !Rest.startswith("@@@")) continue; // FIXME: Get source locations for these errors or diagnose them earlier. if (Symbol.isUndefined() && Rest.startswith("@@") && !Rest.startswith("@@@")) { Asm.getContext().reportError(SMLoc(), "versioned symbol " + AliasName + " must be defined"); continue; } if (Renames.count(&Symbol) && Renames[&Symbol] != Alias) { Asm.getContext().reportError( SMLoc(), llvm::Twine("multiple symbol versions defined for ") + Symbol.getName()); continue; } Renames.insert(std::make_pair(&Symbol, Alias)); } for (const MCSymbol *&Sym : AddrsigSyms) { if (const MCSymbol *R = Renames.lookup(cast(Sym))) Sym = R; if (Sym->isInSection() && Sym->getName().startswith(".L")) Sym = Sym->getSection().getBeginSymbol(); Sym->setUsedInReloc(); } } // It is always valid to create a relocation with a symbol. It is preferable // to use a relocation with a section if that is possible. Using the section // allows us to omit some local symbols from the symbol table. bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm, const MCSymbolRefExpr *RefA, const MCSymbolELF *Sym, uint64_t C, unsigned Type) const { // A PCRel relocation to an absolute value has no symbol (or section). We // represent that with a relocation to a null section. if (!RefA) return false; MCSymbolRefExpr::VariantKind Kind = RefA->getKind(); switch (Kind) { default: break; // The .odp creation emits a relocation against the symbol ".TOC." which // create a R_PPC64_TOC relocation. However the relocation symbol name // in final object creation should be NULL, since the symbol does not // really exist, it is just the reference to TOC base for the current // object file. Since the symbol is undefined, returning false results // in a relocation with a null section which is the desired result. case MCSymbolRefExpr::VK_PPC_TOCBASE: return false; // These VariantKind cause the relocation to refer to something other than // the symbol itself, like a linker generated table. Since the address of // symbol is not relevant, we cannot replace the symbol with the // section and patch the difference in the addend. case MCSymbolRefExpr::VK_GOT: case MCSymbolRefExpr::VK_PLT: case MCSymbolRefExpr::VK_GOTPCREL: case MCSymbolRefExpr::VK_PPC_GOT_LO: case MCSymbolRefExpr::VK_PPC_GOT_HI: case MCSymbolRefExpr::VK_PPC_GOT_HA: return true; } // An undefined symbol is not in any section, so the relocation has to point // to the symbol itself. assert(Sym && "Expected a symbol"); if (Sym->isUndefined()) return true; unsigned Binding = Sym->getBinding(); switch(Binding) { default: llvm_unreachable("Invalid Binding"); case ELF::STB_LOCAL: break; case ELF::STB_WEAK: // If the symbol is weak, it might be overridden by a symbol in another // file. The relocation has to point to the symbol so that the linker // can update it. return true; case ELF::STB_GLOBAL: // Global ELF symbols can be preempted by the dynamic linker. The relocation // has to point to the symbol for a reason analogous to the STB_WEAK case. return true; } // If a relocation points to a mergeable section, we have to be careful. // If the offset is zero, a relocation with the section will encode the // same information. With a non-zero offset, the situation is different. // For example, a relocation can point 42 bytes past the end of a string. // If we change such a relocation to use the section, the linker would think // that it pointed to another string and subtracting 42 at runtime will // produce the wrong value. if (Sym->isInSection()) { auto &Sec = cast(Sym->getSection()); unsigned Flags = Sec.getFlags(); if (Flags & ELF::SHF_MERGE) { if (C != 0) return true; // It looks like gold has a bug (http://sourceware.org/PR16794) and can // only handle section relocations to mergeable sections if using RELA. if (!hasRelocationAddend()) return true; } // Most TLS relocations use a got, so they need the symbol. Even those that // are just an offset (@tpoff), require a symbol in gold versions before // 5efeedf61e4fe720fd3e9a08e6c91c10abb66d42 (2014-09-26) which fixed // http://sourceware.org/PR16773. if (Flags & ELF::SHF_TLS) return true; } // If the symbol is a thumb function the final relocation must set the lowest // bit. With a symbol that is done by just having the symbol have that bit // set, so we would lose the bit if we relocated with the section. // FIXME: We could use the section but add the bit to the relocation value. if (Asm.isThumbFunc(Sym)) return true; if (TargetObjectWriter->needsRelocateWithSymbol(*Sym, Type)) return true; return false; } void ELFObjectWriter::recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { MCAsmBackend &Backend = Asm.getBackend(); bool IsPCRel = Backend.getFixupKindInfo(Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel; const MCSectionELF &FixupSection = cast(*Fragment->getParent()); uint64_t C = Target.getConstant(); uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); MCContext &Ctx = Asm.getContext(); if (const MCSymbolRefExpr *RefB = Target.getSymB()) { // Let A, B and C being the components of Target and R be the location of // the fixup. If the fixup is not pcrel, we want to compute (A - B + C). // If it is pcrel, we want to compute (A - B + C - R). // In general, ELF has no relocations for -B. It can only represent (A + C) // or (A + C - R). If B = R + K and the relocation is not pcrel, we can // replace B to implement it: (A - R - K + C) if (IsPCRel) { Ctx.reportError( Fixup.getLoc(), "No relocation available to represent this relative expression"); return; } const auto &SymB = cast(RefB->getSymbol()); if (SymB.isUndefined()) { Ctx.reportError(Fixup.getLoc(), Twine("symbol '") + SymB.getName() + "' can not be undefined in a subtraction expression"); return; } assert(!SymB.isAbsolute() && "Should have been folded"); const MCSection &SecB = SymB.getSection(); if (&SecB != &FixupSection) { Ctx.reportError(Fixup.getLoc(), "Cannot represent a difference across sections"); return; } uint64_t SymBOffset = Layout.getSymbolOffset(SymB); uint64_t K = SymBOffset - FixupOffset; IsPCRel = true; C -= K; } // We either rejected the fixup or folded B into C at this point. const MCSymbolRefExpr *RefA = Target.getSymA(); const auto *SymA = RefA ? cast(&RefA->getSymbol()) : nullptr; bool ViaWeakRef = false; if (SymA && SymA->isVariable()) { const MCExpr *Expr = SymA->getVariableValue(); if (const auto *Inner = dyn_cast(Expr)) { if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF) { SymA = cast(&Inner->getSymbol()); ViaWeakRef = true; } } } unsigned Type = TargetObjectWriter->getRelocType(Ctx, Target, Fixup, IsPCRel); uint64_t OriginalC = C; bool RelocateWithSymbol = shouldRelocateWithSymbol(Asm, RefA, SymA, C, Type); if (!RelocateWithSymbol && SymA && !SymA->isUndefined()) C += Layout.getSymbolOffset(*SymA); uint64_t Addend = 0; if (hasRelocationAddend()) { Addend = C; C = 0; } FixedValue = C; const MCSectionELF *SecA = (SymA && SymA->isInSection()) ? cast(&SymA->getSection()) : nullptr; if (!checkRelocation(Ctx, Fixup.getLoc(), &FixupSection, SecA)) return; if (!RelocateWithSymbol) { const auto *SectionSymbol = SecA ? cast(SecA->getBeginSymbol()) : nullptr; if (SectionSymbol) SectionSymbol->setUsedInReloc(); ELFRelocationEntry Rec(FixupOffset, SectionSymbol, Type, Addend, SymA, OriginalC); Relocations[&FixupSection].push_back(Rec); return; } const auto *RenamedSymA = SymA; if (SymA) { if (const MCSymbolELF *R = Renames.lookup(SymA)) RenamedSymA = R; if (ViaWeakRef) RenamedSymA->setIsWeakrefUsedInReloc(); else RenamedSymA->setUsedInReloc(); } ELFRelocationEntry Rec(FixupOffset, RenamedSymA, Type, Addend, SymA, OriginalC); Relocations[&FixupSection].push_back(Rec); } bool ELFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( const MCAssembler &Asm, const MCSymbol &SA, const MCFragment &FB, bool InSet, bool IsPCRel) const { const auto &SymA = cast(SA); if (IsPCRel) { assert(!InSet); if (isWeak(SymA)) return false; } return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB, InSet, IsPCRel); } std::unique_ptr llvm::createELFObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS, bool IsLittleEndian) { return llvm::make_unique(std::move(MOTW), OS, IsLittleEndian); } std::unique_ptr llvm::createELFDwoObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS, raw_pwrite_stream &DwoOS, bool IsLittleEndian) { return llvm::make_unique(std::move(MOTW), OS, DwoOS, IsLittleEndian); } Index: vendor/llvm/dist-release_80/lib/MC/MCWin64EH.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/MC/MCWin64EH.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/MC/MCWin64EH.cpp (revision 348932) @@ -1,644 +1,644 @@ //===- lib/MC/MCWin64EH.cpp - MCWin64EH implementation --------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "llvm/MC/MCWin64EH.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Win64EH.h" using namespace llvm; // NOTE: All relocations generated here are 4-byte image-relative. static uint8_t CountOfUnwindCodes(std::vector &Insns) { uint8_t Count = 0; for (const auto &I : Insns) { switch (static_cast(I.Operation)) { default: llvm_unreachable("Unsupported unwind code"); case Win64EH::UOP_PushNonVol: case Win64EH::UOP_AllocSmall: case Win64EH::UOP_SetFPReg: case Win64EH::UOP_PushMachFrame: Count += 1; break; case Win64EH::UOP_SaveNonVol: case Win64EH::UOP_SaveXMM128: Count += 2; break; case Win64EH::UOP_SaveNonVolBig: case Win64EH::UOP_SaveXMM128Big: Count += 3; break; case Win64EH::UOP_AllocLarge: Count += (I.Offset > 512 * 1024 - 8) ? 3 : 2; break; } } return Count; } static void EmitAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS, const MCSymbol *RHS) { MCContext &Context = Streamer.getContext(); const MCExpr *Diff = MCBinaryExpr::createSub(MCSymbolRefExpr::create(LHS, Context), MCSymbolRefExpr::create(RHS, Context), Context); Streamer.EmitValue(Diff, 1); } static void EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin, WinEH::Instruction &inst) { uint8_t b2; uint16_t w; b2 = (inst.Operation & 0x0F); switch (static_cast(inst.Operation)) { default: llvm_unreachable("Unsupported unwind code"); case Win64EH::UOP_PushNonVol: EmitAbsDifference(streamer, inst.Label, begin); b2 |= (inst.Register & 0x0F) << 4; streamer.EmitIntValue(b2, 1); break; case Win64EH::UOP_AllocLarge: EmitAbsDifference(streamer, inst.Label, begin); if (inst.Offset > 512 * 1024 - 8) { b2 |= 0x10; streamer.EmitIntValue(b2, 1); w = inst.Offset & 0xFFF8; streamer.EmitIntValue(w, 2); w = inst.Offset >> 16; } else { streamer.EmitIntValue(b2, 1); w = inst.Offset >> 3; } streamer.EmitIntValue(w, 2); break; case Win64EH::UOP_AllocSmall: b2 |= (((inst.Offset - 8) >> 3) & 0x0F) << 4; EmitAbsDifference(streamer, inst.Label, begin); streamer.EmitIntValue(b2, 1); break; case Win64EH::UOP_SetFPReg: EmitAbsDifference(streamer, inst.Label, begin); streamer.EmitIntValue(b2, 1); break; case Win64EH::UOP_SaveNonVol: case Win64EH::UOP_SaveXMM128: b2 |= (inst.Register & 0x0F) << 4; EmitAbsDifference(streamer, inst.Label, begin); streamer.EmitIntValue(b2, 1); w = inst.Offset >> 3; if (inst.Operation == Win64EH::UOP_SaveXMM128) w >>= 1; streamer.EmitIntValue(w, 2); break; case Win64EH::UOP_SaveNonVolBig: case Win64EH::UOP_SaveXMM128Big: b2 |= (inst.Register & 0x0F) << 4; EmitAbsDifference(streamer, inst.Label, begin); streamer.EmitIntValue(b2, 1); if (inst.Operation == Win64EH::UOP_SaveXMM128Big) w = inst.Offset & 0xFFF0; else w = inst.Offset & 0xFFF8; streamer.EmitIntValue(w, 2); w = inst.Offset >> 16; streamer.EmitIntValue(w, 2); break; case Win64EH::UOP_PushMachFrame: if (inst.Offset == 1) b2 |= 0x10; EmitAbsDifference(streamer, inst.Label, begin); streamer.EmitIntValue(b2, 1); break; } } static void EmitSymbolRefWithOfs(MCStreamer &streamer, const MCSymbol *Base, const MCSymbol *Other) { MCContext &Context = streamer.getContext(); const MCSymbolRefExpr *BaseRef = MCSymbolRefExpr::create(Base, Context); const MCSymbolRefExpr *OtherRef = MCSymbolRefExpr::create(Other, Context); const MCExpr *Ofs = MCBinaryExpr::createSub(OtherRef, BaseRef, Context); const MCSymbolRefExpr *BaseRefRel = MCSymbolRefExpr::create(Base, MCSymbolRefExpr::VK_COFF_IMGREL32, Context); streamer.EmitValue(MCBinaryExpr::createAdd(BaseRefRel, Ofs, Context), 4); } static void EmitRuntimeFunction(MCStreamer &streamer, const WinEH::FrameInfo *info) { MCContext &context = streamer.getContext(); streamer.EmitValueToAlignment(4); EmitSymbolRefWithOfs(streamer, info->Function, info->Begin); EmitSymbolRefWithOfs(streamer, info->Function, info->End); streamer.EmitValue(MCSymbolRefExpr::create(info->Symbol, MCSymbolRefExpr::VK_COFF_IMGREL32, context), 4); } static void EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) { // If this UNWIND_INFO already has a symbol, it's already been emitted. if (info->Symbol) return; MCContext &context = streamer.getContext(); MCSymbol *Label = context.createTempSymbol(); streamer.EmitValueToAlignment(4); streamer.EmitLabel(Label); info->Symbol = Label; // Upper 3 bits are the version number (currently 1). uint8_t flags = 0x01; if (info->ChainedParent) flags |= Win64EH::UNW_ChainInfo << 3; else { if (info->HandlesUnwind) flags |= Win64EH::UNW_TerminateHandler << 3; if (info->HandlesExceptions) flags |= Win64EH::UNW_ExceptionHandler << 3; } streamer.EmitIntValue(flags, 1); if (info->PrologEnd) EmitAbsDifference(streamer, info->PrologEnd, info->Begin); else streamer.EmitIntValue(0, 1); uint8_t numCodes = CountOfUnwindCodes(info->Instructions); streamer.EmitIntValue(numCodes, 1); uint8_t frame = 0; if (info->LastFrameInst >= 0) { WinEH::Instruction &frameInst = info->Instructions[info->LastFrameInst]; assert(frameInst.Operation == Win64EH::UOP_SetFPReg); frame = (frameInst.Register & 0x0F) | (frameInst.Offset & 0xF0); } streamer.EmitIntValue(frame, 1); // Emit unwind instructions (in reverse order). uint8_t numInst = info->Instructions.size(); for (uint8_t c = 0; c < numInst; ++c) { WinEH::Instruction inst = info->Instructions.back(); info->Instructions.pop_back(); EmitUnwindCode(streamer, info->Begin, inst); } // For alignment purposes, the instruction array will always have an even // number of entries, with the final entry potentially unused (in which case // the array will be one longer than indicated by the count of unwind codes // field). if (numCodes & 1) { streamer.EmitIntValue(0, 2); } if (flags & (Win64EH::UNW_ChainInfo << 3)) EmitRuntimeFunction(streamer, info->ChainedParent); else if (flags & ((Win64EH::UNW_TerminateHandler|Win64EH::UNW_ExceptionHandler) << 3)) streamer.EmitValue(MCSymbolRefExpr::create(info->ExceptionHandler, MCSymbolRefExpr::VK_COFF_IMGREL32, context), 4); else if (numCodes == 0) { // The minimum size of an UNWIND_INFO struct is 8 bytes. If we're not // a chained unwind info, if there is no handler, and if there are fewer // than 2 slots used in the unwind code array, we have to pad to 8 bytes. streamer.EmitIntValue(0, 4); } } void llvm::Win64EH::UnwindEmitter::Emit(MCStreamer &Streamer) const { // Emit the unwind info structs first. for (const auto &CFI : Streamer.getWinFrameInfos()) { MCSection *XData = Streamer.getAssociatedXDataSection(CFI->TextSection); Streamer.SwitchSection(XData); ::EmitUnwindInfo(Streamer, CFI.get()); } // Now emit RUNTIME_FUNCTION entries. for (const auto &CFI : Streamer.getWinFrameInfos()) { MCSection *PData = Streamer.getAssociatedPDataSection(CFI->TextSection); Streamer.SwitchSection(PData); EmitRuntimeFunction(Streamer, CFI.get()); } } void llvm::Win64EH::UnwindEmitter::EmitUnwindInfo( MCStreamer &Streamer, WinEH::FrameInfo *info) const { // Switch sections (the static function above is meant to be called from // here and from Emit(). MCSection *XData = Streamer.getAssociatedXDataSection(info->TextSection); Streamer.SwitchSection(XData); ::EmitUnwindInfo(Streamer, info); } static int64_t GetAbsDifference(MCStreamer &Streamer, const MCSymbol *LHS, const MCSymbol *RHS) { MCContext &Context = Streamer.getContext(); const MCExpr *Diff = MCBinaryExpr::createSub(MCSymbolRefExpr::create(LHS, Context), MCSymbolRefExpr::create(RHS, Context), Context); MCObjectStreamer *OS = (MCObjectStreamer *)(&Streamer); int64_t value; Diff->evaluateAsAbsolute(value, OS->getAssembler()); return value; } static uint32_t ARM64CountOfUnwindCodes(const std::vector &Insns) { uint32_t Count = 0; for (const auto &I : Insns) { switch (static_cast(I.Operation)) { default: llvm_unreachable("Unsupported ARM64 unwind code"); case Win64EH::UOP_AllocSmall: Count += 1; break; case Win64EH::UOP_AllocMedium: Count += 2; break; case Win64EH::UOP_AllocLarge: Count += 4; break; case Win64EH::UOP_SaveFPLRX: Count += 1; break; case Win64EH::UOP_SaveFPLR: Count += 1; break; case Win64EH::UOP_SaveReg: Count += 2; break; case Win64EH::UOP_SaveRegP: Count += 2; break; case Win64EH::UOP_SaveRegPX: Count += 2; break; case Win64EH::UOP_SaveRegX: Count += 2; break; case Win64EH::UOP_SaveFReg: Count += 2; break; case Win64EH::UOP_SaveFRegP: Count += 2; break; case Win64EH::UOP_SaveFRegX: Count += 2; break; case Win64EH::UOP_SaveFRegPX: Count += 2; break; case Win64EH::UOP_SetFP: Count += 1; break; case Win64EH::UOP_AddFP: Count += 2; break; case Win64EH::UOP_Nop: Count += 1; break; case Win64EH::UOP_End: Count += 1; break; } } return Count; } // Unwind opcode encodings and restrictions are documented at // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling static void ARM64EmitUnwindCode(MCStreamer &streamer, const MCSymbol *begin, WinEH::Instruction &inst) { uint8_t b, reg; switch (static_cast(inst.Operation)) { default: llvm_unreachable("Unsupported ARM64 unwind code"); case Win64EH::UOP_AllocSmall: b = (inst.Offset >> 4) & 0x1F; streamer.EmitIntValue(b, 1); break; case Win64EH::UOP_AllocMedium: { uint16_t hw = (inst.Offset >> 4) & 0x7FF; b = 0xC0; b |= (hw >> 8); streamer.EmitIntValue(b, 1); b = hw & 0xFF; streamer.EmitIntValue(b, 1); break; } case Win64EH::UOP_AllocLarge: { uint32_t w; b = 0xE0; streamer.EmitIntValue(b, 1); w = inst.Offset >> 4; b = (w & 0x00FF0000) >> 16; streamer.EmitIntValue(b, 1); b = (w & 0x0000FF00) >> 8; streamer.EmitIntValue(b, 1); b = w & 0x000000FF; streamer.EmitIntValue(b, 1); break; } case Win64EH::UOP_SetFP: b = 0xE1; streamer.EmitIntValue(b, 1); break; case Win64EH::UOP_AddFP: b = 0xE2; streamer.EmitIntValue(b, 1); b = (inst.Offset >> 3); streamer.EmitIntValue(b, 1); break; case Win64EH::UOP_Nop: b = 0xE3; streamer.EmitIntValue(b, 1); break; case Win64EH::UOP_SaveFPLRX: b = 0x80; b |= ((inst.Offset - 1) >> 3) & 0x3F; streamer.EmitIntValue(b, 1); break; case Win64EH::UOP_SaveFPLR: b = 0x40; b |= (inst.Offset >> 3) & 0x3F; streamer.EmitIntValue(b, 1); break; case Win64EH::UOP_SaveReg: assert(inst.Register >= 19 && "Saved reg must be >= 19"); reg = inst.Register - 19; b = 0xD0 | ((reg & 0xC) >> 2); streamer.EmitIntValue(b, 1); b = ((reg & 0x3) << 6) | (inst.Offset >> 3); streamer.EmitIntValue(b, 1); break; case Win64EH::UOP_SaveRegX: assert(inst.Register >= 19 && "Saved reg must be >= 19"); reg = inst.Register - 19; b = 0xD4 | ((reg & 0x8) >> 3); streamer.EmitIntValue(b, 1); b = ((reg & 0x7) << 5) | ((inst.Offset >> 3) - 1); streamer.EmitIntValue(b, 1); break; case Win64EH::UOP_SaveRegP: assert(inst.Register >= 19 && "Saved registers must be >= 19"); reg = inst.Register - 19; b = 0xC8 | ((reg & 0xC) >> 2); streamer.EmitIntValue(b, 1); b = ((reg & 0x3) << 6) | (inst.Offset >> 3); streamer.EmitIntValue(b, 1); break; case Win64EH::UOP_SaveRegPX: assert(inst.Register >= 19 && "Saved registers must be >= 19"); reg = inst.Register - 19; b = 0xCC | ((reg & 0xC) >> 2); streamer.EmitIntValue(b, 1); b = ((reg & 0x3) << 6) | ((inst.Offset >> 3) - 1); streamer.EmitIntValue(b, 1); break; case Win64EH::UOP_SaveFReg: assert(inst.Register >= 8 && "Saved dreg must be >= 8"); reg = inst.Register - 8; b = 0xDC | ((reg & 0x4) >> 2); streamer.EmitIntValue(b, 1); b = ((reg & 0x3) << 6) | (inst.Offset >> 3); streamer.EmitIntValue(b, 1); break; case Win64EH::UOP_SaveFRegX: assert(inst.Register >= 8 && "Saved dreg must be >= 8"); reg = inst.Register - 8; b = 0xDE; streamer.EmitIntValue(b, 1); b = ((reg & 0x7) << 5) | ((inst.Offset >> 3) - 1); streamer.EmitIntValue(b, 1); break; case Win64EH::UOP_SaveFRegP: assert(inst.Register >= 8 && "Saved dregs must be >= 8"); reg = inst.Register - 8; b = 0xD8 | ((reg & 0x4) >> 2); streamer.EmitIntValue(b, 1); b = ((reg & 0x3) << 6) | (inst.Offset >> 3); streamer.EmitIntValue(b, 1); break; case Win64EH::UOP_SaveFRegPX: assert(inst.Register >= 8 && "Saved dregs must be >= 8"); reg = inst.Register - 8; b = 0xDA | ((reg & 0x4) >> 2); streamer.EmitIntValue(b, 1); b = ((reg & 0x3) << 6) | ((inst.Offset >> 3) - 1); streamer.EmitIntValue(b, 1); break; case Win64EH::UOP_End: b = 0xE4; streamer.EmitIntValue(b, 1); break; } } // Returns the epilog symbol of an epilog with the exact same unwind code // sequence, if it exists. Otherwise, returns nulltpr. // EpilogInstrs - Unwind codes for the current epilog. // Epilogs - Epilogs that potentialy match the current epilog. static MCSymbol* FindMatchingEpilog(const std::vector& EpilogInstrs, const std::vector& Epilogs, const WinEH::FrameInfo *info) { for (auto *EpilogStart : Epilogs) { auto InstrsIter = info->EpilogMap.find(EpilogStart); assert(InstrsIter != info->EpilogMap.end() && "Epilog not found in EpilogMap"); const auto &Instrs = InstrsIter->second; if (Instrs.size() != EpilogInstrs.size()) continue; bool Match = true; for (unsigned i = 0; i < Instrs.size(); ++i) if (Instrs[i].Operation != EpilogInstrs[i].Operation || Instrs[i].Offset != EpilogInstrs[i].Offset || Instrs[i].Register != EpilogInstrs[i].Register) { Match = false; break; } if (Match) return EpilogStart; } return nullptr; } // Populate the .xdata section. The format of .xdata on ARM64 is documented at // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) { // If this UNWIND_INFO already has a symbol, it's already been emitted. if (info->Symbol) return; MCContext &context = streamer.getContext(); MCSymbol *Label = context.createTempSymbol(); streamer.EmitValueToAlignment(4); streamer.EmitLabel(Label); info->Symbol = Label; uint32_t FuncLength = 0x0; if (info->FuncletOrFuncEnd) FuncLength = (uint32_t)GetAbsDifference(streamer, info->FuncletOrFuncEnd, info->Begin); FuncLength /= 4; uint32_t PrologCodeBytes = ARM64CountOfUnwindCodes(info->Instructions); uint32_t TotalCodeBytes = PrologCodeBytes; // Process epilogs. MapVector EpilogInfo; // Epilogs processed so far. std::vector AddedEpilogs; for (auto &I : info->EpilogMap) { MCSymbol *EpilogStart = I.first; auto &EpilogInstrs = I.second; uint32_t CodeBytes = ARM64CountOfUnwindCodes(EpilogInstrs); MCSymbol* MatchingEpilog = FindMatchingEpilog(EpilogInstrs, AddedEpilogs, info); if (MatchingEpilog) { assert(EpilogInfo.find(MatchingEpilog) != EpilogInfo.end() && "Duplicate epilog not found"); - EpilogInfo[EpilogStart] = EpilogInfo[MatchingEpilog]; + EpilogInfo[EpilogStart] = EpilogInfo.lookup(MatchingEpilog); // Clear the unwind codes in the EpilogMap, so that they don't get output // in the logic below. EpilogInstrs.clear(); } else { EpilogInfo[EpilogStart] = TotalCodeBytes; TotalCodeBytes += CodeBytes; AddedEpilogs.push_back(EpilogStart); } } // Code Words, Epilog count, E, X, Vers, Function Length uint32_t row1 = 0x0; uint32_t CodeWords = TotalCodeBytes / 4; uint32_t CodeWordsMod = TotalCodeBytes % 4; if (CodeWordsMod) CodeWords++; uint32_t EpilogCount = info->EpilogMap.size(); bool ExtensionWord = EpilogCount > 31 || TotalCodeBytes > 124; if (!ExtensionWord) { row1 |= (EpilogCount & 0x1F) << 22; row1 |= (CodeWords & 0x1F) << 27; } // E is always 0 right now, TODO: packed epilog setup if (info->HandlesExceptions) // X row1 |= 1 << 20; row1 |= FuncLength & 0x3FFFF; streamer.EmitIntValue(row1, 4); // Extended Code Words, Extended Epilog Count if (ExtensionWord) { // FIXME: We should be able to split unwind info into multiple sections. // FIXME: We should share epilog codes across epilogs, where possible, // which would make this issue show up less frequently. if (CodeWords > 0xFF || EpilogCount > 0xFFFF) report_fatal_error("SEH unwind data splitting not yet implemented"); uint32_t row2 = 0x0; row2 |= (CodeWords & 0xFF) << 16; row2 |= (EpilogCount & 0xFFFF); streamer.EmitIntValue(row2, 4); } // Epilog Start Index, Epilog Start Offset for (auto &I : EpilogInfo) { MCSymbol *EpilogStart = I.first; uint32_t EpilogIndex = I.second; uint32_t EpilogOffset = (uint32_t)GetAbsDifference(streamer, EpilogStart, info->Begin); if (EpilogOffset) EpilogOffset /= 4; uint32_t row3 = EpilogOffset; row3 |= (EpilogIndex & 0x3FF) << 22; streamer.EmitIntValue(row3, 4); } // Emit prolog unwind instructions (in reverse order). uint8_t numInst = info->Instructions.size(); for (uint8_t c = 0; c < numInst; ++c) { WinEH::Instruction inst = info->Instructions.back(); info->Instructions.pop_back(); ARM64EmitUnwindCode(streamer, info->Begin, inst); } // Emit epilog unwind instructions for (auto &I : info->EpilogMap) { auto &EpilogInstrs = I.second; for (uint32_t i = 0; i < EpilogInstrs.size(); i++) { WinEH::Instruction inst = EpilogInstrs[i]; ARM64EmitUnwindCode(streamer, info->Begin, inst); } } int32_t BytesMod = CodeWords * 4 - TotalCodeBytes; assert(BytesMod >= 0); for (int i = 0; i < BytesMod; i++) streamer.EmitIntValue(0xE3, 1); if (info->HandlesExceptions) streamer.EmitValue( MCSymbolRefExpr::create(info->ExceptionHandler, MCSymbolRefExpr::VK_COFF_IMGREL32, context), 4); } static void ARM64EmitRuntimeFunction(MCStreamer &streamer, const WinEH::FrameInfo *info) { MCContext &context = streamer.getContext(); streamer.EmitValueToAlignment(4); EmitSymbolRefWithOfs(streamer, info->Function, info->Begin); streamer.EmitValue(MCSymbolRefExpr::create(info->Symbol, MCSymbolRefExpr::VK_COFF_IMGREL32, context), 4); } void llvm::Win64EH::ARM64UnwindEmitter::Emit(MCStreamer &Streamer) const { // Emit the unwind info structs first. for (const auto &CFI : Streamer.getWinFrameInfos()) { MCSection *XData = Streamer.getAssociatedXDataSection(CFI->TextSection); Streamer.SwitchSection(XData); ARM64EmitUnwindInfo(Streamer, CFI.get()); } // Now emit RUNTIME_FUNCTION entries. for (const auto &CFI : Streamer.getWinFrameInfos()) { MCSection *PData = Streamer.getAssociatedPDataSection(CFI->TextSection); Streamer.SwitchSection(PData); ARM64EmitRuntimeFunction(Streamer, CFI.get()); } } void llvm::Win64EH::ARM64UnwindEmitter::EmitUnwindInfo( MCStreamer &Streamer, WinEH::FrameInfo *info) const { // Switch sections (the static function above is meant to be called from // here and from Emit(). MCSection *XData = Streamer.getAssociatedXDataSection(info->TextSection); Streamer.SwitchSection(XData); ARM64EmitUnwindInfo(Streamer, info); } Index: vendor/llvm/dist-release_80/lib/MC/WasmObjectWriter.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/MC/WasmObjectWriter.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/MC/WasmObjectWriter.cpp (revision 348932) @@ -1,1590 +1,1596 @@ //===- lib/MC/WasmObjectWriter.cpp - Wasm File Writer ---------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements Wasm object file writer information. // //===----------------------------------------------------------------------===// #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/BinaryFormat/Wasm.h" #include "llvm/Config/llvm-config.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionWasm.h" #include "llvm/MC/MCSymbolWasm.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWasmObjectWriter.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/StringSaver.h" #include using namespace llvm; #define DEBUG_TYPE "mc" namespace { // Went we ceate the indirect function table we start at 1, so that there is // and emtpy slot at 0 and therefore calling a null function pointer will trap. static const uint32_t kInitialTableOffset = 1; // For patching purposes, we need to remember where each section starts, both // for patching up the section size field, and for patching up references to // locations within the section. struct SectionBookkeeping { // Where the size of the section is written. uint64_t SizeOffset; // Where the section header ends (without custom section name). uint64_t PayloadOffset; // Where the contents of the section starts. uint64_t ContentsOffset; uint32_t Index; }; // The signature of a wasm function or event, in a struct capable of being used // as a DenseMap key. // TODO: Consider using wasm::WasmSignature directly instead. struct WasmSignature { // Support empty and tombstone instances, needed by DenseMap. enum { Plain, Empty, Tombstone } State; // The return types of the function. SmallVector Returns; // The parameter types of the function. SmallVector Params; WasmSignature() : State(Plain) {} bool operator==(const WasmSignature &Other) const { return State == Other.State && Returns == Other.Returns && Params == Other.Params; } }; // Traits for using WasmSignature in a DenseMap. struct WasmSignatureDenseMapInfo { static WasmSignature getEmptyKey() { WasmSignature Sig; Sig.State = WasmSignature::Empty; return Sig; } static WasmSignature getTombstoneKey() { WasmSignature Sig; Sig.State = WasmSignature::Tombstone; return Sig; } static unsigned getHashValue(const WasmSignature &Sig) { uintptr_t Value = Sig.State; for (wasm::ValType Ret : Sig.Returns) Value += DenseMapInfo::getHashValue(uint32_t(Ret)); for (wasm::ValType Param : Sig.Params) Value += DenseMapInfo::getHashValue(uint32_t(Param)); return Value; } static bool isEqual(const WasmSignature &LHS, const WasmSignature &RHS) { return LHS == RHS; } }; // A wasm data segment. A wasm binary contains only a single data section // but that can contain many segments, each with their own virtual location // in memory. Each MCSection data created by llvm is modeled as its own // wasm data segment. struct WasmDataSegment { MCSectionWasm *Section; StringRef Name; uint32_t Offset; uint32_t Alignment; uint32_t Flags; SmallVector Data; }; // A wasm function to be written into the function section. struct WasmFunction { uint32_t SigIndex; const MCSymbolWasm *Sym; }; // A wasm global to be written into the global section. struct WasmGlobal { wasm::WasmGlobalType Type; uint64_t InitialValue; }; // Information about a single item which is part of a COMDAT. For each data // segment or function which is in the COMDAT, there is a corresponding // WasmComdatEntry. struct WasmComdatEntry { unsigned Kind; uint32_t Index; }; // Information about a single relocation. struct WasmRelocationEntry { uint64_t Offset; // Where is the relocation. const MCSymbolWasm *Symbol; // The symbol to relocate with. int64_t Addend; // A value to add to the symbol. unsigned Type; // The type of the relocation. const MCSectionWasm *FixupSection; // The section the relocation is targeting. WasmRelocationEntry(uint64_t Offset, const MCSymbolWasm *Symbol, int64_t Addend, unsigned Type, const MCSectionWasm *FixupSection) : Offset(Offset), Symbol(Symbol), Addend(Addend), Type(Type), FixupSection(FixupSection) {} bool hasAddend() const { switch (Type) { case wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB: case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB: case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32: case wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32: case wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32: return true; default: return false; } } void print(raw_ostream &Out) const { Out << wasm::relocTypetoString(Type) << " Off=" << Offset << ", Sym=" << *Symbol << ", Addend=" << Addend << ", FixupSection=" << FixupSection->getSectionName(); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void dump() const { print(dbgs()); } #endif }; static const uint32_t INVALID_INDEX = -1; struct WasmCustomSection { StringRef Name; MCSectionWasm *Section; uint32_t OutputContentsOffset; uint32_t OutputIndex; WasmCustomSection(StringRef Name, MCSectionWasm *Section) : Name(Name), Section(Section), OutputContentsOffset(0), OutputIndex(INVALID_INDEX) {} }; #if !defined(NDEBUG) raw_ostream &operator<<(raw_ostream &OS, const WasmRelocationEntry &Rel) { Rel.print(OS); return OS; } #endif class WasmObjectWriter : public MCObjectWriter { support::endian::Writer W; /// The target specific Wasm writer instance. std::unique_ptr TargetObjectWriter; // Relocations for fixing up references in the code section. std::vector CodeRelocations; uint32_t CodeSectionIndex; // Relocations for fixing up references in the data section. std::vector DataRelocations; uint32_t DataSectionIndex; // Index values to use for fixing up call_indirect type indices. // Maps function symbols to the index of the type of the function DenseMap TypeIndices; // Maps function symbols to the table element index space. Used // for TABLE_INDEX relocation types (i.e. address taken functions). DenseMap TableIndices; // Maps function/global symbols to the function/global/event/section index // space. DenseMap WasmIndices; // Maps data symbols to the Wasm segment and offset/size with the segment. DenseMap DataLocations; // Stores output data (index, relocations, content offset) for custom // section. std::vector CustomSections; // Relocations for fixing up references in the custom sections. DenseMap> CustomSectionsRelocations; // Map from section to defining function symbol. DenseMap SectionFunctions; DenseMap SignatureIndices; SmallVector Signatures; SmallVector Globals; SmallVector DataSegments; unsigned NumFunctionImports = 0; unsigned NumGlobalImports = 0; unsigned NumEventImports = 0; uint32_t SectionCount = 0; // TargetObjectWriter wrappers. bool is64Bit() const { return TargetObjectWriter->is64Bit(); } unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup) const { return TargetObjectWriter->getRelocType(Target, Fixup); } void startSection(SectionBookkeeping &Section, unsigned SectionId); void startCustomSection(SectionBookkeeping &Section, StringRef Name); void endSection(SectionBookkeeping &Section); public: WasmObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS) : W(OS, support::little), TargetObjectWriter(std::move(MOTW)) {} ~WasmObjectWriter() override; private: void reset() override { CodeRelocations.clear(); DataRelocations.clear(); TypeIndices.clear(); WasmIndices.clear(); TableIndices.clear(); DataLocations.clear(); CustomSectionsRelocations.clear(); SignatureIndices.clear(); Signatures.clear(); Globals.clear(); DataSegments.clear(); SectionFunctions.clear(); NumFunctionImports = 0; NumGlobalImports = 0; MCObjectWriter::reset(); } void writeHeader(const MCAssembler &Asm); void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) override; void executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) override; uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; void writeString(const StringRef Str) { encodeULEB128(Str.size(), W.OS); W.OS << Str; } void writeValueType(wasm::ValType Ty) { W.OS << static_cast(Ty); } void writeTypeSection(ArrayRef Signatures); void writeImportSection(ArrayRef Imports, uint32_t DataSize, uint32_t NumElements); void writeFunctionSection(ArrayRef Functions); void writeGlobalSection(); void writeExportSection(ArrayRef Exports); void writeElemSection(ArrayRef TableElems); void writeCodeSection(const MCAssembler &Asm, const MCAsmLayout &Layout, ArrayRef Functions); void writeDataSection(); void writeEventSection(ArrayRef Events); void writeRelocSection(uint32_t SectionIndex, StringRef Name, std::vector &Relocations); void writeLinkingMetaDataSection( ArrayRef SymbolInfos, ArrayRef> InitFuncs, const std::map> &Comdats); void writeCustomSections(const MCAssembler &Asm, const MCAsmLayout &Layout); void writeCustomRelocSections(); void updateCustomSectionRelocations(const SmallVector &Functions, const MCAsmLayout &Layout); uint32_t getProvisionalValue(const WasmRelocationEntry &RelEntry); void applyRelocations(ArrayRef Relocations, uint64_t ContentsOffset); uint32_t getRelocationIndexValue(const WasmRelocationEntry &RelEntry); uint32_t getFunctionType(const MCSymbolWasm &Symbol); uint32_t getEventType(const MCSymbolWasm &Symbol); void registerFunctionType(const MCSymbolWasm &Symbol); void registerEventType(const MCSymbolWasm &Symbol); }; } // end anonymous namespace WasmObjectWriter::~WasmObjectWriter() {} // Write out a section header and a patchable section size field. void WasmObjectWriter::startSection(SectionBookkeeping &Section, unsigned SectionId) { LLVM_DEBUG(dbgs() << "startSection " << SectionId << "\n"); W.OS << char(SectionId); Section.SizeOffset = W.OS.tell(); // The section size. We don't know the size yet, so reserve enough space // for any 32-bit value; we'll patch it later. encodeULEB128(UINT32_MAX, W.OS); // The position where the section starts, for measuring its size. Section.ContentsOffset = W.OS.tell(); Section.PayloadOffset = W.OS.tell(); Section.Index = SectionCount++; } void WasmObjectWriter::startCustomSection(SectionBookkeeping &Section, StringRef Name) { LLVM_DEBUG(dbgs() << "startCustomSection " << Name << "\n"); startSection(Section, wasm::WASM_SEC_CUSTOM); // The position where the section header ends, for measuring its size. Section.PayloadOffset = W.OS.tell(); // Custom sections in wasm also have a string identifier. writeString(Name); // The position where the custom section starts. Section.ContentsOffset = W.OS.tell(); } // Now that the section is complete and we know how big it is, patch up the // section size field at the start of the section. void WasmObjectWriter::endSection(SectionBookkeeping &Section) { - uint64_t Size = W.OS.tell() - Section.PayloadOffset; + uint64_t Size = W.OS.tell(); + // /dev/null doesn't support seek/tell and can report offset of 0. + // Simply skip this patching in that case. + if (!Size) + return; + + Size -= Section.PayloadOffset; if (uint32_t(Size) != Size) report_fatal_error("section size does not fit in a uint32_t"); LLVM_DEBUG(dbgs() << "endSection size=" << Size << "\n"); // Write the final section size to the payload_len field, which follows // the section id byte. uint8_t Buffer[16]; unsigned SizeLen = encodeULEB128(Size, Buffer, 5); assert(SizeLen == 5); static_cast(W.OS).pwrite((char *)Buffer, SizeLen, Section.SizeOffset); } // Emit the Wasm header. void WasmObjectWriter::writeHeader(const MCAssembler &Asm) { W.OS.write(wasm::WasmMagic, sizeof(wasm::WasmMagic)); W.write(wasm::WasmVersion); } void WasmObjectWriter::executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) { // Build a map of sections to the function that defines them, for use // in recordRelocation. for (const MCSymbol &S : Asm.symbols()) { const auto &WS = static_cast(S); if (WS.isDefined() && WS.isFunction() && !WS.isVariable()) { const auto &Sec = static_cast(S.getSection()); auto Pair = SectionFunctions.insert(std::make_pair(&Sec, &S)); if (!Pair.second) report_fatal_error("section already has a defining function: " + Sec.getSectionName()); } } } void WasmObjectWriter::recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { MCAsmBackend &Backend = Asm.getBackend(); bool IsPCRel = Backend.getFixupKindInfo(Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel; const auto &FixupSection = cast(*Fragment->getParent()); uint64_t C = Target.getConstant(); uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); MCContext &Ctx = Asm.getContext(); // The .init_array isn't translated as data, so don't do relocations in it. if (FixupSection.getSectionName().startswith(".init_array")) return; if (const MCSymbolRefExpr *RefB = Target.getSymB()) { assert(RefB->getKind() == MCSymbolRefExpr::VK_None && "Should not have constructed this"); // Let A, B and C being the components of Target and R be the location of // the fixup. If the fixup is not pcrel, we want to compute (A - B + C). // If it is pcrel, we want to compute (A - B + C - R). // In general, Wasm has no relocations for -B. It can only represent (A + C) // or (A + C - R). If B = R + K and the relocation is not pcrel, we can // replace B to implement it: (A - R - K + C) if (IsPCRel) { Ctx.reportError( Fixup.getLoc(), "No relocation available to represent this relative expression"); return; } const auto &SymB = cast(RefB->getSymbol()); if (SymB.isUndefined()) { Ctx.reportError(Fixup.getLoc(), Twine("symbol '") + SymB.getName() + "' can not be undefined in a subtraction expression"); return; } assert(!SymB.isAbsolute() && "Should have been folded"); const MCSection &SecB = SymB.getSection(); if (&SecB != &FixupSection) { Ctx.reportError(Fixup.getLoc(), "Cannot represent a difference across sections"); return; } uint64_t SymBOffset = Layout.getSymbolOffset(SymB); uint64_t K = SymBOffset - FixupOffset; IsPCRel = true; C -= K; } // We either rejected the fixup or folded B into C at this point. const MCSymbolRefExpr *RefA = Target.getSymA(); const auto *SymA = RefA ? cast(&RefA->getSymbol()) : nullptr; if (SymA && SymA->isVariable()) { const MCExpr *Expr = SymA->getVariableValue(); const auto *Inner = cast(Expr); if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF) llvm_unreachable("weakref used in reloc not yet implemented"); } // Put any constant offset in an addend. Offsets can be negative, and // LLVM expects wrapping, in contrast to wasm's immediates which can't // be negative and don't wrap. FixedValue = 0; unsigned Type = getRelocType(Target, Fixup); assert(!IsPCRel); assert(SymA); // Absolute offset within a section or a function. // Currently only supported for for metadata sections. // See: test/MC/WebAssembly/blockaddress.ll if (Type == wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32 || Type == wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32) { if (!FixupSection.getKind().isMetadata()) report_fatal_error("relocations for function or section offsets are " "only supported in metadata sections"); const MCSymbol *SectionSymbol = nullptr; const MCSection &SecA = SymA->getSection(); if (SecA.getKind().isText()) SectionSymbol = SectionFunctions.find(&SecA)->second; else SectionSymbol = SecA.getBeginSymbol(); if (!SectionSymbol) report_fatal_error("section symbol is required for relocation"); C += Layout.getSymbolOffset(*SymA); SymA = cast(SectionSymbol); } // Relocation other than R_WEBASSEMBLY_TYPE_INDEX_LEB are required to be // against a named symbol. if (Type != wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB) { if (SymA->getName().empty()) report_fatal_error("relocations against un-named temporaries are not yet " "supported by wasm"); SymA->setUsedInReloc(); } WasmRelocationEntry Rec(FixupOffset, SymA, C, Type, &FixupSection); LLVM_DEBUG(dbgs() << "WasmReloc: " << Rec << "\n"); if (FixupSection.isWasmData()) { DataRelocations.push_back(Rec); } else if (FixupSection.getKind().isText()) { CodeRelocations.push_back(Rec); } else if (FixupSection.getKind().isMetadata()) { CustomSectionsRelocations[&FixupSection].push_back(Rec); } else { llvm_unreachable("unexpected section type"); } } // Write X as an (unsigned) LEB value at offset Offset in Stream, padded // to allow patching. static void WritePatchableLEB(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) { uint8_t Buffer[5]; unsigned SizeLen = encodeULEB128(X, Buffer, 5); assert(SizeLen == 5); Stream.pwrite((char *)Buffer, SizeLen, Offset); } // Write X as an signed LEB value at offset Offset in Stream, padded // to allow patching. static void WritePatchableSLEB(raw_pwrite_stream &Stream, int32_t X, uint64_t Offset) { uint8_t Buffer[5]; unsigned SizeLen = encodeSLEB128(X, Buffer, 5); assert(SizeLen == 5); Stream.pwrite((char *)Buffer, SizeLen, Offset); } // Write X as a plain integer value at offset Offset in Stream. static void WriteI32(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) { uint8_t Buffer[4]; support::endian::write32le(Buffer, X); Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset); } static const MCSymbolWasm *ResolveSymbol(const MCSymbolWasm &Symbol) { if (Symbol.isVariable()) { const MCExpr *Expr = Symbol.getVariableValue(); auto *Inner = cast(Expr); return cast(&Inner->getSymbol()); } return &Symbol; } // Compute a value to write into the code at the location covered // by RelEntry. This value isn't used by the static linker; it just serves // to make the object format more readable and more likely to be directly // useable. uint32_t WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry) { switch (RelEntry.Type) { case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: { // Provisional value is table address of the resolved symbol itself const MCSymbolWasm *Sym = ResolveSymbol(*RelEntry.Symbol); assert(Sym->isFunction()); return TableIndices[Sym]; } case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: // Provisional value is same as the index return getRelocationIndexValue(RelEntry); case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB: case wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB: // Provisional value is function/global/event Wasm index if (!WasmIndices.count(RelEntry.Symbol)) report_fatal_error("symbol not found in wasm index space: " + RelEntry.Symbol->getName()); return WasmIndices[RelEntry.Symbol]; case wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32: case wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32: { const auto &Section = static_cast(RelEntry.Symbol->getSection()); return Section.getSectionOffset() + RelEntry.Addend; } case wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB: case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32: case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB: { // Provisional value is address of the global const MCSymbolWasm *Sym = ResolveSymbol(*RelEntry.Symbol); // For undefined symbols, use zero if (!Sym->isDefined()) return 0; const wasm::WasmDataReference &Ref = DataLocations[Sym]; const WasmDataSegment &Segment = DataSegments[Ref.Segment]; // Ignore overflow. LLVM allows address arithmetic to silently wrap. return Segment.Offset + Ref.Offset + RelEntry.Addend; } default: llvm_unreachable("invalid relocation type"); } } static void addData(SmallVectorImpl &DataBytes, MCSectionWasm &DataSection) { LLVM_DEBUG(errs() << "addData: " << DataSection.getSectionName() << "\n"); DataBytes.resize(alignTo(DataBytes.size(), DataSection.getAlignment())); for (const MCFragment &Frag : DataSection) { if (Frag.hasInstructions()) report_fatal_error("only data supported in data sections"); if (auto *Align = dyn_cast(&Frag)) { if (Align->getValueSize() != 1) report_fatal_error("only byte values supported for alignment"); // If nops are requested, use zeros, as this is the data section. uint8_t Value = Align->hasEmitNops() ? 0 : Align->getValue(); uint64_t Size = std::min(alignTo(DataBytes.size(), Align->getAlignment()), DataBytes.size() + Align->getMaxBytesToEmit()); DataBytes.resize(Size, Value); } else if (auto *Fill = dyn_cast(&Frag)) { int64_t NumValues; if (!Fill->getNumValues().evaluateAsAbsolute(NumValues)) llvm_unreachable("The fill should be an assembler constant"); DataBytes.insert(DataBytes.end(), Fill->getValueSize() * NumValues, Fill->getValue()); } else if (auto *LEB = dyn_cast(&Frag)) { const SmallVectorImpl &Contents = LEB->getContents(); DataBytes.insert(DataBytes.end(), Contents.begin(), Contents.end()); } else { const auto &DataFrag = cast(Frag); const SmallVectorImpl &Contents = DataFrag.getContents(); DataBytes.insert(DataBytes.end(), Contents.begin(), Contents.end()); } } LLVM_DEBUG(dbgs() << "addData -> " << DataBytes.size() << "\n"); } uint32_t WasmObjectWriter::getRelocationIndexValue(const WasmRelocationEntry &RelEntry) { if (RelEntry.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB) { if (!TypeIndices.count(RelEntry.Symbol)) report_fatal_error("symbol not found in type index space: " + RelEntry.Symbol->getName()); return TypeIndices[RelEntry.Symbol]; } return RelEntry.Symbol->getIndex(); } // Apply the portions of the relocation records that we can handle ourselves // directly. void WasmObjectWriter::applyRelocations( ArrayRef Relocations, uint64_t ContentsOffset) { auto &Stream = static_cast(W.OS); for (const WasmRelocationEntry &RelEntry : Relocations) { uint64_t Offset = ContentsOffset + RelEntry.FixupSection->getSectionOffset() + RelEntry.Offset; LLVM_DEBUG(dbgs() << "applyRelocation: " << RelEntry << "\n"); uint32_t Value = getProvisionalValue(RelEntry); switch (RelEntry.Type) { case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB: case wasm::R_WEBASSEMBLY_MEMORY_ADDR_LEB: case wasm::R_WEBASSEMBLY_EVENT_INDEX_LEB: WritePatchableLEB(Stream, Value, Offset); break; case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: case wasm::R_WEBASSEMBLY_MEMORY_ADDR_I32: case wasm::R_WEBASSEMBLY_FUNCTION_OFFSET_I32: case wasm::R_WEBASSEMBLY_SECTION_OFFSET_I32: WriteI32(Stream, Value, Offset); break; case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: case wasm::R_WEBASSEMBLY_MEMORY_ADDR_SLEB: WritePatchableSLEB(Stream, Value, Offset); break; default: llvm_unreachable("invalid relocation type"); } } } void WasmObjectWriter::writeTypeSection(ArrayRef Signatures) { if (Signatures.empty()) return; SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_TYPE); encodeULEB128(Signatures.size(), W.OS); for (const WasmSignature &Sig : Signatures) { W.OS << char(wasm::WASM_TYPE_FUNC); encodeULEB128(Sig.Params.size(), W.OS); for (wasm::ValType Ty : Sig.Params) writeValueType(Ty); encodeULEB128(Sig.Returns.size(), W.OS); for (wasm::ValType Ty : Sig.Returns) writeValueType(Ty); } endSection(Section); } void WasmObjectWriter::writeImportSection(ArrayRef Imports, uint32_t DataSize, uint32_t NumElements) { if (Imports.empty()) return; uint32_t NumPages = (DataSize + wasm::WasmPageSize - 1) / wasm::WasmPageSize; SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_IMPORT); encodeULEB128(Imports.size(), W.OS); for (const wasm::WasmImport &Import : Imports) { writeString(Import.Module); writeString(Import.Field); W.OS << char(Import.Kind); switch (Import.Kind) { case wasm::WASM_EXTERNAL_FUNCTION: encodeULEB128(Import.SigIndex, W.OS); break; case wasm::WASM_EXTERNAL_GLOBAL: W.OS << char(Import.Global.Type); W.OS << char(Import.Global.Mutable ? 1 : 0); break; case wasm::WASM_EXTERNAL_MEMORY: encodeULEB128(0, W.OS); // flags encodeULEB128(NumPages, W.OS); // initial break; case wasm::WASM_EXTERNAL_TABLE: W.OS << char(Import.Table.ElemType); encodeULEB128(0, W.OS); // flags encodeULEB128(NumElements, W.OS); // initial break; case wasm::WASM_EXTERNAL_EVENT: encodeULEB128(Import.Event.Attribute, W.OS); encodeULEB128(Import.Event.SigIndex, W.OS); break; default: llvm_unreachable("unsupported import kind"); } } endSection(Section); } void WasmObjectWriter::writeFunctionSection(ArrayRef Functions) { if (Functions.empty()) return; SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_FUNCTION); encodeULEB128(Functions.size(), W.OS); for (const WasmFunction &Func : Functions) encodeULEB128(Func.SigIndex, W.OS); endSection(Section); } void WasmObjectWriter::writeGlobalSection() { if (Globals.empty()) return; SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_GLOBAL); encodeULEB128(Globals.size(), W.OS); for (const WasmGlobal &Global : Globals) { writeValueType(static_cast(Global.Type.Type)); W.OS << char(Global.Type.Mutable); W.OS << char(wasm::WASM_OPCODE_I32_CONST); encodeSLEB128(Global.InitialValue, W.OS); W.OS << char(wasm::WASM_OPCODE_END); } endSection(Section); } void WasmObjectWriter::writeEventSection(ArrayRef Events) { if (Events.empty()) return; SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_EVENT); encodeULEB128(Events.size(), W.OS); for (const wasm::WasmEventType &Event : Events) { encodeULEB128(Event.Attribute, W.OS); encodeULEB128(Event.SigIndex, W.OS); } endSection(Section); } void WasmObjectWriter::writeExportSection(ArrayRef Exports) { if (Exports.empty()) return; SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_EXPORT); encodeULEB128(Exports.size(), W.OS); for (const wasm::WasmExport &Export : Exports) { writeString(Export.Name); W.OS << char(Export.Kind); encodeULEB128(Export.Index, W.OS); } endSection(Section); } void WasmObjectWriter::writeElemSection(ArrayRef TableElems) { if (TableElems.empty()) return; SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_ELEM); encodeULEB128(1, W.OS); // number of "segments" encodeULEB128(0, W.OS); // the table index // init expr for starting offset W.OS << char(wasm::WASM_OPCODE_I32_CONST); encodeSLEB128(kInitialTableOffset, W.OS); W.OS << char(wasm::WASM_OPCODE_END); encodeULEB128(TableElems.size(), W.OS); for (uint32_t Elem : TableElems) encodeULEB128(Elem, W.OS); endSection(Section); } void WasmObjectWriter::writeCodeSection(const MCAssembler &Asm, const MCAsmLayout &Layout, ArrayRef Functions) { if (Functions.empty()) return; SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_CODE); CodeSectionIndex = Section.Index; encodeULEB128(Functions.size(), W.OS); for (const WasmFunction &Func : Functions) { auto &FuncSection = static_cast(Func.Sym->getSection()); int64_t Size = 0; if (!Func.Sym->getSize()->evaluateAsAbsolute(Size, Layout)) report_fatal_error(".size expression must be evaluatable"); encodeULEB128(Size, W.OS); FuncSection.setSectionOffset(W.OS.tell() - Section.ContentsOffset); Asm.writeSectionData(W.OS, &FuncSection, Layout); } // Apply fixups. applyRelocations(CodeRelocations, Section.ContentsOffset); endSection(Section); } void WasmObjectWriter::writeDataSection() { if (DataSegments.empty()) return; SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_DATA); DataSectionIndex = Section.Index; encodeULEB128(DataSegments.size(), W.OS); // count for (const WasmDataSegment &Segment : DataSegments) { encodeULEB128(0, W.OS); // memory index W.OS << char(wasm::WASM_OPCODE_I32_CONST); encodeSLEB128(Segment.Offset, W.OS); // offset W.OS << char(wasm::WASM_OPCODE_END); encodeULEB128(Segment.Data.size(), W.OS); // size Segment.Section->setSectionOffset(W.OS.tell() - Section.ContentsOffset); W.OS << Segment.Data; // data } // Apply fixups. applyRelocations(DataRelocations, Section.ContentsOffset); endSection(Section); } void WasmObjectWriter::writeRelocSection( uint32_t SectionIndex, StringRef Name, std::vector &Relocs) { // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md // for descriptions of the reloc sections. if (Relocs.empty()) return; // First, ensure the relocations are sorted in offset order. In general they // should already be sorted since `recordRelocation` is called in offset // order, but for the code section we combine many MC sections into single // wasm section, and this order is determined by the order of Asm.Symbols() // not the sections order. std::stable_sort( Relocs.begin(), Relocs.end(), [](const WasmRelocationEntry &A, const WasmRelocationEntry &B) { return (A.Offset + A.FixupSection->getSectionOffset()) < (B.Offset + B.FixupSection->getSectionOffset()); }); SectionBookkeeping Section; startCustomSection(Section, std::string("reloc.") + Name.str()); encodeULEB128(SectionIndex, W.OS); encodeULEB128(Relocs.size(), W.OS); for (const WasmRelocationEntry &RelEntry : Relocs) { uint64_t Offset = RelEntry.Offset + RelEntry.FixupSection->getSectionOffset(); uint32_t Index = getRelocationIndexValue(RelEntry); W.OS << char(RelEntry.Type); encodeULEB128(Offset, W.OS); encodeULEB128(Index, W.OS); if (RelEntry.hasAddend()) encodeSLEB128(RelEntry.Addend, W.OS); } endSection(Section); } void WasmObjectWriter::writeCustomRelocSections() { for (const auto &Sec : CustomSections) { auto &Relocations = CustomSectionsRelocations[Sec.Section]; writeRelocSection(Sec.OutputIndex, Sec.Name, Relocations); } } void WasmObjectWriter::writeLinkingMetaDataSection( ArrayRef SymbolInfos, ArrayRef> InitFuncs, const std::map> &Comdats) { SectionBookkeeping Section; startCustomSection(Section, "linking"); encodeULEB128(wasm::WasmMetadataVersion, W.OS); SectionBookkeeping SubSection; if (SymbolInfos.size() != 0) { startSection(SubSection, wasm::WASM_SYMBOL_TABLE); encodeULEB128(SymbolInfos.size(), W.OS); for (const wasm::WasmSymbolInfo &Sym : SymbolInfos) { encodeULEB128(Sym.Kind, W.OS); encodeULEB128(Sym.Flags, W.OS); switch (Sym.Kind) { case wasm::WASM_SYMBOL_TYPE_FUNCTION: case wasm::WASM_SYMBOL_TYPE_GLOBAL: case wasm::WASM_SYMBOL_TYPE_EVENT: encodeULEB128(Sym.ElementIndex, W.OS); if ((Sym.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0 || (Sym.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0) writeString(Sym.Name); break; case wasm::WASM_SYMBOL_TYPE_DATA: writeString(Sym.Name); if ((Sym.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0) { encodeULEB128(Sym.DataRef.Segment, W.OS); encodeULEB128(Sym.DataRef.Offset, W.OS); encodeULEB128(Sym.DataRef.Size, W.OS); } break; case wasm::WASM_SYMBOL_TYPE_SECTION: { const uint32_t SectionIndex = CustomSections[Sym.ElementIndex].OutputIndex; encodeULEB128(SectionIndex, W.OS); break; } default: llvm_unreachable("unexpected kind"); } } endSection(SubSection); } if (DataSegments.size()) { startSection(SubSection, wasm::WASM_SEGMENT_INFO); encodeULEB128(DataSegments.size(), W.OS); for (const WasmDataSegment &Segment : DataSegments) { writeString(Segment.Name); encodeULEB128(Segment.Alignment, W.OS); encodeULEB128(Segment.Flags, W.OS); } endSection(SubSection); } if (!InitFuncs.empty()) { startSection(SubSection, wasm::WASM_INIT_FUNCS); encodeULEB128(InitFuncs.size(), W.OS); for (auto &StartFunc : InitFuncs) { encodeULEB128(StartFunc.first, W.OS); // priority encodeULEB128(StartFunc.second, W.OS); // function index } endSection(SubSection); } if (Comdats.size()) { startSection(SubSection, wasm::WASM_COMDAT_INFO); encodeULEB128(Comdats.size(), W.OS); for (const auto &C : Comdats) { writeString(C.first); encodeULEB128(0, W.OS); // flags for future use encodeULEB128(C.second.size(), W.OS); for (const WasmComdatEntry &Entry : C.second) { encodeULEB128(Entry.Kind, W.OS); encodeULEB128(Entry.Index, W.OS); } } endSection(SubSection); } endSection(Section); } void WasmObjectWriter::writeCustomSections(const MCAssembler &Asm, const MCAsmLayout &Layout) { for (auto &CustomSection : CustomSections) { SectionBookkeeping Section; auto *Sec = CustomSection.Section; startCustomSection(Section, CustomSection.Name); Sec->setSectionOffset(W.OS.tell() - Section.ContentsOffset); Asm.writeSectionData(W.OS, Sec, Layout); CustomSection.OutputContentsOffset = Section.ContentsOffset; CustomSection.OutputIndex = Section.Index; endSection(Section); // Apply fixups. auto &Relocations = CustomSectionsRelocations[CustomSection.Section]; applyRelocations(Relocations, CustomSection.OutputContentsOffset); } } uint32_t WasmObjectWriter::getFunctionType(const MCSymbolWasm &Symbol) { assert(Symbol.isFunction()); assert(TypeIndices.count(&Symbol)); return TypeIndices[&Symbol]; } uint32_t WasmObjectWriter::getEventType(const MCSymbolWasm &Symbol) { assert(Symbol.isEvent()); assert(TypeIndices.count(&Symbol)); return TypeIndices[&Symbol]; } void WasmObjectWriter::registerFunctionType(const MCSymbolWasm &Symbol) { assert(Symbol.isFunction()); WasmSignature S; const MCSymbolWasm *ResolvedSym = ResolveSymbol(Symbol); if (auto *Sig = ResolvedSym->getSignature()) { S.Returns = Sig->Returns; S.Params = Sig->Params; } auto Pair = SignatureIndices.insert(std::make_pair(S, Signatures.size())); if (Pair.second) Signatures.push_back(S); TypeIndices[&Symbol] = Pair.first->second; LLVM_DEBUG(dbgs() << "registerFunctionType: " << Symbol << " new:" << Pair.second << "\n"); LLVM_DEBUG(dbgs() << " -> type index: " << Pair.first->second << "\n"); } void WasmObjectWriter::registerEventType(const MCSymbolWasm &Symbol) { assert(Symbol.isEvent()); // TODO Currently we don't generate imported exceptions, but if we do, we // should have a way of infering types of imported exceptions. WasmSignature S; if (auto *Sig = Symbol.getSignature()) { S.Returns = Sig->Returns; S.Params = Sig->Params; } auto Pair = SignatureIndices.insert(std::make_pair(S, Signatures.size())); if (Pair.second) Signatures.push_back(S); TypeIndices[&Symbol] = Pair.first->second; LLVM_DEBUG(dbgs() << "registerEventType: " << Symbol << " new:" << Pair.second << "\n"); LLVM_DEBUG(dbgs() << " -> type index: " << Pair.first->second << "\n"); } static bool isInSymtab(const MCSymbolWasm &Sym) { if (Sym.isUsedInReloc()) return true; if (Sym.isComdat() && !Sym.isDefined()) return false; if (Sym.isTemporary() && Sym.getName().empty()) return false; if (Sym.isTemporary() && Sym.isData() && !Sym.getSize()) return false; if (Sym.isSection()) return false; return true; } uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { uint64_t StartOffset = W.OS.tell(); LLVM_DEBUG(dbgs() << "WasmObjectWriter::writeObject\n"); MCContext &Ctx = Asm.getContext(); // Collect information from the available symbols. SmallVector Functions; SmallVector TableElems; SmallVector Imports; SmallVector Exports; SmallVector Events; SmallVector SymbolInfos; SmallVector, 2> InitFuncs; std::map> Comdats; uint32_t DataSize = 0; // For now, always emit the memory import, since loads and stores are not // valid without it. In the future, we could perhaps be more clever and omit // it if there are no loads or stores. MCSymbolWasm *MemorySym = cast(Ctx.getOrCreateSymbol("__linear_memory")); wasm::WasmImport MemImport; MemImport.Module = MemorySym->getImportModule(); MemImport.Field = MemorySym->getImportName(); MemImport.Kind = wasm::WASM_EXTERNAL_MEMORY; Imports.push_back(MemImport); // For now, always emit the table section, since indirect calls are not // valid without it. In the future, we could perhaps be more clever and omit // it if there are no indirect calls. MCSymbolWasm *TableSym = cast(Ctx.getOrCreateSymbol("__indirect_function_table")); wasm::WasmImport TableImport; TableImport.Module = TableSym->getImportModule(); TableImport.Field = TableSym->getImportName(); TableImport.Kind = wasm::WASM_EXTERNAL_TABLE; TableImport.Table.ElemType = wasm::WASM_TYPE_FUNCREF; Imports.push_back(TableImport); // Populate SignatureIndices, and Imports and WasmIndices for undefined // symbols. This must be done before populating WasmIndices for defined // symbols. for (const MCSymbol &S : Asm.symbols()) { const auto &WS = static_cast(S); // Register types for all functions, including those with private linkage // (because wasm always needs a type signature). if (WS.isFunction()) registerFunctionType(WS); if (WS.isEvent()) registerEventType(WS); if (WS.isTemporary()) continue; // If the symbol is not defined in this translation unit, import it. if (!WS.isDefined() && !WS.isComdat()) { if (WS.isFunction()) { wasm::WasmImport Import; Import.Module = WS.getImportModule(); Import.Field = WS.getImportName(); Import.Kind = wasm::WASM_EXTERNAL_FUNCTION; Import.SigIndex = getFunctionType(WS); Imports.push_back(Import); WasmIndices[&WS] = NumFunctionImports++; } else if (WS.isGlobal()) { if (WS.isWeak()) report_fatal_error("undefined global symbol cannot be weak"); wasm::WasmImport Import; Import.Module = WS.getImportModule(); Import.Field = WS.getImportName(); Import.Kind = wasm::WASM_EXTERNAL_GLOBAL; Import.Global = WS.getGlobalType(); Imports.push_back(Import); WasmIndices[&WS] = NumGlobalImports++; } else if (WS.isEvent()) { if (WS.isWeak()) report_fatal_error("undefined event symbol cannot be weak"); wasm::WasmImport Import; Import.Module = WS.getImportModule(); Import.Field = WS.getImportName(); Import.Kind = wasm::WASM_EXTERNAL_EVENT; Import.Event.Attribute = wasm::WASM_EVENT_ATTRIBUTE_EXCEPTION; Import.Event.SigIndex = getEventType(WS); Imports.push_back(Import); WasmIndices[&WS] = NumEventImports++; } } } // Populate DataSegments and CustomSections, which must be done before // populating DataLocations. for (MCSection &Sec : Asm) { auto &Section = static_cast(Sec); StringRef SectionName = Section.getSectionName(); // .init_array sections are handled specially elsewhere. if (SectionName.startswith(".init_array")) continue; // Code is handled separately if (Section.getKind().isText()) continue; if (Section.isWasmData()) { uint32_t SegmentIndex = DataSegments.size(); DataSize = alignTo(DataSize, Section.getAlignment()); DataSegments.emplace_back(); WasmDataSegment &Segment = DataSegments.back(); Segment.Name = SectionName; Segment.Offset = DataSize; Segment.Section = &Section; addData(Segment.Data, Section); Segment.Alignment = Log2_32(Section.getAlignment()); Segment.Flags = 0; DataSize += Segment.Data.size(); Section.setSegmentIndex(SegmentIndex); if (const MCSymbolWasm *C = Section.getGroup()) { Comdats[C->getName()].emplace_back( WasmComdatEntry{wasm::WASM_COMDAT_DATA, SegmentIndex}); } } else { // Create custom sections assert(Sec.getKind().isMetadata()); StringRef Name = SectionName; // For user-defined custom sections, strip the prefix if (Name.startswith(".custom_section.")) Name = Name.substr(strlen(".custom_section.")); MCSymbol *Begin = Sec.getBeginSymbol(); if (Begin) { WasmIndices[cast(Begin)] = CustomSections.size(); if (SectionName != Begin->getName()) report_fatal_error("section name and begin symbol should match: " + Twine(SectionName)); } CustomSections.emplace_back(Name, &Section); } } // Populate WasmIndices and DataLocations for defined symbols. for (const MCSymbol &S : Asm.symbols()) { // Ignore unnamed temporary symbols, which aren't ever exported, imported, // or used in relocations. if (S.isTemporary() && S.getName().empty()) continue; const auto &WS = static_cast(S); LLVM_DEBUG( dbgs() << "MCSymbol: " << toString(WS.getType()) << " '" << S << "'" << " isDefined=" << S.isDefined() << " isExternal=" << S.isExternal() << " isTemporary=" << S.isTemporary() << " isWeak=" << WS.isWeak() << " isHidden=" << WS.isHidden() << " isVariable=" << WS.isVariable() << "\n"); if (WS.isVariable()) continue; if (WS.isComdat() && !WS.isDefined()) continue; if (WS.isFunction()) { unsigned Index; if (WS.isDefined()) { if (WS.getOffset() != 0) report_fatal_error( "function sections must contain one function each"); if (WS.getSize() == 0) report_fatal_error( "function symbols must have a size set with .size"); // A definition. Write out the function body. Index = NumFunctionImports + Functions.size(); WasmFunction Func; Func.SigIndex = getFunctionType(WS); Func.Sym = &WS; WasmIndices[&WS] = Index; Functions.push_back(Func); auto &Section = static_cast(WS.getSection()); if (const MCSymbolWasm *C = Section.getGroup()) { Comdats[C->getName()].emplace_back( WasmComdatEntry{wasm::WASM_COMDAT_FUNCTION, Index}); } } else { // An import; the index was assigned above. Index = WasmIndices.find(&WS)->second; } LLVM_DEBUG(dbgs() << " -> function index: " << Index << "\n"); } else if (WS.isData()) { if (WS.isTemporary() && !WS.getSize()) continue; if (!WS.isDefined()) { LLVM_DEBUG(dbgs() << " -> segment index: -1" << "\n"); continue; } if (!WS.getSize()) report_fatal_error("data symbols must have a size set with .size: " + WS.getName()); int64_t Size = 0; if (!WS.getSize()->evaluateAsAbsolute(Size, Layout)) report_fatal_error(".size expression must be evaluatable"); auto &DataSection = static_cast(WS.getSection()); assert(DataSection.isWasmData()); // For each data symbol, export it in the symtab as a reference to the // corresponding Wasm data segment. wasm::WasmDataReference Ref = wasm::WasmDataReference{ DataSection.getSegmentIndex(), static_cast(Layout.getSymbolOffset(WS)), static_cast(Size)}; DataLocations[&WS] = Ref; LLVM_DEBUG(dbgs() << " -> segment index: " << Ref.Segment << "\n"); } else if (WS.isGlobal()) { // A "true" Wasm global (currently just __stack_pointer) if (WS.isDefined()) report_fatal_error("don't yet support defined globals"); // An import; the index was assigned above LLVM_DEBUG(dbgs() << " -> global index: " << WasmIndices.find(&WS)->second << "\n"); } else if (WS.isEvent()) { // C++ exception symbol (__cpp_exception) unsigned Index; if (WS.isDefined()) { Index = NumEventImports + Events.size(); wasm::WasmEventType Event; Event.SigIndex = getEventType(WS); Event.Attribute = wasm::WASM_EVENT_ATTRIBUTE_EXCEPTION; WasmIndices[&WS] = Index; Events.push_back(Event); } else { // An import; the index was assigned above. Index = WasmIndices.find(&WS)->second; } LLVM_DEBUG(dbgs() << " -> event index: " << WasmIndices.find(&WS)->second << "\n"); } else { assert(WS.isSection()); } } // Populate WasmIndices and DataLocations for aliased symbols. We need to // process these in a separate pass because we need to have processed the // target of the alias before the alias itself and the symbols are not // necessarily ordered in this way. for (const MCSymbol &S : Asm.symbols()) { if (!S.isVariable()) continue; assert(S.isDefined()); // Find the target symbol of this weak alias and export that index const auto &WS = static_cast(S); const MCSymbolWasm *ResolvedSym = ResolveSymbol(WS); LLVM_DEBUG(dbgs() << WS.getName() << ": weak alias of '" << *ResolvedSym << "'\n"); if (WS.isFunction()) { assert(WasmIndices.count(ResolvedSym) > 0); uint32_t WasmIndex = WasmIndices.find(ResolvedSym)->second; WasmIndices[&WS] = WasmIndex; LLVM_DEBUG(dbgs() << " -> index:" << WasmIndex << "\n"); } else if (WS.isData()) { assert(DataLocations.count(ResolvedSym) > 0); const wasm::WasmDataReference &Ref = DataLocations.find(ResolvedSym)->second; DataLocations[&WS] = Ref; LLVM_DEBUG(dbgs() << " -> index:" << Ref.Segment << "\n"); } else { report_fatal_error("don't yet support global/event aliases"); } } // Finally, populate the symbol table itself, in its "natural" order. for (const MCSymbol &S : Asm.symbols()) { const auto &WS = static_cast(S); if (!isInSymtab(WS)) { WS.setIndex(INVALID_INDEX); continue; } LLVM_DEBUG(dbgs() << "adding to symtab: " << WS << "\n"); uint32_t Flags = 0; if (WS.isWeak()) Flags |= wasm::WASM_SYMBOL_BINDING_WEAK; if (WS.isHidden()) Flags |= wasm::WASM_SYMBOL_VISIBILITY_HIDDEN; if (!WS.isExternal() && WS.isDefined()) Flags |= wasm::WASM_SYMBOL_BINDING_LOCAL; if (WS.isUndefined()) Flags |= wasm::WASM_SYMBOL_UNDEFINED; if (WS.getName() != WS.getImportName()) Flags |= wasm::WASM_SYMBOL_EXPLICIT_NAME; wasm::WasmSymbolInfo Info; Info.Name = WS.getName(); Info.Kind = WS.getType(); Info.Flags = Flags; if (!WS.isData()) { assert(WasmIndices.count(&WS) > 0); Info.ElementIndex = WasmIndices.find(&WS)->second; } else if (WS.isDefined()) { assert(DataLocations.count(&WS) > 0); Info.DataRef = DataLocations.find(&WS)->second; } WS.setIndex(SymbolInfos.size()); SymbolInfos.emplace_back(Info); } { auto HandleReloc = [&](const WasmRelocationEntry &Rel) { // Functions referenced by a relocation need to put in the table. This is // purely to make the object file's provisional values readable, and is // ignored by the linker, which re-calculates the relocations itself. if (Rel.Type != wasm::R_WEBASSEMBLY_TABLE_INDEX_I32 && Rel.Type != wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB) return; assert(Rel.Symbol->isFunction()); const MCSymbolWasm &WS = *ResolveSymbol(*Rel.Symbol); uint32_t FunctionIndex = WasmIndices.find(&WS)->second; uint32_t TableIndex = TableElems.size() + kInitialTableOffset; if (TableIndices.try_emplace(&WS, TableIndex).second) { LLVM_DEBUG(dbgs() << " -> adding " << WS.getName() << " to table: " << TableIndex << "\n"); TableElems.push_back(FunctionIndex); registerFunctionType(WS); } }; for (const WasmRelocationEntry &RelEntry : CodeRelocations) HandleReloc(RelEntry); for (const WasmRelocationEntry &RelEntry : DataRelocations) HandleReloc(RelEntry); } // Translate .init_array section contents into start functions. for (const MCSection &S : Asm) { const auto &WS = static_cast(S); if (WS.getSectionName().startswith(".fini_array")) report_fatal_error(".fini_array sections are unsupported"); if (!WS.getSectionName().startswith(".init_array")) continue; if (WS.getFragmentList().empty()) continue; // init_array is expected to contain a single non-empty data fragment if (WS.getFragmentList().size() != 3) report_fatal_error("only one .init_array section fragment supported"); auto IT = WS.begin(); const MCFragment &EmptyFrag = *IT; if (EmptyFrag.getKind() != MCFragment::FT_Data) report_fatal_error(".init_array section should be aligned"); IT = std::next(IT); const MCFragment &AlignFrag = *IT; if (AlignFrag.getKind() != MCFragment::FT_Align) report_fatal_error(".init_array section should be aligned"); if (cast(AlignFrag).getAlignment() != (is64Bit() ? 8 : 4)) report_fatal_error(".init_array section should be aligned for pointers"); const MCFragment &Frag = *std::next(IT); if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data) report_fatal_error("only data supported in .init_array section"); uint16_t Priority = UINT16_MAX; unsigned PrefixLength = strlen(".init_array"); if (WS.getSectionName().size() > PrefixLength) { if (WS.getSectionName()[PrefixLength] != '.') report_fatal_error( ".init_array section priority should start with '.'"); if (WS.getSectionName() .substr(PrefixLength + 1) .getAsInteger(10, Priority)) report_fatal_error("invalid .init_array section priority"); } const auto &DataFrag = cast(Frag); const SmallVectorImpl &Contents = DataFrag.getContents(); for (const uint8_t * p = (const uint8_t *)Contents.data(), *end = (const uint8_t *)Contents.data() + Contents.size(); p != end; ++p) { if (*p != 0) report_fatal_error("non-symbolic data in .init_array section"); } for (const MCFixup &Fixup : DataFrag.getFixups()) { assert(Fixup.getKind() == MCFixup::getKindForSize(is64Bit() ? 8 : 4, false)); const MCExpr *Expr = Fixup.getValue(); auto *Sym = dyn_cast(Expr); if (!Sym) report_fatal_error("fixups in .init_array should be symbol references"); if (Sym->getKind() != MCSymbolRefExpr::VK_WebAssembly_FUNCTION) report_fatal_error("symbols in .init_array should be for functions"); if (Sym->getSymbol().getIndex() == INVALID_INDEX) report_fatal_error("symbols in .init_array should exist in symbtab"); InitFuncs.push_back( std::make_pair(Priority, Sym->getSymbol().getIndex())); } } // Write out the Wasm header. writeHeader(Asm); writeTypeSection(Signatures); writeImportSection(Imports, DataSize, TableElems.size()); writeFunctionSection(Functions); // Skip the "table" section; we import the table instead. // Skip the "memory" section; we import the memory instead. writeGlobalSection(); writeEventSection(Events); writeExportSection(Exports); writeElemSection(TableElems); writeCodeSection(Asm, Layout, Functions); writeDataSection(); writeCustomSections(Asm, Layout); writeLinkingMetaDataSection(SymbolInfos, InitFuncs, Comdats); writeRelocSection(CodeSectionIndex, "CODE", CodeRelocations); writeRelocSection(DataSectionIndex, "DATA", DataRelocations); writeCustomRelocSections(); // TODO: Translate the .comment section to the output. return W.OS.tell() - StartOffset; } std::unique_ptr llvm::createWasmObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS) { return llvm::make_unique(std::move(MOTW), OS); } Index: vendor/llvm/dist-release_80/lib/Object/COFFImportFile.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/Object/COFFImportFile.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/Object/COFFImportFile.cpp (revision 348932) @@ -1,623 +1,623 @@ //===- COFFImportFile.cpp - COFF short import file implementation ---------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the writeImportLibrary function. // //===----------------------------------------------------------------------===// #include "llvm/Object/COFFImportFile.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ArchiveWriter.h" #include "llvm/Object/COFF.h" #include "llvm/Support/Error.h" #include "llvm/Support/Path.h" #include #include #include using namespace llvm::COFF; using namespace llvm::object; using namespace llvm; namespace llvm { namespace object { static bool is32bit(MachineTypes Machine) { switch (Machine) { default: llvm_unreachable("unsupported machine"); case IMAGE_FILE_MACHINE_ARM64: case IMAGE_FILE_MACHINE_AMD64: return false; case IMAGE_FILE_MACHINE_ARMNT: case IMAGE_FILE_MACHINE_I386: return true; } } static uint16_t getImgRelRelocation(MachineTypes Machine) { switch (Machine) { default: llvm_unreachable("unsupported machine"); case IMAGE_FILE_MACHINE_AMD64: return IMAGE_REL_AMD64_ADDR32NB; case IMAGE_FILE_MACHINE_ARMNT: return IMAGE_REL_ARM_ADDR32NB; case IMAGE_FILE_MACHINE_ARM64: return IMAGE_REL_ARM64_ADDR32NB; case IMAGE_FILE_MACHINE_I386: return IMAGE_REL_I386_DIR32NB; } } template static void append(std::vector &B, const T &Data) { size_t S = B.size(); B.resize(S + sizeof(T)); memcpy(&B[S], &Data, sizeof(T)); } static void writeStringTable(std::vector &B, ArrayRef Strings) { // The COFF string table consists of a 4-byte value which is the size of the // table, including the length field itself. This value is followed by the // string content itself, which is an array of null-terminated C-style // strings. The termination is important as they are referenced to by offset // by the symbol entity in the file format. size_t Pos = B.size(); size_t Offset = B.size(); // Skip over the length field, we will fill it in later as we will have // computed the length while emitting the string content itself. Pos += sizeof(uint32_t); for (const auto &S : Strings) { B.resize(Pos + S.length() + 1); strcpy(reinterpret_cast(&B[Pos]), S.c_str()); Pos += S.length() + 1; } // Backfill the length of the table now that it has been computed. support::ulittle32_t Length(B.size() - Offset); support::endian::write32le(&B[Offset], Length); } static ImportNameType getNameType(StringRef Sym, StringRef ExtName, MachineTypes Machine, bool MinGW) { // A decorated stdcall function in MSVC is exported with the // type IMPORT_NAME, and the exported function name includes the // the leading underscore. In MinGW on the other hand, a decorated // stdcall function still omits the underscore (IMPORT_NAME_NOPREFIX). // See the comment in isDecorated in COFFModuleDefinition.cpp for more // details. if (ExtName.startswith("_") && ExtName.contains('@') && !MinGW) return IMPORT_NAME; if (Sym != ExtName) return IMPORT_NAME_UNDECORATE; if (Machine == IMAGE_FILE_MACHINE_I386 && Sym.startswith("_")) return IMPORT_NAME_NOPREFIX; return IMPORT_NAME; } static Expected replace(StringRef S, StringRef From, StringRef To) { size_t Pos = S.find(From); // From and To may be mangled, but substrings in S may not. if (Pos == StringRef::npos && From.startswith("_") && To.startswith("_")) { From = From.substr(1); To = To.substr(1); Pos = S.find(From); } if (Pos == StringRef::npos) { return make_error( StringRef(Twine(S + ": replacing '" + From + "' with '" + To + "' failed").str()), object_error::parse_failed); } return (Twine(S.substr(0, Pos)) + To + S.substr(Pos + From.size())).str(); } static const std::string NullImportDescriptorSymbolName = "__NULL_IMPORT_DESCRIPTOR"; namespace { // This class constructs various small object files necessary to support linking // symbols imported from a DLL. The contents are pretty strictly defined and // nearly entirely static. The details of the structures files are defined in // WINNT.h and the PE/COFF specification. class ObjectFactory { using u16 = support::ulittle16_t; using u32 = support::ulittle32_t; MachineTypes Machine; BumpPtrAllocator Alloc; StringRef ImportName; StringRef Library; std::string ImportDescriptorSymbolName; std::string NullThunkSymbolName; public: ObjectFactory(StringRef S, MachineTypes M) : Machine(M), ImportName(S), Library(S.drop_back(4)), ImportDescriptorSymbolName(("__IMPORT_DESCRIPTOR_" + Library).str()), NullThunkSymbolName(("\x7f" + Library + "_NULL_THUNK_DATA").str()) {} // Creates an Import Descriptor. This is a small object file which contains a // reference to the terminators and contains the library name (entry) for the // import name table. It will force the linker to construct the necessary // structure to import symbols from the DLL. NewArchiveMember createImportDescriptor(std::vector &Buffer); // Creates a NULL import descriptor. This is a small object file whcih // contains a NULL import descriptor. It is used to terminate the imports // from a specific DLL. NewArchiveMember createNullImportDescriptor(std::vector &Buffer); // Create a NULL Thunk Entry. This is a small object file which contains a // NULL Import Address Table entry and a NULL Import Lookup Table Entry. It // is used to terminate the IAT and ILT. NewArchiveMember createNullThunk(std::vector &Buffer); // Create a short import file which is described in PE/COFF spec 7. Import // Library Format. NewArchiveMember createShortImport(StringRef Sym, uint16_t Ordinal, ImportType Type, ImportNameType NameType); // Create a weak external file which is described in PE/COFF Aux Format 3. NewArchiveMember createWeakExternal(StringRef Sym, StringRef Weak, bool Imp); }; } // namespace NewArchiveMember ObjectFactory::createImportDescriptor(std::vector &Buffer) { const uint32_t NumberOfSections = 2; const uint32_t NumberOfSymbols = 7; const uint32_t NumberOfRelocations = 3; // COFF Header coff_file_header Header{ u16(Machine), u16(NumberOfSections), u32(0), u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + // .idata$2 sizeof(coff_import_directory_table_entry) + NumberOfRelocations * sizeof(coff_relocation) + // .idata$4 (ImportName.size() + 1)), u32(NumberOfSymbols), u16(0), u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : C_Invalid), }; append(Buffer, Header); // Section Header Table const coff_section SectionTable[NumberOfSections] = { {{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}, u32(0), u32(0), u32(sizeof(coff_import_directory_table_entry)), u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)), u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + sizeof(coff_import_directory_table_entry)), u32(0), u16(NumberOfRelocations), u16(0), u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, {{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}, u32(0), u32(0), u32(ImportName.size() + 1), u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + sizeof(coff_import_directory_table_entry) + NumberOfRelocations * sizeof(coff_relocation)), u32(0), u32(0), u16(0), u16(0), u32(IMAGE_SCN_ALIGN_2BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, }; append(Buffer, SectionTable); // .idata$2 const coff_import_directory_table_entry ImportDescriptor{ u32(0), u32(0), u32(0), u32(0), u32(0), }; append(Buffer, ImportDescriptor); const coff_relocation RelocationTable[NumberOfRelocations] = { {u32(offsetof(coff_import_directory_table_entry, NameRVA)), u32(2), u16(getImgRelRelocation(Machine))}, {u32(offsetof(coff_import_directory_table_entry, ImportLookupTableRVA)), u32(3), u16(getImgRelRelocation(Machine))}, {u32(offsetof(coff_import_directory_table_entry, ImportAddressTableRVA)), u32(4), u16(getImgRelRelocation(Machine))}, }; append(Buffer, RelocationTable); // .idata$6 auto S = Buffer.size(); Buffer.resize(S + ImportName.size() + 1); memcpy(&Buffer[S], ImportName.data(), ImportName.size()); Buffer[S + ImportName.size()] = '\0'; // Symbol Table coff_symbol16 SymbolTable[NumberOfSymbols] = { {{{0, 0, 0, 0, 0, 0, 0, 0}}, u32(0), u16(1), u16(0), IMAGE_SYM_CLASS_EXTERNAL, 0}, {{{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}}, u32(0), u16(1), u16(0), IMAGE_SYM_CLASS_SECTION, 0}, {{{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}}, u32(0), u16(2), u16(0), IMAGE_SYM_CLASS_STATIC, 0}, {{{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}}, u32(0), u16(0), u16(0), IMAGE_SYM_CLASS_SECTION, 0}, {{{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}}, u32(0), u16(0), u16(0), IMAGE_SYM_CLASS_SECTION, 0}, {{{0, 0, 0, 0, 0, 0, 0, 0}}, u32(0), u16(0), u16(0), IMAGE_SYM_CLASS_EXTERNAL, 0}, {{{0, 0, 0, 0, 0, 0, 0, 0}}, u32(0), u16(0), u16(0), IMAGE_SYM_CLASS_EXTERNAL, 0}, }; // TODO: Name.Offset.Offset here and in the all similar places below // suggests a names refactoring. Maybe StringTableOffset.Value? SymbolTable[0].Name.Offset.Offset = sizeof(uint32_t); SymbolTable[5].Name.Offset.Offset = sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1; SymbolTable[6].Name.Offset.Offset = sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1 + NullImportDescriptorSymbolName.length() + 1; append(Buffer, SymbolTable); // String Table writeStringTable(Buffer, {ImportDescriptorSymbolName, NullImportDescriptorSymbolName, NullThunkSymbolName}); StringRef F{reinterpret_cast(Buffer.data()), Buffer.size()}; return {MemoryBufferRef(F, ImportName)}; } NewArchiveMember ObjectFactory::createNullImportDescriptor(std::vector &Buffer) { const uint32_t NumberOfSections = 1; const uint32_t NumberOfSymbols = 1; // COFF Header coff_file_header Header{ u16(Machine), u16(NumberOfSections), u32(0), u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + // .idata$3 sizeof(coff_import_directory_table_entry)), u32(NumberOfSymbols), u16(0), u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : C_Invalid), }; append(Buffer, Header); // Section Header Table const coff_section SectionTable[NumberOfSections] = { {{'.', 'i', 'd', 'a', 't', 'a', '$', '3'}, u32(0), u32(0), u32(sizeof(coff_import_directory_table_entry)), u32(sizeof(coff_file_header) + (NumberOfSections * sizeof(coff_section))), u32(0), u32(0), u16(0), u16(0), u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, }; append(Buffer, SectionTable); // .idata$3 const coff_import_directory_table_entry ImportDescriptor{ u32(0), u32(0), u32(0), u32(0), u32(0), }; append(Buffer, ImportDescriptor); // Symbol Table coff_symbol16 SymbolTable[NumberOfSymbols] = { {{{0, 0, 0, 0, 0, 0, 0, 0}}, u32(0), u16(1), u16(0), IMAGE_SYM_CLASS_EXTERNAL, 0}, }; SymbolTable[0].Name.Offset.Offset = sizeof(uint32_t); append(Buffer, SymbolTable); // String Table writeStringTable(Buffer, {NullImportDescriptorSymbolName}); StringRef F{reinterpret_cast(Buffer.data()), Buffer.size()}; return {MemoryBufferRef(F, ImportName)}; } NewArchiveMember ObjectFactory::createNullThunk(std::vector &Buffer) { const uint32_t NumberOfSections = 2; const uint32_t NumberOfSymbols = 1; uint32_t VASize = is32bit(Machine) ? 4 : 8; // COFF Header coff_file_header Header{ u16(Machine), u16(NumberOfSections), u32(0), u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + // .idata$5 VASize + // .idata$4 VASize), u32(NumberOfSymbols), u16(0), u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : C_Invalid), }; append(Buffer, Header); // Section Header Table const coff_section SectionTable[NumberOfSections] = { {{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}, u32(0), u32(0), u32(VASize), u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)), u32(0), u32(0), u16(0), u16(0), u32((is32bit(Machine) ? IMAGE_SCN_ALIGN_4BYTES : IMAGE_SCN_ALIGN_8BYTES) | IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, {{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}, u32(0), u32(0), u32(VASize), u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + VASize), u32(0), u32(0), u16(0), u16(0), u32((is32bit(Machine) ? IMAGE_SCN_ALIGN_4BYTES : IMAGE_SCN_ALIGN_8BYTES) | IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, }; append(Buffer, SectionTable); // .idata$5, ILT append(Buffer, u32(0)); if (!is32bit(Machine)) append(Buffer, u32(0)); // .idata$4, IAT append(Buffer, u32(0)); if (!is32bit(Machine)) append(Buffer, u32(0)); // Symbol Table coff_symbol16 SymbolTable[NumberOfSymbols] = { {{{0, 0, 0, 0, 0, 0, 0, 0}}, u32(0), u16(1), u16(0), IMAGE_SYM_CLASS_EXTERNAL, 0}, }; SymbolTable[0].Name.Offset.Offset = sizeof(uint32_t); append(Buffer, SymbolTable); // String Table writeStringTable(Buffer, {NullThunkSymbolName}); StringRef F{reinterpret_cast(Buffer.data()), Buffer.size()}; return {MemoryBufferRef{F, ImportName}}; } NewArchiveMember ObjectFactory::createShortImport(StringRef Sym, uint16_t Ordinal, ImportType ImportType, ImportNameType NameType) { size_t ImpSize = ImportName.size() + Sym.size() + 2; // +2 for NULs size_t Size = sizeof(coff_import_header) + ImpSize; char *Buf = Alloc.Allocate(Size); memset(Buf, 0, Size); char *P = Buf; // Write short import library. auto *Imp = reinterpret_cast(P); P += sizeof(*Imp); Imp->Sig2 = 0xFFFF; Imp->Machine = Machine; Imp->SizeOfData = ImpSize; if (Ordinal > 0) Imp->OrdinalHint = Ordinal; Imp->TypeInfo = (NameType << 2) | ImportType; // Write symbol name and DLL name. memcpy(P, Sym.data(), Sym.size()); P += Sym.size() + 1; memcpy(P, ImportName.data(), ImportName.size()); return {MemoryBufferRef(StringRef(Buf, Size), ImportName)}; } NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym, StringRef Weak, bool Imp) { std::vector Buffer; const uint32_t NumberOfSections = 1; const uint32_t NumberOfSymbols = 5; // COFF Header coff_file_header Header{ - u16(0), + u16(Machine), u16(NumberOfSections), u32(0), u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section))), u32(NumberOfSymbols), u16(0), u16(0), }; append(Buffer, Header); // Section Header Table const coff_section SectionTable[NumberOfSections] = { {{'.', 'd', 'r', 'e', 'c', 't', 'v', 'e'}, u32(0), u32(0), u32(0), u32(0), u32(0), u32(0), u16(0), u16(0), u32(IMAGE_SCN_LNK_INFO | IMAGE_SCN_LNK_REMOVE)}}; append(Buffer, SectionTable); // Symbol Table coff_symbol16 SymbolTable[NumberOfSymbols] = { {{{'@', 'c', 'o', 'm', 'p', '.', 'i', 'd'}}, u32(0), u16(0xFFFF), u16(0), IMAGE_SYM_CLASS_STATIC, 0}, {{{'@', 'f', 'e', 'a', 't', '.', '0', '0'}}, u32(0), u16(0xFFFF), u16(0), IMAGE_SYM_CLASS_STATIC, 0}, {{{0, 0, 0, 0, 0, 0, 0, 0}}, u32(0), u16(0), u16(0), IMAGE_SYM_CLASS_EXTERNAL, 0}, {{{0, 0, 0, 0, 0, 0, 0, 0}}, u32(0), u16(0), u16(0), IMAGE_SYM_CLASS_WEAK_EXTERNAL, 1}, {{{2, 0, 0, 0, IMAGE_WEAK_EXTERN_SEARCH_ALIAS, 0, 0, 0}}, u32(0), u16(0), u16(0), IMAGE_SYM_CLASS_NULL, 0}, }; SymbolTable[2].Name.Offset.Offset = sizeof(uint32_t); //__imp_ String Table StringRef Prefix = Imp ? "__imp_" : ""; SymbolTable[3].Name.Offset.Offset = sizeof(uint32_t) + Sym.size() + Prefix.size() + 1; append(Buffer, SymbolTable); writeStringTable(Buffer, {(Prefix + Sym).str(), (Prefix + Weak).str()}); // Copied here so we can still use writeStringTable char *Buf = Alloc.Allocate(Buffer.size()); memcpy(Buf, Buffer.data(), Buffer.size()); return {MemoryBufferRef(StringRef(Buf, Buffer.size()), ImportName)}; } Error writeImportLibrary(StringRef ImportName, StringRef Path, ArrayRef Exports, MachineTypes Machine, bool MinGW) { std::vector Members; ObjectFactory OF(llvm::sys::path::filename(ImportName), Machine); std::vector ImportDescriptor; Members.push_back(OF.createImportDescriptor(ImportDescriptor)); std::vector NullImportDescriptor; Members.push_back(OF.createNullImportDescriptor(NullImportDescriptor)); std::vector NullThunk; Members.push_back(OF.createNullThunk(NullThunk)); for (COFFShortExport E : Exports) { if (E.Private) continue; ImportType ImportType = IMPORT_CODE; if (E.Data) ImportType = IMPORT_DATA; if (E.Constant) ImportType = IMPORT_CONST; StringRef SymbolName = E.SymbolName.empty() ? E.Name : E.SymbolName; ImportNameType NameType = getNameType(SymbolName, E.Name, Machine, MinGW); Expected Name = E.ExtName.empty() ? SymbolName : replace(SymbolName, E.Name, E.ExtName); if (!Name) return Name.takeError(); if (!E.AliasTarget.empty() && *Name != E.AliasTarget) { Members.push_back(OF.createWeakExternal(E.AliasTarget, *Name, false)); Members.push_back(OF.createWeakExternal(E.AliasTarget, *Name, true)); continue; } Members.push_back( OF.createShortImport(*Name, E.Ordinal, ImportType, NameType)); } return writeArchive(Path, Members, /*WriteSymtab*/ true, object::Archive::K_GNU, /*Deterministic*/ true, /*Thin*/ false); } } // namespace object } // namespace llvm Index: vendor/llvm/dist-release_80/lib/Target/AArch64/AArch64SchedExynosM4.td =================================================================== --- vendor/llvm/dist-release_80/lib/Target/AArch64/AArch64SchedExynosM4.td (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/AArch64/AArch64SchedExynosM4.td (revision 348932) @@ -1,1004 +1,1004 @@ //=- AArch64SchedExynosM4.td - Samsung Exynos M4 Sched Defs --*- tablegen -*-=// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the machine model for the Samsung Exynos M4 to support // instruction scheduling and other instruction cost heuristics. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // The Exynos-M4 is an advanced superscalar microprocessor with a 6-wide // in-order stage for decode and dispatch and a wider issue stage. // The execution units and loads and stores are out-of-order. def ExynosM4Model : SchedMachineModel { let IssueWidth = 6; // Up to 6 uops per cycle. let MicroOpBufferSize = 228; // ROB size. let LoopMicroOpBufferSize = 48; // Based on the instruction queue size. let LoadLatency = 4; // Optimistic load cases. let MispredictPenalty = 16; // Minimum branch misprediction penalty. let CompleteModel = 1; // Use the default model otherwise. list UnsupportedFeatures = [HasSVE]; } //===----------------------------------------------------------------------===// // Define each kind of processor resource and number available on the Exynos-M4. let SchedModel = ExynosM4Model in { def M4UnitA : ProcResource<2>; // Simple integer def M4UnitC : ProcResource<2>; // Simple and complex integer let Super = M4UnitC, BufferSize = 1 in def M4UnitD : ProcResource<1>; // Integer division (inside C0, serialized) let Super = M4UnitC in def M4UnitE : ProcResource<1>; // CRC (inside C0) def M4UnitB : ProcResource<2>; // Branch def M4UnitL0 : ProcResource<1>; // Load def M4UnitS0 : ProcResource<1>; // Store def M4PipeLS : ProcResource<1>; // Load/Store let Super = M4PipeLS in { def M4UnitL1 : ProcResource<1>; def M4UnitS1 : ProcResource<1>; } def M4PipeF0 : ProcResource<1>; // FP #0 let Super = M4PipeF0 in { def M4UnitFMAC0 : ProcResource<1>; // FP multiplication def M4UnitFADD0 : ProcResource<1>; // Simple FP def M4UnitFCVT0 : ProcResource<1>; // FP conversion def M4UnitNALU0 : ProcResource<1>; // Simple vector def M4UnitNHAD : ProcResource<1>; // Horizontal vector def M4UnitNMSC : ProcResource<1>; // FP and vector miscellanea def M4UnitNMUL0 : ProcResource<1>; // Vector multiplication def M4UnitNSHT0 : ProcResource<1>; // Vector shifting def M4UnitNSHF0 : ProcResource<1>; // Vector shuffling def M4UnitNCRY0 : ProcResource<1>; // Cryptographic } def M4PipeF1 : ProcResource<1>; // FP #1 let Super = M4PipeF1 in { def M4UnitFMAC1 : ProcResource<1>; // FP multiplication def M4UnitFADD1 : ProcResource<1>; // Simple FP def M4UnitFDIV0 : ProcResource<2>; // FP division (serialized) def M4UnitFSQR0 : ProcResource<2>; // FP square root (serialized) def M4UnitFST0 : ProcResource<1>; // FP store def M4UnitNALU1 : ProcResource<1>; // Simple vector def M4UnitNSHT1 : ProcResource<1>; // Vector shifting def M4UnitNSHF1 : ProcResource<1>; // Vector shuffling } def M4PipeF2 : ProcResource<1>; // FP #2 let Super = M4PipeF2 in { def M4UnitFMAC2 : ProcResource<1>; // FP multiplication def M4UnitFADD2 : ProcResource<1>; // Simple FP def M4UnitFCVT1 : ProcResource<1>; // FP conversion def M4UnitFDIV1 : ProcResource<2>; // FP division (serialized) def M4UnitFSQR1 : ProcResource<2>; // FP square root (serialized) def M4UnitFST1 : ProcResource<1>; // FP store def M4UnitNALU2 : ProcResource<1>; // Simple vector def M4UnitNMUL1 : ProcResource<1>; // Vector multiplication def M4UnitNSHT2 : ProcResource<1>; // Vector shifting def M4UnitNCRY1 : ProcResource<1>; // Cryptographic } def M4UnitALU : ProcResGroup<[M4UnitA, M4UnitC]>; def M4UnitL : ProcResGroup<[M4UnitL0, M4UnitL1]>; def M4UnitS : ProcResGroup<[M4UnitS0, M4UnitS1]>; def M4UnitFMAC : ProcResGroup<[M4UnitFMAC0, M4UnitFMAC1, M4UnitFMAC2]>; def M4UnitFMACH : ProcResGroup<[M4UnitFMAC0, M4UnitFMAC1]>; def M4UnitFADD : ProcResGroup<[M4UnitFADD0, M4UnitFADD1, M4UnitFADD2]>; def M4UnitFADDH : ProcResGroup<[M4UnitFADD0, M4UnitFADD1]>; def M4UnitFCVT : ProcResGroup<[M4UnitFCVT0, M4UnitFCVT1]>; def M4UnitFCVTH : ProcResGroup<[M4UnitFCVT0]>; def M4UnitFDIV : ProcResGroup<[M4UnitFDIV0, M4UnitFDIV1]>; def M4UnitFDIVH : ProcResGroup<[M4UnitFDIV0]>; def M4UnitFSQR : ProcResGroup<[M4UnitFSQR0, M4UnitFSQR1]>; def M4UnitFSQRH : ProcResGroup<[M4UnitFSQR0]>; def M4UnitFST : ProcResGroup<[M4UnitFST0, M4UnitFST1]>; def M4UnitNALU : ProcResGroup<[M4UnitNALU0, M4UnitNALU1, M4UnitNALU2]>; def M4UnitNALUH : ProcResGroup<[M4UnitNALU0, M4UnitNALU1]>; def M4UnitNMUL : ProcResGroup<[M4UnitNMUL0, M4UnitNMUL1]>; def M4UnitNSHT : ProcResGroup<[M4UnitNSHT0, M4UnitNSHT1, M4UnitNSHT2]>; def M4UnitNSHF : ProcResGroup<[M4UnitNSHF0, M4UnitNSHF1]>; def M4UnitNSHFH : ProcResGroup<[M4UnitNSHF0]>; def M4UnitNCRY : ProcResGroup<[M4UnitNCRY0, M4UnitNCRY1]>; //===----------------------------------------------------------------------===// // Resources details. def M4WriteZ0 : SchedWriteRes<[]> { let Latency = 0; } def M4WriteZ1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; } def M4WriteZ4 : SchedWriteRes<[]> { let Latency = 4; let NumMicroOps = 0; } def M4WriteA1 : SchedWriteRes<[M4UnitALU]> { let Latency = 1; } def M4WriteA2 : SchedWriteRes<[M4UnitALU]> { let Latency = 2; } def M4WriteAA : SchedWriteRes<[M4UnitALU]> { let Latency = 2; let ResourceCycles = [2]; } def M4WriteAB : SchedWriteRes<[M4UnitALU, M4UnitC]> { let Latency = 2; let NumMicroOps = 2; } def M4WriteAC : SchedWriteRes<[M4UnitALU, M4UnitALU, M4UnitC]> { let Latency = 3; let NumMicroOps = 3; } def M4WriteAD : SchedWriteRes<[M4UnitALU, M4UnitC]> { let Latency = 2; let NumMicroOps = 2; } def M4WriteAF : SchedWriteRes<[M4UnitALU]> { let Latency = 2; let NumMicroOps = 2; } def M4WriteAU : SchedWriteVariant<[SchedVar, SchedVar, SchedVar, SchedVar]>; def M4WriteAV : SchedWriteVariant<[SchedVar, SchedVar]>; def M4WriteAX : SchedWriteVariant<[SchedVar, SchedVar, SchedVar]>; def M4WriteAY : SchedWriteVariant<[SchedVar, SchedVar]>; def M4WriteB1 : SchedWriteRes<[M4UnitB]> { let Latency = 1; } def M4WriteBX : SchedWriteVariant<[SchedVar, SchedVar]>; def M4WriteC1 : SchedWriteRes<[M4UnitC]> { let Latency = 1; } def M4WriteC3 : SchedWriteRes<[M4UnitC]> { let Latency = 3; } def M4WriteCA : SchedWriteRes<[M4UnitC]> { let Latency = 4; let ResourceCycles = [2]; } def M4WriteD12 : SchedWriteRes<[M4UnitD]> { let Latency = 12; } def M4WriteD21 : SchedWriteRes<[M4UnitD]> { let Latency = 21; } def M4WriteE2 : SchedWriteRes<[M4UnitE]> { let Latency = 2; } def M4WriteL4 : SchedWriteRes<[M4UnitL]> { let Latency = 4; } def M4WriteL5 : SchedWriteRes<[M4UnitL]> { let Latency = 5; } def M4WriteLA : SchedWriteRes<[M4UnitL, M4UnitL]> { let Latency = 5; let NumMicroOps = 1; } def M4WriteLB : SchedWriteRes<[M4UnitA, M4UnitL]> { let Latency = 5; let NumMicroOps = 2; } def M4WriteLC : SchedWriteRes<[M4UnitA, M4UnitL, M4UnitL]> { let Latency = 5; let NumMicroOps = 2; } def M4WriteLD : SchedWriteRes<[M4UnitA, M4UnitL]> { let Latency = 4; let NumMicroOps = 2; } def M4WriteLE : SchedWriteRes<[M4UnitA, M4UnitL]> { let Latency = 6; let NumMicroOps = 2; } def M4WriteLH : SchedWriteRes<[]> { let Latency = 5; let NumMicroOps = 0; } def M4WriteLX : SchedWriteVariant<[SchedVar, SchedVar]>; def M4WriteS1 : SchedWriteRes<[M4UnitS]> { let Latency = 1; } def M4WriteSA : SchedWriteRes<[M4UnitS0]> { let Latency = 3; } def M4WriteSB : SchedWriteRes<[M4UnitA, M4UnitS]> { let Latency = 2; let NumMicroOps = 1; } def M4WriteSX : SchedWriteVariant<[SchedVar, SchedVar]>; def M4ReadAdrBase : SchedReadVariant<[SchedVar< MCSchedPredicate< CheckAny< [ScaledIdxFn, ExynosScaledIdxFn]>>, [ReadDefault]>, SchedVar]>; def M4WriteNEONA : SchedWriteRes<[M4UnitNSHF, M4UnitFADD]> { let Latency = 3; let NumMicroOps = 2; } def M4WriteNEONB : SchedWriteRes<[M4UnitNALU, M4UnitS0]> { let Latency = 5; let NumMicroOps = 2; } def M4WriteNEOND : SchedWriteRes<[M4UnitNSHF, M4UnitFST]> { let Latency = 6; let NumMicroOps = 2; } def M4WriteNEONH : SchedWriteRes<[M4UnitNALU, M4UnitS0]> { let Latency = 5; let NumMicroOps = 2; } def M4WriteNEONI : SchedWriteRes<[M4UnitNSHF, M4UnitS0]> { let Latency = 2; let NumMicroOps = 2; } def M4WriteNEONJ : SchedWriteRes<[M4UnitNMSC, M4UnitS0]> { let Latency = 4; } def M4WriteNEONK : SchedWriteRes<[M4UnitNSHF, M4UnitNMSC, M4UnitS0]> { let Latency = 5; let NumMicroOps = 2; } def M4WriteNEONL : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; } -def M4WriteNEONM : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; } def M4WriteNEONN : SchedWriteRes<[M4UnitNMSC, M4UnitNMSC]> { let Latency = 5; let NumMicroOps = 2; } def M4WriteNEONO : SchedWriteRes<[M4UnitNMSC, M4UnitNMSC, M4UnitNMSC]> { let Latency = 8; let NumMicroOps = 3; } def M4WriteNEONP : SchedWriteRes<[M4UnitNSHF, M4UnitNMSC]> { let Latency = 4; let NumMicroOps = 2; } def M4WriteNEONQ : SchedWriteRes<[M4UnitNMSC, M4UnitC]> { let Latency = 3; let NumMicroOps = 1; } def M4WriteNEONR : SchedWriteRes<[M4UnitFCVT0, M4UnitS0]> { let Latency = 4; let NumMicroOps = 1; } def M4WriteNEONV : SchedWriteRes<[M4UnitFDIV, M4UnitFDIV]> { let Latency = 7; let ResourceCycles = [6, 6]; } def M4WriteNEONVH : SchedWriteRes<[M4UnitFDIVH, M4UnitFDIVH]> { let Latency = 7; let ResourceCycles = [6, 6]; } def M4WriteNEONW : SchedWriteRes<[M4UnitFDIV, M4UnitFDIV]> { let Latency = 12; let ResourceCycles = [9, 9]; } def M4WriteNEONX : SchedWriteRes<[M4UnitFSQR, M4UnitFSQR]> { let Latency = 8; let ResourceCycles = [7, 7]; } def M4WriteNEONXH : SchedWriteRes<[M4UnitFSQRH, M4UnitFSQRH]> { let Latency = 7; let ResourceCycles = [6, 6]; } def M4WriteNEONY : SchedWriteRes<[M4UnitFSQR, M4UnitFSQR]> { let Latency = 12; let ResourceCycles = [9, 9]; } def M4WriteNEONZ : SchedWriteVariant<[SchedVar, SchedVar]>; def M4WriteFADD2 : SchedWriteRes<[M4UnitFADD]> { let Latency = 2; } def M4WriteFADD2H : SchedWriteRes<[M4UnitFADDH]> { let Latency = 2; } def M4WriteFCVT2 : SchedWriteRes<[M4UnitFCVT]> { let Latency = 2; } def M4WriteFCVT2A : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 2; } def M4WriteFCVT2H : SchedWriteRes<[M4UnitFCVTH]> { let Latency = 2; } def M4WriteFCVT3 : SchedWriteRes<[M4UnitFCVT]> { let Latency = 3; } def M4WriteFCVT3A : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 3; } def M4WriteFCVT3H : SchedWriteRes<[M4UnitFCVTH]> { let Latency = 3; } def M4WriteFCVT4 : SchedWriteRes<[M4UnitFCVT]> { let Latency = 4; } def M4WriteFCVT4A : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 4; } def M4WriteFCVT6A : SchedWriteRes<[M4UnitFCVT0]> { let Latency = 6; } def M4WriteFDIV7 : SchedWriteRes<[M4UnitFDIV]> { let Latency = 7; let ResourceCycles = [6]; } def M4WriteFDIV7H : SchedWriteRes<[M4UnitFDIVH]> { let Latency = 7; let ResourceCycles = [6]; } def M4WriteFDIV12 : SchedWriteRes<[M4UnitFDIV]> { let Latency = 12; let ResourceCycles = [9]; } def M4WriteFMAC2H : SchedWriteRes<[M4UnitFMACH]> { let Latency = 2; } def M4WriteFMAC3H : SchedWriteRes<[M4UnitFMACH]> { let Latency = 3; } def M4WriteFMAC3 : SchedWriteRes<[M4UnitFMAC]> { let Latency = 3; } def M4WriteFMAC4 : SchedWriteRes<[M4UnitFMAC]> { let Latency = 4; } def M4WriteFMAC4H : SchedWriteRes<[M4UnitFMACH]> { let Latency = 4; } def M4WriteFMAC5 : SchedWriteRes<[M4UnitFMAC]> { let Latency = 5; } def M4WriteFSQR7H : SchedWriteRes<[M4UnitFSQRH]> { let Latency = 7; let ResourceCycles = [6]; } def M4WriteFSQR8 : SchedWriteRes<[M4UnitFSQR]> { let Latency = 8; let ResourceCycles = [7]; } def M4WriteFSQR12 : SchedWriteRes<[M4UnitFSQR]> { let Latency = 12; let ResourceCycles = [9]; } def M4WriteNALU1 : SchedWriteRes<[M4UnitNALU]> { let Latency = 1; } def M4WriteNALU1H : SchedWriteRes<[M4UnitNALUH]> { let Latency = 1; } def M4WriteNCRY1 : SchedWriteRes<[M4UnitNCRY]> { let Latency = 1; } def M4WriteNCRY1A : SchedWriteRes<[M4UnitNCRY0]> { let Latency = 1; } def M4WriteNCRY3A : SchedWriteRes<[M4UnitNCRY0]> { let Latency = 3; } def M4WriteNCRY5A : SchedWriteRes<[M4UnitNCRY]> { let Latency = 5; } def M4WriteNHAD1 : SchedWriteRes<[M4UnitNHAD]> { let Latency = 1; } def M4WriteNHAD3 : SchedWriteRes<[M4UnitNHAD]> { let Latency = 3; } def M4WriteNMSC1 : SchedWriteRes<[M4UnitNMSC]> { let Latency = 1; } def M4WriteNMSC2 : SchedWriteRes<[M4UnitNMSC]> { let Latency = 2; } def M4WriteNMSC3 : SchedWriteRes<[M4UnitNMSC]> { let Latency = 3; } def M4WriteNMUL3 : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; } def M4WriteNSHF1 : SchedWriteRes<[M4UnitNSHF]> { let Latency = 1; } def M4WriteNSHF1H : SchedWriteRes<[M4UnitNSHFH]> { let Latency = 1; } def M4WriteNSHF3 : SchedWriteRes<[M4UnitNSHF]> { let Latency = 3; } def M4WriteNSHFA : SchedWriteRes<[M4UnitNSHF]> { let Latency = 1; let ResourceCycles = [2]; } def M4WriteNSHFB : SchedWriteRes<[M4UnitNSHF]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [2]; } def M4WriteNSHFC : SchedWriteRes<[M4UnitNSHF]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [4]; } def M4WriteNSHFD : SchedWriteRes<[M4UnitNSHF]> { let Latency = 4; let NumMicroOps = 4; let ResourceCycles = [4]; } def M4WriteNSHT1 : SchedWriteRes<[M4UnitNSHT]> { let Latency = 1; } def M4WriteNSHT2 : SchedWriteRes<[M4UnitNSHT]> { let Latency = 2; } def M4WriteNSHT3 : SchedWriteRes<[M4UnitNSHT]> { let Latency = 3; } def M4WriteNSHT4A : SchedWriteRes<[M4UnitNSHT1]> { let Latency = 4; } def M4WriteVLDA : SchedWriteRes<[M4UnitL, M4UnitL]> { let Latency = 5; let NumMicroOps = 2; } def M4WriteVLDB : SchedWriteRes<[M4UnitL, M4UnitL, M4UnitL]> { let Latency = 6; let NumMicroOps = 3; } def M4WriteVLDC : SchedWriteRes<[M4UnitL, M4UnitL, M4UnitL, M4UnitL]> { let Latency = 6; let NumMicroOps = 4; } def M4WriteVLDD : SchedWriteRes<[M4UnitL, M4UnitNSHF]> { let Latency = 6; let NumMicroOps = 2; let ResourceCycles = [2, 1]; } def M4WriteVLDF : SchedWriteRes<[M4UnitL, M4UnitL]> { let Latency = 10; let NumMicroOps = 2; let ResourceCycles = [3, 3]; } def M4WriteVLDG : SchedWriteRes<[M4UnitL, M4UnitNSHF, M4UnitNSHF]> { let Latency = 6; let NumMicroOps = 3; let ResourceCycles = [2, 1, 1]; } def M4WriteVLDI : SchedWriteRes<[M4UnitL, M4UnitL, M4UnitL]> { let Latency = 12; let NumMicroOps = 3; let ResourceCycles = [3, 3, 3]; } def M4WriteVLDJ : SchedWriteRes<[M4UnitL, M4UnitNSHF, M4UnitNSHF, M4UnitNSHF]> { let Latency = 7; let NumMicroOps = 4; let ResourceCycles = [3, 1, 1, 1]; } def M4WriteVLDK : SchedWriteRes<[M4UnitL, M4UnitNSHF, M4UnitNSHF, M4UnitNSHF, M4UnitNSHF]> { let Latency = 7; let NumMicroOps = 5; let ResourceCycles = [3, 1, 1, 1, 1]; } def M4WriteVLDL : SchedWriteRes<[M4UnitL, M4UnitNSHF, M4UnitNSHF, M4UnitL, M4UnitNSHF]> { let Latency = 7; let NumMicroOps = 5; let ResourceCycles = [3, 1, 1, 6, 1]; } def M4WriteVLDM : SchedWriteRes<[M4UnitL, M4UnitNSHF, M4UnitNSHF, M4UnitL, M4UnitNSHF, M4UnitNSHF]> { let Latency = 7; let NumMicroOps = 6; let ResourceCycles = [3, 1, 1, 3, 1, 1]; } def M4WriteVLDN : SchedWriteRes<[M4UnitL, M4UnitL, M4UnitL, M4UnitL]> { let Latency = 14; let NumMicroOps = 4; let ResourceCycles = [3, 3, 3, 3]; } def M4WriteVST1 : SchedWriteRes<[M4UnitS, M4UnitFST]> { let Latency = 1; let NumMicroOps = 1; } def M4WriteVSTA : WriteSequence<[WriteVST], 2>; def M4WriteVSTB : WriteSequence<[WriteVST], 3>; def M4WriteVSTC : WriteSequence<[WriteVST], 4>; def M4WriteVSTD : SchedWriteRes<[M4UnitS, M4UnitFST]> { let Latency = 2; } def M4WriteVSTE : SchedWriteRes<[M4UnitS, M4UnitFST, M4UnitS, M4UnitFST]> { let Latency = 2; let NumMicroOps = 2; } def M4WriteVSTF : SchedWriteRes<[M4UnitNSHF, M4UnitS, M4UnitFST, M4UnitS, M4UnitFST]> { let Latency = 4; let NumMicroOps = 4; let ResourceCycles = [1, 2, 1, 2, 1]; } def M4WriteVSTG : SchedWriteRes<[M4UnitNSHF, M4UnitNSHF, M4UnitNSHF, M4UnitS, M4UnitFST, M4UnitS, M4UnitFST, M4UnitS, M4UnitFST]> { let Latency = 5; let NumMicroOps = 6; let ResourceCycles = [1, 1, 1, 2, 1, 2, 1, 2, 1]; } def M4WriteVSTI : SchedWriteRes<[M4UnitNSHF, M4UnitNSHF, M4UnitNSHF, M4UnitNSHF, M4UnitS, M4UnitFST, M4UnitS, M4UnitFST, M4UnitS, M4UnitFST, M4UnitS, M4UnitFST]> { let Latency = 8; let NumMicroOps = 5; let ResourceCycles = [1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1]; } def M4WriteVSTJ : SchedWriteRes<[M4UnitA, M4UnitS, M4UnitFST]> { let Latency = 1; let NumMicroOps = 2; } def M4WriteVSTK : SchedWriteRes<[M4UnitA, M4UnitS, M4UnitFST]> { let Latency = 3; let NumMicroOps = 2; } def M4WriteVSTL : SchedWriteRes<[M4UnitNSHF, M4UnitNSHF, M4UnitS, M4UnitFST, M4UnitS, M4UnitFST]> { let Latency = 4; let NumMicroOps = 4; let ResourceCycles = [1, 1, 2, 1, 2, 1]; } // Special cases. def M4WriteCOPY : SchedWriteVariant<[SchedVar, SchedVar]>; def M4WriteMOVI : SchedWriteVariant<[SchedVar, SchedVar]>; -def M4WriteMULL : SchedWriteVariant<[SchedVar, - SchedVar]>; // Fast forwarding. def M4ReadAESM1 : SchedReadAdvance<+1, [M4WriteNCRY1]>; def M4ReadFMACM1 : SchedReadAdvance<+1, [M4WriteFMAC4, M4WriteFMAC4H, M4WriteFMAC5]>; def M4ReadNMULM1 : SchedReadAdvance<+1, [M4WriteNMUL3]>; -def M4ReadMULLP2 : SchedReadAdvance<-2, [M4WriteNEONM]>; +def M4ReadNMULP2 : SchedReadAdvance<-2, [M4WriteNMUL3]>; + //===----------------------------------------------------------------------===// // Coarse scheduling model. // Branch instructions. def : SchedAlias; def : SchedAlias; // Arithmetic and logical integer instructions. def : SchedAlias; def : SchedAlias; // FIXME: M4WriteAX crashes TableGen. def : SchedAlias; // FIXME: M4WriteAX crashes TableGen. def : SchedAlias; // Move instructions. def : SchedAlias; // Divide and multiply instructions. def : SchedAlias; def : SchedAlias; def : SchedAlias; def : SchedAlias; // Miscellaneous instructions. def : SchedAlias; // Addressing modes. def : SchedAlias; def : SchedAlias; // Load instructions. def : SchedAlias; def : SchedAlias; def : SchedAlias; // Store instructions. def : SchedAlias; def : SchedAlias; def : SchedAlias; def : SchedAlias; // FP data instructions. def : SchedAlias; def : SchedAlias; def : SchedAlias; def : SchedAlias; // FP miscellaneous instructions. def : SchedAlias; def : SchedAlias; def : SchedAlias; // FP load instructions. def : SchedAlias; // FP store instructions. def : SchedAlias; // ASIMD FP instructions. def : SchedAlias; // Other miscellaneous instructions. def : WriteRes { let Unsupported = 1; } def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } //===----------------------------------------------------------------------===// // Generic fast forwarding. // TODO: Add FP register forwarding rules. def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; // TODO: The forwarding for 32 bits actually saves 2 cycles. def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; //===----------------------------------------------------------------------===// // Finer scheduling model. // Branch instructions def : InstRW<[M4WriteB1], (instrs Bcc)>; def : InstRW<[M4WriteAF], (instrs BL)>; def : InstRW<[M4WriteBX], (instrs BLR)>; def : InstRW<[M4WriteC1], (instregex "^CBN?Z[WX]")>; def : InstRW<[M4WriteAD], (instregex "^TBN?Z[WX]")>; // Arithmetic and logical integer instructions. def : InstRW<[M4WriteAX], (instregex "^(ADD|AND|BIC|EON|EOR|ORN|SUB)[WX]rs$")>; def : InstRW<[M4WriteAU], (instrs ORRWrs, ORRXrs)>; def : InstRW<[M4WriteAX], (instregex "^(ADD|AND|BIC|SUB)S[WX]rs$")>; def : InstRW<[M4WriteAX], (instregex "^(ADD|SUB)S?[WX]rx(64)?$")>; def : InstRW<[M4WriteAV], (instrs ADDWri, ADDXri, ORRWri, ORRXri)>; // Move instructions. def : InstRW<[M4WriteCOPY], (instrs COPY)>; def : InstRW<[M4WriteZ0], (instrs ADR, ADRP)>; def : InstRW<[M4WriteZ0], (instregex "^MOV[NZ][WX]i")>; // Divide and multiply instructions. // Miscellaneous instructions. // Load instructions. def : InstRW<[M4WriteLD, WriteLDHi, WriteAdr], (instregex "^LDP(SW|W|X)(post|pre)")>; def : InstRW<[M4WriteL5, ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roW")>; def : InstRW<[WriteLDIdx, ReadAdrBase], (instregex "^LDR(BB|SBW|SBX|HH|SHW|SHX|SW|W|X)roX")>; def : InstRW<[M4WriteL5, ReadAdrBase], (instrs PRFMroW)>; def : InstRW<[WriteLDIdx, ReadAdrBase], (instrs PRFMroX)>; // Store instructions. def : InstRW<[M4WriteSB, ReadAdrBase], (instregex "^STR(BB|HH|W|X)roW")>; def : InstRW<[WriteST, ReadAdrBase], (instregex "^STR(BB|HH|W|X)roX")>; // FP data instructions. def : InstRW<[M4WriteNSHF1H], (instrs FABSHr)>; def : InstRW<[M4WriteNSHF1], (instregex "^FABS[SD]r")>; def : InstRW<[M4WriteFADD2H], (instregex "^F(ADD|SUB)Hrr")>; def : InstRW<[M4WriteFADD2], (instregex "^F(ADD|SUB)[SD]rr")>; def : InstRW<[M4WriteFADD2H], (instregex "^FADDPv.i16")>; def : InstRW<[M4WriteFADD2], (instregex "^FADDPv.i(32|64)")>; def : InstRW<[M4WriteNEONQ], (instregex "^FCCMPE?[HSD]rr")>; def : InstRW<[M4WriteNMSC2], (instregex "^FCMPE?[HSD]r[ir]")>; def : InstRW<[M4WriteNMSC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(16|32|64|v1)")>; def : InstRW<[M4WriteFDIV7H], (instrs FDIVHrr)>; def : InstRW<[M4WriteFDIV7], (instrs FDIVSrr)>; def : InstRW<[M4WriteFDIV12], (instrs FDIVDrr)>; def : InstRW<[M4WriteNMSC1], (instregex "^F(MAX|MIN)(NM)?[HSD]rr")>; def : InstRW<[M4WriteFMAC3H], (instregex "^FN?MULHrr")>; def : InstRW<[M4WriteFMAC3], (instregex "^FN?MUL[SD]rr")>; def : InstRW<[M4WriteFMAC3H], (instrs FMULX16)>; def : InstRW<[M4WriteFMAC3], (instregex "^FMULX(32|64)")>; def : InstRW<[M4WriteFMAC4H, M4ReadFMACM1], (instregex "^FN?M(ADD|SUB)Hrrr")>; def : InstRW<[M4WriteFMAC4, M4ReadFMACM1], (instregex "^FN?M(ADD|SUB)[SD]rrr")>; def : InstRW<[M4WriteNALU1H], (instrs FNEGHr)>; def : InstRW<[M4WriteNALU1], (instregex "^FNEG[SD]r")>; def : InstRW<[M4WriteFCVT3A], (instregex "^FRINT.+r")>; def : InstRW<[M4WriteNEONH], (instregex "^FCSEL[HSD]rrr")>; def : InstRW<[M4WriteFSQR7H], (instrs FSQRTHr)>; def : InstRW<[M4WriteFSQR8], (instrs FSQRTSr)>; def : InstRW<[M4WriteFSQR12], (instrs FSQRTDr)>; // FP miscellaneous instructions. def : InstRW<[M4WriteFCVT2H], (instregex "^FCVTH[SD]r")>; def : InstRW<[M4WriteFCVT2H], (instregex "^FCVT[SD]Hr")>; def : InstRW<[M4WriteFCVT2], (instregex "^FCVT[SD][SD]r")>; def : InstRW<[M4WriteFCVT6A], (instregex "^[SU]CVTF[SU][XW][HSD]ri")>; def : InstRW<[M4WriteNEONR], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>; def : InstRW<[M4WriteNALU1], (instregex "^FMOV[HSD][ir]")>; def : InstRW<[M4WriteSA], (instregex "^FMOV[WX][HSD]r")>; def : InstRW<[M4WriteNEONJ], (instregex "^FMOV[HSD][WX]r")>; def : InstRW<[M4WriteNEONI], (instregex "^FMOVXDHighr")>; def : InstRW<[M4WriteNEONK], (instregex "^FMOVDXHighr")>; def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev1f16")>; def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev1i(32|64)")>; def : InstRW<[M4WriteNMSC1], (instregex "^FRECPXv1")>; -def : InstRW<[M4WriteFMAC4H, - M4ReadFMACM1], (instregex "^F(RECP|RSQRT)S16")>; -def : InstRW<[M4WriteFMAC4, - M4ReadFMACM1], (instregex "^F(RECP|RSQRT)S(32|64)")>; +def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)S16")>; +def : InstRW<[M4WriteFMAC4], (instregex "^F(RECP|RSQRT)S(32|64)")>; // FP load instructions. def : InstRW<[WriteVLD], (instregex "^LDR[SDQ]l")>; def : InstRW<[WriteVLD], (instregex "^LDUR[BHSDQ]i")>; def : InstRW<[WriteVLD, WriteAdr], (instregex "^LDR[BHSDQ](post|pre)")>; def : InstRW<[WriteVLD], (instregex "^LDR[BHSDQ]ui")>; def : InstRW<[M4WriteLE, ReadAdrBase], (instregex "^LDR[BHSDQ]roW")>; def : InstRW<[WriteVLD, ReadAdrBase], (instregex "^LDR[BHSD]roX")>; def : InstRW<[M4WriteLE, ReadAdrBase], (instrs LDRQroX)>; def : InstRW<[WriteVLD, M4WriteLH], (instregex "^LDN?P[SD]i")>; def : InstRW<[M4WriteLA, M4WriteLH], (instregex "^LDN?PQi")>; def : InstRW<[M4WriteL5, M4WriteLH, WriteAdr], (instregex "^LDP[SD]post")>; def : InstRW<[M4WriteLB, M4WriteLH, WriteAdr], (instrs LDPQpost)>; def : InstRW<[M4WriteLB, M4WriteLH, WriteAdr], (instregex "^LDP[SD]pre")>; def : InstRW<[M4WriteLC, M4WriteLH, WriteAdr], (instrs LDPQpre)>; // FP store instructions. def : InstRW<[WriteVST], (instregex "^STUR[BHSDQ]i")>; def : InstRW<[WriteVST, WriteAdr], (instregex "^STR[BHSDQ](post|pre)")>; def : InstRW<[WriteVST], (instregex "^STR[BHSDQ]ui")>; def : InstRW<[M4WriteVSTJ, ReadAdrBase], (instregex "^STR[BHSD]roW")>; def : InstRW<[M4WriteVSTK, ReadAdrBase], (instrs STRQroW)>; def : InstRW<[WriteVST, ReadAdrBase], (instregex "^STR[BHSD]roX")>; def : InstRW<[M4WriteVSTK, ReadAdrBase], (instrs STRQroX)>; def : InstRW<[WriteVST], (instregex "^STN?P[SD]i")>; def : InstRW<[M4WriteVSTA], (instregex "^STN?PQi")>; def : InstRW<[WriteVST, WriteAdr], (instregex "^STP[SD](post|pre)")>; def : InstRW<[M4WriteVSTJ, WriteAdr], (instregex "^STPQ(post|pre)")>; // ASIMD instructions. def : InstRW<[M4WriteNHAD1], (instregex "^[SU]ABDL?v")>; def : InstRW<[M4WriteNHAD3], (instregex "^[SU]ABAL?v")>; def : InstRW<[M4WriteNMSC1], (instregex "^ABSv")>; def : InstRW<[M4WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>; def : InstRW<[M4WriteNHAD3], (instregex "^[SU]?ADDL?Pv")>; def : InstRW<[M4WriteNHAD3], (instregex "^[SU]H(ADD|SUB)v")>; def : InstRW<[M4WriteNHAD3], (instregex "^[SU](ADD|SUB)[LW]v")>; def : InstRW<[M4WriteNHAD3], (instregex "^R?(ADD|SUB)HN2?v")>; def : InstRW<[M4WriteNHAD3], (instregex "^[SU]Q(ADD|SUB)v")>; def : InstRW<[M4WriteNHAD3], (instregex "^(SU|US)QADDv")>; def : InstRW<[M4WriteNHAD3], (instregex "^[SU]RHADDv")>; def : InstRW<[M4WriteNMSC1], (instregex "^SQ(ABS|NEG)v")>; def : InstRW<[M4WriteNHAD3], (instregex "^[SU]?ADDL?Vv")>; def : InstRW<[M4WriteNMSC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>; def : InstRW<[M4WriteNALU1], (instregex "^CMTSTv")>; def : InstRW<[M4WriteNALU1], (instregex "^(AND|BIC|EOR|NOT|ORN|ORR)v")>; def : InstRW<[M4WriteNMSC1], (instregex "^[SU](MIN|MAX)v")>; def : InstRW<[M4WriteNMSC2], (instregex "^[SU](MIN|MAX)Pv")>; def : InstRW<[M4WriteNHAD3], (instregex "^[SU](MIN|MAX)Vv")>; -def : InstRW<[M4WriteNMUL3], (instregex "^(SQR?D)?MULH?v")>; def : InstRW<[M4WriteNMUL3, M4ReadNMULM1], (instregex "^ML[AS]v")>; -def : InstRW<[M4WriteNMUL3], (instregex "^SQRDML[AS]H")>; -def : InstRW<[M4WriteMULL, - M4ReadMULLP2], (instregex "^(S|U|SQD)ML[AS]Lv")>; -def : InstRW<[M4WriteMULL, - M4ReadMULLP2], (instregex "^(S|U|SQD)MULLv")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULM1], (instregex "^(SQR?D)?MULH?v")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULM1], (instregex "^SQRDML[AS]H")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULM1], (instregex "^(S|U|SQD)ML[AS]L(v1(i32|i64)|v2i32|v4i16|v8i8)")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULP2], (instregex "^(S|U|SQD)ML[AS]L(v4i32|v8i16|v16i8)")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULM1], (instregex "^(S|U|SQD)MULL(v1(i32|i64)|v2i32|v4i16|v8i8)")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULP2], (instregex "^(S|U|SQD)MULL(v4i32|v8i16|v16i8)")>; def : InstRW<[M4WriteNMUL3], (instregex "^[SU]DOT(lane)?v")>; def : InstRW<[M4WriteNHAD3], (instregex "^[SU]ADALPv")>; def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]R?SRA[dv]")>; def : InstRW<[M4WriteNSHT1], (instregex "^SHL[dv]")>; def : InstRW<[M4WriteNSHT1], (instregex "^S[LR]I[dv]")>; def : InstRW<[M4WriteNSHT1], (instregex "^[SU]SH[LR][dv]")>; def : InstRW<[M4WriteNSHT2], (instregex "^[SU]?SHLLv")>; def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]?Q?R?SHRU?N[bhsv]")>; def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]RSH[LR][dv]")>; def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]QR?SHLU?[bhsdv]")>; // ASIMD FP instructions. def : InstRW<[M4WriteNSHF1H], (instregex "^FABSv.f16")>; def : InstRW<[M4WriteNSHF1], (instregex "^FABSv.f(32|64)")>; def : InstRW<[M4WriteFADD2H], (instregex "^F(ABD|ADD|SUB)v.f16")>; def : InstRW<[M4WriteFADD2], (instregex "^F(ABD|ADD|SUB)v.f(32|64)")>; def : InstRW<[M4WriteFADD2H], (instregex "^FADDPv.f16")>; def : InstRW<[M4WriteFADD2], (instregex "^FADDPv.f(32|64)")>; def : InstRW<[M4WriteNMSC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>; def : InstRW<[M4WriteFCVT2], (instregex "^FCVT(L|N|XN)v")>; def : InstRW<[M4WriteFCVT2A], (instregex "^FCVT[AMNPZ][SU]v")>; def : InstRW<[M4WriteFCVT2H], (instregex "^[SU]CVTFv.[fi]16")>; def : InstRW<[M4WriteFCVT2], (instregex "^[SU]CVTFv.[fi](32|64)")>; def : InstRW<[M4WriteFDIV7H], (instrs FDIVv4f16)>; def : InstRW<[M4WriteNEONVH], (instrs FDIVv8f16)>; def : InstRW<[M4WriteFDIV7], (instrs FDIVv2f32)>; def : InstRW<[M4WriteNEONV], (instrs FDIVv4f32)>; def : InstRW<[M4WriteNEONW], (instrs FDIVv2f64)>; def : InstRW<[M4WriteNMSC1], (instregex "^F(MAX|MIN)(NM)?v")>; def : InstRW<[M4WriteNMSC2], (instregex "^F(MAX|MIN)(NM)?Pv")>; def : InstRW<[M4WriteNEONZ], (instregex "^F(MAX|MIN)(NM)?Vv")>; def : InstRW<[M4WriteFMAC2H], (instregex "^FMULX?v.[fi]16")>; def : InstRW<[M4WriteFMAC3], (instregex "^FMULX?v.[fi](32|64)")>; def : InstRW<[M4WriteFMAC4H, M4ReadFMACM1], (instregex "^FML[AS]v.[fi]16")>; def : InstRW<[M4WriteFMAC4, M4ReadFMACM1], (instregex "^FML[AS]v.[fi](32|64)")>; def : InstRW<[M4WriteNALU1H], (instregex "^FNEGv.f16")>; def : InstRW<[M4WriteNALU1], (instregex "^FNEGv.f(32|64)")>; def : InstRW<[M4WriteFCVT3A], (instregex "^FRINT[AIMNPXZ]v")>; def : InstRW<[M4WriteFSQR7H], (instrs FSQRTv4f16)>; def : InstRW<[M4WriteNEONXH], (instrs FSQRTv8f16)>; def : InstRW<[M4WriteFSQR8], (instrs FSQRTv2f32)>; def : InstRW<[M4WriteNEONX], (instrs FSQRTv4f32)>; def : InstRW<[M4WriteNEONY], (instrs FSQRTv2f64)>; // ASIMD miscellaneous instructions. def : InstRW<[M4WriteNALU1], (instregex "^RBITv")>; def : InstRW<[M4WriteNALU1], (instregex "^(BIF|BIT|BSL)v")>; def : InstRW<[M4WriteNALU1], (instregex "^CL[STZ]v")>; def : InstRW<[M4WriteNEONB], (instregex "^DUPv.+gpr")>; def : InstRW<[M4WriteNSHF1], (instregex "^CPY")>; def : InstRW<[M4WriteNSHF1], (instregex "^DUPv.+lane")>; def : InstRW<[M4WriteNSHF1], (instregex "^EXTv")>; def : InstRW<[M4WriteNSHT4A], (instregex "^XTNv")>; def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]?QXTU?Nv")>; def : InstRW<[M4WriteNEONB], (instregex "^INSv.+gpr")>; def : InstRW<[M4WriteNSHF1], (instregex "^INSv.+lane")>; def : InstRW<[M4WriteMOVI], (instregex "^(MOV|MVN)I")>; def : InstRW<[M4WriteNALU1H], (instregex "^FMOVv.f16")>; def : InstRW<[M4WriteNALU1], (instregex "^FMOVv.f(32|64)")>; def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev[248]f16")>; def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev[248]f(32|64)")>; def : InstRW<[M4WriteFCVT3], (instregex "^U(RECP|RSQRT)Ev[24]i32")>; -def : InstRW<[M4WriteFMAC4H, - M4ReadFMACM1], (instregex "^F(RECP|RSQRT)Sv.f16")>; -def : InstRW<[M4WriteFMAC4, - M4ReadFMACM1], (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>; +def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)Sv.f16")>; +def : InstRW<[M4WriteFMAC4], (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>; def : InstRW<[M4WriteNSHF1], (instregex "^REV(16|32|64)v")>; def : InstRW<[M4WriteNSHFA], (instregex "^TB[LX]v(8|16)i8One")>; def : InstRW<[M4WriteNSHFB], (instregex "^TB[LX]v(8|16)i8Two")>; def : InstRW<[M4WriteNSHFC], (instregex "^TB[LX]v(8|16)i8Three")>; def : InstRW<[M4WriteNSHFD], (instregex "^TB[LX]v(8|16)i8Four")>; def : InstRW<[M4WriteNEONP], (instregex "^[SU]MOVv")>; def : InstRW<[M4WriteNSHF1], (instregex "^(TRN|UZP|ZIP)[12]v")>; // ASIMD load instructions. def : InstRW<[WriteVLD], (instregex "LD1Onev(8b|4h|2s|1d)$")>; def : InstRW<[WriteVLD, M4WriteA1], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>; def : InstRW<[WriteVLD], (instregex "LD1Onev(16b|8h|4s|2d)$")>; def : InstRW<[WriteVLD, M4WriteA1], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>; def : InstRW<[M4WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>; def : InstRW<[M4WriteVLDA, M4WriteA1], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>; def : InstRW<[M4WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>; def : InstRW<[M4WriteVLDA, M4WriteA1], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>; def : InstRW<[M4WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>; def : InstRW<[M4WriteVLDB, M4WriteA1], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>; def : InstRW<[M4WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>; def : InstRW<[M4WriteVLDB, M4WriteA1], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>; def : InstRW<[M4WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>; def : InstRW<[M4WriteVLDC, M4WriteA1], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>; def : InstRW<[M4WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>; def : InstRW<[M4WriteVLDC, M4WriteA1], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>; def : InstRW<[M4WriteVLDD], (instregex "LD1i(8|16|32|64)$")>; def : InstRW<[M4WriteVLDD, M4WriteA1], (instregex "LD1i(8|16|32|64)_POST$")>; def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|4h|2s|1d)$")>; def : InstRW<[WriteVLD, M4WriteA1], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>; def : InstRW<[WriteVLD], (instregex "LD1Rv(16b|8h|4s|2d)$")>; def : InstRW<[WriteVLD, M4WriteA1], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; def : InstRW<[M4WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>; def : InstRW<[M4WriteVLDF, M4WriteA1], (instregex "LD2Twov(8b|4h|2s)_POST$")>; def : InstRW<[M4WriteVLDF], (instregex "LD2Twov(16b|8h|4s|2d)$")>; def : InstRW<[M4WriteVLDF, M4WriteA1], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>; def : InstRW<[M4WriteVLDG], (instregex "LD2i(8|16|32|64)$")>; def : InstRW<[M4WriteVLDG, M4WriteA1], (instregex "LD2i(8|16|32|64)_POST$")>; def : InstRW<[M4WriteVLDA], (instregex "LD2Rv(8b|4h|2s|1d)$")>; def : InstRW<[M4WriteVLDA, M4WriteA1], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>; def : InstRW<[M4WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>; def : InstRW<[M4WriteVLDA, M4WriteA1], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; def : InstRW<[M4WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>; def : InstRW<[M4WriteVLDI, M4WriteA1], (instregex "LD3Threev(8b|4h|2s)_POST$")>; def : InstRW<[M4WriteVLDI], (instregex "LD3Threev(16b|8h|4s|2d)$")>; def : InstRW<[M4WriteVLDI, M4WriteA1], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>; def : InstRW<[M4WriteVLDJ], (instregex "LD3i(8|16|32)$")>; def : InstRW<[M4WriteVLDJ, M4WriteA1], (instregex "LD3i(8|16|32)_POST$")>; def : InstRW<[M4WriteVLDL], (instregex "LD3i64$")>; def : InstRW<[M4WriteVLDL, M4WriteA1], (instregex "LD3i64_POST$")>; def : InstRW<[M4WriteVLDB], (instregex "LD3Rv(8b|4h|2s|1d)$")>; def : InstRW<[M4WriteVLDB, M4WriteA1], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>; def : InstRW<[M4WriteVLDB], (instregex "LD3Rv(16b|8h|4s|2d)$")>; def : InstRW<[M4WriteVLDB, M4WriteA1], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>; def : InstRW<[M4WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>; def : InstRW<[M4WriteVLDN, M4WriteA1], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; def : InstRW<[M4WriteVLDN], (instregex "LD4Fourv(16b|8h|4s|2d)$")>; def : InstRW<[M4WriteVLDN, M4WriteA1], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; def : InstRW<[M4WriteVLDK], (instregex "LD4i(8|16|32)$")>; def : InstRW<[M4WriteVLDK, M4WriteA1], (instregex "LD4i(8|16|32)_POST$")>; def : InstRW<[M4WriteVLDM], (instregex "LD4i64$")>; def : InstRW<[M4WriteVLDM, M4WriteA1], (instregex "LD4i64_POST$")>; def : InstRW<[M4WriteVLDC], (instregex "LD4Rv(8b|4h|2s|1d)$")>; def : InstRW<[M4WriteVLDC, M4WriteA1], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>; def : InstRW<[M4WriteVLDC], (instregex "LD4Rv(16b|8h|4s|2d)$")>; def : InstRW<[M4WriteVLDC, M4WriteA1], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>; // ASIMD store instructions. def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d)$")>; def : InstRW<[WriteVST, M4WriteA1], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; def : InstRW<[WriteVST], (instregex "ST1Onev(16b|8h|4s|2d)$")>; def : InstRW<[WriteVST, M4WriteA1], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; def : InstRW<[M4WriteVSTA], (instregex "ST1Twov(8b|4h|2s|1d)$")>; def : InstRW<[M4WriteVSTA, M4WriteA1], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; def : InstRW<[M4WriteVSTA], (instregex "ST1Twov(16b|8h|4s|2d)$")>; def : InstRW<[M4WriteVSTA, M4WriteA1], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; def : InstRW<[M4WriteVSTB], (instregex "ST1Threev(8b|4h|2s|1d)$")>; def : InstRW<[M4WriteVSTB, M4WriteA1], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; def : InstRW<[M4WriteVSTB], (instregex "ST1Threev(16b|8h|4s|2d)$")>; def : InstRW<[M4WriteVSTB, M4WriteA1], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; def : InstRW<[M4WriteVSTC], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; def : InstRW<[M4WriteVSTC, M4WriteA1], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; def : InstRW<[M4WriteVSTC], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; def : InstRW<[M4WriteVSTC, M4WriteA1], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; def : InstRW<[WriteVST], (instregex "ST1i(8|16|32|64)$")>; def : InstRW<[WriteVST, M4WriteA1], (instregex "ST1i(8|16|32|64)_POST$")>; def : InstRW<[M4WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>; def : InstRW<[M4WriteVSTD, M4WriteA1], (instregex "ST2Twov(8b|4h|2s)_POST$")>; def : InstRW<[M4WriteVSTE], (instregex "ST2Twov(16b|8h|4s|2d)$")>; def : InstRW<[M4WriteVSTE, M4WriteA1], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; def : InstRW<[M4WriteVSTD], (instregex "ST2i(8|16|32|64)$")>; def : InstRW<[M4WriteVSTD, M4WriteA1], (instregex "ST2i(8|16|32|64)_POST$")>; def : InstRW<[M4WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>; def : InstRW<[M4WriteVSTF, M4WriteA1], (instregex "ST3Threev(8b|4h|2s)_POST$")>; def : InstRW<[M4WriteVSTG], (instregex "ST3Threev(16b|8h|4s|2d)$")>; def : InstRW<[M4WriteVSTG, M4WriteA1], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>; def : InstRW<[M4WriteVSTE], (instregex "ST3i(8|16|32|64)$")>; def : InstRW<[M4WriteVSTE, M4WriteA1], (instregex "ST3i(8|16|32|64)_POST$")>; def : InstRW<[M4WriteVSTL], (instregex "ST4Fourv(8b|4h|2s)$")>; def : InstRW<[M4WriteVSTL, M4WriteA1], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; def : InstRW<[M4WriteVSTI], (instregex "ST4Fourv(16b|8h|4s|2d)$")>; def : InstRW<[M4WriteVSTI, M4WriteA1], (instregex "ST4Fourv(16b|8h|4s|2d)_POST$")>; def : InstRW<[M4WriteVSTE], (instregex "ST4i(8|16|32|64)$")>; def : InstRW<[M4WriteVSTE, M4WriteA1], (instregex "ST4i(8|16|32|64)_POST$")>; // Cryptography instructions. def : InstRW<[M4WriteNCRY1], (instregex "^AES[DE]")>; def : InstRW<[M4WriteNCRY1, M4ReadAESM1], (instregex "^AESI?MC")>; def : InstRW<[M4WriteNCRY1A], (instregex "^PMULv")>; def : InstRW<[M4WriteNCRY1A], (instregex "^PMULLv(1|8)i")>; def : InstRW<[M4WriteNCRY3A], (instregex "^PMULLv(2|16)i")>; def : InstRW<[M4WriteNCRY1A], (instregex "^SHA1([CHMP]|SU[01])")>; def : InstRW<[M4WriteNCRY1A], (instrs SHA256SU0rr)>; def : InstRW<[M4WriteNCRY5A], (instrs SHA256SU1rrr)>; def : InstRW<[M4WriteNCRY5A], (instrs SHA256H2rrr)>; // CRC instructions. def : InstRW<[M4WriteE2], (instregex "^CRC32C?[BHWX]rr$")>; } // SchedModel = ExynosM4Model Index: vendor/llvm/dist-release_80/lib/Target/AArch64/AArch64SchedPredExynos.td =================================================================== --- vendor/llvm/dist-release_80/lib/Target/AArch64/AArch64SchedPredExynos.td (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/AArch64/AArch64SchedPredExynos.td (revision 348932) @@ -1,157 +1,146 @@ //===- AArch64SchedPredExynos.td - AArch64 Sched Preds -----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines scheduling predicate definitions that are used by the // AArch64 Exynos processors. // //===----------------------------------------------------------------------===// // Auxiliary predicates. // Check the shift in arithmetic and logic instructions. def ExynosCheckShift : CheckAny<[CheckShiftBy0, CheckAll< [CheckShiftLSL, CheckAny< [CheckShiftBy1, CheckShiftBy2, CheckShiftBy3]>]>]>; // Exynos predicates. // Identify BLR specifying the LR register as the indirect target register. def ExynosBranchLinkLRPred : MCSchedPredicate< CheckAll<[CheckOpcode<[BLR]>, CheckRegOperand<0, LR>]>>; // Identify arithmetic instructions without or with limited extension or shift. def ExynosArithFn : TIIPredicate< "isExynosArithFast", MCOpcodeSwitchStatement< [MCOpcodeSwitchCase< IsArithExtOp.ValidOpcodes, MCReturnStatement< CheckAny<[CheckExtBy0, CheckAll< [CheckAny< [CheckExtUXTW, CheckExtUXTX]>, CheckAny< [CheckExtBy1, CheckExtBy2, CheckExtBy3]>]>]>>>, MCOpcodeSwitchCase< IsArithShiftOp.ValidOpcodes, MCReturnStatement>, MCOpcodeSwitchCase< IsArithUnshiftOp.ValidOpcodes, MCReturnStatement>], MCReturnStatement>>; def ExynosArithPred : MCSchedPredicate; // Identify logic instructions with limited shift. def ExynosLogicFn : TIIPredicate< "isExynosLogicFast", MCOpcodeSwitchStatement< [MCOpcodeSwitchCase< IsLogicShiftOp.ValidOpcodes, MCReturnStatement>, MCOpcodeSwitchCase< IsLogicUnshiftOp.ValidOpcodes, MCReturnStatement>], MCReturnStatement>>; def ExynosLogicPred : MCSchedPredicate; // Identify more logic instructions with limited shift. def ExynosLogicExFn : TIIPredicate< "isExynosLogicExFast", MCOpcodeSwitchStatement< [MCOpcodeSwitchCase< IsLogicShiftOp.ValidOpcodes, MCReturnStatement< CheckAny< [ExynosCheckShift, CheckAll< [CheckShiftLSL, CheckShiftBy8]>]>>>, MCOpcodeSwitchCase< IsLogicUnshiftOp.ValidOpcodes, MCReturnStatement>], MCReturnStatement>>; def ExynosLogicExPred : MCSchedPredicate; // Identify a load or store using the register offset addressing mode // with a scaled non-extended register. def ExynosScaledIdxFn : TIIPredicate<"isExynosScaledAddr", MCOpcodeSwitchStatement< [MCOpcodeSwitchCase< IsLoadStoreRegOffsetOp.ValidOpcodes, MCReturnStatement< CheckAny< [CheckMemExtSXTW, CheckMemExtUXTW, CheckMemScaled]>>>], MCReturnStatement>>; def ExynosScaledIdxPred : MCSchedPredicate; // Identify FP instructions. def ExynosFPPred : MCSchedPredicate>; -// Identify whether an instruction whose result is a long vector -// operates on the upper half of the input registers. -def ExynosLongVectorUpperFn : TIIPredicate< - "isExynosLongVectorUpper", - MCOpcodeSwitchStatement< - [MCOpcodeSwitchCase< - IsLongVectorUpperOp.ValidOpcodes, - MCReturnStatement>], - MCReturnStatement>>; -def ExynosLongVectorUpperPred : MCSchedPredicate; - // Identify 128-bit NEON instructions. def ExynosQFormPred : MCSchedPredicate; // Identify instructions that reset a register efficiently. def ExynosResetFn : TIIPredicate< "isExynosResetFast", MCOpcodeSwitchStatement< [MCOpcodeSwitchCase< [ADR, ADRP, MOVNWi, MOVNXi, MOVZWi, MOVZXi], MCReturnStatement>, MCOpcodeSwitchCase< [ORRWri, ORRXri], MCReturnStatement< CheckAll< [CheckIsRegOperand<1>, CheckAny< [CheckRegOperand<1, WZR>, CheckRegOperand<1, XZR>]>]>>>], MCReturnStatement< CheckAny< [IsCopyIdiomFn, IsZeroFPIdiomFn]>>>>; def ExynosResetPred : MCSchedPredicate; // Identify EXTR as the alias for ROR (immediate). def ExynosRotateRightImmPred : MCSchedPredicate< CheckAll<[CheckOpcode<[EXTRWrri, EXTRXrri]>, CheckSameRegOperand<1, 2>]>>; // Identify cheap arithmetic and logic immediate instructions. def ExynosCheapFn : TIIPredicate< "isExynosCheapAsMove", MCOpcodeSwitchStatement< [MCOpcodeSwitchCase< IsArithLogicImmOp.ValidOpcodes, MCReturnStatement>], MCReturnStatement< CheckAny< [ExynosArithFn, ExynosResetFn, ExynosLogicFn]>>>>; Index: vendor/llvm/dist-release_80/lib/Target/AArch64/AArch64SchedPredicates.td =================================================================== --- vendor/llvm/dist-release_80/lib/Target/AArch64/AArch64SchedPredicates.td (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/AArch64/AArch64SchedPredicates.td (revision 348932) @@ -1,423 +1,370 @@ //===- AArch64SchedPredicates.td - AArch64 Sched Preds -----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines scheduling predicate definitions that are used by the // AArch64 subtargets. // //===----------------------------------------------------------------------===// // Function mappers. // Check the extension type in arithmetic instructions. let FunctionMapper = "AArch64_AM::getArithExtendType" in { def CheckExtUXTB : CheckImmOperand_s<3, "AArch64_AM::UXTB">; def CheckExtUXTH : CheckImmOperand_s<3, "AArch64_AM::UXTH">; def CheckExtUXTW : CheckImmOperand_s<3, "AArch64_AM::UXTW">; def CheckExtUXTX : CheckImmOperand_s<3, "AArch64_AM::UXTX">; def CheckExtSXTB : CheckImmOperand_s<3, "AArch64_AM::SXTB">; def CheckExtSXTH : CheckImmOperand_s<3, "AArch64_AM::SXTH">; def CheckExtSXTW : CheckImmOperand_s<3, "AArch64_AM::SXTW">; def CheckExtSXTX : CheckImmOperand_s<3, "AArch64_AM::SXTX">; } // Check for shifting in extended arithmetic instructions. foreach I = {0-3} in { let FunctionMapper = "AArch64_AM::getArithShiftValue" in def CheckExtBy#I : CheckImmOperand<3, I>; } // Check the extension type in the register offset addressing mode. let FunctionMapper = "AArch64_AM::getMemExtendType" in { def CheckMemExtUXTW : CheckImmOperand_s<3, "AArch64_AM::UXTW">; def CheckMemExtLSL : CheckImmOperand_s<3, "AArch64_AM::UXTX">; def CheckMemExtSXTW : CheckImmOperand_s<3, "AArch64_AM::SXTW">; def CheckMemExtSXTX : CheckImmOperand_s<3, "AArch64_AM::SXTX">; } // Check for scaling in the register offset addressing mode. let FunctionMapper = "AArch64_AM::getMemDoShift" in def CheckMemScaled : CheckImmOperandSimple<3>; // Check the shifting type in arithmetic and logic instructions. let FunctionMapper = "AArch64_AM::getShiftType" in { def CheckShiftLSL : CheckImmOperand_s<3, "AArch64_AM::LSL">; def CheckShiftLSR : CheckImmOperand_s<3, "AArch64_AM::LSR">; def CheckShiftASR : CheckImmOperand_s<3, "AArch64_AM::ASR">; def CheckShiftROR : CheckImmOperand_s<3, "AArch64_AM::ROR">; def CheckShiftMSL : CheckImmOperand_s<3, "AArch64_AM::MSL">; } // Check for shifting in arithmetic and logic instructions. foreach I = {0-3, 8} in { let FunctionMapper = "AArch64_AM::getShiftValue" in def CheckShiftBy#I : CheckImmOperand<3, I>; } // Generic predicates. // Identify whether an instruction is the 64-bit NEON form based on its result. def CheckDForm : CheckAll<[CheckIsRegOperand<0>, CheckAny<[CheckRegOperand<0, D0>, CheckRegOperand<0, D1>, CheckRegOperand<0, D2>, CheckRegOperand<0, D3>, CheckRegOperand<0, D4>, CheckRegOperand<0, D5>, CheckRegOperand<0, D6>, CheckRegOperand<0, D7>, CheckRegOperand<0, D8>, CheckRegOperand<0, D9>, CheckRegOperand<0, D10>, CheckRegOperand<0, D11>, CheckRegOperand<0, D12>, CheckRegOperand<0, D13>, CheckRegOperand<0, D14>, CheckRegOperand<0, D15>, CheckRegOperand<0, D16>, CheckRegOperand<0, D17>, CheckRegOperand<0, D18>, CheckRegOperand<0, D19>, CheckRegOperand<0, D20>, CheckRegOperand<0, D21>, CheckRegOperand<0, D22>, CheckRegOperand<0, D23>, CheckRegOperand<0, D24>, CheckRegOperand<0, D25>, CheckRegOperand<0, D26>, CheckRegOperand<0, D27>, CheckRegOperand<0, D28>, CheckRegOperand<0, D29>, CheckRegOperand<0, D30>, CheckRegOperand<0, D31>]>]>; // Identify whether an instruction is the 128-bit NEON form based on its result. def CheckQForm : CheckAll<[CheckIsRegOperand<0>, CheckAny<[CheckRegOperand<0, Q0>, CheckRegOperand<0, Q1>, CheckRegOperand<0, Q2>, CheckRegOperand<0, Q3>, CheckRegOperand<0, Q4>, CheckRegOperand<0, Q5>, CheckRegOperand<0, Q6>, CheckRegOperand<0, Q7>, CheckRegOperand<0, Q8>, CheckRegOperand<0, Q9>, CheckRegOperand<0, Q10>, CheckRegOperand<0, Q11>, CheckRegOperand<0, Q12>, CheckRegOperand<0, Q13>, CheckRegOperand<0, Q14>, CheckRegOperand<0, Q15>, CheckRegOperand<0, Q16>, CheckRegOperand<0, Q17>, CheckRegOperand<0, Q18>, CheckRegOperand<0, Q19>, CheckRegOperand<0, Q20>, CheckRegOperand<0, Q21>, CheckRegOperand<0, Q22>, CheckRegOperand<0, Q23>, CheckRegOperand<0, Q24>, CheckRegOperand<0, Q25>, CheckRegOperand<0, Q26>, CheckRegOperand<0, Q27>, CheckRegOperand<0, Q28>, CheckRegOperand<0, Q29>, CheckRegOperand<0, Q30>, CheckRegOperand<0, Q31>]>]>; // Identify arithmetic instructions with extend. def IsArithExtOp : CheckOpcode<[ADDWrx, ADDXrx, ADDSWrx, ADDSXrx, SUBWrx, SUBXrx, SUBSWrx, SUBSXrx, ADDXrx64, ADDSXrx64, SUBXrx64, SUBSXrx64]>; // Identify arithmetic immediate instructions. def IsArithImmOp : CheckOpcode<[ADDWri, ADDXri, ADDSWri, ADDSXri, SUBWri, SUBXri, SUBSWri, SUBSXri]>; // Identify arithmetic instructions with shift. def IsArithShiftOp : CheckOpcode<[ADDWrs, ADDXrs, ADDSWrs, ADDSXrs, SUBWrs, SUBXrs, SUBSWrs, SUBSXrs]>; // Identify arithmetic instructions without shift. def IsArithUnshiftOp : CheckOpcode<[ADDWrr, ADDXrr, ADDSWrr, ADDSXrr, SUBWrr, SUBXrr, SUBSWrr, SUBSXrr]>; // Identify logic immediate instructions. def IsLogicImmOp : CheckOpcode<[ANDWri, ANDXri, EORWri, EORXri, ORRWri, ORRXri]>; // Identify logic instructions with shift. def IsLogicShiftOp : CheckOpcode<[ANDWrs, ANDXrs, ANDSWrs, ANDSXrs, BICWrs, BICXrs, BICSWrs, BICSXrs, EONWrs, EONXrs, EORWrs, EORXrs, ORNWrs, ORNXrs, ORRWrs, ORRXrs]>; // Identify logic instructions without shift. def IsLogicUnshiftOp : CheckOpcode<[ANDWrr, ANDXrr, ANDSWrr, ANDSXrr, BICWrr, BICXrr, BICSWrr, BICSXrr, EONWrr, EONXrr, EORWrr, EORXrr, ORNWrr, ORNXrr, ORRWrr, ORRXrr]>; // Identify arithmetic and logic immediate instructions. def IsArithLogicImmOp : CheckOpcode; // Identify arithmetic and logic instructions with shift. def IsArithLogicShiftOp : CheckOpcode; // Identify arithmetic and logic instructions without shift. def IsArithLogicUnshiftOp : CheckOpcode; // Identify whether an instruction is an ASIMD // load using the post index addressing mode. def IsLoadASIMDPostOp : CheckOpcode<[LD1Onev8b_POST, LD1Onev4h_POST, LD1Onev2s_POST, LD1Onev1d_POST, LD1Onev16b_POST, LD1Onev8h_POST, LD1Onev4s_POST, LD1Onev2d_POST, LD1Twov8b_POST, LD1Twov4h_POST, LD1Twov2s_POST, LD1Twov1d_POST, LD1Twov16b_POST, LD1Twov8h_POST, LD1Twov4s_POST, LD1Twov2d_POST, LD1Threev8b_POST, LD1Threev4h_POST, LD1Threev2s_POST, LD1Threev1d_POST, LD1Threev16b_POST, LD1Threev8h_POST, LD1Threev4s_POST, LD1Threev2d_POST, LD1Fourv8b_POST, LD1Fourv4h_POST, LD1Fourv2s_POST, LD1Fourv1d_POST, LD1Fourv16b_POST, LD1Fourv8h_POST, LD1Fourv4s_POST, LD1Fourv2d_POST, LD1i8_POST, LD1i16_POST, LD1i32_POST, LD1i64_POST, LD1Rv8b_POST, LD1Rv4h_POST, LD1Rv2s_POST, LD1Rv1d_POST, LD1Rv16b_POST, LD1Rv8h_POST, LD1Rv4s_POST, LD1Rv2d_POST, LD2Twov8b_POST, LD2Twov4h_POST, LD2Twov2s_POST, LD2Twov16b_POST, LD2Twov8h_POST, LD2Twov4s_POST, LD2Twov2d_POST, LD2i8_POST, LD2i16_POST, LD2i32_POST, LD2i64_POST, LD2Rv8b_POST, LD2Rv4h_POST, LD2Rv2s_POST, LD2Rv1d_POST, LD2Rv16b_POST, LD2Rv8h_POST, LD2Rv4s_POST, LD2Rv2d_POST, LD3Threev8b_POST, LD3Threev4h_POST, LD3Threev2s_POST, LD3Threev16b_POST, LD3Threev8h_POST, LD3Threev4s_POST, LD3Threev2d_POST, LD3i8_POST, LD3i16_POST, LD3i32_POST, LD3i64_POST, LD3Rv8b_POST, LD3Rv4h_POST, LD3Rv2s_POST, LD3Rv1d_POST, LD3Rv16b_POST, LD3Rv8h_POST, LD3Rv4s_POST, LD3Rv2d_POST, LD4Fourv8b_POST, LD4Fourv4h_POST, LD4Fourv2s_POST, LD4Fourv16b_POST, LD4Fourv8h_POST, LD4Fourv4s_POST, LD4Fourv2d_POST, LD4i8_POST, LD4i16_POST, LD4i32_POST, LD4i64_POST, LD4Rv8b_POST, LD4Rv4h_POST, LD4Rv2s_POST, LD4Rv1d_POST, LD4Rv16b_POST, LD4Rv8h_POST, LD4Rv4s_POST, LD4Rv2d_POST]>; // Identify whether an instruction is an ASIMD // store using the post index addressing mode. def IsStoreASIMDPostOp : CheckOpcode<[ST1Onev8b_POST, ST1Onev4h_POST, ST1Onev2s_POST, ST1Onev1d_POST, ST1Onev16b_POST, ST1Onev8h_POST, ST1Onev4s_POST, ST1Onev2d_POST, ST1Twov8b_POST, ST1Twov4h_POST, ST1Twov2s_POST, ST1Twov1d_POST, ST1Twov16b_POST, ST1Twov8h_POST, ST1Twov4s_POST, ST1Twov2d_POST, ST1Threev8b_POST, ST1Threev4h_POST, ST1Threev2s_POST, ST1Threev1d_POST, ST1Threev16b_POST, ST1Threev8h_POST, ST1Threev4s_POST, ST1Threev2d_POST, ST1Fourv8b_POST, ST1Fourv4h_POST, ST1Fourv2s_POST, ST1Fourv1d_POST, ST1Fourv16b_POST, ST1Fourv8h_POST, ST1Fourv4s_POST, ST1Fourv2d_POST, ST1i8_POST, ST1i16_POST, ST1i32_POST, ST1i64_POST, ST2Twov8b_POST, ST2Twov4h_POST, ST2Twov2s_POST, ST2Twov16b_POST, ST2Twov8h_POST, ST2Twov4s_POST, ST2Twov2d_POST, ST2i8_POST, ST2i16_POST, ST2i32_POST, ST2i64_POST, ST3Threev8b_POST, ST3Threev4h_POST, ST3Threev2s_POST, ST3Threev16b_POST, ST3Threev8h_POST, ST3Threev4s_POST, ST3Threev2d_POST, ST3i8_POST, ST3i16_POST, ST3i32_POST, ST3i64_POST, ST4Fourv8b_POST, ST4Fourv4h_POST, ST4Fourv2s_POST, ST4Fourv16b_POST, ST4Fourv8h_POST, ST4Fourv4s_POST, ST4Fourv2d_POST, ST4i8_POST, ST4i16_POST, ST4i32_POST, ST4i64_POST]>; // Identify whether an instruction is an ASIMD load // or store using the post index addressing mode. def IsLoadStoreASIMDPostOp : CheckOpcode; // Identify whether an instruction is a load // using the register offset addressing mode. def IsLoadRegOffsetOp : CheckOpcode<[PRFMroW, PRFMroX, LDRBBroW, LDRBBroX, LDRSBWroW, LDRSBWroX, LDRSBXroW, LDRSBXroX, LDRHHroW, LDRHHroX, LDRSHWroW, LDRSHWroX, LDRSHXroW, LDRSHXroX, LDRWroW, LDRWroX, LDRSWroW, LDRSWroX, LDRXroW, LDRXroX, LDRBroW, LDRBroX, LDRHroW, LDRHroX, LDRSroW, LDRSroX, LDRDroW, LDRDroX]>; // Identify whether an instruction is a load // using the register offset addressing mode. def IsStoreRegOffsetOp : CheckOpcode<[STRBBroW, STRBBroX, STRHHroW, STRHHroX, STRWroW, STRWroX, STRXroW, STRXroX, STRBroW, STRBroX, STRHroW, STRHroX, STRSroW, STRSroX, STRDroW, STRDroX]>; // Identify whether an instruction is a load or // store using the register offset addressing mode. def IsLoadStoreRegOffsetOp : CheckOpcode; -// Identify whether an instruction whose result is a long vector -// operates on the upper half of the input registers. -def IsLongVectorUpperOp : CheckOpcode<[FCVTLv8i16, FCVTLv4i32, - FCVTNv8i16, FCVTNv4i32, - FCVTXNv4f32, - PMULLv16i8, PMULLv2i64, - RADDHNv8i16_v16i8, RADDHNv4i32_v8i16, RADDHNv2i64_v4i32, - RSHRNv16i8_shift, RSHRNv8i16_shift, RSHRNv4i32_shift, - RSUBHNv8i16_v16i8, RSUBHNv4i32_v8i16, RSUBHNv2i64_v4i32, - SABALv16i8_v8i16, SABALv8i16_v4i32, SABALv4i32_v2i64, - SABDLv16i8_v8i16, SABDLv8i16_v4i32, SABDLv4i32_v2i64, - SADDLv16i8_v8i16, SADDLv8i16_v4i32, SADDLv4i32_v2i64, - SADDWv16i8_v8i16, SADDWv8i16_v4i32, SADDWv4i32_v2i64, - SHLLv16i8, SHLLv8i16, SHLLv4i32, - SHRNv16i8_shift, SHRNv8i16_shift, SHRNv4i32_shift, - SMLALv16i8_v8i16, SMLALv8i16_v4i32, SMLALv4i32_v2i64, - SMLALv8i16_indexed, SMLALv4i32_indexed, - SMLSLv16i8_v8i16, SMLSLv8i16_v4i32, SMLSLv4i32_v2i64, - SMLSLv8i16_indexed, SMLSLv4i32_indexed, - SMULLv16i8_v8i16, SMULLv8i16_v4i32, SMULLv4i32_v2i64, - SMULLv8i16_indexed, SMULLv4i32_indexed, - SQDMLALv8i16_v4i32, SQDMLALv4i32_v2i64, - SQDMLALv8i16_indexed, SQDMLALv4i32_indexed, - SQDMLSLv8i16_v4i32, SQDMLSLv4i32_v2i64, - SQDMLSLv8i16_indexed, SQDMLSLv4i32_indexed, - SQDMULLv8i16_v4i32, SQDMULLv4i32_v2i64, - SQDMULLv8i16_indexed, SQDMULLv4i32_indexed, - SQRSHRNv16i8_shift, SQRSHRNv8i16_shift, SQRSHRNv4i32_shift, - SQRSHRUNv16i8_shift, SQRSHRUNv8i16_shift, SQRSHRUNv4i32_shift, - SQSHRNv16i8_shift, SQSHRNv8i16_shift, SQSHRNv4i32_shift, - SQSHRUNv16i8_shift, SQSHRUNv8i16_shift, SQSHRUNv4i32_shift, - SQXTNv16i8, SQXTNv8i16, SQXTNv4i32, - SQXTUNv16i8, SQXTUNv8i16, SQXTUNv4i32, - SSHLLv16i8_shift, SSHLLv8i16_shift, SSHLLv4i32_shift, - SSUBLv16i8_v8i16, SSUBLv8i16_v4i32, SSUBLv4i32_v2i64, - SSUBWv16i8_v8i16, SSUBWv8i16_v4i32, SSUBWv4i32_v2i64, - UABALv16i8_v8i16, UABALv8i16_v4i32, UABALv4i32_v2i64, - UABDLv16i8_v8i16, UABDLv8i16_v4i32, UABDLv4i32_v2i64, - UADDLv16i8_v8i16, UADDLv8i16_v4i32, UADDLv4i32_v2i64, - UADDWv16i8_v8i16, UADDWv8i16_v4i32, UADDWv4i32_v2i64, - UMLALv16i8_v8i16, UMLALv8i16_v4i32, UMLALv4i32_v2i64, - UMLALv8i16_indexed, UMLALv4i32_indexed, - UMLSLv16i8_v8i16, UMLSLv8i16_v4i32, UMLSLv4i32_v2i64, - UMLSLv8i16_indexed, UMLSLv4i32_indexed, - UMULLv16i8_v8i16, UMULLv8i16_v4i32, UMULLv4i32_v2i64, - UMULLv8i16_indexed, UMULLv4i32_indexed, - UQSHRNv16i8_shift, UQSHRNv8i16_shift, UQSHRNv4i32_shift, - UQXTNv16i8, UQXTNv8i16, UQXTNv4i32, - USHLLv16i8_shift, USHLLv8i16_shift, USHLLv4i32_shift, - USUBLv16i8_v8i16, USUBLv8i16_v4i32, USUBLv4i32_v2i64, - USUBWv16i8_v8i16, USUBWv8i16_v4i32, USUBWv4i32_v2i64, - XTNv16i8, XTNv8i16, XTNv4i32]>; - // Target predicates. // Identify an instruction that effectively transfers a register to another. def IsCopyIdiomFn : TIIPredicate<"isCopyIdiom", MCOpcodeSwitchStatement< [// MOV {Rd, SP}, {SP, Rn} => // ADD {Rd, SP}, {SP, Rn}, #0 MCOpcodeSwitchCase< [ADDWri, ADDXri], MCReturnStatement< CheckAll< [CheckIsRegOperand<0>, CheckIsRegOperand<1>, CheckAny< [CheckRegOperand<0, WSP>, CheckRegOperand<0, SP>, CheckRegOperand<1, WSP>, CheckRegOperand<1, SP>]>, CheckZeroOperand<2>]>>>, // MOV Rd, Rm => // ORR Rd, ZR, Rm, LSL #0 MCOpcodeSwitchCase< [ORRWrs, ORRXrs], MCReturnStatement< CheckAll< [CheckIsRegOperand<1>, CheckIsRegOperand<2>, CheckAny< [CheckRegOperand<1, WZR>, CheckRegOperand<1, XZR>]>, CheckShiftBy0]>>>], MCReturnStatement>>; def IsCopyIdiomPred : MCSchedPredicate; // Identify arithmetic instructions with an extended register. def RegExtendedFn : TIIPredicate<"hasExtendedReg", MCOpcodeSwitchStatement< [MCOpcodeSwitchCase< IsArithExtOp.ValidOpcodes, MCReturnStatement< CheckNot>>>], MCReturnStatement>>; def RegExtendedPred : MCSchedPredicate; // Identify arithmetic and logic instructions with a shifted register. def RegShiftedFn : TIIPredicate<"hasShiftedReg", MCOpcodeSwitchStatement< [MCOpcodeSwitchCase< IsArithLogicShiftOp.ValidOpcodes, MCReturnStatement< CheckNot>>>], MCReturnStatement>>; def RegShiftedPred : MCSchedPredicate; // Identify a load or store using the register offset addressing mode // with an extended or scaled register. def ScaledIdxFn : TIIPredicate<"isScaledAddr", MCOpcodeSwitchStatement< [MCOpcodeSwitchCase< IsLoadStoreRegOffsetOp.ValidOpcodes, MCReturnStatement< CheckAny<[CheckNot, CheckMemScaled]>>>], MCReturnStatement>>; def ScaledIdxPred : MCSchedPredicate; // Identify an instruction that effectively resets a FP register to zero. def IsZeroFPIdiomFn : TIIPredicate<"isZeroFPIdiom", MCOpcodeSwitchStatement< [// MOVI Vd, #0 MCOpcodeSwitchCase< [MOVIv8b_ns, MOVIv16b_ns, MOVID, MOVIv2d_ns], MCReturnStatement>>, // MOVI Vd, #0, LSL #0 MCOpcodeSwitchCase< [MOVIv4i16, MOVIv8i16, MOVIv2i32, MOVIv4i32], MCReturnStatement< CheckAll< [CheckZeroOperand<1>, CheckZeroOperand<2>]>>>], MCReturnStatement>>; def IsZeroFPIdiomPred : MCSchedPredicate; // Identify an instruction that effectively resets a GP register to zero. def IsZeroIdiomFn : TIIPredicate<"isZeroIdiom", MCOpcodeSwitchStatement< [// ORR Rd, ZR, #0 MCOpcodeSwitchCase< [ORRWri, ORRXri], MCReturnStatement< CheckAll< [CheckIsRegOperand<1>, CheckAny< [CheckRegOperand<1, WZR>, CheckRegOperand<1, XZR>]>, CheckZeroOperand<2>]>>>], MCReturnStatement>>; def IsZeroIdiomPred : MCSchedPredicate; Index: vendor/llvm/dist-release_80/lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/Target/AMDGPU/SIFoldOperands.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/AMDGPU/SIFoldOperands.cpp (revision 348932) @@ -1,1160 +1,1169 @@ //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // /// \file //===----------------------------------------------------------------------===// // #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "si-fold-operands" using namespace llvm; namespace { struct FoldCandidate { MachineInstr *UseMI; union { MachineOperand *OpToFold; uint64_t ImmToFold; int FrameIndexToFold; }; int ShrinkOpcode; unsigned char UseOpNo; MachineOperand::MachineOperandType Kind; bool Commuted; FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp, bool Commuted_ = false, int ShrinkOp = -1) : UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo), Kind(FoldOp->getType()), Commuted(Commuted_) { if (FoldOp->isImm()) { ImmToFold = FoldOp->getImm(); } else if (FoldOp->isFI()) { FrameIndexToFold = FoldOp->getIndex(); } else { assert(FoldOp->isReg()); OpToFold = FoldOp; } } bool isFI() const { return Kind == MachineOperand::MO_FrameIndex; } bool isImm() const { return Kind == MachineOperand::MO_Immediate; } bool isReg() const { return Kind == MachineOperand::MO_Register; } bool isCommuted() const { return Commuted; } bool needsShrink() const { return ShrinkOpcode != -1; } int getShrinkOpcode() const { return ShrinkOpcode; } }; class SIFoldOperands : public MachineFunctionPass { public: static char ID; MachineRegisterInfo *MRI; const SIInstrInfo *TII; const SIRegisterInfo *TRI; const GCNSubtarget *ST; void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx, SmallVectorImpl &FoldList, SmallVectorImpl &CopiesToReplace) const; void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const; const MachineOperand *isClamp(const MachineInstr &MI) const; bool tryFoldClamp(MachineInstr &MI); std::pair isOMod(const MachineInstr &MI) const; bool tryFoldOMod(MachineInstr &MI); public: SIFoldOperands() : MachineFunctionPass(ID) { initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { return "SI Fold Operands"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } }; } // End anonymous namespace. INITIALIZE_PASS(SIFoldOperands, DEBUG_TYPE, "SI Fold Operands", false, false) char SIFoldOperands::ID = 0; char &llvm::SIFoldOperandsID = SIFoldOperands::ID; // Wrapper around isInlineConstant that understands special cases when // instruction types are replaced during operand folding. static bool isInlineConstantIfFolded(const SIInstrInfo *TII, const MachineInstr &UseMI, unsigned OpNo, const MachineOperand &OpToFold) { if (TII->isInlineConstant(UseMI, OpNo, OpToFold)) return true; unsigned Opc = UseMI.getOpcode(); switch (Opc) { case AMDGPU::V_MAC_F32_e64: case AMDGPU::V_MAC_F16_e64: case AMDGPU::V_FMAC_F32_e64: { // Special case for mac. Since this is replaced with mad when folded into // src2, we need to check the legality for the final instruction. int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); if (static_cast(OpNo) == Src2Idx) { bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64; bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64; unsigned Opc = IsFMA ? AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16); const MCInstrDesc &MadDesc = TII->get(Opc); return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType); } return false; } default: return false; } } FunctionPass *llvm::createSIFoldOperandsPass() { return new SIFoldOperands(); } static bool updateOperand(FoldCandidate &Fold, const SIInstrInfo &TII, const TargetRegisterInfo &TRI) { MachineInstr *MI = Fold.UseMI; MachineOperand &Old = MI->getOperand(Fold.UseOpNo); assert(Old.isReg()); if (Fold.isImm()) { if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked) { // Set op_sel/op_sel_hi on this operand or bail out if op_sel is // already set. unsigned Opcode = MI->getOpcode(); int OpNo = MI->getOperandNo(&Old); int ModIdx = -1; if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0)) ModIdx = AMDGPU::OpName::src0_modifiers; else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1)) ModIdx = AMDGPU::OpName::src1_modifiers; else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2)) ModIdx = AMDGPU::OpName::src2_modifiers; assert(ModIdx != -1); ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx); MachineOperand &Mod = MI->getOperand(ModIdx); unsigned Val = Mod.getImm(); if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1)) return false; // If upper part is all zero we do not need op_sel_hi. if (!isUInt<16>(Fold.ImmToFold)) { if (!(Fold.ImmToFold & 0xffff)) { Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0); Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1); Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff); return true; } Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1); } } + } - if (Fold.needsShrink()) { - MachineBasicBlock *MBB = MI->getParent(); - auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI); - if (Liveness != MachineBasicBlock::LQR_Dead) - return false; + if ((Fold.isImm() || Fold.isFI()) && Fold.needsShrink()) { + MachineBasicBlock *MBB = MI->getParent(); + auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI); + if (Liveness != MachineBasicBlock::LQR_Dead) + return false; - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - int Op32 = Fold.getShrinkOpcode(); - MachineOperand &Dst0 = MI->getOperand(0); - MachineOperand &Dst1 = MI->getOperand(1); - assert(Dst0.isDef() && Dst1.isDef()); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + int Op32 = Fold.getShrinkOpcode(); + MachineOperand &Dst0 = MI->getOperand(0); + MachineOperand &Dst1 = MI->getOperand(1); + assert(Dst0.isDef() && Dst1.isDef()); - bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg()); + bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg()); - const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg()); - unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC); - const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg()); - unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC); + const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg()); + unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC); - MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32); + MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32); - if (HaveNonDbgCarryUse) { - BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg()) - .addReg(AMDGPU::VCC, RegState::Kill); - } - - // Keep the old instruction around to avoid breaking iterators, but - // replace the outputs with dummy registers. - Dst0.setReg(NewReg0); - Dst1.setReg(NewReg1); - - if (Fold.isCommuted()) - TII.commuteInstruction(*Inst32, false); - return true; + if (HaveNonDbgCarryUse) { + BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg()) + .addReg(AMDGPU::VCC, RegState::Kill); } - Old.ChangeToImmediate(Fold.ImmToFold); + // Keep the old instruction around to avoid breaking iterators, but + // replace it with a dummy instruction to remove uses. + // + // FIXME: We should not invert how this pass looks at operands to avoid + // this. Should track set of foldable movs instead of looking for uses + // when looking at a use. + Dst0.setReg(NewReg0); + for (unsigned I = MI->getNumOperands() - 1; I > 0; --I) + MI->RemoveOperand(I); + MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF)); + + if (Fold.isCommuted()) + TII.commuteInstruction(*Inst32, false); return true; } assert(!Fold.needsShrink() && "not handled"); + if (Fold.isImm()) { + Old.ChangeToImmediate(Fold.ImmToFold); + return true; + } + if (Fold.isFI()) { Old.ChangeToFrameIndex(Fold.FrameIndexToFold); return true; } MachineOperand *New = Fold.OpToFold; if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) && TargetRegisterInfo::isVirtualRegister(New->getReg())) { Old.substVirtReg(New->getReg(), New->getSubReg(), TRI); Old.setIsUndef(New->isUndef()); return true; } // FIXME: Handle physical registers. return false; } static bool isUseMIInFoldList(ArrayRef FoldList, const MachineInstr *MI) { for (auto Candidate : FoldList) { if (Candidate.UseMI == MI) return true; } return false; } static bool tryAddToFoldList(SmallVectorImpl &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *OpToFold, const SIInstrInfo *TII) { if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) { // Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2 unsigned Opc = MI->getOpcode(); if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 || Opc == AMDGPU::V_FMAC_F32_e64) && (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) { bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64; bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64; unsigned NewOpc = IsFMA ? AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16); // Check if changing this to a v_mad_{f16, f32} instruction will allow us // to fold the operand. MI->setDesc(TII->get(NewOpc)); bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII); if (FoldAsMAD) { MI->untieRegOperand(OpNo); return true; } MI->setDesc(TII->get(Opc)); } // Special case for s_setreg_b32 if (Opc == AMDGPU::S_SETREG_B32 && OpToFold->isImm()) { MI->setDesc(TII->get(AMDGPU::S_SETREG_IMM32_B32)); FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold)); return true; } // If we are already folding into another operand of MI, then // we can't commute the instruction, otherwise we risk making the // other fold illegal. if (isUseMIInFoldList(FoldList, MI)) return false; unsigned CommuteOpNo = OpNo; // Operand is not legal, so try to commute the instruction to // see if this makes it possible to fold. unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex; unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex; bool CanCommute = TII->findCommutedOpIndices(*MI, CommuteIdx0, CommuteIdx1); if (CanCommute) { if (CommuteIdx0 == OpNo) CommuteOpNo = CommuteIdx1; else if (CommuteIdx1 == OpNo) CommuteOpNo = CommuteIdx0; } // One of operands might be an Imm operand, and OpNo may refer to it after // the call of commuteInstruction() below. Such situations are avoided // here explicitly as OpNo must be a register operand to be a candidate // for memory folding. if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() || !MI->getOperand(CommuteIdx1).isReg())) return false; if (!CanCommute || !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1)) return false; if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) { if ((Opc == AMDGPU::V_ADD_I32_e64 || Opc == AMDGPU::V_SUB_I32_e64 || Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME - OpToFold->isImm()) { + (OpToFold->isImm() || OpToFold->isFI())) { MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); // Verify the other operand is a VGPR, otherwise we would violate the // constant bus restriction. unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0; MachineOperand &OtherOp = MI->getOperand(OtherIdx); if (!OtherOp.isReg() || !TII->getRegisterInfo().isVGPR(MRI, OtherOp.getReg())) return false; assert(MI->getOperand(1).isDef()); - int Op32 = AMDGPU::getVOPe32(Opc); + // Make sure to get the 32-bit version of the commuted opcode. + unsigned MaybeCommutedOpc = MI->getOpcode(); + int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc); + FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true, Op32)); return true; } TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1); return false; } FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true)); return true; } FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold)); return true; } // If the use operand doesn't care about the value, this may be an operand only // used for register indexing, in which case it is unsafe to fold. static bool isUseSafeToFold(const SIInstrInfo *TII, const MachineInstr &MI, const MachineOperand &UseMO) { return !UseMO.isUndef() && !TII->isSDWA(MI); //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg()); } void SIFoldOperands::foldOperand( MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx, SmallVectorImpl &FoldList, SmallVectorImpl &CopiesToReplace) const { const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx); if (!isUseSafeToFold(TII, *UseMI, UseOp)) return; // FIXME: Fold operands with subregs. if (UseOp.isReg() && OpToFold.isReg()) { if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister) return; // Don't fold subregister extracts into tied operands, only if it is a full // copy since a subregister use tied to a full register def doesn't really // make sense. e.g. don't fold: // // %1 = COPY %0:sub1 // %2 = V_MAC_{F16, F32} %3, %4, %1 // // into // %2 = V_MAC_{F16, F32} %3, %4, %0:sub1 if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister) return; } // Special case for REG_SEQUENCE: We can't fold literals into // REG_SEQUENCE instructions, so we have to fold them into the // uses of REG_SEQUENCE. if (UseMI->isRegSequence()) { unsigned RegSeqDstReg = UseMI->getOperand(0).getReg(); unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm(); for (MachineRegisterInfo::use_iterator RSUse = MRI->use_begin(RegSeqDstReg), RSE = MRI->use_end(); RSUse != RSE; ++RSUse) { MachineInstr *RSUseMI = RSUse->getParent(); if (RSUse->getSubReg() != RegSeqDstSubReg) continue; foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList, CopiesToReplace); } return; } bool FoldingImm = OpToFold.isImm(); if (FoldingImm && UseMI->isCopy()) { unsigned DestReg = UseMI->getOperand(0).getReg(); const TargetRegisterClass *DestRC = TargetRegisterInfo::isVirtualRegister(DestReg) ? MRI->getRegClass(DestReg) : TRI->getPhysRegClass(DestReg); unsigned SrcReg = UseMI->getOperand(1).getReg(); if (TargetRegisterInfo::isVirtualRegister(DestReg) && TargetRegisterInfo::isVirtualRegister(SrcReg)) { const TargetRegisterClass * SrcRC = MRI->getRegClass(SrcReg); if (TRI->isSGPRClass(SrcRC) && TRI->hasVGPRs(DestRC)) { MachineRegisterInfo::use_iterator NextUse; SmallVector CopyUses; for (MachineRegisterInfo::use_iterator Use = MRI->use_begin(DestReg), E = MRI->use_end(); Use != E; Use = NextUse) { NextUse = std::next(Use); FoldCandidate FC = FoldCandidate(Use->getParent(), Use.getOperandNo(), &UseMI->getOperand(1)); CopyUses.push_back(FC); } for (auto & F : CopyUses) { foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo, FoldList, CopiesToReplace); } } } // In order to fold immediates into copies, we need to change the // copy to a MOV. unsigned MovOp = TII->getMovOpcode(DestRC); if (MovOp == AMDGPU::COPY) return; UseMI->setDesc(TII->get(MovOp)); CopiesToReplace.push_back(UseMI); } else { if (UseMI->isCopy() && OpToFold.isReg() && TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(0).getReg()) && TargetRegisterInfo::isVirtualRegister(UseMI->getOperand(1).getReg()) && TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) && TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg()) && !UseMI->getOperand(1).getSubReg()) { UseMI->getOperand(1).setReg(OpToFold.getReg()); UseMI->getOperand(1).setSubReg(OpToFold.getSubReg()); UseMI->getOperand(1).setIsKill(false); CopiesToReplace.push_back(UseMI); OpToFold.setIsKill(false); return; } const MCInstrDesc &UseDesc = UseMI->getDesc(); // Don't fold into target independent nodes. Target independent opcodes // don't have defined register classes. if (UseDesc.isVariadic() || UseOp.isImplicit() || UseDesc.OpInfo[UseOpIdx].RegClass == -1) return; } if (!FoldingImm) { tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII); // FIXME: We could try to change the instruction from 64-bit to 32-bit // to enable more folding opportunites. The shrink operands pass // already does this. return; } const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc(); const TargetRegisterClass *FoldRC = TRI->getRegClass(FoldDesc.OpInfo[0].RegClass); // Split 64-bit constants into 32-bits for folding. if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) { unsigned UseReg = UseOp.getReg(); const TargetRegisterClass *UseRC = TargetRegisterInfo::isVirtualRegister(UseReg) ? MRI->getRegClass(UseReg) : TRI->getPhysRegClass(UseReg); if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64) return; APInt Imm(64, OpToFold.getImm()); if (UseOp.getSubReg() == AMDGPU::sub0) { Imm = Imm.getLoBits(32); } else { assert(UseOp.getSubReg() == AMDGPU::sub1); Imm = Imm.getHiBits(32); } MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII); return; } tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII); } static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS) { switch (Opcode) { case AMDGPU::V_AND_B32_e64: case AMDGPU::V_AND_B32_e32: case AMDGPU::S_AND_B32: Result = LHS & RHS; return true; case AMDGPU::V_OR_B32_e64: case AMDGPU::V_OR_B32_e32: case AMDGPU::S_OR_B32: Result = LHS | RHS; return true; case AMDGPU::V_XOR_B32_e64: case AMDGPU::V_XOR_B32_e32: case AMDGPU::S_XOR_B32: Result = LHS ^ RHS; return true; case AMDGPU::V_LSHL_B32_e64: case AMDGPU::V_LSHL_B32_e32: case AMDGPU::S_LSHL_B32: // The instruction ignores the high bits for out of bounds shifts. Result = LHS << (RHS & 31); return true; case AMDGPU::V_LSHLREV_B32_e64: case AMDGPU::V_LSHLREV_B32_e32: Result = RHS << (LHS & 31); return true; case AMDGPU::V_LSHR_B32_e64: case AMDGPU::V_LSHR_B32_e32: case AMDGPU::S_LSHR_B32: Result = LHS >> (RHS & 31); return true; case AMDGPU::V_LSHRREV_B32_e64: case AMDGPU::V_LSHRREV_B32_e32: Result = RHS >> (LHS & 31); return true; case AMDGPU::V_ASHR_I32_e64: case AMDGPU::V_ASHR_I32_e32: case AMDGPU::S_ASHR_I32: Result = static_cast(LHS) >> (RHS & 31); return true; case AMDGPU::V_ASHRREV_I32_e64: case AMDGPU::V_ASHRREV_I32_e32: Result = static_cast(RHS) >> (LHS & 31); return true; default: return false; } } static unsigned getMovOpc(bool IsScalar) { return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; } /// Remove any leftover implicit operands from mutating the instruction. e.g. /// if we replace an s_and_b32 with a copy, we don't need the implicit scc def /// anymore. static void stripExtraCopyOperands(MachineInstr &MI) { const MCInstrDesc &Desc = MI.getDesc(); unsigned NumOps = Desc.getNumOperands() + Desc.getNumImplicitUses() + Desc.getNumImplicitDefs(); for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I) MI.RemoveOperand(I); } static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) { MI.setDesc(NewDesc); stripExtraCopyOperands(MI); } static MachineOperand *getImmOrMaterializedImm(MachineRegisterInfo &MRI, MachineOperand &Op) { if (Op.isReg()) { // If this has a subregister, it obviously is a register source. if (Op.getSubReg() != AMDGPU::NoSubRegister || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) return &Op; MachineInstr *Def = MRI.getVRegDef(Op.getReg()); if (Def && Def->isMoveImmediate()) { MachineOperand &ImmSrc = Def->getOperand(1); if (ImmSrc.isImm()) return &ImmSrc; } } return &Op; } // Try to simplify operations with a constant that may appear after instruction // selection. // TODO: See if a frame index with a fixed offset can fold. static bool tryConstantFoldOp(MachineRegisterInfo &MRI, const SIInstrInfo *TII, MachineInstr *MI, MachineOperand *ImmOp) { unsigned Opc = MI->getOpcode(); if (Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 || Opc == AMDGPU::S_NOT_B32) { MI->getOperand(1).ChangeToImmediate(~ImmOp->getImm()); mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32))); return true; } int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); if (Src1Idx == -1) return false; int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); MachineOperand *Src0 = getImmOrMaterializedImm(MRI, MI->getOperand(Src0Idx)); MachineOperand *Src1 = getImmOrMaterializedImm(MRI, MI->getOperand(Src1Idx)); if (!Src0->isImm() && !Src1->isImm()) return false; if (MI->getOpcode() == AMDGPU::V_LSHL_OR_B32) { if (Src0->isImm() && Src0->getImm() == 0) { // v_lshl_or_b32 0, X, Y -> copy Y // v_lshl_or_b32 0, X, K -> v_mov_b32 K bool UseCopy = TII->getNamedOperand(*MI, AMDGPU::OpName::src2)->isReg(); MI->RemoveOperand(Src1Idx); MI->RemoveOperand(Src0Idx); MI->setDesc(TII->get(UseCopy ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32)); return true; } } // and k0, k1 -> v_mov_b32 (k0 & k1) // or k0, k1 -> v_mov_b32 (k0 | k1) // xor k0, k1 -> v_mov_b32 (k0 ^ k1) if (Src0->isImm() && Src1->isImm()) { int32_t NewImm; if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm())) return false; const SIRegisterInfo &TRI = TII->getRegisterInfo(); bool IsSGPR = TRI.isSGPRReg(MRI, MI->getOperand(0).getReg()); // Be careful to change the right operand, src0 may belong to a different // instruction. MI->getOperand(Src0Idx).ChangeToImmediate(NewImm); MI->RemoveOperand(Src1Idx); mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR))); return true; } if (!MI->isCommutable()) return false; if (Src0->isImm() && !Src1->isImm()) { std::swap(Src0, Src1); std::swap(Src0Idx, Src1Idx); } int32_t Src1Val = static_cast(Src1->getImm()); if (Opc == AMDGPU::V_OR_B32_e64 || Opc == AMDGPU::V_OR_B32_e32 || Opc == AMDGPU::S_OR_B32) { if (Src1Val == 0) { // y = or x, 0 => y = copy x MI->RemoveOperand(Src1Idx); mutateCopyOp(*MI, TII->get(AMDGPU::COPY)); } else if (Src1Val == -1) { // y = or x, -1 => y = v_mov_b32 -1 MI->RemoveOperand(Src1Idx); mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32))); } else return false; return true; } if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 || MI->getOpcode() == AMDGPU::V_AND_B32_e32 || MI->getOpcode() == AMDGPU::S_AND_B32) { if (Src1Val == 0) { // y = and x, 0 => y = v_mov_b32 0 MI->RemoveOperand(Src0Idx); mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32))); } else if (Src1Val == -1) { // y = and x, -1 => y = copy x MI->RemoveOperand(Src1Idx); mutateCopyOp(*MI, TII->get(AMDGPU::COPY)); stripExtraCopyOperands(*MI); } else return false; return true; } if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 || MI->getOpcode() == AMDGPU::V_XOR_B32_e32 || MI->getOpcode() == AMDGPU::S_XOR_B32) { if (Src1Val == 0) { // y = xor x, 0 => y = copy x MI->RemoveOperand(Src1Idx); mutateCopyOp(*MI, TII->get(AMDGPU::COPY)); return true; } } return false; } // Try to fold an instruction into a simpler one static bool tryFoldInst(const SIInstrInfo *TII, MachineInstr *MI) { unsigned Opc = MI->getOpcode(); if (Opc == AMDGPU::V_CNDMASK_B32_e32 || Opc == AMDGPU::V_CNDMASK_B32_e64 || Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) { const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0); const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1); if (Src1->isIdenticalTo(*Src0)) { LLVM_DEBUG(dbgs() << "Folded " << *MI << " into "); int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); if (Src2Idx != -1) MI->RemoveOperand(Src2Idx); MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1)); mutateCopyOp(*MI, TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY : getMovOpc(false))); LLVM_DEBUG(dbgs() << *MI << '\n'); return true; } } return false; } void SIFoldOperands::foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const { // We need mutate the operands of new mov instructions to add implicit // uses of EXEC, but adding them invalidates the use_iterator, so defer // this. SmallVector CopiesToReplace; SmallVector FoldList; MachineOperand &Dst = MI.getOperand(0); bool FoldingImm = OpToFold.isImm() || OpToFold.isFI(); if (FoldingImm) { unsigned NumLiteralUses = 0; MachineOperand *NonInlineUse = nullptr; int NonInlineUseOpNo = -1; MachineRegisterInfo::use_iterator NextUse; for (MachineRegisterInfo::use_iterator Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end(); Use != E; Use = NextUse) { NextUse = std::next(Use); MachineInstr *UseMI = Use->getParent(); unsigned OpNo = Use.getOperandNo(); // Folding the immediate may reveal operations that can be constant // folded or replaced with a copy. This can happen for example after // frame indices are lowered to constants or from splitting 64-bit // constants. // // We may also encounter cases where one or both operands are // immediates materialized into a register, which would ordinarily not // be folded due to multiple uses or operand constraints. if (OpToFold.isImm() && tryConstantFoldOp(*MRI, TII, UseMI, &OpToFold)) { LLVM_DEBUG(dbgs() << "Constant folded " << *UseMI << '\n'); // Some constant folding cases change the same immediate's use to a new // instruction, e.g. and x, 0 -> 0. Make sure we re-visit the user // again. The same constant folded instruction could also have a second // use operand. NextUse = MRI->use_begin(Dst.getReg()); FoldList.clear(); continue; } // Try to fold any inline immediate uses, and then only fold other // constants if they have one use. // // The legality of the inline immediate must be checked based on the use // operand, not the defining instruction, because 32-bit instructions // with 32-bit inline immediate sources may be used to materialize // constants used in 16-bit operands. // // e.g. it is unsafe to fold: // s_mov_b32 s0, 1.0 // materializes 0x3f800000 // v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00 // Folding immediates with more than one use will increase program size. // FIXME: This will also reduce register usage, which may be better // in some cases. A better heuristic is needed. if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) { foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace); } else { if (++NumLiteralUses == 1) { NonInlineUse = &*Use; NonInlineUseOpNo = OpNo; } } } if (NumLiteralUses == 1) { MachineInstr *UseMI = NonInlineUse->getParent(); foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList, CopiesToReplace); } } else { // Folding register. SmallVector UsesToProcess; for (MachineRegisterInfo::use_iterator Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end(); Use != E; ++Use) { UsesToProcess.push_back(Use); } for (auto U : UsesToProcess) { MachineInstr *UseMI = U->getParent(); foldOperand(OpToFold, UseMI, U.getOperandNo(), FoldList, CopiesToReplace); } } MachineFunction *MF = MI.getParent()->getParent(); // Make sure we add EXEC uses to any new v_mov instructions created. for (MachineInstr *Copy : CopiesToReplace) Copy->addImplicitDefUseOperands(*MF); for (FoldCandidate &Fold : FoldList) { if (updateOperand(Fold, *TII, *TRI)) { // Clear kill flags. if (Fold.isReg()) { assert(Fold.OpToFold && Fold.OpToFold->isReg()); // FIXME: Probably shouldn't bother trying to fold if not an // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR // copies. MRI->clearKillFlags(Fold.OpToFold->getReg()); } LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << static_cast(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n'); tryFoldInst(TII, Fold.UseMI); } else if (Fold.isCommuted()) { // Restoring instruction's original operand order if fold has failed. TII->commuteInstruction(*Fold.UseMI, false); } } } // Clamp patterns are canonically selected to v_max_* instructions, so only // handle them. const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const { unsigned Op = MI.getOpcode(); switch (Op) { case AMDGPU::V_MAX_F32_e64: case AMDGPU::V_MAX_F16_e64: case AMDGPU::V_MAX_F64: case AMDGPU::V_PK_MAX_F16: { if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm()) return nullptr; // Make sure sources are identical. const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); if (!Src0->isReg() || !Src1->isReg() || Src0->getReg() != Src1->getReg() || Src0->getSubReg() != Src1->getSubReg() || Src0->getSubReg() != AMDGPU::NoSubRegister) return nullptr; // Can't fold up if we have modifiers. if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod)) return nullptr; unsigned Src0Mods = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm(); unsigned Src1Mods = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)->getImm(); // Having a 0 op_sel_hi would require swizzling the output in the source // instruction, which we can't do. unsigned UnsetMods = (Op == AMDGPU::V_PK_MAX_F16) ? SISrcMods::OP_SEL_1 : 0; if (Src0Mods != UnsetMods && Src1Mods != UnsetMods) return nullptr; return Src0; } default: return nullptr; } } // We obviously have multiple uses in a clamp since the register is used twice // in the same instruction. static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg) { int Count = 0; for (auto I = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end(); I != E; ++I) { if (++Count > 1) return false; } return true; } // FIXME: Clamp for v_mad_mixhi_f16 handled during isel. bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) { const MachineOperand *ClampSrc = isClamp(MI); if (!ClampSrc || !hasOneNonDBGUseInst(*MRI, ClampSrc->getReg())) return false; MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg()); // The type of clamp must be compatible. if (TII->getClampMask(*Def) != TII->getClampMask(MI)) return false; MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp); if (!DefClamp) return false; LLVM_DEBUG(dbgs() << "Folding clamp " << *DefClamp << " into " << *Def << '\n'); // Clamp is applied after omod, so it is OK if omod is set. DefClamp->setImm(1); MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg()); MI.eraseFromParent(); return true; } static int getOModValue(unsigned Opc, int64_t Val) { switch (Opc) { case AMDGPU::V_MUL_F32_e64: { switch (static_cast(Val)) { case 0x3f000000: // 0.5 return SIOutMods::DIV2; case 0x40000000: // 2.0 return SIOutMods::MUL2; case 0x40800000: // 4.0 return SIOutMods::MUL4; default: return SIOutMods::NONE; } } case AMDGPU::V_MUL_F16_e64: { switch (static_cast(Val)) { case 0x3800: // 0.5 return SIOutMods::DIV2; case 0x4000: // 2.0 return SIOutMods::MUL2; case 0x4400: // 4.0 return SIOutMods::MUL4; default: return SIOutMods::NONE; } } default: llvm_unreachable("invalid mul opcode"); } } // FIXME: Does this really not support denormals with f16? // FIXME: Does this need to check IEEE mode bit? SNaNs are generally not // handled, so will anything other than that break? std::pair SIFoldOperands::isOMod(const MachineInstr &MI) const { unsigned Op = MI.getOpcode(); switch (Op) { case AMDGPU::V_MUL_F32_e64: case AMDGPU::V_MUL_F16_e64: { // If output denormals are enabled, omod is ignored. if ((Op == AMDGPU::V_MUL_F32_e64 && ST->hasFP32Denormals()) || (Op == AMDGPU::V_MUL_F16_e64 && ST->hasFP16Denormals())) return std::make_pair(nullptr, SIOutMods::NONE); const MachineOperand *RegOp = nullptr; const MachineOperand *ImmOp = nullptr; const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); if (Src0->isImm()) { ImmOp = Src0; RegOp = Src1; } else if (Src1->isImm()) { ImmOp = Src1; RegOp = Src0; } else return std::make_pair(nullptr, SIOutMods::NONE); int OMod = getOModValue(Op, ImmOp->getImm()); if (OMod == SIOutMods::NONE || TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) || TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) || TII->hasModifiersSet(MI, AMDGPU::OpName::omod) || TII->hasModifiersSet(MI, AMDGPU::OpName::clamp)) return std::make_pair(nullptr, SIOutMods::NONE); return std::make_pair(RegOp, OMod); } case AMDGPU::V_ADD_F32_e64: case AMDGPU::V_ADD_F16_e64: { // If output denormals are enabled, omod is ignored. if ((Op == AMDGPU::V_ADD_F32_e64 && ST->hasFP32Denormals()) || (Op == AMDGPU::V_ADD_F16_e64 && ST->hasFP16Denormals())) return std::make_pair(nullptr, SIOutMods::NONE); // Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); if (Src0->isReg() && Src1->isReg() && Src0->getReg() == Src1->getReg() && Src0->getSubReg() == Src1->getSubReg() && !TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) && !TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) && !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) && !TII->hasModifiersSet(MI, AMDGPU::OpName::omod)) return std::make_pair(Src0, SIOutMods::MUL2); return std::make_pair(nullptr, SIOutMods::NONE); } default: return std::make_pair(nullptr, SIOutMods::NONE); } } // FIXME: Does this need to check IEEE bit on function? bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) { const MachineOperand *RegOp; int OMod; std::tie(RegOp, OMod) = isOMod(MI); if (OMod == SIOutMods::NONE || !RegOp->isReg() || RegOp->getSubReg() != AMDGPU::NoSubRegister || !hasOneNonDBGUseInst(*MRI, RegOp->getReg())) return false; MachineInstr *Def = MRI->getVRegDef(RegOp->getReg()); MachineOperand *DefOMod = TII->getNamedOperand(*Def, AMDGPU::OpName::omod); if (!DefOMod || DefOMod->getImm() != SIOutMods::NONE) return false; // Clamp is applied after omod. If the source already has clamp set, don't // fold it. if (TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp)) return false; LLVM_DEBUG(dbgs() << "Folding omod " << MI << " into " << *Def << '\n'); DefOMod->setImm(OMod); MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg()); MI.eraseFromParent(); return true; } bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; MRI = &MF.getRegInfo(); ST = &MF.getSubtarget(); TII = ST->getInstrInfo(); TRI = &TII->getRegisterInfo(); const SIMachineFunctionInfo *MFI = MF.getInfo(); // omod is ignored by hardware if IEEE bit is enabled. omod also does not // correctly handle signed zeros. // bool IsIEEEMode = ST->enableIEEEBit(MF); bool HasNSZ = MFI->hasNoSignedZerosFPMath(); for (MachineBasicBlock *MBB : depth_first(&MF)) { MachineBasicBlock::iterator I, Next; for (I = MBB->begin(); I != MBB->end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; tryFoldInst(TII, &MI); if (!TII->isFoldableCopy(MI)) { // TODO: Omod might be OK if there is NSZ only on the source // instruction, and not the omod multiply. if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) || !tryFoldOMod(MI)) tryFoldClamp(MI); continue; } MachineOperand &OpToFold = MI.getOperand(1); bool FoldingImm = OpToFold.isImm() || OpToFold.isFI(); // FIXME: We could also be folding things like TargetIndexes. if (!FoldingImm && !OpToFold.isReg()) continue; if (OpToFold.isReg() && !TargetRegisterInfo::isVirtualRegister(OpToFold.getReg())) continue; // Prevent folding operands backwards in the function. For example, // the COPY opcode must not be replaced by 1 in this example: // // %3 = COPY %vgpr0; VGPR_32:%3 // ... // %vgpr0 = V_MOV_B32_e32 1, implicit %exec MachineOperand &Dst = MI.getOperand(0); if (Dst.isReg() && !TargetRegisterInfo::isVirtualRegister(Dst.getReg())) continue; foldInstOperand(MI, OpToFold); } } return false; } Index: vendor/llvm/dist-release_80/lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- vendor/llvm/dist-release_80/lib/Target/AMDGPU/VOP2Instructions.td (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/AMDGPU/VOP2Instructions.td (revision 348932) @@ -1,1065 +1,1059 @@ //===-- VOP2Instructions.td - Vector Instruction Defintions ---------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // VOP2 Classes //===----------------------------------------------------------------------===// class VOP2e op, VOPProfile P> : Enc32 { bits<8> vdst; bits<9> src0; bits<8> src1; let Inst{8-0} = !if(P.HasSrc0, src0, 0); let Inst{16-9} = !if(P.HasSrc1, src1, 0); let Inst{24-17} = !if(P.EmitDst, vdst, 0); let Inst{30-25} = op; let Inst{31} = 0x0; //encoding } class VOP2_MADKe op, VOPProfile P> : Enc64 { bits<8> vdst; bits<9> src0; bits<8> src1; bits<32> imm; let Inst{8-0} = !if(P.HasSrc0, src0, 0); let Inst{16-9} = !if(P.HasSrc1, src1, 0); let Inst{24-17} = !if(P.EmitDst, vdst, 0); let Inst{30-25} = op; let Inst{31} = 0x0; // encoding let Inst{63-32} = imm; } class VOP2_SDWAe op, VOPProfile P> : VOP_SDWAe

{ bits<8> vdst; bits<8> src1; let Inst{8-0} = 0xf9; // sdwa let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); let Inst{30-25} = op; let Inst{31} = 0x0; // encoding } class VOP2_SDWA9Ae op, VOPProfile P> : VOP_SDWA9Ae

{ bits<8> vdst; bits<9> src1; let Inst{8-0} = 0xf9; // sdwa let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); let Inst{30-25} = op; let Inst{31} = 0x0; // encoding let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr } class VOP2_Pseudo pattern=[], string suffix = "_e32"> : VOP_Pseudo { let AsmOperands = P.Asm32; let Size = 4; let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; let SubtargetPredicate = isGCN; let VOP2 = 1; let VALU = 1; let Uses = [EXEC]; let AsmVariantName = AMDGPUAsmVariants.Default; } class VOP2_Real : InstSI , SIMCInstr { let isPseudo = 0; let isCodeGenOnly = 0; let Constraints = ps.Constraints; let DisableEncoding = ps.DisableEncoding; // copy relevant pseudo op flags let SubtargetPredicate = ps.SubtargetPredicate; let AsmMatchConverter = ps.AsmMatchConverter; let AsmVariantName = ps.AsmVariantName; let Constraints = ps.Constraints; let DisableEncoding = ps.DisableEncoding; let TSFlags = ps.TSFlags; let UseNamedOperandTable = ps.UseNamedOperandTable; let Uses = ps.Uses; let Defs = ps.Defs; } class VOP2_SDWA_Pseudo pattern=[]> : VOP_SDWA_Pseudo { let AsmMatchConverter = "cvtSdwaVOP2"; } class VOP2_DPP_Pseudo pattern=[]> : VOP_DPP_Pseudo { } class getVOP2Pat64 : LetDummies { list ret = !if(P.HasModifiers, [(set P.DstVT:$vdst, (node (P.Src0VT !if(P.HasOMod, (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]); } multiclass VOP2Inst_e32 { let renamedInGFX9 = GFX9Renamed in { def _e32 : VOP2_Pseudo .ret>, Commutable_REV; } // End renamedInGFX9 = GFX9Renamed } multiclass VOP2Inst_e64 { let renamedInGFX9 = GFX9Renamed in { def _e64 : VOP3_Pseudo .ret>, Commutable_REV; } // End renamedInGFX9 = GFX9Renamed } multiclass VOP2Inst_sdwa { let renamedInGFX9 = GFX9Renamed in { def _sdwa : VOP2_SDWA_Pseudo ; } // End renamedInGFX9 = GFX9Renamed } multiclass VOP2Inst : VOP2Inst_e32, VOP2Inst_e64, VOP2Inst_sdwa { let renamedInGFX9 = GFX9Renamed in { foreach _ = BoolToList.ret in def _dpp : VOP2_DPP_Pseudo ; } } multiclass VOP2bInst { let renamedInGFX9 = GFX9Renamed in { let SchedRW = [Write32Bit, WriteSALU] in { let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { def _e32 : VOP2_Pseudo .ret>, Commutable_REV; def _sdwa : VOP2_SDWA_Pseudo { let AsmMatchConverter = "cvtSdwaVOP2b"; } foreach _ = BoolToList.ret in def _dpp : VOP2_DPP_Pseudo ; } def _e64 : VOP3_Pseudo .ret>, Commutable_REV; } } } multiclass VOP2eInst { let SchedRW = [Write32Bit] in { let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in { def _e32 : VOP2_Pseudo , Commutable_REV; def _sdwa : VOP2_SDWA_Pseudo { let AsmMatchConverter = "cvtSdwaVOP2b"; } foreach _ = BoolToList.ret in def _dpp : VOP2_DPP_Pseudo ; } def _e64 : VOP3_Pseudo .ret>, Commutable_REV; } } class VOP_MADAK : VOPProfile <[vt, vt, vt, vt]> { field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm); field bit HasExt = 0; // Hack to stop printing _e64 let DstRC = RegisterOperand; field string Asm32 = " $vdst, $src0, $src1, $imm"; } def VOP_MADAK_F16 : VOP_MADAK ; def VOP_MADAK_F32 : VOP_MADAK ; class VOP_MADMK : VOPProfile <[vt, vt, vt, vt]> { field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); field dag Ins32 = (ins VCSrc_f32:$src0, ImmOpType:$imm, VGPR_32:$src1); field bit HasExt = 0; // Hack to stop printing _e64 let DstRC = RegisterOperand; field string Asm32 = " $vdst, $src0, $imm, $src1"; } def VOP_MADMK_F16 : VOP_MADMK ; def VOP_MADMK_F32 : VOP_MADMK ; // FIXME: Remove src2_modifiers. It isn't used, so is wasting memory // and processing time but it makes it easier to convert to mad. class VOP_MAC : VOPProfile <[vt, vt, vt, vt]> { let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2); let Ins64 = getIns64, 3, 0, HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1ModDPP:$src1_modifiers, Src1DPP:$src1, VGPR_32:$src2, // stub argument dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, VGPR_32:$src2, // stub argument clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); let Asm32 = getAsm32<1, 2, vt>.ret; let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, vt>.ret; let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret; let AsmSDWA = getAsmSDWA<1, 2, vt>.ret; let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt>.ret; let HasSrc2 = 0; let HasSrc2Mods = 0; let HasExt = 1; let HasExtDPP = 1; let HasExtSDWA = 1; let HasExtSDWA9 = 0; } def VOP_MAC_F16 : VOP_MAC ; def VOP_MAC_F32 : VOP_MAC ; // Write out to vcc or arbitrary SGPR. def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> { let Asm32 = "$vdst, vcc, $src0, $src1"; let Asm64 = "$vdst, $sdst, $src0, $src1"; let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; let Outs32 = (outs DstRC:$vdst); let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); } // Write out to vcc or arbitrary SGPR and read in from vcc or // arbitrary SGPR. def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { // We use VCSrc_b32 to exclude literal constants, even though the // encoding normally allows them since the implicit VCC use means // using one would always violate the constant bus // restriction. SGPRs are still allowed because it should // technically be possible to use VCC again as src0. let Src0RC32 = VCSrc_b32; let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; let Asm64 = "$vdst, $sdst, $src0, $src1, $src2"; let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; let Outs32 = (outs DstRC:$vdst); let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); // Suppress src2 implied by type since the 32-bit encoding uses an // implicit VCC use. let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); let InsDPP = (ins DstRCDPP:$old, Src0DPP:$src0, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let HasExt = 1; let HasExtDPP = 1; let HasExtSDWA = 1; let HasExtSDWA9 = 1; } // Read in from vcc or arbitrary SGPR def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above. let Asm32 = "$vdst, $src0, $src1, vcc"; let Asm64 = "$vdst, $src0, $src1, $src2"; let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; let Outs32 = (outs DstRC:$vdst); let Outs64 = (outs DstRC:$vdst); // Suppress src2 implied by type since the 32-bit encoding uses an // implicit VCC use. let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); let InsDPP = (ins DstRCDPP:$old, Src0DPP:$src0, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let HasExt = 1; let HasExtDPP = 1; let HasExtSDWA = 1; let HasExtSDWA9 = 1; } def VOP_READLANE : VOPProfile<[i32, i32, i32]> { let Outs32 = (outs SReg_32:$vdst); let Outs64 = Outs32; let Ins32 = (ins VGPR_32:$src0, SCSrc_b32:$src1); let Ins64 = Ins32; let Asm32 = " $vdst, $src0, $src1"; let Asm64 = Asm32; let HasExt = 0; let HasExtDPP = 0; let HasExtSDWA = 0; let HasExtSDWA9 = 0; } def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> { let Outs32 = (outs VGPR_32:$vdst); let Outs64 = Outs32; let Ins32 = (ins SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in); let Ins64 = Ins32; let Asm32 = " $vdst, $src0, $src1"; let Asm64 = Asm32; let HasSrc2 = 0; let HasSrc2Mods = 0; let HasExt = 0; let HasExtDPP = 0; let HasExtSDWA = 0; let HasExtSDWA9 = 0; } //===----------------------------------------------------------------------===// // VOP2 Instructions //===----------------------------------------------------------------------===// let SubtargetPredicate = isGCN, Predicates = [isGCN] in { defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>; def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>; let isCommutable = 1 in { defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, fadd>; defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>; defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">; defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>; defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, fmul>; defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_PAT_GEN, AMDGPUmul_i24>; defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN, AMDGPUmulhi_i24>; defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_PAT_GEN, AMDGPUmul_u24>; defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN, AMDGPUmulhi_u24>; defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>; defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>; defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN, smin>; defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN, smax>; defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN, umin>; defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN, umax>; defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, null_frag, "v_lshr_b32">; defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32">; defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, null_frag, "v_lshl_b32">; defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN, and>; defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN, or>; defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN, xor>; let Constraints = "$vdst = $src2", DisableEncoding="$src2", isConvertibleToThreeAddress = 1 in { defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>; } def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>; // No patterns so that the scalar instructions are always selected. // The scalar versions will be replaced with vector when needed later. // V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI, // but the VI instructions behave the same as the SI versions. defm V_ADD_I32 : VOP2bInst <"v_add_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_i32", 1>; defm V_SUB_I32 : VOP2bInst <"v_sub_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32", 1>; defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32", 1>; defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>; defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; let SubtargetPredicate = HasAddNoCarryInsts in { defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32, null_frag, "v_add_u32", 1>; defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32, null_frag, "v_sub_u32", 1>; defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32, null_frag, "v_sub_u32", 1>; } } // End isCommutable = 1 // These are special and do not read the exec mask. let isConvergent = 1, Uses = [] in { def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>; let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>; } // End $vdst = $vdst_in, DisableEncoding $vdst_in } // End isConvergent = 1 defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_NO_EXT>; defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_NO_EXT>; defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT, int_amdgcn_mbcnt_lo>; defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT, int_amdgcn_mbcnt_hi>; defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT, AMDGPUldexp>; defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT>; // TODO: set "Uses = dst" defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT, AMDGPUpknorm_i16_f32>; defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT, AMDGPUpknorm_u16_f32>; defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT, AMDGPUpkrtz_f16_f32>; defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT, AMDGPUpk_u16_u32>; defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT, AMDGPUpk_i16_i32>; } // End SubtargetPredicate = isGCN, Predicates = [isGCN] def : GCNPat< (AMDGPUadde i32:$src0, i32:$src1, i1:$src2), (V_ADDC_U32_e64 $src0, $src1, $src2) >; def : GCNPat< (AMDGPUsube i32:$src0, i32:$src1, i1:$src2), (V_SUBB_U32_e64 $src0, $src1, $src2) >; // These instructions only exist on SI and CI let SubtargetPredicate = isSICI, Predicates = [isSICI] in { defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>; defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>; let isCommutable = 1 in { defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>; defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN, srl>; defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN, sra>; defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN, shl>; } // End isCommutable = 1 } // End let SubtargetPredicate = SICI, Predicates = [isSICI] class DivergentBinOp : GCNPat< (getDivergentFrag.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), !if(!cast(Inst).IsOrig, (Inst $src0, $src1), (Inst $src1, $src0) ) >; let AddedComplexity = 1 in { def : DivergentBinOp; def : DivergentBinOp; def : DivergentBinOp; } let SubtargetPredicate = HasAddNoCarryInsts in { - def : DivergentBinOp; - def : DivergentBinOp; - def : DivergentBinOp; + def : DivergentBinOp; + def : DivergentBinOp; } - -def : DivergentBinOp; - def : DivergentBinOp; -def : DivergentBinOp; - -def : DivergentBinOp; +def : DivergentBinOp; def : DivergentBinOp; def : DivergentBinOp; def : DivergentBinOp; def : DivergentBinOp; def : DivergentBinOp; class divergent_i64_BinOp : GCNPat< (getDivergentFrag.ret i64:$src0, i64:$src1), (REG_SEQUENCE VReg_64, (Inst (i32 (EXTRACT_SUBREG $src0, sub0)), (i32 (EXTRACT_SUBREG $src1, sub0)) ), sub0, (Inst (i32 (EXTRACT_SUBREG $src0, sub1)), (i32 (EXTRACT_SUBREG $src1, sub1)) ), sub1 ) >; def : divergent_i64_BinOp ; def : divergent_i64_BinOp ; def : divergent_i64_BinOp ; let SubtargetPredicate = Has16BitInsts in { let FPDPRounding = 1 in { def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>; } // End FPDPRounding = 1 defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16>; defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16>; defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16>; let isCommutable = 1 in { let FPDPRounding = 1 in { defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, fadd>; defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, fsub>; defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">; defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, fmul>; def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; } // End FPDPRounding = 1 defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16>; defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16>; defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16, null_frag, "v_sub_u16">; defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16>; defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16>; defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16>; defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16>; defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16>; let Constraints = "$vdst = $src2", DisableEncoding="$src2", isConvertibleToThreeAddress = 1 in { defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; } } // End isCommutable = 1 } // End SubtargetPredicate = Has16BitInsts let SubtargetPredicate = HasDLInsts in { defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32>; let Constraints = "$vdst = $src2", DisableEncoding="$src2", isConvertibleToThreeAddress = 1, isCommutable = 1 in { defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>; } } // End SubtargetPredicate = HasDLInsts // Note: 16-bit instructions produce a 0 result in the high 16-bits. multiclass Arithmetic_i16_Pats { def : GCNPat< (op i16:$src0, i16:$src1), (inst $src0, $src1) >; def : GCNPat< (i32 (zext (op i16:$src0, i16:$src1))), (inst $src0, $src1) >; def : GCNPat< (i64 (zext (op i16:$src0, i16:$src1))), (REG_SEQUENCE VReg_64, (inst $src0, $src1), sub0, (V_MOV_B32_e32 (i32 0)), sub1) >; } multiclass Bits_OpsRev_i16_Pats { def : GCNPat< (op i16:$src0, i16:$src1), (inst $src1, $src0) >; def : GCNPat< (i32 (zext (op i16:$src0, i16:$src1))), (inst $src1, $src0) >; def : GCNPat< (i64 (zext (op i16:$src0, i16:$src1))), (REG_SEQUENCE VReg_64, (inst $src1, $src0), sub0, (V_MOV_B32_e32 (i32 0)), sub1) >; } class ZExt_i16_i1_Pat : GCNPat < (i16 (ext i1:$src)), (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src) >; let Predicates = [Has16BitInsts] in { defm : Arithmetic_i16_Pats; defm : Arithmetic_i16_Pats; defm : Arithmetic_i16_Pats; defm : Arithmetic_i16_Pats; defm : Arithmetic_i16_Pats; defm : Arithmetic_i16_Pats; defm : Arithmetic_i16_Pats; def : GCNPat < (and i16:$src0, i16:$src1), (V_AND_B32_e64 $src0, $src1) >; def : GCNPat < (or i16:$src0, i16:$src1), (V_OR_B32_e64 $src0, $src1) >; def : GCNPat < (xor i16:$src0, i16:$src1), (V_XOR_B32_e64 $src0, $src1) >; defm : Bits_OpsRev_i16_Pats; defm : Bits_OpsRev_i16_Pats; defm : Bits_OpsRev_i16_Pats; def : ZExt_i16_i1_Pat; def : ZExt_i16_i1_Pat; def : GCNPat < (i16 (sext i1:$src)), (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src) >; // Undo sub x, c -> add x, -c canonicalization since c is more likely // an inline immediate than -c. // TODO: Also do for 64-bit. def : GCNPat< (add i16:$src0, (i16 NegSubInlineConst16:$src1)), (V_SUB_U16_e64 $src0, NegSubInlineConst16:$src1) >; } // End Predicates = [Has16BitInsts] //===----------------------------------------------------------------------===// // SI //===----------------------------------------------------------------------===// let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in { multiclass VOP2_Real_si op> { def _si : VOP2_Real(NAME), SIEncodingFamily.SI>, VOP2e(NAME).Pfl>; } multiclass VOP2_Real_MADK_si op> { def _si : VOP2_Real(NAME), SIEncodingFamily.SI>, VOP2_MADKe(NAME).Pfl>; } multiclass VOP2_Real_e32_si op> { def _e32_si : VOP2_Real(NAME#"_e32"), SIEncodingFamily.SI>, VOP2e(NAME#"_e32").Pfl>; } multiclass VOP2_Real_e32e64_si op> : VOP2_Real_e32_si { def _e64_si : VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>, VOP3e_si <{1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl>; } multiclass VOP2be_Real_e32e64_si op> : VOP2_Real_e32_si { def _e64_si : VOP3_Real(NAME#"_e64"), SIEncodingFamily.SI>, VOP3be_si <{1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl>; } } // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" defm V_CNDMASK_B32 : VOP2_Real_e32e64_si <0x0>; defm V_ADD_F32 : VOP2_Real_e32e64_si <0x3>; defm V_SUB_F32 : VOP2_Real_e32e64_si <0x4>; defm V_SUBREV_F32 : VOP2_Real_e32e64_si <0x5>; defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_si <0x7>; defm V_MUL_F32 : VOP2_Real_e32e64_si <0x8>; defm V_MUL_I32_I24 : VOP2_Real_e32e64_si <0x9>; defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_si <0xa>; defm V_MUL_U32_U24 : VOP2_Real_e32e64_si <0xb>; defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_si <0xc>; defm V_MIN_F32 : VOP2_Real_e32e64_si <0xf>; defm V_MAX_F32 : VOP2_Real_e32e64_si <0x10>; defm V_MIN_I32 : VOP2_Real_e32e64_si <0x11>; defm V_MAX_I32 : VOP2_Real_e32e64_si <0x12>; defm V_MIN_U32 : VOP2_Real_e32e64_si <0x13>; defm V_MAX_U32 : VOP2_Real_e32e64_si <0x14>; defm V_LSHRREV_B32 : VOP2_Real_e32e64_si <0x16>; defm V_ASHRREV_I32 : VOP2_Real_e32e64_si <0x18>; defm V_LSHLREV_B32 : VOP2_Real_e32e64_si <0x1a>; defm V_AND_B32 : VOP2_Real_e32e64_si <0x1b>; defm V_OR_B32 : VOP2_Real_e32e64_si <0x1c>; defm V_XOR_B32 : VOP2_Real_e32e64_si <0x1d>; defm V_MAC_F32 : VOP2_Real_e32e64_si <0x1f>; defm V_MADMK_F32 : VOP2_Real_MADK_si <0x20>; defm V_MADAK_F32 : VOP2_Real_MADK_si <0x21>; defm V_ADD_I32 : VOP2be_Real_e32e64_si <0x25>; defm V_SUB_I32 : VOP2be_Real_e32e64_si <0x26>; defm V_SUBREV_I32 : VOP2be_Real_e32e64_si <0x27>; defm V_ADDC_U32 : VOP2be_Real_e32e64_si <0x28>; defm V_SUBB_U32 : VOP2be_Real_e32e64_si <0x29>; defm V_SUBBREV_U32 : VOP2be_Real_e32e64_si <0x2a>; defm V_READLANE_B32 : VOP2_Real_si <0x01>; let InOperandList = (ins SSrc_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in { defm V_WRITELANE_B32 : VOP2_Real_si <0x02>; } defm V_MAC_LEGACY_F32 : VOP2_Real_e32e64_si <0x6>; defm V_MIN_LEGACY_F32 : VOP2_Real_e32e64_si <0xd>; defm V_MAX_LEGACY_F32 : VOP2_Real_e32e64_si <0xe>; defm V_LSHR_B32 : VOP2_Real_e32e64_si <0x15>; defm V_ASHR_I32 : VOP2_Real_e32e64_si <0x17>; defm V_LSHL_B32 : VOP2_Real_e32e64_si <0x19>; defm V_BFM_B32 : VOP2_Real_e32e64_si <0x1e>; defm V_BCNT_U32_B32 : VOP2_Real_e32e64_si <0x22>; defm V_MBCNT_LO_U32_B32 : VOP2_Real_e32e64_si <0x23>; defm V_MBCNT_HI_U32_B32 : VOP2_Real_e32e64_si <0x24>; defm V_LDEXP_F32 : VOP2_Real_e32e64_si <0x2b>; defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e32e64_si <0x2c>; defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e32e64_si <0x2d>; defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e32e64_si <0x2e>; defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e32e64_si <0x2f>; defm V_CVT_PK_U16_U32 : VOP2_Real_e32e64_si <0x30>; defm V_CVT_PK_I16_I32 : VOP2_Real_e32e64_si <0x31>; //===----------------------------------------------------------------------===// // VI //===----------------------------------------------------------------------===// class VOP2_DPPe op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : VOP_DPPe

{ bits<8> vdst; bits<8> src1; let Inst{8-0} = 0xfa; //dpp let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); let Inst{30-25} = op; let Inst{31} = 0x0; //encoding } let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in { multiclass VOP2_Real_MADK_vi op> { def _vi : VOP2_Real(NAME), SIEncodingFamily.VI>, VOP2_MADKe(NAME).Pfl>; } multiclass VOP2_Real_e32_vi op> { def _e32_vi : VOP2_Real(NAME#"_e32"), SIEncodingFamily.VI>, VOP2e(NAME#"_e32").Pfl>; } multiclass VOP2_Real_e64_vi op> { def _e64_vi : VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>, VOP3e_vi (NAME#"_e64").Pfl>; } multiclass VOP2_Real_e64only_vi op> { def _e64_vi : VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>, VOP3e_vi (NAME#"_e64").Pfl> { // Hack to stop printing _e64 VOP3_Pseudo ps = !cast(NAME#"_e64"); let OutOperandList = (outs VGPR_32:$vdst); let AsmString = ps.Mnemonic # " " # ps.AsmOperands; } } multiclass Base_VOP2_Real_e32e64_vi op> : VOP2_Real_e32_vi, VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>; } // End AssemblerPredicates = [isVI], DecoderNamespace = "VI" multiclass VOP2_SDWA_Real op> { def _sdwa_vi : VOP_SDWA_Real (NAME#"_sdwa")>, VOP2_SDWAe (NAME#"_sdwa").Pfl>; } multiclass VOP2_SDWA9_Real op> { def _sdwa_gfx9 : VOP_SDWA9_Real (NAME#"_sdwa")>, VOP2_SDWA9Ae (NAME#"_sdwa").Pfl>; } let AssemblerPredicates = [isVIOnly] in { multiclass VOP2be_Real_e32e64_vi_only op, string OpName, string AsmName> { def _e32_vi : VOP2_Real(OpName#"_e32"), SIEncodingFamily.VI>, VOP2e(OpName#"_e32").Pfl> { VOP2_Pseudo ps = !cast(OpName#"_e32"); let AsmString = AsmName # ps.AsmOperands; let DecoderNamespace = "VI"; } def _e64_vi : VOP3_Real(OpName#"_e64"), SIEncodingFamily.VI>, VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast(OpName#"_e64").Pfl> { VOP3_Pseudo ps = !cast(OpName#"_e64"); let AsmString = AsmName # ps.AsmOperands; let DecoderNamespace = "VI"; } def _sdwa_vi : VOP_SDWA_Real (OpName#"_sdwa")>, VOP2_SDWAe (OpName#"_sdwa").Pfl> { VOP2_SDWA_Pseudo ps = !cast(OpName#"_sdwa"); let AsmString = AsmName # ps.AsmOperands; } foreach _ = BoolToList(OpName#"_e32").Pfl.HasExtDPP>.ret in def _dpp_vi : VOP_DPP_Real(OpName#"_dpp"), SIEncodingFamily.VI>, VOP2_DPPe(OpName#"_dpp")> { VOP2_DPP_Pseudo ps = !cast(OpName#"_dpp"); let AsmString = AsmName # ps.AsmOperands; } } } let AssemblerPredicates = [isGFX9] in { multiclass VOP2be_Real_e32e64_gfx9 op, string OpName, string AsmName> { def _e32_gfx9 : VOP2_Real(OpName#"_e32"), SIEncodingFamily.GFX9>, VOP2e(OpName#"_e32").Pfl> { VOP2_Pseudo ps = !cast(OpName#"_e32"); let AsmString = AsmName # ps.AsmOperands; let DecoderNamespace = "GFX9"; } def _e64_gfx9 : VOP3_Real(OpName#"_e64"), SIEncodingFamily.GFX9>, VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast(OpName#"_e64").Pfl> { VOP3_Pseudo ps = !cast(OpName#"_e64"); let AsmString = AsmName # ps.AsmOperands; let DecoderNamespace = "GFX9"; } def _sdwa_gfx9 : VOP_SDWA9_Real (OpName#"_sdwa")>, VOP2_SDWA9Ae (OpName#"_sdwa").Pfl> { VOP2_SDWA_Pseudo ps = !cast(OpName#"_sdwa"); let AsmString = AsmName # ps.AsmOperands; } foreach _ = BoolToList(OpName#"_e32").Pfl.HasExtDPP>.ret in def _dpp_gfx9 : VOP_DPP_Real(OpName#"_dpp"), SIEncodingFamily.GFX9>, VOP2_DPPe(OpName#"_dpp")> { VOP2_DPP_Pseudo ps = !cast(OpName#"_dpp"); let AsmString = AsmName # ps.AsmOperands; let DecoderNamespace = "SDWA9"; } } multiclass VOP2_Real_e32e64_gfx9 op> { def _e32_gfx9 : VOP2_Real(NAME#"_e32"), SIEncodingFamily.GFX9>, VOP2e(NAME#"_e32").Pfl>{ let DecoderNamespace = "GFX9"; } def _e64_gfx9 : VOP3_Real(NAME#"_e64"), SIEncodingFamily.GFX9>, VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast(NAME#"_e64").Pfl> { let DecoderNamespace = "GFX9"; } def _sdwa_gfx9 : VOP_SDWA9_Real (NAME#"_sdwa")>, VOP2_SDWA9Ae (NAME#"_sdwa").Pfl> { } foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtDPP>.ret in def _dpp_gfx9 : VOP_DPP_Real(NAME#"_dpp"), SIEncodingFamily.GFX9>, VOP2_DPPe(NAME#"_dpp")> { let DecoderNamespace = "SDWA9"; } } } // AssemblerPredicates = [isGFX9] multiclass VOP2_Real_e32e64_vi op> : Base_VOP2_Real_e32e64_vi, VOP2_SDWA_Real, VOP2_SDWA9_Real { foreach _ = BoolToList(NAME#"_e32").Pfl.HasExtDPP>.ret in def _dpp_vi : VOP_DPP_Real(NAME#"_dpp"), SIEncodingFamily.VI>, VOP2_DPPe(NAME#"_dpp")>; } defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>; defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>; defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>; defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>; defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_vi <0x4>; defm V_MUL_F32 : VOP2_Real_e32e64_vi <0x5>; defm V_MUL_I32_I24 : VOP2_Real_e32e64_vi <0x6>; defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_vi <0x7>; defm V_MUL_U32_U24 : VOP2_Real_e32e64_vi <0x8>; defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_vi <0x9>; defm V_MIN_F32 : VOP2_Real_e32e64_vi <0xa>; defm V_MAX_F32 : VOP2_Real_e32e64_vi <0xb>; defm V_MIN_I32 : VOP2_Real_e32e64_vi <0xc>; defm V_MAX_I32 : VOP2_Real_e32e64_vi <0xd>; defm V_MIN_U32 : VOP2_Real_e32e64_vi <0xe>; defm V_MAX_U32 : VOP2_Real_e32e64_vi <0xf>; defm V_LSHRREV_B32 : VOP2_Real_e32e64_vi <0x10>; defm V_ASHRREV_I32 : VOP2_Real_e32e64_vi <0x11>; defm V_LSHLREV_B32 : VOP2_Real_e32e64_vi <0x12>; defm V_AND_B32 : VOP2_Real_e32e64_vi <0x13>; defm V_OR_B32 : VOP2_Real_e32e64_vi <0x14>; defm V_XOR_B32 : VOP2_Real_e32e64_vi <0x15>; defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>; defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>; defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>; defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_I32", "v_add_u32">; defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_I32", "v_sub_u32">; defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_I32", "v_subrev_u32">; defm V_ADDC_U32 : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32", "v_addc_u32">; defm V_SUBB_U32 : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32", "v_subb_u32">; defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">; defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_I32", "v_add_co_u32">; defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_I32", "v_sub_co_u32">; defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_I32", "v_subrev_co_u32">; defm V_ADDC_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32", "v_addc_co_u32">; defm V_SUBB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32", "v_subb_co_u32">; defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">; defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>; defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>; defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>; defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>; defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>; defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64only_vi <0x28d>; defm V_LDEXP_F32 : VOP2_Real_e64only_vi <0x288>; defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>; defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>; defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>; defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64only_vi <0x296>; defm V_CVT_PK_U16_U32 : VOP2_Real_e64only_vi <0x297>; defm V_CVT_PK_I16_I32 : VOP2_Real_e64only_vi <0x298>; defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>; defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>; defm V_SUBREV_F16 : VOP2_Real_e32e64_vi <0x21>; defm V_MUL_F16 : VOP2_Real_e32e64_vi <0x22>; defm V_MAC_F16 : VOP2_Real_e32e64_vi <0x23>; defm V_MADMK_F16 : VOP2_Real_MADK_vi <0x24>; defm V_MADAK_F16 : VOP2_Real_MADK_vi <0x25>; defm V_ADD_U16 : VOP2_Real_e32e64_vi <0x26>; defm V_SUB_U16 : VOP2_Real_e32e64_vi <0x27>; defm V_SUBREV_U16 : VOP2_Real_e32e64_vi <0x28>; defm V_MUL_LO_U16 : VOP2_Real_e32e64_vi <0x29>; defm V_LSHLREV_B16 : VOP2_Real_e32e64_vi <0x2a>; defm V_LSHRREV_B16 : VOP2_Real_e32e64_vi <0x2b>; defm V_ASHRREV_I16 : VOP2_Real_e32e64_vi <0x2c>; defm V_MAX_F16 : VOP2_Real_e32e64_vi <0x2d>; defm V_MIN_F16 : VOP2_Real_e32e64_vi <0x2e>; defm V_MAX_U16 : VOP2_Real_e32e64_vi <0x2f>; defm V_MAX_I16 : VOP2_Real_e32e64_vi <0x30>; defm V_MIN_U16 : VOP2_Real_e32e64_vi <0x31>; defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>; defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>; let SubtargetPredicate = isVI in { // Aliases to simplify matching of floating-point instructions that // are VOP2 on SI and VOP3 on VI. class SI2_VI3Alias : InstAlias < name#" $dst, $src0, $src1", !if(inst.Pfl.HasOMod, (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0), (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0)) >, PredicateControl { let UseInstAsmMatchConverter = 0; let AsmVariantName = AMDGPUAsmVariants.VOP3; } def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>; def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>; def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>; def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; } // End SubtargetPredicate = isVI let SubtargetPredicate = HasDLInsts in { defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>; defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>; } // End SubtargetPredicate = HasDLInsts Index: vendor/llvm/dist-release_80/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/Target/ARM/ARMISelLowering.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/ARM/ARMISelLowering.cpp (revision 348932) @@ -1,15186 +1,15164 @@ //===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the interfaces that ARM uses to lower LLVM code into a // selection DAG. // //===----------------------------------------------------------------------===// #include "ARMISelLowering.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "ARMPerfectShuffle.h" #include "ARMRegisterInfo.h" #include "ARMSelectionDAGInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "Utils/ARMBaseInfo.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSchedule.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include #include #include #include #include #include #include #include #include #include using namespace llvm; #define DEBUG_TYPE "arm-isel" STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); STATISTIC(NumConstpoolPromoted, "Number of constants with their storage promoted into constant pools"); static cl::opt ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true)); static cl::opt EnableConstpoolPromotion( "arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), cl::init(false)); // FIXME: set to true by default once PR32780 is fixed static cl::opt ConstpoolPromotionMaxSize( "arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), cl::init(64)); static cl::opt ConstpoolPromotionMaxTotal( "arm-promote-constant-max-total", cl::Hidden, cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128)); // The APCS parameter registers. static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT) { if (VT != PromotedLdStVT) { setOperationAction(ISD::LOAD, VT, Promote); AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); setOperationAction(ISD::STORE, VT, Promote); AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); } MVT ElemTy = VT.getVectorElementType(); if (ElemTy != MVT::f64) setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); if (ElemTy == MVT::i32) { setOperationAction(ISD::SINT_TO_FP, VT, Custom); setOperationAction(ISD::UINT_TO_FP, VT, Custom); setOperationAction(ISD::FP_TO_SINT, VT, Custom); setOperationAction(ISD::FP_TO_UINT, VT, Custom); } else { setOperationAction(ISD::SINT_TO_FP, VT, Expand); setOperationAction(ISD::UINT_TO_FP, VT, Expand); setOperationAction(ISD::FP_TO_SINT, VT, Expand); setOperationAction(ISD::FP_TO_UINT, VT, Expand); } setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); setOperationAction(ISD::SELECT, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); if (VT.isInteger()) { setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); setOperationAction(ISD::SRL, VT, Custom); } // Promote all bit-wise operations. if (VT.isInteger() && VT != PromotedBitwiseVT) { setOperationAction(ISD::AND, VT, Promote); AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); setOperationAction(ISD::OR, VT, Promote); AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); setOperationAction(ISD::XOR, VT, Promote); AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); } // Neon does not support vector divide/remainder operations. setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64) for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) setOperationAction(Opcode, VT, Legal); } void ARMTargetLowering::addDRTypeForNEON(MVT VT) { addRegisterClass(VT, &ARM::DPRRegClass); addTypeForNEON(VT, MVT::f64, MVT::v2i32); } void ARMTargetLowering::addQRTypeForNEON(MVT VT) { addRegisterClass(VT, &ARM::DPairRegClass); addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); } ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { RegInfo = Subtarget->getRegisterInfo(); Itins = Subtarget->getInstrItineraryData(); setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() && !Subtarget->isTargetWatchOS()) { bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard; for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID) setLibcallCallingConv(static_cast(LCID), IsHFTarget ? CallingConv::ARM_AAPCS_VFP : CallingConv::ARM_AAPCS); } if (Subtarget->isTargetMachO()) { // Uses VFP for Thumb libfuncs if available. if (Subtarget->isThumb() && Subtarget->hasVFP2() && Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) { static const struct { const RTLIB::Libcall Op; const char * const Name; const ISD::CondCode Cond; } LibraryCalls[] = { // Single-precision floating-point arithmetic. { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID }, { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID }, { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID }, { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID }, // Double-precision floating-point arithmetic. { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID }, { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID }, { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID }, { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID }, // Single-precision comparisons. { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE }, { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE }, { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE }, { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE }, { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE }, { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE }, { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE }, { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ }, // Double-precision comparisons. { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE }, { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE }, { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE }, { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE }, { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE }, { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE }, { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE }, { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ }, // Floating-point to integer conversions. // i64 conversions are done via library routines even when generating VFP // instructions, so use the same ones. { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID }, { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID }, { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID }, { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID }, // Conversions between floating types. { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID }, { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID }, // Integer to floating-point conversions. // i64 conversions are done via library routines even when generating VFP // instructions, so use the same ones. // FIXME: There appears to be some naming inconsistency in ARM libgcc: // e.g., __floatunsidf vs. __floatunssidfvfp. { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID }, { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID }, { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID }, { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID }, }; for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); if (LC.Cond != ISD::SETCC_INVALID) setCmpLibcallCC(LC.Op, LC.Cond); } } } // These libcalls are not available in 32-bit. setLibcallName(RTLIB::SHL_I128, nullptr); setLibcallName(RTLIB::SRL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); // RTLIB if (Subtarget->isAAPCS_ABI() && (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) { static const struct { const RTLIB::Libcall Op; const char * const Name; const CallingConv::ID CC; const ISD::CondCode Cond; } LibraryCalls[] = { // Double-precision floating-point arithmetic helper functions // RTABI chapter 4.1.2, Table 2 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, // Double-precision floating-point comparison helper functions // RTABI chapter 4.1.2, Table 3 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, // Single-precision floating-point arithmetic helper functions // RTABI chapter 4.1.2, Table 4 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, // Single-precision floating-point comparison helper functions // RTABI chapter 4.1.2, Table 5 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, // Floating-point to integer conversions. // RTABI chapter 4.1.2, Table 6 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, // Conversions between floating types. // RTABI chapter 4.1.2, Table 7 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, // Integer to floating-point conversions. // RTABI chapter 4.1.2, Table 8 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, // Long long helper functions // RTABI chapter 4.2, Table 9 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, // Integer division functions // RTABI chapter 4.3.1 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, }; for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); setLibcallCallingConv(LC.Op, LC.CC); if (LC.Cond != ISD::SETCC_INVALID) setCmpLibcallCC(LC.Op, LC.Cond); } // EABI dependent RTLIB if (TM.Options.EABIVersion == EABI::EABI4 || TM.Options.EABIVersion == EABI::EABI5) { static const struct { const RTLIB::Libcall Op; const char *const Name; const CallingConv::ID CC; const ISD::CondCode Cond; } MemOpsLibraryCalls[] = { // Memory operations // RTABI chapter 4.3.4 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, }; for (const auto &LC : MemOpsLibraryCalls) { setLibcallName(LC.Op, LC.Name); setLibcallCallingConv(LC.Op, LC.CC); if (LC.Cond != ISD::SETCC_INVALID) setCmpLibcallCC(LC.Op, LC.Cond); } } } if (Subtarget->isTargetWindows()) { static const struct { const RTLIB::Libcall Op; const char * const Name; const CallingConv::ID CC; } LibraryCalls[] = { { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP }, { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP }, { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP }, { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP }, { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP }, { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, }; for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); setLibcallCallingConv(LC.Op, LC.CC); } } // Use divmod compiler-rt calls for iOS 5.0 and later. if (Subtarget->isTargetMachO() && !(Subtarget->isTargetIOS() && Subtarget->getTargetTriple().isOSVersionLT(5, 0))) { setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); } // The half <-> float conversion functions are always soft-float on // non-watchos platforms, but are needed for some targets which use a // hard-float calling convention by default. if (!Subtarget->isTargetWatchABI()) { if (Subtarget->isAAPCS_ABI()) { setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS); } else { setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS); setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS); setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS); } } // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have // a __gnu_ prefix (which is the default). if (Subtarget->isTargetAEABI()) { static const struct { const RTLIB::Libcall Op; const char * const Name; const CallingConv::ID CC; } LibraryCalls[] = { { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS }, { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS }, { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS }, }; for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); setLibcallCallingConv(LC.Op, LC.CC); } } if (Subtarget->isThumb1Only()) addRegisterClass(MVT::i32, &ARM::tGPRRegClass); else addRegisterClass(MVT::i32, &ARM::GPRRegClass); if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, &ARM::SPRRegClass); addRegisterClass(MVT::f64, &ARM::DPRRegClass); } if (Subtarget->hasFullFP16()) { addRegisterClass(MVT::f16, &ARM::HPRRegClass); setOperationAction(ISD::BITCAST, MVT::i16, Custom); setOperationAction(ISD::BITCAST, MVT::i32, Custom); setOperationAction(ISD::BITCAST, MVT::f16, Custom); setOperationAction(ISD::FMINNUM, MVT::f16, Legal); setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); } for (MVT VT : MVT::vector_valuetypes()) { for (MVT InnerVT : MVT::vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); } setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); setOperationAction(ISD::BSWAP, VT, Expand); } setOperationAction(ISD::ConstantFP, MVT::f32, Custom); setOperationAction(ISD::ConstantFP, MVT::f64, Custom); setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); if (Subtarget->hasNEON()) { addDRTypeForNEON(MVT::v2f32); addDRTypeForNEON(MVT::v8i8); addDRTypeForNEON(MVT::v4i16); addDRTypeForNEON(MVT::v2i32); addDRTypeForNEON(MVT::v1i64); addQRTypeForNEON(MVT::v4f32); addQRTypeForNEON(MVT::v2f64); addQRTypeForNEON(MVT::v16i8); addQRTypeForNEON(MVT::v8i16); addQRTypeForNEON(MVT::v4i32); addQRTypeForNEON(MVT::v2i64); if (Subtarget->hasFullFP16()) { addQRTypeForNEON(MVT::v8f16); addDRTypeForNEON(MVT::v4f16); } // v2f64 is legal so that QR subregs can be extracted as f64 elements, but // neither Neon nor VFP support any arithmetic operations on it. // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively // supported for v4f32. setOperationAction(ISD::FADD, MVT::v2f64, Expand); setOperationAction(ISD::FSUB, MVT::v2f64, Expand); setOperationAction(ISD::FMUL, MVT::v2f64, Expand); // FIXME: Code duplication: FDIV and FREM are expanded always, see // ARMTargetLowering::addTypeForNEON method for details. setOperationAction(ISD::FDIV, MVT::v2f64, Expand); setOperationAction(ISD::FREM, MVT::v2f64, Expand); // FIXME: Create unittest. // In another words, find a way when "copysign" appears in DAG with vector // operands. setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); // FIXME: Code duplication: SETCC has custom operation action, see // ARMTargetLowering::addTypeForNEON method for details. setOperationAction(ISD::SETCC, MVT::v2f64, Expand); // FIXME: Create unittest for FNEG and for FABS. setOperationAction(ISD::FNEG, MVT::v2f64, Expand); setOperationAction(ISD::FABS, MVT::v2f64, Expand); setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); setOperationAction(ISD::FSIN, MVT::v2f64, Expand); setOperationAction(ISD::FCOS, MVT::v2f64, Expand); setOperationAction(ISD::FPOW, MVT::v2f64, Expand); setOperationAction(ISD::FLOG, MVT::v2f64, Expand); setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); setOperationAction(ISD::FEXP, MVT::v2f64, Expand); setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); setOperationAction(ISD::FRINT, MVT::v2f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); setOperationAction(ISD::FMA, MVT::v2f64, Expand); setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); setOperationAction(ISD::FSIN, MVT::v4f32, Expand); setOperationAction(ISD::FCOS, MVT::v4f32, Expand); setOperationAction(ISD::FPOW, MVT::v4f32, Expand); setOperationAction(ISD::FLOG, MVT::v4f32, Expand); setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); setOperationAction(ISD::FEXP, MVT::v4f32, Expand); setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); setOperationAction(ISD::FCEIL, MVT::v4f32, Expand); setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand); setOperationAction(ISD::FRINT, MVT::v4f32, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); // Mark v2f32 intrinsics. setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); setOperationAction(ISD::FSIN, MVT::v2f32, Expand); setOperationAction(ISD::FCOS, MVT::v2f32, Expand); setOperationAction(ISD::FPOW, MVT::v2f32, Expand); setOperationAction(ISD::FLOG, MVT::v2f32, Expand); setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); setOperationAction(ISD::FLOG10, MVT::v2f32, Expand); setOperationAction(ISD::FEXP, MVT::v2f32, Expand); setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); setOperationAction(ISD::FCEIL, MVT::v2f32, Expand); setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand); setOperationAction(ISD::FRINT, MVT::v2f32, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand); // Neon does not support some operations on v1i64 and v2i64 types. setOperationAction(ISD::MUL, MVT::v1i64, Expand); // Custom handling for some quad-vector types to detect VMULL. setOperationAction(ISD::MUL, MVT::v8i16, Custom); setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v2i64, Custom); // Custom handling for some vector types to avoid expensive expansions setOperationAction(ISD::SDIV, MVT::v4i16, Custom); setOperationAction(ISD::SDIV, MVT::v8i8, Custom); setOperationAction(ISD::UDIV, MVT::v4i16, Custom); setOperationAction(ISD::UDIV, MVT::v8i8, Custom); // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with // a destination type that is wider than the source, and nor does // it have a FP_TO_[SU]INT instruction with a narrower destination than // source. setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); // NEON does not have single instruction CTPOP for vectors with element // types wider than 8-bits. However, custom lowering can leverage the // v8i8/v16i8 vcnt instruction. setOperationAction(ISD::CTPOP, MVT::v2i32, Custom); setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); setOperationAction(ISD::CTPOP, MVT::v1i64, Custom); setOperationAction(ISD::CTPOP, MVT::v2i64, Custom); setOperationAction(ISD::CTLZ, MVT::v1i64, Expand); setOperationAction(ISD::CTLZ, MVT::v2i64, Expand); // NEON does not have single instruction CTTZ for vectors. setOperationAction(ISD::CTTZ, MVT::v8i8, Custom); setOperationAction(ISD::CTTZ, MVT::v4i16, Custom); setOperationAction(ISD::CTTZ, MVT::v2i32, Custom); setOperationAction(ISD::CTTZ, MVT::v1i64, Custom); setOperationAction(ISD::CTTZ, MVT::v16i8, Custom); setOperationAction(ISD::CTTZ, MVT::v8i16, Custom); setOperationAction(ISD::CTTZ, MVT::v4i32, Custom); setOperationAction(ISD::CTTZ, MVT::v2i64, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom); // NEON only has FMA instructions as of VFP4. if (!Subtarget->hasVFP4()) { setOperationAction(ISD::FMA, MVT::v2f32, Expand); setOperationAction(ISD::FMA, MVT::v4f32, Expand); } setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::BUILD_VECTOR); setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::FP_TO_SINT); setTargetDAGCombine(ISD::FP_TO_UINT); setTargetDAGCombine(ISD::FDIV); setTargetDAGCombine(ISD::LOAD); // It is legal to extload from v4i8 to v4i16 or v4i32. for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16, MVT::v2i32}) { for (MVT VT : MVT::integer_vector_valuetypes()) { setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal); setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal); setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal); } } } if (Subtarget->isFPOnlySP()) { // When targeting a floating-point unit with only single-precision // operations, f64 is legal for the few double-precision instructions which // are present However, no double-precision operations other than moves, // loads and stores are provided by the hardware. setOperationAction(ISD::FADD, MVT::f64, Expand); setOperationAction(ISD::FSUB, MVT::f64, Expand); setOperationAction(ISD::FMUL, MVT::f64, Expand); setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FDIV, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FGETSIGN, MVT::f64, Expand); setOperationAction(ISD::FNEG, MVT::f64, Expand); setOperationAction(ISD::FABS, MVT::f64, Expand); setOperationAction(ISD::FSQRT, MVT::f64, Expand); setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FLOG, MVT::f64, Expand); setOperationAction(ISD::FLOG2, MVT::f64, Expand); setOperationAction(ISD::FLOG10, MVT::f64, Expand); setOperationAction(ISD::FEXP, MVT::f64, Expand); setOperationAction(ISD::FEXP2, MVT::f64, Expand); setOperationAction(ISD::FCEIL, MVT::f64, Expand); setOperationAction(ISD::FTRUNC, MVT::f64, Expand); setOperationAction(ISD::FRINT, MVT::f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); setOperationAction(ISD::FFLOOR, MVT::f64, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom); setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); } computeRegisterProperties(Subtarget->getRegisterInfo()); // ARM does not have floating-point extending loads. for (MVT VT : MVT::fp_valuetypes()) { setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); } // ... or truncating stores setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); // ARM does not have i1 sign extending load. for (MVT VT : MVT::integer_valuetypes()) setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); // ARM supports all 4 flavors of integer indexed load / store. if (!Subtarget->isThumb1Only()) { for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { setIndexedLoadAction(im, MVT::i1, Legal); setIndexedLoadAction(im, MVT::i8, Legal); setIndexedLoadAction(im, MVT::i16, Legal); setIndexedLoadAction(im, MVT::i32, Legal); setIndexedStoreAction(im, MVT::i1, Legal); setIndexedStoreAction(im, MVT::i8, Legal); setIndexedStoreAction(im, MVT::i16, Legal); setIndexedStoreAction(im, MVT::i32, Legal); } } else { // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}. setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal); setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); } setOperationAction(ISD::SADDO, MVT::i32, Custom); setOperationAction(ISD::UADDO, MVT::i32, Custom); setOperationAction(ISD::SSUBO, MVT::i32, Custom); setOperationAction(ISD::USUBO, MVT::i32, Custom); setOperationAction(ISD::ADDCARRY, MVT::i32, Custom); setOperationAction(ISD::SUBCARRY, MVT::i32, Custom); // i64 operation support. setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); if (Subtarget->isThumb1Only()) { setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); } if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() || (Subtarget->isThumb2() && !Subtarget->hasDSP())) setOperationAction(ISD::MULHS, MVT::i32, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL, MVT::i64, Custom); setOperationAction(ISD::SRA, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1. if (Subtarget->isThumb1Only()) { setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); } if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); // ARM does not have ROTL. setOperationAction(ISD::ROTL, MVT::i32, Expand); for (MVT VT : MVT::vector_valuetypes()) { setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); } setOperationAction(ISD::CTTZ, MVT::i32, Custom); setOperationAction(ISD::CTPOP, MVT::i32, Expand); if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) { setOperationAction(ISD::CTLZ, MVT::i32, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall); } // @llvm.readcyclecounter requires the Performance Monitors extension. // Default to the 0 expansion on unsupported platforms. // FIXME: Technically there are older ARM CPUs that have // implementation-specific ways of obtaining this information. if (Subtarget->hasPerfMon()) setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); // Only ARMv6 has BSWAP. if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode() : Subtarget->hasDivideInARMMode(); if (!hasDivide) { // These are expanded into libcalls if the cpu doesn't have HW divider. setOperationAction(ISD::SDIV, MVT::i32, LibCall); setOperationAction(ISD::UDIV, MVT::i32, LibCall); } if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) { setOperationAction(ISD::SDIV, MVT::i32, Custom); setOperationAction(ISD::UDIV, MVT::i32, Custom); setOperationAction(ISD::SDIV, MVT::i64, Custom); setOperationAction(ISD::UDIV, MVT::i64, Custom); } setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); // Register based DivRem for AEABI (RTABI 4.2) if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows()) { setOperationAction(ISD::SREM, MVT::i64, Custom); setOperationAction(ISD::UREM, MVT::i64, Custom); HasStandaloneRem = false; if (Subtarget->isTargetWindows()) { const struct { const RTLIB::Libcall Op; const char * const Name; const CallingConv::ID CC; } LibraryCalls[] = { { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS }, { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS }, { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS }, { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS }, }; for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); setLibcallCallingConv(LC.Op, LC.CC); } } else { const struct { const RTLIB::Libcall Op; const char * const Name; const CallingConv::ID CC; } LibraryCalls[] = { { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS }, { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS }, { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS }, }; for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); setLibcallCallingConv(LC.Op, LC.CC); } } setOperationAction(ISD::SDIVREM, MVT::i32, Custom); setOperationAction(ISD::UDIVREM, MVT::i32, Custom); setOperationAction(ISD::SDIVREM, MVT::i64, Custom); setOperationAction(ISD::UDIVREM, MVT::i64, Custom); } else { setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Expand); } if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT()) for (auto &VT : {MVT::f32, MVT::f64}) setOperationAction(ISD::FPOWI, VT, Custom); setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); setOperationAction(ISD::TRAP, MVT::Other, Legal); setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); // Use the default implementation. setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::Other, Expand); setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VAEND, MVT::Other, Expand); setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); if (Subtarget->isTargetWindows()) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); else setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use // the default expansion. InsertFencesForAtomic = false; if (Subtarget->hasAnyDataBarrier() && (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) { // ATOMIC_FENCE needs custom lowering; the others should have been expanded // to ldrex/strex loops already. setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); if (!Subtarget->isThumb() || !Subtarget->isMClass()) setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); // On v8, we have particularly efficient implementations of atomic fences // if they can be combined with nearby atomic loads and stores. if (!Subtarget->hasAcquireRelease() || getTargetMachine().getOptLevel() == 0) { // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc. InsertFencesForAtomic = true; } } else { // If there's anything we can use as a barrier, go through custom lowering // for ATOMIC_FENCE. // If target has DMB in thumb, Fences can be inserted. if (Subtarget->hasDataBarrier()) InsertFencesForAtomic = true; setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Subtarget->hasAnyDataBarrier() ? Custom : Expand); // Set them all for expansion, which will force libcalls. setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the // Unordered/Monotonic case. if (!InsertFencesForAtomic) { setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); } } setOperationAction(ISD::PREFETCH, MVT::Other, Custom); // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. if (!Subtarget->hasV6Ops()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); } setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR // iff target supports vfp2. setOperationAction(ISD::BITCAST, MVT::i64, Custom); setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); } // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); if (Subtarget->useSjLjEH()) setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); setOperationAction(ISD::SETCC, MVT::f64, Expand); setOperationAction(ISD::SELECT, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); if (Subtarget->hasFullFP16()) { setOperationAction(ISD::SETCC, MVT::f16, Expand); setOperationAction(ISD::SELECT, MVT::f16, Custom); setOperationAction(ISD::SELECT_CC, MVT::f16, Custom); } setOperationAction(ISD::SETCCCARRY, MVT::i32, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::BR_CC, MVT::i32, Custom); if (Subtarget->hasFullFP16()) setOperationAction(ISD::BR_CC, MVT::f16, Custom); setOperationAction(ISD::BR_CC, MVT::f32, Custom); setOperationAction(ISD::BR_CC, MVT::f64, Custom); setOperationAction(ISD::BR_JT, MVT::Other, Custom); // We don't support sin/cos/fmod/copysign/pow setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); } setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); if (!Subtarget->hasVFP4()) { setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FMA, MVT::f32, Expand); } // Various VFP goodness if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) { // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded. if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) { setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); } // fp16 is a special v7 extension that adds f16 <-> f32 conversions. if (!Subtarget->hasFP16()) { setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); } } // Use __sincos_stret if available. if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { setOperationAction(ISD::FSINCOS, MVT::f64, Custom); setOperationAction(ISD::FSINCOS, MVT::f32, Custom); } // FP-ARMv8 implements a lot of rounding-like FP operations. if (Subtarget->hasFPARMv8()) { setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FCEIL, MVT::f32, Legal); setOperationAction(ISD::FROUND, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); setOperationAction(ISD::FRINT, MVT::f32, Legal); setOperationAction(ISD::FMINNUM, MVT::f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal); setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); if (!Subtarget->isFPOnlySP()) { setOperationAction(ISD::FFLOOR, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FROUND, MVT::f64, Legal); setOperationAction(ISD::FTRUNC, MVT::f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); setOperationAction(ISD::FRINT, MVT::f64, Legal); setOperationAction(ISD::FMINNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); } } if (Subtarget->hasNEON()) { // vmin and vmax aren't available in a scalar form, so we use // a NEON instruction with an undef lane instead. setOperationAction(ISD::FMINIMUM, MVT::f16, Legal); setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal); setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); setOperationAction(ISD::FMINIMUM, MVT::v2f32, Legal); setOperationAction(ISD::FMAXIMUM, MVT::v2f32, Legal); setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal); setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal); if (Subtarget->hasFullFP16()) { setOperationAction(ISD::FMINNUM, MVT::v4f16, Legal); setOperationAction(ISD::FMAXNUM, MVT::v4f16, Legal); setOperationAction(ISD::FMINNUM, MVT::v8f16, Legal); setOperationAction(ISD::FMAXNUM, MVT::v8f16, Legal); setOperationAction(ISD::FMINIMUM, MVT::v4f16, Legal); setOperationAction(ISD::FMAXIMUM, MVT::v4f16, Legal); setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal); setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal); } } // We have target-specific dag combine patterns for the following nodes: // ARMISD::VMOVRRD - No need to call setTargetDAGCombine setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::XOR); if (Subtarget->hasV6Ops()) setTargetDAGCombine(ISD::SRL); setStackPointerRegisterToSaveRestore(ARM::SP); if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) setSchedulingPreference(Sched::RegPressure); else setSchedulingPreference(Sched::Hybrid); //// temporary - rewrite interface to use type MaxStoresPerMemset = 8; MaxStoresPerMemsetOptSize = 4; MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores MaxStoresPerMemcpyOptSize = 2; MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores MaxStoresPerMemmoveOptSize = 2; // On ARM arguments smaller than 4 bytes are extended, so all arguments // are at least 4 bytes aligned. setMinStackArgumentAlignment(4); // Prefer likely predicted branches to selects on out-of-order cores. PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder(); setPrefLoopAlignment(Subtarget->getPrefLoopAlignment()); setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } bool ARMTargetLowering::useSoftFloat() const { return Subtarget->useSoftFloat(); } // FIXME: It might make sense to define the representative register class as the // nearest super-register that has a non-null superset. For example, DPR_VFP2 is // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, // SPR's representative would be DPR_VFP2. This should work well if register // pressure tracking were modified such that a register use would increment the // pressure of the register class's representative and all of it's super // classes' representatives transitively. We have not implemented this because // of the difficulty prior to coalescing of modeling operand register classes // due to the common occurrence of cross class copies and subregister insertions // and extractions. std::pair ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const { const TargetRegisterClass *RRC = nullptr; uint8_t Cost = 1; switch (VT.SimpleTy) { default: return TargetLowering::findRepresentativeClass(TRI, VT); // Use DPR as representative register class for all floating point // and vector types. Since there are 32 SPR registers and 32 DPR registers so // the cost is 1 for both f32 and f64. case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: RRC = &ARM::DPRRegClass; // When NEON is used for SP, only half of the register file is available // because operations that define both SP and DP results will be constrained // to the VFP2 class (D0-D15). We currently model this constraint prior to // coalescing by double-counting the SP regs. See the FIXME above. if (Subtarget->useNEONForSinglePrecisionFP()) Cost = 2; break; case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: case MVT::v4f32: case MVT::v2f64: RRC = &ARM::DPRRegClass; Cost = 2; break; case MVT::v4i64: RRC = &ARM::DPRRegClass; Cost = 4; break; case MVT::v8i64: RRC = &ARM::DPRRegClass; Cost = 8; break; } return std::make_pair(RRC, Cost); } const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((ARMISD::NodeType)Opcode) { case ARMISD::FIRST_NUMBER: break; case ARMISD::Wrapper: return "ARMISD::Wrapper"; case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL"; case ARMISD::CALL: return "ARMISD::CALL"; case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; case ARMISD::BRCOND: return "ARMISD::BRCOND"; case ARMISD::BR_JT: return "ARMISD::BR_JT"; case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG"; case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; case ARMISD::CMP: return "ARMISD::CMP"; case ARMISD::CMN: return "ARMISD::CMN"; case ARMISD::CMPZ: return "ARMISD::CMPZ"; case ARMISD::CMPFP: return "ARMISD::CMPFP"; case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; case ARMISD::CMOV: return "ARMISD::CMOV"; case ARMISD::SUBS: return "ARMISD::SUBS"; case ARMISD::SSAT: return "ARMISD::SSAT"; case ARMISD::USAT: return "ARMISD::USAT"; case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; case ARMISD::ADDC: return "ARMISD::ADDC"; case ARMISD::ADDE: return "ARMISD::ADDE"; case ARMISD::SUBC: return "ARMISD::SUBC"; case ARMISD::SUBE: return "ARMISD::SUBE"; case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; case ARMISD::VMOVhr: return "ARMISD::VMOVhr"; case ARMISD::VMOVrh: return "ARMISD::VMOVrh"; case ARMISD::VMOVSR: return "ARMISD::VMOVSR"; case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP"; case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH"; case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK"; case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK"; case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; case ARMISD::VCGE: return "ARMISD::VCGE"; case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; case ARMISD::VCGEU: return "ARMISD::VCGEU"; case ARMISD::VCGT: return "ARMISD::VCGT"; case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; case ARMISD::VCGTU: return "ARMISD::VCGTU"; case ARMISD::VTST: return "ARMISD::VTST"; case ARMISD::VSHL: return "ARMISD::VSHL"; case ARMISD::VSHRs: return "ARMISD::VSHRs"; case ARMISD::VSHRu: return "ARMISD::VSHRu"; case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; case ARMISD::VSLI: return "ARMISD::VSLI"; case ARMISD::VSRI: return "ARMISD::VSRI"; case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM"; case ARMISD::VDUP: return "ARMISD::VDUP"; case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; case ARMISD::VEXT: return "ARMISD::VEXT"; case ARMISD::VREV64: return "ARMISD::VREV64"; case ARMISD::VREV32: return "ARMISD::VREV32"; case ARMISD::VREV16: return "ARMISD::VREV16"; case ARMISD::VZIP: return "ARMISD::VZIP"; case ARMISD::VUZP: return "ARMISD::VUZP"; case ARMISD::VTRN: return "ARMISD::VTRN"; case ARMISD::VTBL1: return "ARMISD::VTBL1"; case ARMISD::VTBL2: return "ARMISD::VTBL2"; case ARMISD::VMULLs: return "ARMISD::VMULLs"; case ARMISD::VMULLu: return "ARMISD::VMULLu"; case ARMISD::UMAAL: return "ARMISD::UMAAL"; case ARMISD::UMLAL: return "ARMISD::UMLAL"; case ARMISD::SMLAL: return "ARMISD::SMLAL"; case ARMISD::SMLALBB: return "ARMISD::SMLALBB"; case ARMISD::SMLALBT: return "ARMISD::SMLALBT"; case ARMISD::SMLALTB: return "ARMISD::SMLALTB"; case ARMISD::SMLALTT: return "ARMISD::SMLALTT"; case ARMISD::SMULWB: return "ARMISD::SMULWB"; case ARMISD::SMULWT: return "ARMISD::SMULWT"; case ARMISD::SMLALD: return "ARMISD::SMLALD"; case ARMISD::SMLALDX: return "ARMISD::SMLALDX"; case ARMISD::SMLSLD: return "ARMISD::SMLSLD"; case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX"; case ARMISD::SMMLAR: return "ARMISD::SMMLAR"; case ARMISD::SMMLSR: return "ARMISD::SMMLSR"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; case ARMISD::VBSL: return "ARMISD::VBSL"; case ARMISD::MEMCPY: return "ARMISD::MEMCPY"; case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP"; case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD"; case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD"; case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD"; case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD"; case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD"; case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD"; case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD"; case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD"; case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD"; case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; } return nullptr; } EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const { if (!VT.isVector()) return getPointerTy(DL); return VT.changeVectorElementTypeToInteger(); } /// getRegClassFor - Return the register class that should be used for the /// specified value type. const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const { // Map v4i64 to QQ registers but do not make the type legal. Similarly map // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to // load / store 4 to 8 consecutive D registers. if (Subtarget->hasNEON()) { if (VT == MVT::v4i64) return &ARM::QQPRRegClass; if (VT == MVT::v8i64) return &ARM::QQQQPRRegClass; } return TargetLowering::getRegClassFor(VT); } // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the // source/dest is aligned and the copy size is large enough. We therefore want // to align such objects passed to memory intrinsics. bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, unsigned &PrefAlign) const { if (!isa(CI)) return false; MinSize = 8; // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1 // cycle faster than 4-byte aligned LDM. PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4); return true; } // Create a fast isel object. FastISel * ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const { return ARM::createFastISel(funcInfo, libInfo); } Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { unsigned NumVals = N->getNumValues(); if (!NumVals) return Sched::RegPressure; for (unsigned i = 0; i != NumVals; ++i) { EVT VT = N->getValueType(i); if (VT == MVT::Glue || VT == MVT::Other) continue; if (VT.isFloatingPoint() || VT.isVector()) return Sched::ILP; } if (!N->isMachineOpcode()) return Sched::RegPressure; // Load are scheduled for latency even if there instruction itinerary // is not available. const TargetInstrInfo *TII = Subtarget->getInstrInfo(); const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); if (MCID.getNumDefs() == 0) return Sched::RegPressure; if (!Itins->isEmpty() && Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) return Sched::ILP; return Sched::RegPressure; } //===----------------------------------------------------------------------===// // Lowering Code //===----------------------------------------------------------------------===// static bool isSRL16(const SDValue &Op) { if (Op.getOpcode() != ISD::SRL) return false; if (auto Const = dyn_cast(Op.getOperand(1))) return Const->getZExtValue() == 16; return false; } static bool isSRA16(const SDValue &Op) { if (Op.getOpcode() != ISD::SRA) return false; if (auto Const = dyn_cast(Op.getOperand(1))) return Const->getZExtValue() == 16; return false; } static bool isSHL16(const SDValue &Op) { if (Op.getOpcode() != ISD::SHL) return false; if (auto Const = dyn_cast(Op.getOperand(1))) return Const->getZExtValue() == 16; return false; } // Check for a signed 16-bit value. We special case SRA because it makes it // more simple when also looking for SRAs that aren't sign extending a // smaller value. Without the check, we'd need to take extra care with // checking order for some operations. static bool isS16(const SDValue &Op, SelectionDAG &DAG) { if (isSRA16(Op)) return isSHL16(Op.getOperand(0)); return DAG.ComputeNumSignBits(Op) == 17; } /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { switch (CC) { default: llvm_unreachable("Unknown condition code!"); case ISD::SETNE: return ARMCC::NE; case ISD::SETEQ: return ARMCC::EQ; case ISD::SETGT: return ARMCC::GT; case ISD::SETGE: return ARMCC::GE; case ISD::SETLT: return ARMCC::LT; case ISD::SETLE: return ARMCC::LE; case ISD::SETUGT: return ARMCC::HI; case ISD::SETUGE: return ARMCC::HS; case ISD::SETULT: return ARMCC::LO; case ISD::SETULE: return ARMCC::LS; } } /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) { CondCode2 = ARMCC::AL; InvalidOnQNaN = true; switch (CC) { default: llvm_unreachable("Unknown FP condition!"); case ISD::SETEQ: case ISD::SETOEQ: CondCode = ARMCC::EQ; InvalidOnQNaN = false; break; case ISD::SETGT: case ISD::SETOGT: CondCode = ARMCC::GT; break; case ISD::SETGE: case ISD::SETOGE: CondCode = ARMCC::GE; break; case ISD::SETOLT: CondCode = ARMCC::MI; break; case ISD::SETOLE: CondCode = ARMCC::LS; break; case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; InvalidOnQNaN = false; break; case ISD::SETO: CondCode = ARMCC::VC; break; case ISD::SETUO: CondCode = ARMCC::VS; break; case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; InvalidOnQNaN = false; break; case ISD::SETUGT: CondCode = ARMCC::HI; break; case ISD::SETUGE: CondCode = ARMCC::PL; break; case ISD::SETLT: case ISD::SETULT: CondCode = ARMCC::LT; break; case ISD::SETLE: case ISD::SETULE: CondCode = ARMCC::LE; break; case ISD::SETNE: case ISD::SETUNE: CondCode = ARMCC::NE; InvalidOnQNaN = false; break; } } //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// #include "ARMGenCallingConv.inc" /// getEffectiveCallingConv - Get the effective calling convention, taking into /// account presence of floating point hardware and calling convention /// limitations, such as support for variadic functions. CallingConv::ID ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, bool isVarArg) const { switch (CC) { default: report_fatal_error("Unsupported calling convention"); case CallingConv::ARM_AAPCS: case CallingConv::ARM_APCS: case CallingConv::GHC: return CC; case CallingConv::PreserveMost: return CallingConv::PreserveMost; case CallingConv::ARM_AAPCS_VFP: case CallingConv::Swift: return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; case CallingConv::C: if (!Subtarget->isAAPCS_ABI()) return CallingConv::ARM_APCS; else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && getTargetMachine().Options.FloatABIType == FloatABI::Hard && !isVarArg) return CallingConv::ARM_AAPCS_VFP; else return CallingConv::ARM_AAPCS; case CallingConv::Fast: case CallingConv::CXX_FAST_TLS: if (!Subtarget->isAAPCS_ABI()) { if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) return CallingConv::Fast; return CallingConv::ARM_APCS; } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) return CallingConv::ARM_AAPCS_VFP; else return CallingConv::ARM_AAPCS; } } CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const { return CCAssignFnForNode(CC, false, isVarArg); } CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const { return CCAssignFnForNode(CC, true, isVarArg); } /// CCAssignFnForNode - Selects the correct CCAssignFn for the given /// CallingConvention. CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const { switch (getEffectiveCallingConv(CC, isVarArg)) { default: report_fatal_error("Unsupported calling convention"); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); case CallingConv::ARM_AAPCS: return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); case CallingConv::ARM_AAPCS_VFP: return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); case CallingConv::Fast: return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); case CallingConv::GHC: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); case CallingConv::PreserveMost: return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); } } /// LowerCallResult - Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. SDValue ARMTargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, bool isThisReturn, SDValue ThisVal) const { // Assign locations to each value returned by this call. SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg)); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign VA = RVLocs[i]; // Pass 'this' value directly from the argument to return value, to avoid // reg unit interference if (i == 0 && isThisReturn) { assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 && "unexpected return calling convention register assignment"); InVals.push_back(ThisVal); continue; } SDValue Val; if (VA.needsCustom()) { // Handle f64 or half of a v2f64. SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Lo.getValue(1); InFlag = Lo.getValue(2); VA = RVLocs[++i]; // skip ahead to next loc SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); if (!Subtarget->isLittle()) std::swap (Lo, Hi); Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); if (VA.getLocVT() == MVT::v2f64) { SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, DAG.getConstant(0, dl, MVT::i32)); VA = RVLocs[++i]; // skip ahead to next loc Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Lo.getValue(1); InFlag = Lo.getValue(2); VA = RVLocs[++i]; // skip ahead to next loc Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); if (!Subtarget->isLittle()) std::swap (Lo, Hi); Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, DAG.getConstant(1, dl, MVT::i32)); } } else { Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), InFlag); Chain = Val.getValue(1); InFlag = Val.getValue(2); } switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); break; } InVals.push_back(Val); } return Chain; } /// LowerMemOpCallTo - Store the argument to the stack. SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const { unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), StackPtr, PtrOff); return DAG.getStore( Chain, dl, Arg, PtrOff, MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset)); } void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg, RegsToPassVector &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, SDValue &StackPtr, SmallVectorImpl &MemOpChains, ISD::ArgFlagsTy Flags) const { SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); unsigned id = Subtarget->isLittle() ? 0 : 1; RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id))); if (NextVA.isRegLoc()) RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id))); else { assert(NextVA.isMemLoc()); if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout())); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id), dl, DAG, NextVA, Flags)); } } /// LowerCall - Lowering a call into a callseq_start <- /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter /// nodes. SDValue ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &dl = CLI.DL; SmallVectorImpl &Outs = CLI.Outs; SmallVectorImpl &OutVals = CLI.OutVals; SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool doesNotRet = CLI.DoesNotReturn; bool isVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); bool isThisReturn = false; bool isSibCall = false; auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls"); // Disable tail calls if they're not supported. if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true") isTailCall = false; if (isTailCall) { // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, isStructRet, MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG); if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall()) report_fatal_error("failed to perform tail call elimination on a call " "site marked musttail"); // We don't support GuaranteedTailCallOpt for ARM, only automatically // detected sibcalls. if (isTailCall) { ++NumTailCalls; isSibCall = true; } } // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg)); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); // For tail calls, memory operands are available in our caller's stack. if (isSibCall) NumBytes = 0; // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass if (!isSibCall) Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout())); RegsToPassVector RegsToPass; SmallVector MemOpChains; // Walk the register/memloc assignments, inserting copies/loads. In the case // of tail call optimization, arguments are handled later. for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; ++i, ++realArgIdx) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[realArgIdx]; ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; bool isByVal = Flags.isByVal(); // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::BCvt: Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); break; } // f64 and v2f64 might be passed in i32 pairs and must be split into pieces if (VA.needsCustom()) { if (VA.getLocVT() == MVT::v2f64) { SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, DAG.getConstant(0, dl, MVT::i32)); SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, DAG.getConstant(1, dl, MVT::i32)); PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); VA = ArgLocs[++i]; // skip ahead to next loc if (VA.isRegLoc()) { PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); } else { assert(VA.isMemLoc()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, dl, DAG, VA, Flags)); } } else { PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); } } else if (VA.isRegLoc()) { if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() && Outs[0].VT == MVT::i32) { assert(VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment"); assert(!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'"); isThisReturn = true; } RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else if (isByVal) { assert(VA.isMemLoc()); unsigned offset = 0; // True if this byval aggregate will be split between registers // and memory. unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed(); if (CurByValIdx < ByValArgsCount) { unsigned RegBegin, RegEnd; CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); unsigned int i, j; for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { SDValue Const = DAG.getConstant(4*i, dl, MVT::i32); SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(), DAG.InferPtrAlignment(AddArg)); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(j, Load)); } // If parameter size outsides register area, "offset" value // helps us to calculate stack slot for remained part properly. offset = RegEnd - RegBegin; CCInfo.nextInRegsParam(); } if (Flags.getByValSize() > 4*offset) { auto PtrVT = getPointerTy(DAG.getDataLayout()); unsigned LocMemOffset = VA.getLocMemOffset(); SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff); SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl); SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset); SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl, MVT::i32); SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl, MVT::i32); SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, Ops)); } } else if (!isSibCall) { assert(VA.isMemLoc()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, dl, DAG, VA, Flags)); } } if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; - // Tail call byval lowering might overwrite argument registers so in case of - // tail call optimization the copies to registers are lowered later. - if (!isTailCall) - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - - // For tail calls lower the arguments to the 'real' stack slot. - if (isTailCall) { - // Force all the incoming stack arguments to be loaded from the stack - // before any new outgoing arguments are stored to the stack, because the - // outgoing stack slots may alias the incoming argument stack slots, and - // the alias isn't otherwise explicit. This is slightly more conservative - // than necessary, because it means that each store effectively depends - // on every argument instead of just those arguments it would clobber. - - // Do not flag preceding copytoreg stuff together with the following stuff. - InFlag = SDValue(); - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - InFlag = SDValue(); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); } // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. bool isDirect = false; const TargetMachine &TM = getTargetMachine(); const Module *Mod = MF.getFunction().getParent(); const GlobalValue *GV = nullptr; if (GlobalAddressSDNode *G = dyn_cast(Callee)) GV = G->getGlobal(); bool isStub = !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO(); bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); bool isLocalARMFunc = false; ARMFunctionInfo *AFI = MF.getInfo(); auto PtrVt = getPointerTy(DAG.getDataLayout()); if (Subtarget->genLongCalls()) { assert((!isPositionIndependent() || Subtarget->isTargetWindows()) && "long-calls codegen is not position independent!"); // Handle a global address or an external symbol. If it's not one of // those, the target's already in a register, so we don't need to do // anything extra. if (isa(Callee)) { // Create a constant pool entry for the callee address unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); // Get the address of the callee into a register SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad( PtrVt, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } else if (ExternalSymbolSDNode *S=dyn_cast(Callee)) { const char *Sym = S->getSymbol(); // Create a constant pool entry for the callee address unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, ARMPCLabelIndex, 0); // Get the address of the callee into a register SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad( PtrVt, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } } else if (isa(Callee)) { // If we're optimizing for minimum size and the function is called three or // more times in this block, we can improve codesize by calling indirectly // as BLXr has a 16-bit encoding. auto *GV = cast(Callee)->getGlobal(); auto *BB = CLI.CS.getParent(); bool PreferIndirect = Subtarget->isThumb() && MF.getFunction().optForMinSize() && count_if(GV->users(), [&BB](const User *U) { return isa(U) && cast(U)->getParent() == BB; }) > 2; if (!PreferIndirect) { isDirect = true; bool isDef = GV->isStrongDefinitionForLinker(); // ARM call to a local ARM function is predicable. isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking); // tBX takes a register source operand. if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); Callee = DAG.getNode( ARMISD::WrapperPIC, dl, PtrVt, DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY)); Callee = DAG.getLoad( PtrVt, dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(DAG.getMachineFunction()), /* Alignment = */ 0, MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); } else if (Subtarget->isTargetCOFF()) { assert(Subtarget->isTargetWindows() && "Windows is the only supported COFF target"); unsigned TargetFlags = GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG; Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, TargetFlags); if (GV->hasDLLImportStorageClass()) Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee), MachinePointerInfo::getGOT(DAG.getMachineFunction())); } else { Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0); } } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { isDirect = true; // tBX takes a register source operand. const char *Sym = S->getSymbol(); if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, ARMPCLabelIndex, 4); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad( PtrVt, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel); } else { Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0); } } // FIXME: handle tail calls differently. unsigned CallOpc; if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; else CallOpc = ARMISD::CALL; } else { if (!isDirect && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() && // Emit regular call when code size is the priority !MF.getFunction().optForMinSize()) // "mov lr, pc; b _foo" to avoid confusing the RSP CallOpc = ARMISD::CALL_NOLINK; else CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; } std::vector Ops; Ops.push_back(Chain); Ops.push_back(Callee); // Add argument registers to the end of the list so that they are known live // into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. if (!isTailCall) { const uint32_t *Mask; const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo(); if (isThisReturn) { // For 'this' returns, use the R0-preserving mask if applicable Mask = ARI->getThisReturnPreservedMask(MF, CallConv); if (!Mask) { // Set isThisReturn to false if the calling convention is not one that // allows 'returned' to be modeled in this way, so LowerCallResult does // not try to pass 'this' straight through isThisReturn = false; Mask = ARI->getCallPreservedMask(MF, CallConv); } } else Mask = ARI->getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); } if (InFlag.getNode()) Ops.push_back(InFlag); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); if (isTailCall) { MF.getFrameInfo().setHasTailCall(); return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); } // Returns a chain and a flag for retval copy to use. Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), DAG.getIntPtrConstant(0, dl, true), InFlag, dl); if (!Ins.empty()) InFlag = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that we // return. return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, InVals, isThisReturn, isThisReturn ? OutVals[0] : SDValue()); } /// HandleByVal - Every parameter *after* a byval parameter is passed /// on the stack. Remember the next parameter register to allocate, /// and then confiscate the rest of the parameter registers to insure /// this. void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, unsigned Align) const { // Byval (as with any stack) slots are always at least 4 byte aligned. Align = std::max(Align, 4U); unsigned Reg = State->AllocateReg(GPRArgRegs); if (!Reg) return; unsigned AlignInRegs = Align / 4; unsigned Waste = (ARM::R4 - Reg) % AlignInRegs; for (unsigned i = 0; i < Waste; ++i) Reg = State->AllocateReg(GPRArgRegs); if (!Reg) return; unsigned Excess = 4 * (ARM::R4 - Reg); // Special case when NSAA != SP and parameter size greater than size of // all remained GPR regs. In that case we can't split parameter, we must // send it to stack. We also must set NCRN to R4, so waste all // remained registers. const unsigned NSAAOffset = State->getNextStackOffset(); if (NSAAOffset != 0 && Size > Excess) { while (State->AllocateReg(GPRArgRegs)) ; return; } // First register for byval parameter is the first register that wasn't // allocated before this method call, so it would be "reg". // If parameter is small enough to be saved in range [reg, r4), then // the end (first after last) register would be reg + param-size-in-regs, // else parameter would be splitted between registers and stack, // end register would be r4 in this case. unsigned ByValRegBegin = Reg; unsigned ByValRegEnd = std::min(Reg + Size / 4, ARM::R4); State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); // Note, first register is allocated in the beginning of function already, // allocate remained amount of registers we need. for (unsigned i = Reg + 1; i != ByValRegEnd; ++i) State->AllocateReg(GPRArgRegs); // A byval parameter that is split between registers and memory needs its // size truncated here. // In the case where the entire structure fits in registers, we set the // size in memory to zero. Size = std::max(Size - Excess, 0); } /// MatchingStackOffset - Return true if the given stack call argument is /// already available in the same position (relatively) of the caller's /// incoming argument stack. static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII) { unsigned Bytes = Arg.getValueSizeInBits() / 8; int FI = std::numeric_limits::max(); if (Arg.getOpcode() == ISD::CopyFromReg) { unsigned VR = cast(Arg.getOperand(1))->getReg(); if (!TargetRegisterInfo::isVirtualRegister(VR)) return false; MachineInstr *Def = MRI->getVRegDef(VR); if (!Def) return false; if (!Flags.isByVal()) { if (!TII->isLoadFromStackSlot(*Def, FI)) return false; } else { return false; } } else if (LoadSDNode *Ld = dyn_cast(Arg)) { if (Flags.isByVal()) // ByVal argument is passed in as a pointer but it's now being // dereferenced. e.g. // define @foo(%struct.X* %A) { // tail call @bar(%struct.X* byval %A) // } return false; SDValue Ptr = Ld->getBasePtr(); FrameIndexSDNode *FINode = dyn_cast(Ptr); if (!FINode) return false; FI = FINode->getIndex(); } else return false; assert(FI != std::numeric_limits::max()); if (!MFI.isFixedObjectIndex(FI)) return false; return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI); } /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. bool ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet, bool isCallerStructRet, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, SelectionDAG& DAG) const { MachineFunction &MF = DAG.getMachineFunction(); const Function &CallerF = MF.getFunction(); CallingConv::ID CallerCC = CallerF.getCallingConv(); assert(Subtarget->supportsTailCall()); // Tail calls to function pointers cannot be optimized for Thumb1 if the args // to the call take up r0-r3. The reason is that there are no legal registers // left to hold the pointer to the function to be called. if (Subtarget->isThumb1Only() && Outs.size() >= 4 && !isa(Callee.getNode())) return false; // Look for obvious safe cases to perform tail call optimization that do not // require ABI changes. This is what gcc calls sibcall. // Exception-handling functions need a special set of instructions to indicate // a return to the hardware. Tail-calling another function would probably // break this. if (CallerF.hasFnAttribute("interrupt")) return false; // Also avoid sibcall optimization if either caller or callee uses struct // return semantics. if (isCalleeStructRet || isCallerStructRet) return false; // Externally-defined functions with weak linkage should not be // tail-called on ARM when the OS does not support dynamic // pre-emption of symbols, as the AAELF spec requires normal calls // to undefined weak functions to be replaced with a NOP or jump to the // next instruction. The behaviour of branch instructions in this // situation (as used for tail calls) is implementation-defined, so we // cannot rely on the linker replacing the tail call with a return. if (GlobalAddressSDNode *G = dyn_cast(Callee)) { const GlobalValue *GV = G->getGlobal(); const Triple &TT = getTargetMachine().getTargetTriple(); if (GV->hasExternalWeakLinkage() && (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) return false; } // Check that the call results are passed in the same way. LLVMContext &C = *DAG.getContext(); if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, CCAssignFnForReturn(CalleeCC, isVarArg), CCAssignFnForReturn(CallerCC, isVarArg))) return false; // The callee has to preserve all registers the caller needs to preserve. const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); if (CalleeCC != CallerCC) { const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) return false; } // If Caller's vararg or byval argument has been split between registers and // stack, do not perform tail call, since part of the argument is in caller's // local frame. const ARMFunctionInfo *AFI_Caller = MF.getInfo(); if (AFI_Caller->getArgRegsSaveSize()) return false; // If the callee takes no arguments then go on to check the results of the // call. if (!Outs.empty()) { // Check if stack adjustment is needed. For now, do not do this if any // argument is passed on the stack. SmallVector ArgLocs; CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg)); if (CCInfo.getNextStackOffset()) { // Check if the arguments are already laid out in the right way as // the caller's fixed stack objects. MachineFrameInfo &MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; ++i, ++realArgIdx) { CCValAssign &VA = ArgLocs[i]; EVT RegVT = VA.getLocVT(); SDValue Arg = OutVals[realArgIdx]; ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; if (VA.getLocInfo() == CCValAssign::Indirect) return false; if (VA.needsCustom()) { // f64 and vector types are split into multiple registers or // register/stack-slot combinations. The types will not match // the registers; give up on memory f64 refs until we figure // out what to do about this. if (!VA.isRegLoc()) return false; if (!ArgLocs[++i].isRegLoc()) return false; if (RegVT == MVT::v2f64) { if (!ArgLocs[++i].isRegLoc()) return false; if (!ArgLocs[++i].isRegLoc()) return false; } } else if (!VA.isRegLoc()) { if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI, TII)) return false; } } } const MachineRegisterInfo &MRI = MF.getRegInfo(); if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) return false; } return true; } bool ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg)); } static SDValue LowerInterruptReturn(SmallVectorImpl &RetOps, const SDLoc &DL, SelectionDAG &DAG) { const MachineFunction &MF = DAG.getMachineFunction(); const Function &F = MF.getFunction(); StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString(); // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset // version of the "preferred return address". These offsets affect the return // instruction if this is a return from PL1 without hypervisor extensions. // IRQ/FIQ: +4 "subs pc, lr, #4" // SWI: 0 "subs pc, lr, #0" // ABORT: +4 "subs pc, lr, #4" // UNDEF: +4/+2 "subs pc, lr, #0" // UNDEF varies depending on where the exception came from ARM or Thumb // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0. int64_t LROffset; if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" || IntKind == "ABORT") LROffset = 4; else if (IntKind == "SWI" || IntKind == "UNDEF") LROffset = 0; else report_fatal_error("Unsupported interrupt attribute. If present, value " "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF"); RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, DL, MVT::i32, false)); return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps); } SDValue ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location. SmallVector RVLocs; // CCState - Info about the registers and stack slots. CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); // Analyze outgoing return values. CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg)); SDValue Flag; SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) bool isLittleEndian = Subtarget->isLittle(); MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); AFI->setReturnRegsCount(RVLocs.size()); // Copy the result values into the output registers. for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size(); ++i, ++realRVLocIdx) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); SDValue Arg = OutVals[realRVLocIdx]; bool ReturnF16 = false; if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) { // Half-precision return values can be returned like this: // // t11 f16 = fadd ... // t12: i16 = bitcast t11 // t13: i32 = zero_extend t12 // t14: f32 = bitcast t13 <~~~~~~~ Arg // // to avoid code generation for bitcasts, we simply set Arg to the node // that produces the f16 value, t11 in this case. // if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) { SDValue ZE = Arg.getOperand(0); if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) { SDValue BC = ZE.getOperand(0); if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) { Arg = BC.getOperand(0); ReturnF16 = true; } } } } switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: if (!ReturnF16) Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); break; } if (VA.needsCustom()) { if (VA.getLocVT() == MVT::v2f64) { // Extract the first half and return it in two registers. SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, DAG.getConstant(0, dl, MVT::i32)); SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Half); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc // Extract the 2nd half and fall through to handle it as an f64 value. Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, DAG.getConstant(1, dl, MVT::i32)); } // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is // available. SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(isLittleEndian ? 0 : 1), Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(isLittleEndian ? 1 : 0), Flag); } else Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); // Guarantee that all emitted copies are // stuck together, avoiding something bad. Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), ReturnF16 ? MVT::f16 : VA.getLocVT())); } const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); if (I) { for (; *I; ++I) { if (ARM::GPRRegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::i32)); else if (ARM::DPRRegClass.contains(*I)) RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); } } // Update chain and glue. RetOps[0] = Chain; if (Flag.getNode()) RetOps.push_back(Flag); // CPUs which aren't M-class use a special sequence to return from // exceptions (roughly, any instruction setting pc and cpsr simultaneously, // though we use "subs pc, lr, #N"). // // M-class CPUs actually use a normal return sequence with a special // (hardware-provided) value in LR, so the normal code path works. if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") && !Subtarget->isMClass()) { if (Subtarget->isThumb1Only()) report_fatal_error("interrupt attribute is not supported in Thumb1"); return LowerInterruptReturn(RetOps, dl, DAG); } return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps); } bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { if (N->getNumValues() != 1) return false; if (!N->hasNUsesOfValue(1, 0)) return false; SDValue TCChain = Chain; SDNode *Copy = *N->use_begin(); if (Copy->getOpcode() == ISD::CopyToReg) { // If the copy has a glue operand, we conservatively assume it isn't safe to // perform a tail call. if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) return false; TCChain = Copy->getOperand(0); } else if (Copy->getOpcode() == ARMISD::VMOVRRD) { SDNode *VMov = Copy; // f64 returned in a pair of GPRs. SmallPtrSet Copies; for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); UI != UE; ++UI) { if (UI->getOpcode() != ISD::CopyToReg) return false; Copies.insert(*UI); } if (Copies.size() > 2) return false; for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); UI != UE; ++UI) { SDValue UseChain = UI->getOperand(0); if (Copies.count(UseChain.getNode())) // Second CopyToReg Copy = *UI; else { // We are at the top of this chain. // If the copy has a glue operand, we conservatively assume it // isn't safe to perform a tail call. if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue) return false; // First CopyToReg TCChain = UseChain; } } } else if (Copy->getOpcode() == ISD::BITCAST) { // f32 returned in a single GPR. if (!Copy->hasOneUse()) return false; Copy = *Copy->use_begin(); if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) return false; // If the copy has a glue operand, we conservatively assume it isn't safe to // perform a tail call. if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) return false; TCChain = Copy->getOperand(0); } else { return false; } bool HasRet = false; for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); UI != UE; ++UI) { if (UI->getOpcode() != ARMISD::RET_FLAG && UI->getOpcode() != ARMISD::INTRET_FLAG) return false; HasRet = true; } if (!HasRet) return false; Chain = TCChain; return true; } bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { if (!Subtarget->supportsTailCall()) return false; auto Attr = CI->getParent()->getParent()->getFnAttribute("disable-tail-calls"); if (!CI->isTailCall() || Attr.getValueAsString() == "true") return false; return true; } // Trying to write a 64 bit value so need to split into two 32 bit values first, // and pass the lower and high parts through. static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) { SDLoc DL(Op); SDValue WriteValue = Op->getOperand(2); // This function is only supposed to be called for i64 type argument. assert(WriteValue.getValueType() == MVT::i64 && "LowerWRITE_REGISTER called for non-i64 type argument."); SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue, DAG.getConstant(0, DL, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue, DAG.getConstant(1, DL, MVT::i32)); SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi }; return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops); } // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is // one of the above mentioned nodes. It has to be wrapped because otherwise // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only // be used to form addressing mode. These wrapped nodes will be selected // into MOVi. SDValue ARMTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); // FIXME there is no actual debug info here SDLoc dl(Op); ConstantPoolSDNode *CP = cast(Op); SDValue Res; // When generating execute-only code Constant Pools must be promoted to the // global data section. It's a bit ugly that we can't share them across basic // blocks, but this way we guarantee that execute-only behaves correct with // position-independent addressing modes. if (Subtarget->genExecuteOnly()) { auto AFI = DAG.getMachineFunction().getInfo(); auto T = const_cast(CP->getType()); auto C = const_cast(CP->getConstVal()); auto M = const_cast(DAG.getMachineFunction(). getFunction().getParent()); auto GV = new GlobalVariable( *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C, Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" + Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" + Twine(AFI->createPICLabelUId()) ); SDValue GA = DAG.getTargetGlobalAddress(dyn_cast(GV), dl, PtrVT); return LowerGlobalAddress(GA, DAG); } if (CP->isMachineConstantPoolEntry()) Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlignment()); else Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment()); return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); } unsigned ARMTargetLowering::getJumpTableEncoding() const { return MachineJumpTableInfo::EK_Inline; } SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); unsigned ARMPCLabelIndex = 0; SDLoc DL(Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); const BlockAddress *BA = cast(Op)->getBlockAddress(); SDValue CPAddr; bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI(); if (!IsPositionIndependent) { CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); } else { unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, ARMCP::CPBlockAddress, PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); } CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); SDValue Result = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); if (!IsPositionIndependent) return Result; SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); } /// Convert a TLS address reference into the correct sequence of loads /// and calls to compute the variable's address for Darwin, and return an /// SDValue containing the final node. /// Darwin only has one TLS scheme which must be capable of dealing with the /// fully general situation, in the worst case. This means: /// + "extern __thread" declaration. /// + Defined in a possibly unknown dynamic library. /// /// The general system is that each __thread variable has a [3 x i32] descriptor /// which contains information used by the runtime to calculate the address. The /// only part of this the compiler needs to know about is the first word, which /// contains a function pointer that must be called with the address of the /// entire descriptor in "r0". /// /// Since this descriptor may be in a different unit, in general access must /// proceed along the usual ARM rules. A common sequence to produce is: /// /// movw rT1, :lower16:_var$non_lazy_ptr /// movt rT1, :upper16:_var$non_lazy_ptr /// ldr r0, [rT1] /// ldr rT2, [r0] /// blx rT2 /// [...address now in r0...] SDValue ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetDarwin() && "This function expects a Darwin target"); SDLoc DL(Op); // First step is to get the address of the actua global symbol. This is where // the TLS descriptor lives. SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG); // The first entry in the descriptor is a function pointer that we must call // to obtain the address of the variable. SDValue Chain = DAG.getEntryNode(); SDValue FuncTLVGet = DAG.getLoad( MVT::i32, DL, Chain, DescAddr, MachinePointerInfo::getGOT(DAG.getMachineFunction()), /* Alignment = */ 4, MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); Chain = FuncTLVGet.getValue(1); MachineFunction &F = DAG.getMachineFunction(); MachineFrameInfo &MFI = F.getFrameInfo(); MFI.setAdjustsStack(true); // TLS calls preserve all registers except those that absolutely must be // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be // silly). auto TRI = getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo(); auto ARI = static_cast(TRI); const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction()); // Finally, we can make the call. This is just a degenerate version of a // normal AArch64 call node: r0 takes the address of the descriptor, and // returns the address of the variable in this thread. Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue()); Chain = DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue), Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32), DAG.getRegisterMask(Mask), Chain.getValue(1)); return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1)); } SDValue ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering"); SDValue Chain = DAG.getEntryNode(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc DL(Op); // Load the current TEB (thread environment block) SDValue Ops[] = {Chain, DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32), DAG.getConstant(15, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32), DAG.getConstant(13, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32), DAG.getConstant(2, DL, MVT::i32)}; SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList(MVT::i32, MVT::Other), Ops); SDValue TEB = CurrentTEB.getValue(0); Chain = CurrentTEB.getValue(1); // Load the ThreadLocalStoragePointer from the TEB // A pointer to the TLS array is located at offset 0x2c from the TEB. SDValue TLSArray = DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL)); TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo()); // The pointer to the thread's TLS data area is at the TLS Index scaled by 4 // offset into the TLSArray. // Load the TLS index from the C runtime SDValue TLSIndex = DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG); TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex); TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo()); SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex, DAG.getConstant(2, DL, MVT::i32)); SDValue TLS = DAG.getLoad(PtrVT, DL, Chain, DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot), MachinePointerInfo()); // Get the offset of the start of the .tls section (section base) const auto *GA = cast(Op); auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL); SDValue Offset = DAG.getLoad( PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32, DAG.getTargetConstantPool(CPV, PtrVT, 4)), MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset); } // Lower ISD::GlobalTLSAddress using the "general dynamic" model SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG) const { SDLoc dl(GA); EVT PtrVT = getPointerTy(DAG.getDataLayout()); unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); Argument = DAG.getLoad( PtrVT, dl, DAG.getEntryNode(), Argument, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); SDValue Chain = Argument.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); // call __tls_get_addr. ArgListTy Args; ArgListEntry Entry; Entry.Node = Argument; Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); Args.push_back(Entry); // FIXME: is there useful debug info available here? TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain).setLibCallee( CallingConv::C, Type::getInt32Ty(*DAG.getContext()), DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args)); std::pair CallResult = LowerCallTo(CLI); return CallResult.first; } // Lower ISD::GlobalTLSAddress using the "initial exec" or // "local exec" model. SDValue ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, SelectionDAG &DAG, TLSModel::Model model) const { const GlobalValue *GV = GA->getGlobal(); SDLoc dl(GA); SDValue Offset; SDValue Chain = DAG.getEntryNode(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Get the Thread Pointer SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); if (model == TLSModel::InitialExec) { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); // Initial exec model. unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad( PtrVT, dl, Chain, Offset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); Chain = Offset.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); Offset = DAG.getLoad( PtrVT, dl, Chain, Offset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } else { // local exec model assert(model == TLSModel::LocalExec); ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad( PtrVT, dl, Chain, Offset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } // The address of the thread local variable is the add of the thread // pointer with the offset of the variable. return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); } SDValue ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { GlobalAddressSDNode *GA = cast(Op); if (DAG.getTarget().useEmulatedTLS()) return LowerToTLSEmulatedModel(GA, DAG); if (Subtarget->isTargetDarwin()) return LowerGlobalTLSAddressDarwin(Op, DAG); if (Subtarget->isTargetWindows()) return LowerGlobalTLSAddressWindows(Op, DAG); // TODO: implement the "local dynamic" model assert(Subtarget->isTargetELF() && "Only ELF implemented here"); TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); switch (model) { case TLSModel::GeneralDynamic: case TLSModel::LocalDynamic: return LowerToTLSGeneralDynamicModel(GA, DAG); case TLSModel::InitialExec: case TLSModel::LocalExec: return LowerToTLSExecModels(GA, DAG, model); } llvm_unreachable("bogus TLS model"); } /// Return true if all users of V are within function F, looking through /// ConstantExprs. static bool allUsersAreInFunction(const Value *V, const Function *F) { SmallVector Worklist; for (auto *U : V->users()) Worklist.push_back(U); while (!Worklist.empty()) { auto *U = Worklist.pop_back_val(); if (isa(U)) { for (auto *UU : U->users()) Worklist.push_back(UU); continue; } auto *I = dyn_cast(U); if (!I || I->getParent()->getParent() != F) return false; } return true; } static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, const SDLoc &dl) { // If we're creating a pool entry for a constant global with unnamed address, // and the global is small enough, we can emit it inline into the constant pool // to save ourselves an indirection. // // This is a win if the constant is only used in one function (so it doesn't // need to be duplicated) or duplicating the constant wouldn't increase code // size (implying the constant is no larger than 4 bytes). const Function &F = DAG.getMachineFunction().getFunction(); // We rely on this decision to inline being idemopotent and unrelated to the // use-site. We know that if we inline a variable at one use site, we'll // inline it elsewhere too (and reuse the constant pool entry). Fast-isel // doesn't know about this optimization, so bail out if it's enabled else // we could decide to inline here (and thus never emit the GV) but require // the GV from fast-isel generated code. if (!EnableConstpoolPromotion || DAG.getMachineFunction().getTarget().Options.EnableFastISel) return SDValue(); auto *GVar = dyn_cast(GV); if (!GVar || !GVar->hasInitializer() || !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() || !GVar->hasLocalLinkage()) return SDValue(); // If we inline a value that contains relocations, we move the relocations // from .data to .text. This is not allowed in position-independent code. auto *Init = GVar->getInitializer(); if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) && Init->needsRelocation()) return SDValue(); // The constant islands pass can only really deal with alignment requests // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote // any type wanting greater alignment requirements than 4 bytes. We also // can only promote constants that are multiples of 4 bytes in size or // are paddable to a multiple of 4. Currently we only try and pad constants // that are strings for simplicity. auto *CDAInit = dyn_cast(Init); unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType()); unsigned Align = DAG.getDataLayout().getPreferredAlignment(GVar); unsigned RequiredPadding = 4 - (Size % 4); bool PaddingPossible = RequiredPadding == 4 || (CDAInit && CDAInit->isString()); if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize || Size == 0) return SDValue(); unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding); MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); // We can't bloat the constant pool too much, else the ConstantIslands pass // may fail to converge. If we haven't promoted this global yet (it may have // multiple uses), and promoting it would increase the constant pool size (Sz // > 4), ensure we have space to do so up to MaxTotal. if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4) if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >= ConstpoolPromotionMaxTotal) return SDValue(); // This is only valid if all users are in a single function; we can't clone // the constant in general. The LLVM IR unnamed_addr allows merging // constants, but not cloning them. // // We could potentially allow cloning if we could prove all uses of the // constant in the current function don't care about the address, like // printf format strings. But that isn't implemented for now. if (!allUsersAreInFunction(GVar, &F)) return SDValue(); // We're going to inline this global. Pad it out if needed. if (RequiredPadding != 4) { StringRef S = CDAInit->getAsString(); SmallVector V(S.size()); std::copy(S.bytes_begin(), S.bytes_end(), V.begin()); while (RequiredPadding--) V.push_back(0); Init = ConstantDataArray::get(*DAG.getContext(), V); } auto CPVal = ARMConstantPoolConstant::Create(GVar, Init); SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4); if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) { AFI->markGlobalAsPromotedToConstantPool(GVar); AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() + PaddedSize - 4); } ++NumConstpoolPromoted; return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); } bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const { if (const GlobalAlias *GA = dyn_cast(GV)) if (!(GV = GA->getBaseObject())) return false; if (const auto *V = dyn_cast(GV)) return V->isConstant(); return isa(GV); } SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { switch (Subtarget->getTargetTriple().getObjectFormat()) { default: llvm_unreachable("unknown object format"); case Triple::COFF: return LowerGlobalAddressWindows(Op, DAG); case Triple::ELF: return LowerGlobalAddressELF(Op, DAG); case Triple::MachO: return LowerGlobalAddressDarwin(Op, DAG); } } SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); const GlobalValue *GV = cast(Op)->getGlobal(); const TargetMachine &TM = getTargetMachine(); bool IsRO = isReadOnly(GV); // promoteToConstantPool only if not generating XO text section if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly()) if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl)) return V; if (isPositionIndependent()) { bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, UseGOT_PREL ? ARMII::MO_GOT : 0); SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G); if (UseGOT_PREL) Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, MachinePointerInfo::getGOT(DAG.getMachineFunction())); return Result; } else if (Subtarget->isROPI() && IsRO) { // PC-relative. SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT); SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G); return Result; } else if (Subtarget->isRWPI() && !IsRO) { // SB-relative. SDValue RelAddr; if (Subtarget->useMovt(DAG.getMachineFunction())) { ++NumMovwMovt; SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL); RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G); } else { // use literal pool for address constant ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, ARMCP::SBREL); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); RelAddr = DAG.getLoad( PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT); SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr); return Result; } // If we have T2 ops, we can materialize the address directly via movt/movw // pair. This is always cheaper. if (Subtarget->useMovt(DAG.getMachineFunction())) { ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, DAG.getTargetGlobalAddress(GV, dl, PtrVT)); } else { SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); return DAG.getLoad( PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); } } SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const { assert(!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Darwin"); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); const GlobalValue *GV = cast(Op)->getGlobal(); if (Subtarget->useMovt(DAG.getMachineFunction())) ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into multiple nodes unsigned Wrapper = isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper; SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY); SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G); if (Subtarget->isGVIndirectSymbol(GV)) Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, MachinePointerInfo::getGOT(DAG.getMachineFunction())); return Result; } SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported"); assert(Subtarget->useMovt(DAG.getMachineFunction()) && "Windows on ARM expects to use movw/movt"); assert(!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows"); const TargetMachine &TM = getTargetMachine(); const GlobalValue *GV = cast(Op)->getGlobal(); ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG; if (GV->hasDLLImportStorageClass()) TargetFlags = ARMII::MO_DLLIMPORT; else if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) TargetFlags = ARMII::MO_COFFSTUB; EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result; SDLoc DL(Op); ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0, TargetFlags)); if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB)) Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo::getGOT(DAG.getMachineFunction())); return Result; } SDValue ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); SDValue Val = DAG.getConstant(0, dl, MVT::i32); return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), Op.getOperand(1), Val); } SDValue ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32)); } SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other, Op.getOperand(0)); } SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. case Intrinsic::thread_pointer: { EVT PtrVT = getPointerTy(DAG.getDataLayout()); return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); } case Intrinsic::eh_sjlj_lsda: { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue CPAddr; bool IsPositionIndependent = isPositionIndependent(); unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0; ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex, ARMCP::CPLSDA, PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad( PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); if (IsPositionIndependent) { SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } return Result; } case Intrinsic::arm_neon_vabs: return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(), Op.getOperand(1)); case Intrinsic::arm_neon_vmulls: case Intrinsic::arm_neon_vmullu: { unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) ? ARMISD::VMULLs : ARMISD::VMULLu; return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } case Intrinsic::arm_neon_vminnm: case Intrinsic::arm_neon_vmaxnm: { unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm) ? ISD::FMINNUM : ISD::FMAXNUM; return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } case Intrinsic::arm_neon_vminu: case Intrinsic::arm_neon_vmaxu: { if (Op.getValueType().isFloatingPoint()) return SDValue(); unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu) ? ISD::UMIN : ISD::UMAX; return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } case Intrinsic::arm_neon_vmins: case Intrinsic::arm_neon_vmaxs: { // v{min,max}s is overloaded between signed integers and floats. if (!Op.getValueType().isFloatingPoint()) { unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins) ? ISD::SMIN : ISD::SMAX; return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins) ? ISD::FMINIMUM : ISD::FMAXIMUM; return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } case Intrinsic::arm_neon_vtbl1: return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); case Intrinsic::arm_neon_vtbl2: return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); } } static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { SDLoc dl(Op); ConstantSDNode *SSIDNode = cast(Op.getOperand(2)); auto SSID = static_cast(SSIDNode->getZExtValue()); if (SSID == SyncScope::SingleThread) return Op; if (!Subtarget->hasDataBarrier()) { // Some ARMv6 cpus can support data barriers with an mcr instruction. // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get // here. assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"); return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), DAG.getConstant(0, dl, MVT::i32)); } ConstantSDNode *OrdN = cast(Op.getOperand(1)); AtomicOrdering Ord = static_cast(OrdN->getZExtValue()); ARM_MB::MemBOpt Domain = ARM_MB::ISH; if (Subtarget->isMClass()) { // Only a full system barrier exists in the M-class architectures. Domain = ARM_MB::SY; } else if (Subtarget->preferISHSTBarriers() && Ord == AtomicOrdering::Release) { // Swift happens to implement ISHST barriers in a way that's compatible with // Release semantics but weaker than ISH so we'd be fools not to use // it. Beware: other processors probably don't! Domain = ARM_MB::ISHST; } return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0), DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32), DAG.getConstant(Domain, dl, MVT::i32)); } static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { // ARM pre v5TE and Thumb1 does not have preload instructions. if (!(Subtarget->isThumb2() || (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) // Just preserve the chain. return Op.getOperand(0); SDLoc dl(Op); unsigned isRead = ~cast(Op.getOperand(2))->getZExtValue() & 1; if (!isRead && (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) // ARMv7 with MP extension has PLDW. return Op.getOperand(0); unsigned isData = cast(Op.getOperand(4))->getZExtValue(); if (Subtarget->isThumb()) { // Invert the bits. isRead = ~isRead & 1; isData = ~isData & 1; } return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32), DAG.getConstant(isData, dl, MVT::i32)); } static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *FuncInfo = MF.getInfo(); // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDLoc dl(Op); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), MachinePointerInfo(SV)); } SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, const SDLoc &dl) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); const TargetRegisterClass *RC; if (AFI->isThumb1OnlyFunction()) RC = &ARM::tGPRRegClass; else RC = &ARM::GPRRegClass; // Transform the arguments stored in physical registers into virtual ones. unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); SDValue ArgValue2; if (NextVA.isMemLoc()) { MachineFrameInfo &MFI = MF.getFrameInfo(); int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true); // Create load node to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); ArgValue2 = DAG.getLoad( MVT::i32, dl, Root, FIN, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); } else { Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); } if (!Subtarget->isLittle()) std::swap (ArgValue, ArgValue2); return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); } // The remaining GPRs hold either the beginning of variable-argument // data, or the beginning of an aggregate passed by value (usually // byval). Either way, we allocate stack slots adjacent to the data // provided by our caller, and store the unallocated registers there. // If this is a variadic function, the va_list pointer will begin with // these values; otherwise, this reassembles a (byval) structure that // was split between registers and memory. // Return: The frame index registers were stored into. int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &dl, SDValue &Chain, const Value *OrigArg, unsigned InRegsParamRecordIdx, int ArgOffset, unsigned ArgSize) const { // Currently, two use-cases possible: // Case #1. Non-var-args function, and we meet first byval parameter. // Setup first unallocated register as first byval register; // eat all remained registers // (these two actions are performed by HandleByVal method). // Then, here, we initialize stack frame with // "store-reg" instructions. // Case #2. Var-args function, that doesn't contain byval parameters. // The same: eat all remained unallocated registers, // initialize stack frame. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); unsigned RBegin, REnd; if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); } else { unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs); RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx]; REnd = ARM::R4; } if (REnd != RBegin) ArgOffset = -4 * (ARM::R4 - RBegin); auto PtrVT = getPointerTy(DAG.getDataLayout()); int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false); SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT); SmallVector MemOps; const TargetRegisterClass *RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass; for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) { unsigned VReg = MF.addLiveIn(Reg, RC); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(OrigArg, 4 * i)); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT)); } if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return FrameIndex; } // Setup stack frame, the va_list pointer will start from. void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &dl, SDValue &Chain, unsigned ArgOffset, unsigned TotalArgRegsSaveSize, bool ForceMutable) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); // Try to store any remaining integer argument regs // to their spots on the stack so that they may be loaded by dereferencing // the result of va_next. // If there is no regs to be stored, just point address after last // argument passed via stack. int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, CCInfo.getInRegsParamsCount(), CCInfo.getNextStackOffset(), 4); AFI->setVarArgsFrameIndex(FrameIndex); } SDValue ARMTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg)); SmallVector ArgValues; SDValue ArgValue; Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin(); unsigned CurArgIdx = 0; // Initially ArgRegsSaveSize is zero. // Then we increase this value each time we meet byval parameter. // We also increase this value in case of varargs function. AFI->setArgRegsSaveSize(0); // Calculate the amount of stack space that we need to allocate to store // byval and variadic arguments that are passed in registers. // We need to know this before we allocate the first byval or variadic // argument, as they will be allocated a stack slot below the CFA (Canonical // Frame Address, the stack pointer at entry to the function). unsigned ArgRegBegin = ARM::R4; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount()) break; CCValAssign &VA = ArgLocs[i]; unsigned Index = VA.getValNo(); ISD::ArgFlagsTy Flags = Ins[Index].Flags; if (!Flags.isByVal()) continue; assert(VA.isMemLoc() && "unexpected byval pointer in reg"); unsigned RBegin, REnd; CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd); ArgRegBegin = std::min(ArgRegBegin, RBegin); CCInfo.nextInRegsParam(); } CCInfo.rewindByValRegsInfo(); int lastInsIndex = -1; if (isVarArg && MFI.hasVAStart()) { unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs); if (RegIdx != array_lengthof(GPRArgRegs)) ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]); } unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin); AFI->setArgRegsSaveSize(TotalArgRegsSaveSize); auto PtrVT = getPointerTy(DAG.getDataLayout()); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (Ins[VA.getValNo()].isOrigArg()) { std::advance(CurOrigArg, Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx); CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex(); } // Arguments stored in registers. if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); if (VA.needsCustom()) { // f64 and vector types are split up into multiple registers or // combinations of registers and stack slots. if (VA.getLocVT() == MVT::v2f64) { SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); VA = ArgLocs[++i]; // skip ahead to next loc SDValue ArgValue2; if (VA.isMemLoc()) { int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FI)); } else { ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); } ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue, ArgValue1, DAG.getIntPtrConstant(0, dl)); ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue, ArgValue2, DAG.getIntPtrConstant(1, dl)); } else ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); } else { const TargetRegisterClass *RC; if (RegVT == MVT::f16) RC = &ARM::HPRRegClass; else if (RegVT == MVT::f32) RC = &ARM::SPRRegClass; else if (RegVT == MVT::f64 || RegVT == MVT::v4f16) RC = &ARM::DPRRegClass; else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16) RC = &ARM::QPRRegClass; else if (RegVT == MVT::i32) RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass; else llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); // Transform the arguments in physical registers into virtual ones. unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); } // If this is an 8 or 16-bit value, it is really passed promoted // to 32 bits. Insert an assert[sz]ext to capture this, then // truncate to the right size. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); break; case CCValAssign::SExt: ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); break; case CCValAssign::ZExt: ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); break; } InVals.push_back(ArgValue); } else { // VA.isRegLoc() // sanity check assert(VA.isMemLoc()); assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); int index = VA.getValNo(); // Some Ins[] entries become multiple ArgLoc[] entries. // Process them only once. if (index != lastInsIndex) { ISD::ArgFlagsTy Flags = Ins[index].Flags; // FIXME: For now, all byval parameter objects are marked mutable. // This can be changed with more analysis. // In case of tail call optimization mark all arguments mutable. // Since they could be overwritten by lowering of arguments in case of // a tail call. if (Flags.isByVal()) { assert(Ins[index].isOrigArg() && "Byval arguments cannot be implicit"); unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed(); int FrameIndex = StoreByValRegs( CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex, VA.getLocMemOffset(), Flags.getByValSize()); InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT)); CCInfo.nextInRegsParam(); } else { unsigned FIOffset = VA.getLocMemOffset(); int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8, FIOffset, true); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FI))); } lastInsIndex = index; } } } // varargs if (isVarArg && MFI.hasVAStart()) VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(), TotalArgRegsSaveSize); AFI->setArgumentStackSize(CCInfo.getNextStackOffset()); return Chain; } /// isFloatingPointZero - Return true if this is +0.0. static bool isFloatingPointZero(SDValue Op) { if (ConstantFPSDNode *CFP = dyn_cast(Op)) return CFP->getValueAPF().isPosZero(); else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { // Maybe this has already been legalized into the constant pool? if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { SDValue WrapperOp = Op.getOperand(1).getOperand(0); if (ConstantPoolSDNode *CP = dyn_cast(WrapperOp)) if (const ConstantFP *CFP = dyn_cast(CP->getConstVal())) return CFP->getValueAPF().isPosZero(); } } else if (Op->getOpcode() == ISD::BITCAST && Op->getValueType(0) == MVT::f64) { // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64) // created by LowerConstantFP(). SDValue BitcastOp = Op->getOperand(0); if (BitcastOp->getOpcode() == ARMISD::VMOVIMM && isNullConstant(BitcastOp->getOperand(0))) return true; } return false; } /// Returns appropriate ARM CMP (cmp) and corresponding condition code for /// the given operands. SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const { if (ConstantSDNode *RHSC = dyn_cast(RHS.getNode())) { unsigned C = RHSC->getZExtValue(); if (!isLegalICmpImmediate((int32_t)C)) { // Constant does not fit, try adjusting it by one. switch (CC) { default: break; case ISD::SETLT: case ISD::SETGE: if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; RHS = DAG.getConstant(C - 1, dl, MVT::i32); } break; case ISD::SETULT: case ISD::SETUGE: if (C != 0 && isLegalICmpImmediate(C-1)) { CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; RHS = DAG.getConstant(C - 1, dl, MVT::i32); } break; case ISD::SETLE: case ISD::SETGT: if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; RHS = DAG.getConstant(C + 1, dl, MVT::i32); } break; case ISD::SETULE: case ISD::SETUGT: if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; RHS = DAG.getConstant(C + 1, dl, MVT::i32); } break; } } } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) && (ARM_AM::getShiftOpcForNode(RHS.getOpcode()) == ARM_AM::no_shift)) { // In ARM and Thumb-2, the compare instructions can shift their second // operand. CC = ISD::getSetCCSwappedOperands(CC); std::swap(LHS, RHS); } ARMCC::CondCodes CondCode = IntCCToARMCC(CC); ARMISD::NodeType CompareType; switch (CondCode) { default: CompareType = ARMISD::CMP; break; case ARMCC::EQ: case ARMCC::NE: // Uses only Z Flag CompareType = ARMISD::CMPZ; break; } ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); } /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl, bool InvalidOnQNaN) const { assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64); SDValue Cmp; SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32); if (!isFloatingPointZero(RHS)) Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C); else Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C); return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); } /// duplicateCmp - Glue values can have only one use, so this function /// duplicates a comparison node. SDValue ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { unsigned Opc = Cmp.getOpcode(); SDLoc DL(Cmp); if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); Cmp = Cmp.getOperand(0); Opc = Cmp.getOpcode(); if (Opc == ARMISD::CMPFP) Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0), Cmp.getOperand(1), Cmp.getOperand(2)); else { assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0), Cmp.getOperand(1)); } return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); } // This function returns three things: the arithmetic computation itself // (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The // comparison and the condition code define the case in which the arithmetic // computation *does not* overflow. std::pair ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const { assert(Op.getValueType() == MVT::i32 && "Unsupported value type"); SDValue Value, OverflowCmp; SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDLoc dl(Op); // FIXME: We are currently always generating CMPs because we don't support // generating CMN through the backend. This is not as good as the natural // CMP case because it causes a register dependency and cannot be folded // later. switch (Op.getOpcode()) { default: llvm_unreachable("Unknown overflow instruction!"); case ISD::SADDO: ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32); Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS); OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); break; case ISD::UADDO: ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); // We use ADDC here to correspond to its use in LowerUnsignedALUO. // We do not use it in the USUBO case as Value may not be used. Value = DAG.getNode(ARMISD::ADDC, dl, DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS) .getValue(0); OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); break; case ISD::SSUBO: ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32); Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); break; case ISD::USUBO: ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); break; case ISD::UMULO: // We generate a UMUL_LOHI and then check if the high word is 0. ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32); Value = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(Op.getValueType(), Op.getValueType()), LHS, RHS); OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1), DAG.getConstant(0, dl, MVT::i32)); Value = Value.getValue(0); // We only want the low 32 bits for the result. break; case ISD::SMULO: // We generate a SMUL_LOHI and then check if all the bits of the high word // are the same as the sign bit of the low word. ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32); Value = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(Op.getValueType(), Op.getValueType()), LHS, RHS); OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1), DAG.getNode(ISD::SRA, dl, Op.getValueType(), Value.getValue(0), DAG.getConstant(31, dl, MVT::i32))); Value = Value.getValue(0); // We only want the low 32 bits for the result. break; } // switch (...) return std::make_pair(Value, OverflowCmp); } SDValue ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const { // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) return SDValue(); SDValue Value, OverflowCmp; SDValue ARMcc; std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDLoc dl(Op); // We use 0 and 1 as false and true values. SDValue TVal = DAG.getConstant(1, dl, MVT::i32); SDValue FVal = DAG.getConstant(0, dl, MVT::i32); EVT VT = Op.getValueType(); SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, CCR, OverflowCmp); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); } static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, SelectionDAG &DAG) { SDLoc DL(BoolCarry); EVT CarryVT = BoolCarry.getValueType(); // This converts the boolean value carry into the carry flag by doing // ARMISD::SUBC Carry, 1 SDValue Carry = DAG.getNode(ARMISD::SUBC, DL, DAG.getVTList(CarryVT, MVT::i32), BoolCarry, DAG.getConstant(1, DL, CarryVT)); return Carry.getValue(1); } static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, SelectionDAG &DAG) { SDLoc DL(Flags); // Now convert the carry flag into a boolean carry. We do this // using ARMISD:ADDE 0, 0, Carry return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32), DAG.getConstant(0, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32), Flags); } SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const { // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) return SDValue(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDLoc dl(Op); EVT VT = Op.getValueType(); SDVTList VTs = DAG.getVTList(VT, MVT::i32); SDValue Value; SDValue Overflow; switch (Op.getOpcode()) { default: llvm_unreachable("Unknown overflow instruction!"); case ISD::UADDO: Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS); // Convert the carry flag into a boolean value. Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG); break; case ISD::USUBO: { Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS); // Convert the carry flag into a boolean value. Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG); // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow // value. So compute 1 - C. Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32, DAG.getConstant(1, dl, MVT::i32), Overflow); break; } } return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); } SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); SDValue SelectFalse = Op.getOperand(2); SDLoc dl(Op); unsigned Opc = Cond.getOpcode(); if (Cond.getResNo() == 1 && (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || Opc == ISD::USUBO)) { if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) return SDValue(); SDValue Value, OverflowCmp; SDValue ARMcc; std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); EVT VT = Op.getValueType(); return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR, OverflowCmp, DAG); } // Convert: // // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) // if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { const ConstantSDNode *CMOVTrue = dyn_cast(Cond.getOperand(0)); const ConstantSDNode *CMOVFalse = dyn_cast(Cond.getOperand(1)); if (CMOVTrue && CMOVFalse) { unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); SDValue True; SDValue False; if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { True = SelectTrue; False = SelectFalse; } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { True = SelectFalse; False = SelectTrue; } if (True.getNode() && False.getNode()) { EVT VT = Op.getValueType(); SDValue ARMcc = Cond.getOperand(2); SDValue CCR = Cond.getOperand(3); SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); assert(True.getValueType() == VT); return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG); } } } // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the // undefined bits before doing a full-word comparison with zero. Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, DAG.getConstant(1, dl, Cond.getValueType())); return DAG.getSelectCC(dl, Cond, DAG.getConstant(0, dl, Cond.getValueType()), SelectTrue, SelectFalse, ISD::SETNE); } static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps) { // Start by selecting the GE condition code for opcodes that return true for // 'equality' if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE || CC == ISD::SETULE) CondCode = ARMCC::GE; // and GT for opcodes that return false for 'equality'. else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT || CC == ISD::SETULT) CondCode = ARMCC::GT; // Since we are constrained to GE/GT, if the opcode contains 'less', we need // to swap the compare operands. if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT || CC == ISD::SETULT) swpCmpOps = true; // Both GT and GE are ordered comparisons, and return false for 'unordered'. // If we have an unordered opcode, we need to swap the operands to the VSEL // instruction (effectively negating the condition). // // This also has the effect of swapping which one of 'less' or 'greater' // returns true, so we also swap the compare operands. It also switches // whether we return true for 'equality', so we compensate by picking the // opposite condition code to our original choice. if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE || CC == ISD::SETUGT) { swpCmpOps = !swpCmpOps; swpVselOps = !swpVselOps; CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT; } // 'ordered' is 'anything but unordered', so use the VS condition code and // swap the VSEL operands. if (CC == ISD::SETO) { CondCode = ARMCC::VS; swpVselOps = true; } // 'unordered or not equal' is 'anything but equal', so use the EQ condition // code and swap the VSEL operands. if (CC == ISD::SETUNE) { CondCode = ARMCC::EQ; swpVselOps = true; } } SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal, SDValue ARMcc, SDValue CCR, SDValue Cmp, SelectionDAG &DAG) const { if (Subtarget->isFPOnlySP() && VT == MVT::f64) { FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), FalseVal); TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), TrueVal); SDValue TrueLow = TrueVal.getValue(0); SDValue TrueHigh = TrueVal.getValue(1); SDValue FalseLow = FalseVal.getValue(0); SDValue FalseHigh = FalseVal.getValue(1); SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow, ARMcc, CCR, Cmp); SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh, ARMcc, CCR, duplicateCmp(Cmp, DAG)); return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High); } else { return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp); } } static bool isGTorGE(ISD::CondCode CC) { return CC == ISD::SETGT || CC == ISD::SETGE; } static bool isLTorLE(ISD::CondCode CC) { return CC == ISD::SETLT || CC == ISD::SETLE; } // See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating. // All of these conditions (and their <= and >= counterparts) will do: // x < k ? k : x // x > k ? x : k // k < x ? x : k // k > x ? k : x static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K) { return (isGTorGE(CC) && ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) || (isLTorLE(CC) && ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))); } // Similar to isLowerSaturate(), but checks for upper-saturating conditions. static bool isUpperSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K) { return (isGTorGE(CC) && ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) || (isLTorLE(CC) && ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))); } // Check if two chained conditionals could be converted into SSAT or USAT. // // SSAT can replace a set of two conditional selectors that bound a number to an // interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples: // // x < -k ? -k : (x > k ? k : x) // x < -k ? -k : (x < k ? x : k) // x > -k ? (x > k ? k : x) : -k // x < k ? (x < -k ? -k : x) : k // etc. // // USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1 is // a power of 2. // // It returns true if the conversion can be done, false otherwise. // Additionally, the variable is returned in parameter V, the constant in K and // usat is set to true if the conditional represents an unsigned saturation static bool isSaturatingConditional(const SDValue &Op, SDValue &V, uint64_t &K, bool &usat) { SDValue LHS1 = Op.getOperand(0); SDValue RHS1 = Op.getOperand(1); SDValue TrueVal1 = Op.getOperand(2); SDValue FalseVal1 = Op.getOperand(3); ISD::CondCode CC1 = cast(Op.getOperand(4))->get(); const SDValue Op2 = isa(TrueVal1) ? FalseVal1 : TrueVal1; if (Op2.getOpcode() != ISD::SELECT_CC) return false; SDValue LHS2 = Op2.getOperand(0); SDValue RHS2 = Op2.getOperand(1); SDValue TrueVal2 = Op2.getOperand(2); SDValue FalseVal2 = Op2.getOperand(3); ISD::CondCode CC2 = cast(Op2.getOperand(4))->get(); // Find out which are the constants and which are the variables // in each conditional SDValue *K1 = isa(LHS1) ? &LHS1 : isa(RHS1) ? &RHS1 : nullptr; SDValue *K2 = isa(LHS2) ? &LHS2 : isa(RHS2) ? &RHS2 : nullptr; SDValue K2Tmp = isa(TrueVal2) ? TrueVal2 : FalseVal2; SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1; SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2; SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2; // We must detect cases where the original operations worked with 16- or // 8-bit values. In such case, V2Tmp != V2 because the comparison operations // must work with sign-extended values but the select operations return // the original non-extended value. SDValue V2TmpReg = V2Tmp; if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG) V2TmpReg = V2Tmp->getOperand(0); // Check that the registers and the constants have the correct values // in both conditionals if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp || V2TmpReg != V2) return false; // Figure out which conditional is saturating the lower/upper bound. const SDValue *LowerCheckOp = isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1) ? &Op : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2 : nullptr; const SDValue *UpperCheckOp = isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1) ? &Op : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2 : nullptr; if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp) return false; // Check that the constant in the lower-bound check is // the opposite of the constant in the upper-bound check // in 1's complement. int64_t Val1 = cast(*K1)->getSExtValue(); int64_t Val2 = cast(*K2)->getSExtValue(); int64_t PosVal = std::max(Val1, Val2); int64_t NegVal = std::min(Val1, Val2); if (((Val1 > Val2 && UpperCheckOp == &Op) || (Val1 < Val2 && UpperCheckOp == &Op2)) && isPowerOf2_64(PosVal + 1)) { // Handle the difference between USAT (unsigned) and SSAT (signed) saturation if (Val1 == ~Val2) usat = false; else if (NegVal == 0) usat = true; else return false; V = V2; K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive return true; } return false; } // Check if a condition of the type x < k ? k : x can be converted into a // bit operation instead of conditional moves. // Currently this is allowed given: // - The conditions and values match up // - k is 0 or -1 (all ones) // This function will not check the last condition, thats up to the caller // It returns true if the transformation can be made, and in such case // returns x in V, and k in SatK. static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, SDValue &SatK) { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); ISD::CondCode CC = cast(Op.getOperand(4))->get(); SDValue TrueVal = Op.getOperand(2); SDValue FalseVal = Op.getOperand(3); SDValue *K = isa(LHS) ? &LHS : isa(RHS) ? &RHS : nullptr; // No constant operation in comparison, early out if (!K) return false; SDValue KTmp = isa(TrueVal) ? TrueVal : FalseVal; V = (KTmp == TrueVal) ? FalseVal : TrueVal; SDValue VTmp = (K && *K == LHS) ? RHS : LHS; // If the constant on left and right side, or variable on left and right, // does not match, early out if (*K != KTmp || V != VTmp) return false; if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) { SatK = *K; return true; } return false; } SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc dl(Op); // Try to convert two saturating conditional selects into a single SSAT SDValue SatValue; uint64_t SatConstant; bool SatUSat; if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) && isSaturatingConditional(Op, SatValue, SatConstant, SatUSat)) { if (SatUSat) return DAG.getNode(ARMISD::USAT, dl, VT, SatValue, DAG.getConstant(countTrailingOnes(SatConstant), dl, VT)); else return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue, DAG.getConstant(countTrailingOnes(SatConstant), dl, VT)); } // Try to convert expressions of the form x < k ? k : x (and similar forms) // into more efficient bit operations, which is possible when k is 0 or -1 // On ARM and Thumb-2 which have flexible operand 2 this will result in // single instructions. On Thumb the shift and the bit operation will be two // instructions. // Only allow this transformation on full-width (32-bit) operations SDValue LowerSatConstant; if (VT == MVT::i32 && isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) { SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue, DAG.getConstant(31, dl, VT)); if (isNullConstant(LowerSatConstant)) { SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV, DAG.getAllOnesConstant(dl, VT)); return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV); } else if (isAllOnesConstant(LowerSatConstant)) return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV); } SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); ISD::CondCode CC = cast(Op.getOperand(4))->get(); SDValue TrueVal = Op.getOperand(2); SDValue FalseVal = Op.getOperand(3); if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, dl); // If softenSetCCOperands only returned one value, we should compare it to // zero. if (!RHS.getNode()) { RHS = DAG.getConstant(0, dl, LHS.getValueType()); CC = ISD::SETNE; } } if (LHS.getValueType() == MVT::i32) { // Try to generate VSEL on ARMv8. // The VSEL instruction can't use all the usual ARM condition // codes: it only has two bits to select the condition code, so it's // constrained to use only GE, GT, VS and EQ. // // To implement all the various ISD::SETXXX opcodes, we sometimes need to // swap the operands of the previous compare instruction (effectively // inverting the compare condition, swapping 'less' and 'greater') and // sometimes need to swap the operands to the VSEL (which inverts the // condition in the sense of firing whenever the previous condition didn't) if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) { ARMCC::CondCodes CondCode = IntCCToARMCC(CC); if (CondCode == ARMCC::LT || CondCode == ARMCC::LE || CondCode == ARMCC::VC || CondCode == ARMCC::NE) { CC = ISD::getSetCCInverse(CC, true); std::swap(TrueVal, FalseVal); } } SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); } ARMCC::CondCodes CondCode, CondCode2; bool InvalidOnQNaN; FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN); // Normalize the fp compare. If RHS is zero we keep it there so we match // CMPFPw0 instead of CMPFP. if (Subtarget->hasFPARMv8() && !isFloatingPointZero(RHS) && (TrueVal.getValueType() == MVT::f16 || TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) { bool swpCmpOps = false; bool swpVselOps = false; checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps); if (CondCode == ARMCC::GT || CondCode == ARMCC::GE || CondCode == ARMCC::VS || CondCode == ARMCC::EQ) { if (swpCmpOps) std::swap(LHS, RHS); if (swpVselOps) std::swap(TrueVal, FalseVal); } } SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); if (CondCode2 != ARMCC::AL) { SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32); // FIXME: Needs another CMP because flag can have but one use. SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN); Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG); } return Result; } /// canChangeToInt - Given the fp compare operand, return true if it is suitable /// to morph to an integer compare sequence. static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget) { SDNode *N = Op.getNode(); if (!N->hasOneUse()) // Otherwise it requires moving the value from fp to integer registers. return false; if (!N->getNumValues()) return false; EVT VT = Op.getValueType(); if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) // f32 case is generally profitable. f64 case only makes sense when vcmpe + // vmrs are very slow, e.g. cortex-a8. return false; if (isFloatingPointZero(Op)) { SeenZero = true; return true; } return ISD::isNormalLoad(N); } static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { if (isFloatingPointZero(Op)) return DAG.getConstant(0, SDLoc(Op), MVT::i32); if (LoadSDNode *Ld = dyn_cast(Op)) return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->getAlignment(), Ld->getMemOperand()->getFlags()); llvm_unreachable("Unknown VFP cmp argument!"); } static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2) { SDLoc dl(Op); if (isFloatingPointZero(Op)) { RetVal1 = DAG.getConstant(0, dl, MVT::i32); RetVal2 = DAG.getConstant(0, dl, MVT::i32); return; } if (LoadSDNode *Ld = dyn_cast(Op)) { SDValue Ptr = Ld->getBasePtr(); RetVal1 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), Ld->getAlignment(), Ld->getMemOperand()->getFlags()); EVT PtrType = Ptr.getValueType(); unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); SDValue NewPtr = DAG.getNode(ISD::ADD, dl, PtrType, Ptr, DAG.getConstant(4, dl, PtrType)); RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr, Ld->getPointerInfo().getWithOffset(4), NewAlign, Ld->getMemOperand()->getFlags()); return; } llvm_unreachable("Unknown VFP cmp argument!"); } /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some /// f32 and even f64 comparisons to integer ones. SDValue ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); SDValue RHS = Op.getOperand(3); SDValue Dest = Op.getOperand(4); SDLoc dl(Op); bool LHSSeenZero = false; bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); bool RHSSeenZero = false; bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { // If unsafe fp math optimization is enabled and there are no other uses of // the CMP operands, and the condition code is EQ or NE, we can optimize it // to an integer comparison. if (CC == ISD::SETOEQ) CC = ISD::SETEQ; else if (CC == ISD::SETUNE) CC = ISD::SETNE; SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32); SDValue ARMcc; if (LHS.getValueType() == MVT::f32) { LHS = DAG.getNode(ISD::AND, dl, MVT::i32, bitcastf32Toi32(LHS, DAG), Mask); RHS = DAG.getNode(ISD::AND, dl, MVT::i32, bitcastf32Toi32(RHS, DAG), Mask); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, Cmp); } SDValue LHS1, LHS2; SDValue RHS1, RHS2; expandf64Toi32(LHS, DAG, LHS1, LHS2); expandf64Toi32(RHS, DAG, RHS1, RHS2); LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); ARMCC::CondCodes CondCode = IntCCToARMCC(CC); ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops); } return SDValue(); } SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Cond = Op.getOperand(1); SDValue Dest = Op.getOperand(2); SDLoc dl(Op); // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch // instruction. unsigned Opc = Cond.getOpcode(); bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) && !Subtarget->isThumb1Only(); if (Cond.getResNo() == 1 && (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || Opc == ISD::USUBO || OptimizeMul)) { // Only lower legal XALUO ops. if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) return SDValue(); // The actual operation with overflow check. SDValue Value, OverflowCmp; SDValue ARMcc; std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); // Reverse the condition code. ARMCC::CondCodes CondCode = (ARMCC::CondCodes)cast(ARMcc)->getZExtValue(); CondCode = ARMCC::getOppositeCondition(CondCode); ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, OverflowCmp); } return SDValue(); } SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); SDValue RHS = Op.getOperand(3); SDValue Dest = Op.getOperand(4); SDLoc dl(Op); if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, dl); // If softenSetCCOperands only returned one value, we should compare it to // zero. if (!RHS.getNode()) { RHS = DAG.getConstant(0, dl, LHS.getValueType()); CC = ISD::SETNE; } } // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch // instruction. unsigned Opc = LHS.getOpcode(); bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) && !Subtarget->isThumb1Only(); if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) && (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || Opc == ISD::USUBO || OptimizeMul) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { // Only lower legal XALUO ops. if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0))) return SDValue(); // The actual operation with overflow check. SDValue Value, OverflowCmp; SDValue ARMcc; std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc); if ((CC == ISD::SETNE) != isOneConstant(RHS)) { // Reverse the condition code. ARMCC::CondCodes CondCode = (ARMCC::CondCodes)cast(ARMcc)->getZExtValue(); CondCode = ARMCC::getOppositeCondition(CondCode); ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); } SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, OverflowCmp); } if (LHS.getValueType() == MVT::i32) { SDValue ARMcc; SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, Cmp); } if (getTargetMachine().Options.UnsafeFPMath && (CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETNE || CC == ISD::SETUNE)) { if (SDValue Result = OptimizeVFPBrcond(Op, DAG)) return Result; } ARMCC::CondCodes CondCode, CondCode2; bool InvalidOnQNaN; FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN); SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); if (CondCode2 != ARMCC::AL) { ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32); SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); } return Res; } SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Table = Op.getOperand(1); SDValue Index = Op.getOperand(2); SDLoc dl(Op); EVT PTy = getPointerTy(DAG.getDataLayout()); JumpTableSDNode *JT = cast(Table); SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI); Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy)); SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index); if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) { // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table // which does another jump to the destination. This also makes it easier // to translate it to TBB / TBH later (Thumb2 only). // FIXME: This might not work if the function is extremely large. return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, Addr, Op.getOperand(2), JTI); } if (isPositionIndependent() || Subtarget->isROPI()) { Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); Chain = Addr.getValue(1); Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); } else { Addr = DAG.getLoad(PTy, dl, Chain, Addr, MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); Chain = Addr.getValue(1); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); } } static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); SDLoc dl(Op); if (Op.getValueType().getVectorElementType() == MVT::i32) { if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) return Op; return DAG.UnrollVectorOp(Op.getNode()); } const bool HasFullFP16 = static_cast(DAG.getSubtarget()).hasFullFP16(); EVT NewTy; const EVT OpTy = Op.getOperand(0).getValueType(); if (OpTy == MVT::v4f32) NewTy = MVT::v4i32; else if (OpTy == MVT::v4f16 && HasFullFP16) NewTy = MVT::v4i16; else if (OpTy == MVT::v8f16 && HasFullFP16) NewTy = MVT::v8i16; else llvm_unreachable("Invalid type for custom lowering!"); if (VT != MVT::v4i16 && VT != MVT::v8i16) return DAG.UnrollVectorOp(Op.getNode()); Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); } SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorFP_TO_INT(Op, DAG); if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) { RTLIB::Libcall LC; if (Op.getOpcode() == ISD::FP_TO_SINT) LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType()); else LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0), /*isSigned*/ false, SDLoc(Op)).first; } return Op; } static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); SDLoc dl(Op); if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { if (VT.getVectorElementType() == MVT::f32) return Op; return DAG.UnrollVectorOp(Op.getNode()); } assert((Op.getOperand(0).getValueType() == MVT::v4i16 || Op.getOperand(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!"); const bool HasFullFP16 = static_cast(DAG.getSubtarget()).hasFullFP16(); EVT DestVecType; if (VT == MVT::v4f32) DestVecType = MVT::v4i32; else if (VT == MVT::v4f16 && HasFullFP16) DestVecType = MVT::v4i16; else if (VT == MVT::v8f16 && HasFullFP16) DestVecType = MVT::v8i16; else return DAG.UnrollVectorOp(Op.getNode()); unsigned CastOpc; unsigned Opc; switch (Op.getOpcode()) { default: llvm_unreachable("Invalid opcode!"); case ISD::SINT_TO_FP: CastOpc = ISD::SIGN_EXTEND; Opc = ISD::SINT_TO_FP; break; case ISD::UINT_TO_FP: CastOpc = ISD::ZERO_EXTEND; Opc = ISD::UINT_TO_FP; break; } Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0)); return DAG.getNode(Opc, dl, VT, Op); } SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorINT_TO_FP(Op, DAG); if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) { RTLIB::Libcall LC; if (Op.getOpcode() == ISD::SINT_TO_FP) LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); else LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0), /*isSigned*/ false, SDLoc(Op)).first; } return Op; } SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // Implement fcopysign with a fabs and a conditional fneg. SDValue Tmp0 = Op.getOperand(0); SDValue Tmp1 = Op.getOperand(1); SDLoc dl(Op); EVT VT = Op.getValueType(); EVT SrcVT = Tmp1.getValueType(); bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR; bool UseNEON = !InGPR && Subtarget->hasNEON(); if (UseNEON) { // Use VBSL to copy the sign bit. unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, DAG.getTargetConstant(EncodedVal, dl, MVT::i32)); EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; if (VT == MVT::f64) Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), DAG.getConstant(32, dl, MVT::i32)); else /*if (VT == MVT::f32)*/ Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); if (SrcVT == MVT::f32) { Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); if (VT == MVT::f64) Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), DAG.getConstant(32, dl, MVT::i32)); } else if (VT == MVT::f32) Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64, DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), DAG.getConstant(32, dl, MVT::i32)); Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), dl, MVT::i32); AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); if (VT == MVT::f32) { Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, DAG.getConstant(0, dl, MVT::i32)); } else { Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); } return Res; } // Bitcast operand 1 to i32. if (SrcVT == MVT::f64) Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Tmp1).getValue(1); Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); // Or in the signbit with integer operations. SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32); SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32); Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); if (VT == MVT::f32) { Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); return DAG.getNode(ISD::BITCAST, dl, MVT::f32, DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); } // f64: Or the high part with signbit and then combine two parts. Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Tmp0); SDValue Lo = Tmp0.getValue(0); SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); } SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setReturnAddressIsTaken(true); if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); EVT VT = Op.getValueType(); SDLoc dl(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = DAG.getConstant(4, dl, MVT::i32); return DAG.getLoad(VT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), MachinePointerInfo()); } // Return LR, which contains the return address. Mark it an implicit live-in. unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { const ARMBaseRegisterInfo &ARI = *static_cast(RegInfo); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); SDLoc dl(Op); // FIXME probably not meaningful unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); unsigned FrameReg = ARI.getFrameRegister(MF); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, MachinePointerInfo()); return FrameAddr; } // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const { unsigned Reg = StringSwitch(RegName) .Case("sp", ARM::SP) .Default(0); if (Reg) return Reg; report_fatal_error(Twine("Invalid register name \"" + StringRef(RegName) + "\".")); } // Result is 64 bit value so split into two 32 bit values and return as a // pair of values. static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) { SDLoc DL(N); // This function is only supposed to be called for i64 type destination. assert(N->getValueType(0) == MVT::i64 && "ExpandREAD_REGISTER called for non-i64 type result."); SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL, DAG.getVTList(MVT::i32, MVT::i32, MVT::Other), N->getOperand(0), N->getOperand(1)); Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0), Read.getValue(1))); Results.push_back(Read.getOperand(0)); } /// \p BC is a bitcast that is about to be turned into a VMOVDRR. /// When \p DstVT, the destination type of \p BC, is on the vector /// register bank and the source of bitcast, \p Op, operates on the same bank, /// it might be possible to combine them, such that everything stays on the /// vector register bank. /// \p return The node that would replace \p BT, if the combine /// is possible. static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, SelectionDAG &DAG) { SDValue Op = BC->getOperand(0); EVT DstVT = BC->getValueType(0); // The only vector instruction that can produce a scalar (remember, // since the bitcast was about to be turned into VMOVDRR, the source // type is i64) from a vector is EXTRACT_VECTOR_ELT. // Moreover, we can do this combine only if there is one use. // Finally, if the destination type is not a vector, there is not // much point on forcing everything on the vector bank. if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || !Op.hasOneUse()) return SDValue(); // If the index is not constant, we will introduce an additional // multiply that will stick. // Give up in that case. ConstantSDNode *Index = dyn_cast(Op.getOperand(1)); if (!Index) return SDValue(); unsigned DstNumElt = DstVT.getVectorNumElements(); // Compute the new index. const APInt &APIntIndex = Index->getAPIntValue(); APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt); NewIndex *= APIntIndex; // Check if the new constant index fits into i32. if (NewIndex.getBitWidth() > 32) return SDValue(); // vMTy bitcast(i64 extractelt vNi64 src, i32 index) -> // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M) SDLoc dl(Op); SDValue ExtractSrc = Op.getOperand(0); EVT VecVT = EVT::getVectorVT( *DAG.getContext(), DstVT.getScalarType(), ExtractSrc.getValueType().getVectorNumElements() * DstNumElt); SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast, DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32)); } /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 /// operand type is illegal (e.g., v2f32 for a target that doesn't support /// vectors), since the legalizer won't know what to do with that. static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc dl(N); SDValue Op = N->getOperand(0); // This function is only supposed to be called for i64 types, either as the // source or destination of the bit convert. EVT SrcVT = Op.getValueType(); EVT DstVT = N->getValueType(0); const bool HasFullFP16 = Subtarget->hasFullFP16(); if (SrcVT == MVT::f32 && DstVT == MVT::i32) { // FullFP16: half values are passed in S-registers, and we don't // need any of the bitcast and moves: // // t2: f32,ch = CopyFromReg t0, Register:f32 %0 // t5: i32 = bitcast t2 // t18: f16 = ARMISD::VMOVhr t5 if (Op.getOpcode() != ISD::CopyFromReg || Op.getValueType() != MVT::f32) return SDValue(); auto Move = N->use_begin(); if (Move->getOpcode() != ARMISD::VMOVhr) return SDValue(); SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) }; SDValue Copy = DAG.getNode(ISD::CopyFromReg, SDLoc(Op), MVT::f16, Ops); DAG.ReplaceAllUsesWith(*Move, &Copy); return Copy; } if (SrcVT == MVT::i16 && DstVT == MVT::f16) { if (!HasFullFP16) return SDValue(); // SoftFP: read half-precision arguments: // // t2: i32,ch = ... // t7: i16 = truncate t2 <~~~~ Op // t8: f16 = bitcast t7 <~~~~ N // if (Op.getOperand(0).getValueType() == MVT::i32) return DAG.getNode(ARMISD::VMOVhr, SDLoc(Op), MVT::f16, Op.getOperand(0)); return SDValue(); } // Half-precision return values if (SrcVT == MVT::f16 && DstVT == MVT::i16) { if (!HasFullFP16) return SDValue(); // // t11: f16 = fadd t8, t10 // t12: i16 = bitcast t11 <~~~ SDNode N // t13: i32 = zero_extend t12 // t16: ch,glue = CopyToReg t0, Register:i32 %r0, t13 // t17: ch = ARMISD::RET_FLAG t16, Register:i32 %r0, t16:1 // // transform this into: // // t20: i32 = ARMISD::VMOVrh t11 // t16: ch,glue = CopyToReg t0, Register:i32 %r0, t20 // auto ZeroExtend = N->use_begin(); if (N->use_size() != 1 || ZeroExtend->getOpcode() != ISD::ZERO_EXTEND || ZeroExtend->getValueType(0) != MVT::i32) return SDValue(); auto Copy = ZeroExtend->use_begin(); if (Copy->getOpcode() == ISD::CopyToReg && Copy->use_begin()->getOpcode() == ARMISD::RET_FLAG) { SDValue Cvt = DAG.getNode(ARMISD::VMOVrh, SDLoc(Op), MVT::i32, Op); DAG.ReplaceAllUsesWith(*ZeroExtend, &Cvt); return Cvt; } return SDValue(); } if (!(SrcVT == MVT::i64 || DstVT == MVT::i64)) return SDValue(); // Turn i64->f64 into VMOVDRR. if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { // Do not force values to GPRs (this is what VMOVDRR does for the inputs) // if we can combine the bitcast with its source. if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG)) return Val; SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(0, dl, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(1, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, DstVT, DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); } // Turn f64->i64 into VMOVRRD. if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { SDValue Cvt; if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() && SrcVT.getVectorNumElements() > 1) Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op)); else Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Op); // Merge the pieces into a single i64 value. return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); } return SDValue(); } /// getZeroVector - Returns a vector of specified type with all zero elements. /// Zero vectors are used to represent vector negation and in those cases /// will be implemented with the NEON VNEG instruction. However, VNEG does /// not support i64 elements, so sometimes the zero vectors will need to be /// explicitly constructed. Regardless, use a canonical VMOV to create the /// zero vector. static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) { assert(VT.isVector() && "Expected a vector type"); // The canonical modified immediate encoding of a zero vector is....0! SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32); EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); } /// LowerShiftRightParts - Lower SRA_PARTS, which returns two /// i32 values and take a 2 x i32 value to shift plus a shift amount. SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); SDLoc dl(Op); SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, DAG.getConstant(VTBits, dl, MVT::i32), ShAmt); SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, DAG.getConstant(VTBits, dl, MVT::i32)); SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, ARMcc, CCR, CmpLo); SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); SDValue HiBigShift = Opc == ISD::SRA ? DAG.getNode(Opc, dl, VT, ShOpHi, DAG.getConstant(VTBits - 1, dl, VT)) : DAG.getConstant(0, dl, VT); SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CCR, CmpHi); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); } /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two /// i32 values and take a 2 x i32 value to shift plus a shift amount. SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); SDLoc dl(Op); SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); assert(Op.getOpcode() == ISD::SHL_PARTS); SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, DAG.getConstant(VTBits, dl, MVT::i32), ShAmt); SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, DAG.getConstant(VTBits, dl, MVT::i32)); SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CCR, CmpHi); SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); } SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const { // The rounding mode is in bits 23:22 of the FPSCR. // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) // so that the shift + and get folded into a bitfield extract. SDLoc dl(Op); SDValue Ops[] = { DAG.getEntryNode(), DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) }; SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops); SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, DAG.getConstant(1U << 22, dl, MVT::i32)); SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, DAG.getConstant(22, dl, MVT::i32)); return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, DAG.getConstant(3, dl, MVT::i32)); } static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { SDLoc dl(N); EVT VT = N->getValueType(0); if (VT.isVector()) { assert(ST->hasNEON()); // Compute the least significant set bit: LSB = X & -X SDValue X = N->getOperand(0); SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X); SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX); EVT ElemTy = VT.getVectorElementType(); if (ElemTy == MVT::i8) { // Compute with: cttz(x) = ctpop(lsb - 1) SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT, DAG.getTargetConstant(1, dl, ElemTy)); SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One); return DAG.getNode(ISD::CTPOP, dl, VT, Bits); } if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) && (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) { // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0 unsigned NumBits = ElemTy.getSizeInBits(); SDValue WidthMinus1 = DAG.getNode(ARMISD::VMOVIMM, dl, VT, DAG.getTargetConstant(NumBits - 1, dl, ElemTy)); SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB); return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ); } // Compute with: cttz(x) = ctpop(lsb - 1) // Compute LSB - 1. SDValue Bits; if (ElemTy == MVT::i64) { // Load constant 0xffff'ffff'ffff'ffff to register. SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT, DAG.getTargetConstant(0x1eff, dl, MVT::i32)); Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF); } else { SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT, DAG.getTargetConstant(1, dl, ElemTy)); Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One); } return DAG.getNode(ISD::CTPOP, dl, VT, Bits); } if (!ST->hasV6T2Ops()) return SDValue(); SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0)); return DAG.getNode(ISD::CTLZ, dl, VT, rbit); } static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); SDLoc DL(N); assert(ST->hasNEON() && "Custom ctpop lowering requires NEON."); assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0)); Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res); // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds. unsigned EltSize = 8; unsigned NumElts = VT.is64BitVector() ? 8 : 16; while (EltSize != VT.getScalarSizeInBits()) { SmallVector Ops; Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL, TLI.getPointerTy(DAG.getDataLayout()))); Ops.push_back(Res); EltSize *= 2; NumElts /= 2; MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops); } return Res; } static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); SDLoc dl(N); if (!VT.isVector()) return SDValue(); // Lower vector shifts on NEON to use VSHL. assert(ST->hasNEON() && "unexpected vector shift"); // Left shifts translate directly to the vshiftu intrinsic. if (N->getOpcode() == ISD::SHL) return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl, MVT::i32), N->getOperand(0), N->getOperand(1)); assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); // NEON uses the same intrinsics for both left and right shifts. For // right shifts, the shift amounts are negative, so negate the vector of // shift amounts. EVT ShiftVT = N->getOperand(1).getValueType(); SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1)); Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? Intrinsic::arm_neon_vshifts : Intrinsic::arm_neon_vshiftu); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(vshiftInt, dl, MVT::i32), N->getOperand(0), NegatedCount); } static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); SDLoc dl(N); // We can get here for a node like i32 = ISD::SHL i32, i64 if (VT != MVT::i64) return SDValue(); assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && "Unknown shift to lower!"); // We only lower SRA, SRL of 1 here, all others use generic lowering. if (!isOneConstant(N->getOperand(1))) return SDValue(); // If we are in thumb mode, we don't have RRX. if (ST->isThumb1Only()) return SDValue(); // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), DAG.getConstant(0, dl, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), DAG.getConstant(1, dl, MVT::i32)); // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and // captures the result into a carry flag. unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi); // The low part is an ARMISD::RRX operand, which shifts the carry in. Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); // Merge the pieces into a single i64 value. return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); } static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { SDValue TmpOp0, TmpOp1; bool Invert = false; bool Swap = false; unsigned Opc = 0; SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger(); EVT VT = Op.getValueType(); ISD::CondCode SetCCOpcode = cast(CC)->get(); SDLoc dl(Op); if (Op0.getValueType().getVectorElementType() == MVT::i64 && (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) { // Special-case integer 64-bit equality comparisons. They aren't legal, // but they can be lowered with a few vector instructions. unsigned CmpElements = CmpVT.getVectorNumElements() * 2; EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements); SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0); SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1); SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1, DAG.getCondCode(ISD::SETEQ)); SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp); SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed); Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged); if (SetCCOpcode == ISD::SETNE) Merged = DAG.getNOT(dl, Merged, CmpVT); Merged = DAG.getSExtOrTrunc(Merged, dl, VT); return Merged; } if (CmpVT.getVectorElementType() == MVT::i64) // 64-bit comparisons are not legal in general. return SDValue(); if (Op1.getValueType().isFloatingPoint()) { switch (SetCCOpcode) { default: llvm_unreachable("Illegal FP comparison"); case ISD::SETUNE: case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH; case ISD::SETOEQ: case ISD::SETEQ: Opc = ARMISD::VCEQ; break; case ISD::SETOLT: case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETOGT: case ISD::SETGT: Opc = ARMISD::VCGT; break; case ISD::SETOLE: case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETOGE: case ISD::SETGE: Opc = ARMISD::VCGE; break; case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH; case ISD::SETONE: // Expand this to (OLT | OGT). TmpOp0 = Op0; TmpOp1 = Op1; Opc = ISD::OR; Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1); break; case ISD::SETUO: Invert = true; LLVM_FALLTHROUGH; case ISD::SETO: // Expand this to (OLT | OGE). TmpOp0 = Op0; TmpOp1 = Op1; Opc = ISD::OR; Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1); break; } } else { // Integer comparisons. switch (SetCCOpcode) { default: llvm_unreachable("Illegal integer comparison"); case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH; case ISD::SETEQ: Opc = ARMISD::VCEQ; break; case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETGT: Opc = ARMISD::VCGT; break; case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETGE: Opc = ARMISD::VCGE; break; case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETUGT: Opc = ARMISD::VCGTU; break; case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETUGE: Opc = ARMISD::VCGEU; break; } // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). if (Opc == ARMISD::VCEQ) { SDValue AndOp; if (ISD::isBuildVectorAllZeros(Op1.getNode())) AndOp = Op0; else if (ISD::isBuildVectorAllZeros(Op0.getNode())) AndOp = Op1; // Ignore bitconvert. if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST) AndOp = AndOp.getOperand(0); if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { Opc = ARMISD::VTST; Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0)); Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1)); Invert = !Invert; } } } if (Swap) std::swap(Op0, Op1); // If one of the operands is a constant vector zero, attempt to fold the // comparison to a specialized compare-against-zero form. SDValue SingleOp; if (ISD::isBuildVectorAllZeros(Op1.getNode())) SingleOp = Op0; else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { if (Opc == ARMISD::VCGE) Opc = ARMISD::VCLEZ; else if (Opc == ARMISD::VCGT) Opc = ARMISD::VCLTZ; SingleOp = Op1; } SDValue Result; if (SingleOp.getNode()) { switch (Opc) { case ARMISD::VCEQ: Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break; case ARMISD::VCGE: Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break; case ARMISD::VCLEZ: Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break; case ARMISD::VCGT: Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break; case ARMISD::VCLTZ: Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break; default: Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1); } } else { Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1); } Result = DAG.getSExtOrTrunc(Result, dl, VT); if (Invert) Result = DAG.getNOT(dl, Result, VT); return Result; } static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue Carry = Op.getOperand(2); SDValue Cond = Op.getOperand(3); SDLoc DL(Op); assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only."); // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we // have to invert the carry first. Carry = DAG.getNode(ISD::SUB, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32), Carry); // This converts the boolean value carry into the carry flag. Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG); SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry); SDValue FVal = DAG.getConstant(0, DL, MVT::i32); SDValue TVal = DAG.getConstant(1, DL, MVT::i32); SDValue ARMcc = DAG.getConstant( IntCCToARMCC(cast(Cond)->get()), DL, MVT::i32); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR, Cmp.getValue(1), SDValue()); return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc, CCR, Chain.getValue(1)); } /// isNEONModifiedImm - Check if the specified splat value corresponds to a /// valid vector constant for a NEON instruction with a "modified immediate" /// operand (e.g., VMOV). If so, return the encoded value. static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, bool is128Bits, NEONModImmType type) { unsigned OpCmode, Imm; // SplatBitSize is set to the smallest size that splats the vector, so a // zero vector will always have SplatBitSize == 8. However, NEON modified // immediate instructions others than VMOV do not support the 8-bit encoding // of a zero vector, and the default encoding of zero is supposed to be the // 32-bit version. if (SplatBits == 0) SplatBitSize = 32; switch (SplatBitSize) { case 8: if (type != VMOVModImm) return SDValue(); // Any 1-byte value is OK. Op=0, Cmode=1110. assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); OpCmode = 0xe; Imm = SplatBits; VT = is128Bits ? MVT::v16i8 : MVT::v8i8; break; case 16: // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. VT = is128Bits ? MVT::v8i16 : MVT::v4i16; if ((SplatBits & ~0xff) == 0) { // Value = 0x00nn: Op=x, Cmode=100x. OpCmode = 0x8; Imm = SplatBits; break; } if ((SplatBits & ~0xff00) == 0) { // Value = 0xnn00: Op=x, Cmode=101x. OpCmode = 0xa; Imm = SplatBits >> 8; break; } return SDValue(); case 32: // NEON's 32-bit VMOV supports splat values where: // * only one byte is nonzero, or // * the least significant byte is 0xff and the second byte is nonzero, or // * the least significant 2 bytes are 0xff and the third is nonzero. VT = is128Bits ? MVT::v4i32 : MVT::v2i32; if ((SplatBits & ~0xff) == 0) { // Value = 0x000000nn: Op=x, Cmode=000x. OpCmode = 0; Imm = SplatBits; break; } if ((SplatBits & ~0xff00) == 0) { // Value = 0x0000nn00: Op=x, Cmode=001x. OpCmode = 0x2; Imm = SplatBits >> 8; break; } if ((SplatBits & ~0xff0000) == 0) { // Value = 0x00nn0000: Op=x, Cmode=010x. OpCmode = 0x4; Imm = SplatBits >> 16; break; } if ((SplatBits & ~0xff000000) == 0) { // Value = 0xnn000000: Op=x, Cmode=011x. OpCmode = 0x6; Imm = SplatBits >> 24; break; } // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC if (type == OtherModImm) return SDValue(); if ((SplatBits & ~0xffff) == 0 && ((SplatBits | SplatUndef) & 0xff) == 0xff) { // Value = 0x0000nnff: Op=x, Cmode=1100. OpCmode = 0xc; Imm = SplatBits >> 8; break; } if ((SplatBits & ~0xffffff) == 0 && ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { // Value = 0x00nnffff: Op=x, Cmode=1101. OpCmode = 0xd; Imm = SplatBits >> 16; break; } // Note: there are a few 32-bit splat values (specifically: 00ffff00, // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not // VMOV.I32. A (very) minor optimization would be to replicate the value // and fall through here to test for a valid 64-bit splat. But, then the // caller would also need to check and handle the change in size. return SDValue(); case 64: { if (type != VMOVModImm) return SDValue(); // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. uint64_t BitMask = 0xff; uint64_t Val = 0; unsigned ImmMask = 1; Imm = 0; for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { if (((SplatBits | SplatUndef) & BitMask) == BitMask) { Val |= BitMask; Imm |= ImmMask; } else if ((SplatBits & BitMask) != 0) { return SDValue(); } BitMask <<= 8; ImmMask <<= 1; } if (DAG.getDataLayout().isBigEndian()) // swap higher and lower 32 bit word Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4); // Op=1, Cmode=1110. OpCmode = 0x1e; VT = is128Bits ? MVT::v2i64 : MVT::v1i64; break; } default: llvm_unreachable("unexpected size for isNEONModifiedImm"); } unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); return DAG.getTargetConstant(EncodedVal, dl, MVT::i32); } SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const { EVT VT = Op.getValueType(); bool IsDouble = (VT == MVT::f64); ConstantFPSDNode *CFP = cast(Op); const APFloat &FPVal = CFP->getValueAPF(); // Prevent floating-point constants from using literal loads // when execute-only is enabled. if (ST->genExecuteOnly()) { // If we can represent the constant as an immediate, don't lower it if (isFPImmLegal(FPVal, VT)) return Op; // Otherwise, construct as integer, and move to float register APInt INTVal = FPVal.bitcastToAPInt(); SDLoc DL(CFP); switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unknown floating point type!"); break; case MVT::f64: { SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32); SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32); if (!ST->isLittle()) std::swap(Lo, Hi); return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi); } case MVT::f32: return DAG.getNode(ARMISD::VMOVSR, DL, VT, DAG.getConstant(INTVal, DL, MVT::i32)); } } if (!ST->hasVFP3()) return SDValue(); // Use the default (constant pool) lowering for double constants when we have // an SP-only FPU if (IsDouble && Subtarget->isFPOnlySP()) return SDValue(); // Try splatting with a VMOV.f32... int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal); if (ImmVal != -1) { if (IsDouble || !ST->useNEONForSinglePrecisionFP()) { // We have code in place to select a valid ConstantFP already, no need to // do any mangling. return Op; } // It's a float and we are trying to use NEON operations where // possible. Lower it to a splat followed by an extract. SDLoc DL(Op); SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32); SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, NewVal); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant, DAG.getConstant(0, DL, MVT::i32)); } // The rest of our options are NEON only, make sure that's allowed before // proceeding.. if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP())) return SDValue(); EVT VMovVT; uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue(); // It wouldn't really be worth bothering for doubles except for one very // important value, which does happen to match: 0.0. So make sure we don't do // anything stupid. if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32)) return SDValue(); // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too). SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT, false, VMOVModImm); if (NewVal != SDValue()) { SDLoc DL(Op); SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, NewVal); if (IsDouble) return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); // It's a float: cast and extract a vector element. SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, VecConstant); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, DAG.getConstant(0, DL, MVT::i32)); } // Finally, try a VMVN.i32 NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT, false, VMVNModImm); if (NewVal != SDValue()) { SDLoc DL(Op); SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); if (IsDouble) return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); // It's a float: cast and extract a vector element. SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, VecConstant); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, DAG.getConstant(0, DL, MVT::i32)); } return SDValue(); } // check if an VEXT instruction can handle the shuffle mask when the // vector sources of the shuffle are the same. static bool isSingletonVEXTMask(ArrayRef M, EVT VT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); // Assume that the first shuffle index is not UNDEF. Fail if it is. if (M[0] < 0) return false; Imm = M[0]; // If this is a VEXT shuffle, the immediate value is the index of the first // element. The other shuffle indices must be the successive elements after // the first one. unsigned ExpectedElt = Imm; for (unsigned i = 1; i < NumElts; ++i) { // Increment the expected index. If it wraps around, just follow it // back to index zero and keep going. ++ExpectedElt; if (ExpectedElt == NumElts) ExpectedElt = 0; if (M[i] < 0) continue; // ignore UNDEF indices if (ExpectedElt != static_cast(M[i])) return false; } return true; } static bool isVEXTMask(ArrayRef M, EVT VT, bool &ReverseVEXT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); ReverseVEXT = false; // Assume that the first shuffle index is not UNDEF. Fail if it is. if (M[0] < 0) return false; Imm = M[0]; // If this is a VEXT shuffle, the immediate value is the index of the first // element. The other shuffle indices must be the successive elements after // the first one. unsigned ExpectedElt = Imm; for (unsigned i = 1; i < NumElts; ++i) { // Increment the expected index. If it wraps around, it may still be // a VEXT but the source vectors must be swapped. ExpectedElt += 1; if (ExpectedElt == NumElts * 2) { ExpectedElt = 0; ReverseVEXT = true; } if (M[i] < 0) continue; // ignore UNDEF indices if (ExpectedElt != static_cast(M[i])) return false; } // Adjust the index value if the source operands will be swapped. if (ReverseVEXT) Imm -= NumElts; return true; } /// isVREVMask - Check if a vector shuffle corresponds to a VREV /// instruction with the specified blocksize. (The order of the elements /// within each block of the vector is reversed.) static bool isVREVMask(ArrayRef M, EVT VT, unsigned BlockSize) { assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && "Only possible block sizes for VREV are: 16, 32, 64"); unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); unsigned BlockElts = M[0] + 1; // If the first shuffle index is UNDEF, be optimistic. if (M[0] < 0) BlockElts = BlockSize / EltSz; if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) return false; for (unsigned i = 0; i < NumElts; ++i) { if (M[i] < 0) continue; // ignore UNDEF indices if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) return false; } return true; } static bool isVTBLMask(ArrayRef M, EVT VT) { // We can handle <8 x i8> vector shuffles. If the index in the mask is out of // range, then 0 is placed into the resulting vector. So pretty much any mask // of 8 elements can work here. return VT == MVT::v8i8 && M.size() == 8; } static unsigned SelectPairHalf(unsigned Elements, ArrayRef Mask, unsigned Index) { if (Mask.size() == Elements * 2) return Index / Elements; return Mask[Index] == 0 ? 0 : 1; } // Checks whether the shuffle mask represents a vector transpose (VTRN) by // checking that pairs of elements in the shuffle mask represent the same index // in each vector, incrementing the expected index by 2 at each step. // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6] // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g} // v2={e,f,g,h} // WhichResult gives the offset for each element in the mask based on which // of the two results it belongs to. // // The transpose can be represented either as: // result1 = shufflevector v1, v2, result1_shuffle_mask // result2 = shufflevector v1, v2, result2_shuffle_mask // where v1/v2 and the shuffle masks have the same number of elements // (here WhichResult (see below) indicates which result is being checked) // // or as: // results = shufflevector v1, v2, shuffle_mask // where both results are returned in one vector and the shuffle mask has twice // as many elements as v1/v2 (here WhichResult will always be 0 if true) here we // want to check the low half and high half of the shuffle mask as if it were // the other case static bool isVTRNMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); if (M.size() != NumElts && M.size() != NumElts*2) return false; // If the mask is twice as long as the input vector then we need to check the // upper and lower parts of the mask with a matching value for WhichResult // FIXME: A mask with only even values will be rejected in case the first // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only // M[0] is used to determine WhichResult for (unsigned i = 0; i < M.size(); i += NumElts) { WhichResult = SelectPairHalf(NumElts, M, i); for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) || (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult)) return false; } } if (M.size() == NumElts*2) WhichResult = 0; return true; } /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. static bool isVTRN_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); if (M.size() != NumElts && M.size() != NumElts*2) return false; for (unsigned i = 0; i < M.size(); i += NumElts) { WhichResult = SelectPairHalf(NumElts, M, i); for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) || (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult)) return false; } } if (M.size() == NumElts*2) WhichResult = 0; return true; } // Checks whether the shuffle mask represents a vector unzip (VUZP) by checking // that the mask elements are either all even and in steps of size 2 or all odd // and in steps of size 2. // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6] // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g} // v2={e,f,g,h} // Requires similar checks to that of isVTRNMask with // respect the how results are returned. static bool isVUZPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); if (M.size() != NumElts && M.size() != NumElts*2) return false; for (unsigned i = 0; i < M.size(); i += NumElts) { WhichResult = SelectPairHalf(NumElts, M, i); for (unsigned j = 0; j < NumElts; ++j) { if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult) return false; } } if (M.size() == NumElts*2) WhichResult = 0; // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. if (VT.is64BitVector() && EltSz == 32) return false; return true; } /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, static bool isVUZP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); if (M.size() != NumElts && M.size() != NumElts*2) return false; unsigned Half = NumElts / 2; for (unsigned i = 0; i < M.size(); i += NumElts) { WhichResult = SelectPairHalf(NumElts, M, i); for (unsigned j = 0; j < NumElts; j += Half) { unsigned Idx = WhichResult; for (unsigned k = 0; k < Half; ++k) { int MIdx = M[i + j + k]; if (MIdx >= 0 && (unsigned) MIdx != Idx) return false; Idx += 2; } } } if (M.size() == NumElts*2) WhichResult = 0; // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. if (VT.is64BitVector() && EltSz == 32) return false; return true; } // Checks whether the shuffle mask represents a vector zip (VZIP) by checking // that pairs of elements of the shufflemask represent the same index in each // vector incrementing sequentially through the vectors. // e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5] // v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f} // v2={e,f,g,h} // Requires similar checks to that of isVTRNMask with respect the how results // are returned. static bool isVZIPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); if (M.size() != NumElts && M.size() != NumElts*2) return false; for (unsigned i = 0; i < M.size(); i += NumElts) { WhichResult = SelectPairHalf(NumElts, M, i); unsigned Idx = WhichResult * NumElts / 2; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) || (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts)) return false; Idx += 1; } } if (M.size() == NumElts*2) WhichResult = 0; // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. if (VT.is64BitVector() && EltSz == 32) return false; return true; } /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. static bool isVZIP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getScalarSizeInBits(); if (EltSz == 64) return false; unsigned NumElts = VT.getVectorNumElements(); if (M.size() != NumElts && M.size() != NumElts*2) return false; for (unsigned i = 0; i < M.size(); i += NumElts) { WhichResult = SelectPairHalf(NumElts, M, i); unsigned Idx = WhichResult * NumElts / 2; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) || (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx)) return false; Idx += 1; } } if (M.size() == NumElts*2) WhichResult = 0; // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. if (VT.is64BitVector() && EltSz == 32) return false; return true; } /// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), /// and return the corresponding ARMISD opcode if it is, or 0 if it isn't. static unsigned isNEONTwoResultShuffleMask(ArrayRef ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF) { isV_UNDEF = false; if (isVTRNMask(ShuffleMask, VT, WhichResult)) return ARMISD::VTRN; if (isVUZPMask(ShuffleMask, VT, WhichResult)) return ARMISD::VUZP; if (isVZIPMask(ShuffleMask, VT, WhichResult)) return ARMISD::VZIP; isV_UNDEF = true; if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) return ARMISD::VTRN; if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) return ARMISD::VUZP; if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) return ARMISD::VZIP; return 0; } /// \return true if this is a reverse operation on an vector. static bool isReverseMask(ArrayRef M, EVT VT) { unsigned NumElts = VT.getVectorNumElements(); // Make sure the mask has the right size. if (NumElts != M.size()) return false; // Look for <15, ..., 3, -1, 1, 0>. for (unsigned i = 0; i != NumElts; ++i) if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i)) return false; return true; } // If N is an integer constant that can be moved into a register in one // instruction, return an SDValue of such a constant (will become a MOV // instruction). Otherwise return null. static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, const SDLoc &dl) { uint64_t Val; if (!isa(N)) return SDValue(); Val = cast(N)->getZExtValue(); if (ST->isThumb1Only()) { if (Val <= 255 || ~Val <= 255) return DAG.getConstant(Val, dl, MVT::i32); } else { if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) return DAG.getConstant(Val, dl, MVT::i32); } return SDValue(); } // If this is a case we can't handle, return null and let the default // expansion code take care of it. SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const { BuildVectorSDNode *BVN = cast(Op.getNode()); SDLoc dl(Op); EVT VT = Op.getValueType(); APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatUndef.isAllOnesValue()) return DAG.getUNDEF(VT); if (SplatBitSize <= 64) { // Check if an immediate VMOV works. EVT VmovVT; SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT, VT.is128BitVector(), VMOVModImm); if (Val.getNode()) { SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); } // Try an immediate VMVN. uint64_t NegatedImm = (~SplatBits).getZExtValue(); Val = isNEONModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT, VT.is128BitVector(), VMVNModImm); if (Val.getNode()) { SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); } // Use vmov.f32 to materialize other v2f32 and v4f32 splats. if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) { int ImmVal = ARM_AM::getFP32Imm(SplatBits); if (ImmVal != -1) { SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32); return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); } } } } // Scan through the operands to see if only one value is used. // // As an optimisation, even if more than one value is used it may be more // profitable to splat with one value then change some lanes. // // Heuristically we decide to do this if the vector has a "dominant" value, // defined as splatted to more than half of the lanes. unsigned NumElts = VT.getVectorNumElements(); bool isOnlyLowElement = true; bool usesOnlyOneValue = true; bool hasDominantValue = false; bool isConstant = true; // Map of the number of times a particular SDValue appears in the // element list. DenseMap ValueCounts; SDValue Value; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); if (V.isUndef()) continue; if (i > 0) isOnlyLowElement = false; if (!isa(V) && !isa(V)) isConstant = false; ValueCounts.insert(std::make_pair(V, 0)); unsigned &Count = ValueCounts[V]; // Is this value dominant? (takes up more than half of the lanes) if (++Count > (NumElts / 2)) { hasDominantValue = true; Value = V; } } if (ValueCounts.size() != 1) usesOnlyOneValue = false; if (!Value.getNode() && !ValueCounts.empty()) Value = ValueCounts.begin()->first; if (ValueCounts.empty()) return DAG.getUNDEF(VT); // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR. // Keep going if we are hitting this case. if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode())) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); unsigned EltSize = VT.getScalarSizeInBits(); // Use VDUP for non-constant splats. For f32 constant splats, reduce to // i32 and try again. if (hasDominantValue && EltSize <= 32) { if (!isConstant) { SDValue N; // If we are VDUPing a value that comes directly from a vector, that will // cause an unnecessary move to and from a GPR, where instead we could // just use VDUPLANE. We can only do this if the lane being extracted // is at a constant index, as the VDUP from lane instructions only have // constant-index forms. ConstantSDNode *constIndex; if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && (constIndex = dyn_cast(Value->getOperand(1)))) { // We need to create a new undef vector to use for the VDUPLANE if the // size of the vector from which we get the value is different than the // size of the vector that we need to create. We will insert the element // such that the register coalescer will remove unnecessary copies. if (VT != Value->getOperand(0).getValueType()) { unsigned index = constIndex->getAPIntValue().getLimitedValue() % VT.getVectorNumElements(); N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT), Value, DAG.getConstant(index, dl, MVT::i32)), DAG.getConstant(index, dl, MVT::i32)); } else N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, Value->getOperand(0), Value->getOperand(1)); } else N = DAG.getNode(ARMISD::VDUP, dl, VT, Value); if (!usesOnlyOneValue) { // The dominant value was splatted as 'N', but we now have to insert // all differing elements. for (unsigned I = 0; I < NumElts; ++I) { if (Op.getOperand(I) == Value) continue; SmallVector Ops; Ops.push_back(N); Ops.push_back(Op.getOperand(I)); Ops.push_back(DAG.getConstant(I, dl, MVT::i32)); N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops); } } return N; } if (VT.getVectorElementType().isFloatingPoint()) { SmallVector Ops; for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(i))); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); SDValue Val = DAG.getBuildVector(VecVT, dl, Ops); Val = LowerBUILD_VECTOR(Val, DAG, ST); if (Val.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, Val); } if (usesOnlyOneValue) { SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); if (isConstant && Val.getNode()) return DAG.getNode(ARMISD::VDUP, dl, VT, Val); } } // If all elements are constants and the case above didn't get hit, fall back // to the default expansion, which will generate a load from the constant // pool. if (isConstant) return SDValue(); // Empirical tests suggest this is rarely worth it for vectors of length <= 2. if (NumElts >= 4) { SDValue shuffle = ReconstructShuffle(Op, DAG); if (shuffle != SDValue()) return shuffle; } if (VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) { // If we haven't found an efficient lowering, try splitting a 128-bit vector // into two 64-bit vectors; we might discover a better way to lower it. SmallVector Ops(Op->op_begin(), Op->op_begin() + NumElts); EVT ExtVT = VT.getVectorElementType(); EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2); SDValue Lower = DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2)); if (Lower.getOpcode() == ISD::BUILD_VECTOR) Lower = LowerBUILD_VECTOR(Lower, DAG, ST); SDValue Upper = DAG.getBuildVector( HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2)); if (Upper.getOpcode() == ISD::BUILD_VECTOR) Upper = LowerBUILD_VECTOR(Upper, DAG, ST); if (Lower && Upper) return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper); } // Vectors with 32- or 64-bit elements can be built by directly assigning // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands // will be legalized. if (EltSize >= 32) { // Do the expansion with floating-point types, since that is what the VFP // registers are defined to use, and since i64 is not legal. EVT EltVT = EVT::getFloatingPointVT(EltSize); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); SmallVector Ops; for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, Val); } // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we // know the default expansion would otherwise fall back on something even // worse. For a vector with one or two non-undef values, that's // scalar_to_vector for the elements followed by a shuffle (provided the // shuffle is valid for the target) and materialization element by element // on the stack followed by a load for everything else. if (!isConstant && !usesOnlyOneValue) { SDValue Vec = DAG.getUNDEF(VT); for (unsigned i = 0 ; i < NumElts; ++i) { SDValue V = Op.getOperand(i); if (V.isUndef()) continue; SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32); Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx); } return Vec; } return SDValue(); } // Gather data to see if the operation can be modelled as a // shuffle in combination with VEXTs. SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"); SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned NumElts = VT.getVectorNumElements(); struct ShuffleSourceInfo { SDValue Vec; unsigned MinElt = std::numeric_limits::max(); unsigned MaxElt = 0; // We may insert some combination of BITCASTs and VEXT nodes to force Vec to // be compatible with the shuffle we intend to construct. As a result // ShuffleVec will be some sliding window into the original Vec. SDValue ShuffleVec; // Code should guarantee that element i in Vec starts at element "WindowBase // + i * WindowScale in ShuffleVec". int WindowBase = 0; int WindowScale = 1; ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {} bool operator ==(SDValue OtherVec) { return Vec == OtherVec; } }; // First gather all vectors used as an immediate source for this BUILD_VECTOR // node. SmallVector Sources; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); if (V.isUndef()) continue; else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { // A shuffle can only come from building a vector from various // elements of other vectors. return SDValue(); } else if (!isa(V.getOperand(1))) { // Furthermore, shuffles require a constant mask, whereas extractelts // accept variable indices. return SDValue(); } // Add this element source to the list if it's not already there. SDValue SourceVec = V.getOperand(0); auto Source = llvm::find(Sources, SourceVec); if (Source == Sources.end()) Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec)); // Update the minimum and maximum lane number seen. unsigned EltNo = cast(V.getOperand(1))->getZExtValue(); Source->MinElt = std::min(Source->MinElt, EltNo); Source->MaxElt = std::max(Source->MaxElt, EltNo); } // Currently only do something sane when at most two source vectors // are involved. if (Sources.size() > 2) return SDValue(); // Find out the smallest element size among result and two sources, and use // it as element size to build the shuffle_vector. EVT SmallestEltTy = VT.getVectorElementType(); for (auto &Source : Sources) { EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType(); if (SrcEltTy.bitsLT(SmallestEltTy)) SmallestEltTy = SrcEltTy; } unsigned ResMultiplier = VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits(); NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits(); EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts); // If the source vector is too wide or too narrow, we may nevertheless be able // to construct a compatible shuffle either by concatenating it with UNDEF or // extracting a suitable range of elements. for (auto &Src : Sources) { EVT SrcVT = Src.ShuffleVec.getValueType(); if (SrcVT.getSizeInBits() == VT.getSizeInBits()) continue; // This stage of the search produces a source with the same element type as // the original, but with a total width matching the BUILD_VECTOR output. EVT EltVT = SrcVT.getVectorElementType(); unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits(); EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts); if (SrcVT.getSizeInBits() < VT.getSizeInBits()) { if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits()) return SDValue(); // We can pad out the smaller vector for free, so if it's part of a // shuffle... Src.ShuffleVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec, DAG.getUNDEF(Src.ShuffleVec.getValueType())); continue; } if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits()) return SDValue(); if (Src.MaxElt - Src.MinElt >= NumSrcElts) { // Span too large for a VEXT to cope return SDValue(); } if (Src.MinElt >= NumSrcElts) { // The extraction can just take the second half Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, DAG.getConstant(NumSrcElts, dl, MVT::i32)); Src.WindowBase = -NumSrcElts; } else if (Src.MaxElt < NumSrcElts) { // The extraction can just take the first half Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, DAG.getConstant(0, dl, MVT::i32)); } else { // An actual VEXT is needed SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, DAG.getConstant(0, dl, MVT::i32)); SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec, DAG.getConstant(NumSrcElts, dl, MVT::i32)); Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1, VEXTSrc2, DAG.getConstant(Src.MinElt, dl, MVT::i32)); Src.WindowBase = -Src.MinElt; } } // Another possible incompatibility occurs from the vector element types. We // can fix this by bitcasting the source vectors to the same type we intend // for the shuffle. for (auto &Src : Sources) { EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType(); if (SrcEltTy == SmallestEltTy) continue; assert(ShuffleVT.getVectorElementType() == SmallestEltTy); Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec); Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits(); Src.WindowBase *= Src.WindowScale; } // Final sanity check before we try to actually produce a shuffle. LLVM_DEBUG(for (auto Src : Sources) assert(Src.ShuffleVec.getValueType() == ShuffleVT);); // The stars all align, our next step is to produce the mask for the shuffle. SmallVector Mask(ShuffleVT.getVectorNumElements(), -1); int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits(); for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { SDValue Entry = Op.getOperand(i); if (Entry.isUndef()) continue; auto Src = llvm::find(Sources, Entry.getOperand(0)); int EltNo = cast(Entry.getOperand(1))->getSExtValue(); // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit // trunc. So only std::min(SrcBits, DestBits) actually get defined in this // segment. EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType(); int BitsDefined = std::min(OrigEltTy.getSizeInBits(), VT.getScalarSizeInBits()); int LanesDefined = BitsDefined / BitsPerShuffleLane; // This source is expected to fill ResMultiplier lanes of the final shuffle, // starting at the appropriate offset. int *LaneMask = &Mask[i * ResMultiplier]; int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase; ExtractBase += NumElts * (Src - Sources.begin()); for (int j = 0; j < LanesDefined; ++j) LaneMask[j] = ExtractBase + j; } // Final check before we try to produce nonsense... if (!isShuffleMaskLegal(Mask, ShuffleVT)) return SDValue(); // We can't handle more than two sources. This should have already // been checked before this point. assert(Sources.size() <= 2 && "Too many sources!"); SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) }; for (unsigned i = 0; i < Sources.size(); ++i) ShuffleOps[i] = Sources[i].ShuffleVec; SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0], ShuffleOps[1], Mask); return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle); } /// isShuffleMaskLegal - Targets can use this to indicate that they only /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values /// are assumed to be legal. bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { if (VT.getVectorNumElements() == 4 && (VT.is128BitVector() || VT.is64BitVector())) { unsigned PFIndexes[4]; for (unsigned i = 0; i != 4; ++i) { if (M[i] < 0) PFIndexes[i] = 8; else PFIndexes[i] = M[i]; } // Compute the index in the perfect shuffle table. unsigned PFTableIndex = PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; unsigned Cost = (PFEntry >> 30); if (Cost <= 4) return true; } bool ReverseVEXT, isV_UNDEF; unsigned Imm, WhichResult; unsigned EltSize = VT.getScalarSizeInBits(); return (EltSize >= 32 || ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isVREVMask(M, VT, 64) || isVREVMask(M, VT, 32) || isVREVMask(M, VT, 16) || isVEXTMask(M, VT, ReverseVEXT, Imm) || isVTBLMask(M, VT) || isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF) || ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT))); } /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit /// the specified operations to build the shuffle. static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl) { unsigned OpNum = (PFEntry >> 26) & 0x0F; unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); enum { OP_COPY = 0, // Copy, used for things like to say it is <0,1,2,3> OP_VREV, OP_VDUP0, OP_VDUP1, OP_VDUP2, OP_VDUP3, OP_VEXT1, OP_VEXT2, OP_VEXT3, OP_VUZPL, // VUZP, left result OP_VUZPR, // VUZP, right result OP_VZIPL, // VZIP, left result OP_VZIPR, // VZIP, right result OP_VTRNL, // VTRN, left result OP_VTRNR // VTRN, right result }; if (OpNum == OP_COPY) { if (LHSID == (1*9+2)*9+3) return LHS; assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); return RHS; } SDValue OpLHS, OpRHS; OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); EVT VT = OpLHS.getValueType(); switch (OpNum) { default: llvm_unreachable("Unknown shuffle opcode!"); case OP_VREV: // VREV divides the vector in half and swaps within the half. if (VT.getVectorElementType() == MVT::i32 || VT.getVectorElementType() == MVT::f32) return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); // vrev <4 x i16> -> VREV32 if (VT.getVectorElementType() == MVT::i16) return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS); // vrev <4 x i8> -> VREV16 assert(VT.getVectorElementType() == MVT::i8); return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS); case OP_VDUP0: case OP_VDUP1: case OP_VDUP2: case OP_VDUP3: return DAG.getNode(ARMISD::VDUPLANE, dl, VT, OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32)); case OP_VEXT1: case OP_VEXT2: case OP_VEXT3: return DAG.getNode(ARMISD::VEXT, dl, VT, OpLHS, OpRHS, DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32)); case OP_VUZPL: case OP_VUZPR: return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); case OP_VZIPL: case OP_VZIPR: return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); case OP_VTRNL: case OP_VTRNR: return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); } } static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef ShuffleMask, SelectionDAG &DAG) { // Check to see if we can use the VTBL instruction. SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); SDLoc DL(Op); SmallVector VTBLMask; for (ArrayRef::iterator I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32)); if (V2.getNode()->isUndef()) return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, DAG.getBuildVector(MVT::v8i8, DL, VTBLMask)); return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, DAG.getBuildVector(MVT::v8i8, DL, VTBLMask)); } static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, SelectionDAG &DAG) { SDLoc DL(Op); SDValue OpLHS = Op.getOperand(0); EVT VT = OpLHS.getValueType(); assert((VT == MVT::v8i16 || VT == MVT::v16i8) && "Expect an v8i16/v16i8 type"); OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS); // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now, // extract the first 8 bytes into the top double word and the last 8 bytes // into the bottom double word. The v8i16 case is similar. unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4; return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS, DAG.getConstant(ExtractNum, DL, MVT::i32)); } static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); SDLoc dl(Op); EVT VT = Op.getValueType(); ShuffleVectorSDNode *SVN = cast(Op.getNode()); // Convert shuffles that are directly supported on NEON to target-specific // DAG nodes, instead of keeping them as shuffles and matching them again // during code selection. This is more efficient and avoids the possibility // of inconsistencies between legalization and selection. // FIXME: floating-point vectors should be canonicalized to integer vectors // of the same time so that they get CSEd properly. ArrayRef ShuffleMask = SVN->getMask(); unsigned EltSize = VT.getScalarSizeInBits(); if (EltSize <= 32) { if (SVN->isSplat()) { int Lane = SVN->getSplatIndex(); // If this is undef splat, generate it via "just" vdup, if possible. if (Lane == -1) Lane = 0; // Test if V1 is a SCALAR_TO_VECTOR. if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); } // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR // (and probably will turn into a SCALAR_TO_VECTOR once legalization // reaches it). if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && !isa(V1.getOperand(0))) { bool IsScalarToVector = true; for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) if (!V1.getOperand(i).isUndef()) { IsScalarToVector = false; break; } if (IsScalarToVector) return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); } return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, DAG.getConstant(Lane, dl, MVT::i32)); } bool ReverseVEXT; unsigned Imm; if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { if (ReverseVEXT) std::swap(V1, V2); return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, DAG.getConstant(Imm, dl, MVT::i32)); } if (isVREVMask(ShuffleMask, VT, 64)) return DAG.getNode(ARMISD::VREV64, dl, VT, V1); if (isVREVMask(ShuffleMask, VT, 32)) return DAG.getNode(ARMISD::VREV32, dl, VT, V1); if (isVREVMask(ShuffleMask, VT, 16)) return DAG.getNode(ARMISD::VREV16, dl, VT, V1); if (V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) { return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1, DAG.getConstant(Imm, dl, MVT::i32)); } // Check for Neon shuffles that modify both input vectors in place. // If both results are used, i.e., if there are two shuffles with the same // source operands and with masks corresponding to both results of one of // these operations, DAG memoization will ensure that a single node is // used for both shuffles. unsigned WhichResult; bool isV_UNDEF; if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask( ShuffleMask, VT, WhichResult, isV_UNDEF)) { if (isV_UNDEF) V2 = V1; return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2) .getValue(WhichResult); } // Also check for these shuffles through CONCAT_VECTORS: we canonicalize // shuffles that produce a result larger than their operands with: // shuffle(concat(v1, undef), concat(v2, undef)) // -> // shuffle(concat(v1, v2), undef) // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine). // // This is useful in the general case, but there are special cases where // native shuffles produce larger results: the two-result ops. // // Look through the concat when lowering them: // shuffle(concat(v1, v2), undef) // -> // concat(VZIP(v1, v2):0, :1) // if (V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) { SDValue SubV1 = V1->getOperand(0); SDValue SubV2 = V1->getOperand(1); EVT SubVT = SubV1.getValueType(); // We expect these to have been canonicalized to -1. assert(llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"); if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask( ShuffleMask, SubVT, WhichResult, isV_UNDEF)) { if (isV_UNDEF) SubV2 = SubV1; assert((WhichResult == 0) && "In-place shuffle of concat can only have one result!"); SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT), SubV1, SubV2); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0), Res.getValue(1)); } } } // If the shuffle is not directly supported and it has 4 elements, use // the PerfectShuffle-generated table to synthesize it from other shuffles. unsigned NumElts = VT.getVectorNumElements(); if (NumElts == 4) { unsigned PFIndexes[4]; for (unsigned i = 0; i != 4; ++i) { if (ShuffleMask[i] < 0) PFIndexes[i] = 8; else PFIndexes[i] = ShuffleMask[i]; } // Compute the index in the perfect shuffle table. unsigned PFTableIndex = PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; unsigned Cost = (PFEntry >> 30); if (Cost <= 4) return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); } // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. if (EltSize >= 32) { // Do the expansion with floating-point types, since that is what the VFP // registers are defined to use, and since i64 is not legal. EVT EltVT = EVT::getFloatingPointVT(EltSize); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1); V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2); SmallVector Ops; for (unsigned i = 0; i < NumElts; ++i) { if (ShuffleMask[i] < 0) Ops.push_back(DAG.getUNDEF(EltVT)); else Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ShuffleMask[i] < (int)NumElts ? V1 : V2, DAG.getConstant(ShuffleMask[i] & (NumElts-1), dl, MVT::i32))); } SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, Val); } if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT)) return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG); if (VT == MVT::v8i8) if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG)) return NewOp; return SDValue(); } static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // INSERT_VECTOR_ELT is legal only for immediate indexes. SDValue Lane = Op.getOperand(2); if (!isa(Lane)) return SDValue(); return Op; } static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // EXTRACT_VECTOR_ELT is legal only for immediate indexes. SDValue Lane = Op.getOperand(1); if (!isa(Lane)) return SDValue(); SDValue Vec = Op.getOperand(0); if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) { SDLoc dl(Op); return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); } return Op; } static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { // The only time a CONCAT_VECTORS operation can have legal types is when // two 64-bit vectors are concatenated to a 128-bit vector. assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && "unexpected CONCAT_VECTORS"); SDLoc dl(Op); SDValue Val = DAG.getUNDEF(MVT::v2f64); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); if (!Op0.isUndef()) Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0), DAG.getIntPtrConstant(0, dl)); if (!Op1.isUndef()) Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1), DAG.getIntPtrConstant(1, dl)); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val); } /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each /// element has been zero/sign-extended, depending on the isSigned parameter, /// from an integer type half its size. static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, bool isSigned) { // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32. EVT VT = N->getValueType(0); if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) { SDNode *BVN = N->getOperand(0).getNode(); if (BVN->getValueType(0) != MVT::v4i32 || BVN->getOpcode() != ISD::BUILD_VECTOR) return false; unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0; unsigned HiElt = 1 - LoElt; ConstantSDNode *Lo0 = dyn_cast(BVN->getOperand(LoElt)); ConstantSDNode *Hi0 = dyn_cast(BVN->getOperand(HiElt)); ConstantSDNode *Lo1 = dyn_cast(BVN->getOperand(LoElt+2)); ConstantSDNode *Hi1 = dyn_cast(BVN->getOperand(HiElt+2)); if (!Lo0 || !Hi0 || !Lo1 || !Hi1) return false; if (isSigned) { if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 && Hi1->getSExtValue() == Lo1->getSExtValue() >> 32) return true; } else { if (Hi0->isNullValue() && Hi1->isNullValue()) return true; } return false; } if (N->getOpcode() != ISD::BUILD_VECTOR) return false; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDNode *Elt = N->getOperand(i).getNode(); if (ConstantSDNode *C = dyn_cast(Elt)) { unsigned EltSize = VT.getScalarSizeInBits(); unsigned HalfSize = EltSize / 2; if (isSigned) { if (!isIntN(HalfSize, C->getSExtValue())) return false; } else { if (!isUIntN(HalfSize, C->getZExtValue())) return false; } continue; } return false; } return true; } /// isSignExtended - Check if a node is a vector value that is sign-extended /// or a constant BUILD_VECTOR with sign-extended elements. static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N)) return true; if (isExtendedBUILD_VECTOR(N, DAG, true)) return true; return false; } /// isZeroExtended - Check if a node is a vector value that is zero-extended /// or a constant BUILD_VECTOR with zero-extended elements. static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N)) return true; if (isExtendedBUILD_VECTOR(N, DAG, false)) return true; return false; } static EVT getExtensionTo64Bits(const EVT &OrigVT) { if (OrigVT.getSizeInBits() >= 64) return OrigVT; assert(OrigVT.isSimple() && "Expecting a simple value type"); MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy; switch (OrigSimpleTy) { default: llvm_unreachable("Unexpected Vector Type"); case MVT::v2i8: case MVT::v2i16: return MVT::v2i32; case MVT::v4i8: return MVT::v4i16; } } /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL. /// We insert the required extension here to get the vector to fill a D register. static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode) { // The vector originally had a size of OrigTy. It was then extended to ExtTy. // We expect the ExtTy to be 128-bits total. If the OrigTy is less than // 64-bits we need to insert a new extension so that it will be 64-bits. assert(ExtTy.is128BitVector() && "Unexpected extension size"); if (OrigTy.getSizeInBits() >= 64) return N; // Must extend size to at least 64 bits to be used as an operand for VMULL. EVT NewVT = getExtensionTo64Bits(OrigTy); return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N); } /// SkipLoadExtensionForVMULL - return a load of the original vector size that /// does not do any sign/zero extension. If the original vector is less /// than 64 bits, an appropriate extension will be added after the load to /// reach a total size of 64 bits. We have to add the extension separately /// because ARM does not have a sign/zero extending load for vectors. static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT()); // The load already has the right type. if (ExtendedTy == LD->getMemoryVT()) return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->getAlignment(), LD->getMemOperand()->getFlags()); // We need to create a zextload/sextload. We cannot just create a load // followed by a zext/zext node because LowerMUL is also run during normal // operation legalization where we can't create illegal types. return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->getMemoryVT(), LD->getAlignment(), LD->getMemOperand()->getFlags()); } /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, /// extending load, or BUILD_VECTOR with extended elements, return the /// unextended value. The unextended vector should be 64 bits so that it can /// be used as an operand to a VMULL instruction. If the original vector size /// before extension is less than 64 bits we add a an extension to resize /// the vector to 64 bits. static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) return AddRequiredExtensionForVMULL(N->getOperand(0), DAG, N->getOperand(0)->getValueType(0), N->getValueType(0), N->getOpcode()); if (LoadSDNode *LD = dyn_cast(N)) { assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && "Expected extending load"); SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1)); unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; SDValue extLoad = DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad); return newLoad; } // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will // have been legalized as a BITCAST from v4i32. if (N->getOpcode() == ISD::BITCAST) { SDNode *BVN = N->getOperand(0).getNode(); assert(BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0; return DAG.getBuildVector( MVT::v2i32, SDLoc(N), {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)}); } // Construct a new BUILD_VECTOR with elements truncated to half the size. assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); EVT VT = N->getValueType(0); unsigned EltSize = VT.getScalarSizeInBits() / 2; unsigned NumElts = VT.getVectorNumElements(); MVT TruncVT = MVT::getIntegerVT(EltSize); SmallVector Ops; SDLoc dl(N); for (unsigned i = 0; i != NumElts; ++i) { ConstantSDNode *C = cast(N->getOperand(i)); const APInt &CInt = C->getAPIntValue(); // Element types smaller than 32 bits are not legal, so use i32 elements. // The values are implicitly truncated so sext vs. zext doesn't matter. Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32)); } return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops); } static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { unsigned Opcode = N->getOpcode(); if (Opcode == ISD::ADD || Opcode == ISD::SUB) { SDNode *N0 = N->getOperand(0).getNode(); SDNode *N1 = N->getOperand(1).getNode(); return N0->hasOneUse() && N1->hasOneUse() && isSignExtended(N0, DAG) && isSignExtended(N1, DAG); } return false; } static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { unsigned Opcode = N->getOpcode(); if (Opcode == ISD::ADD || Opcode == ISD::SUB) { SDNode *N0 = N->getOperand(0).getNode(); SDNode *N1 = N->getOperand(1).getNode(); return N0->hasOneUse() && N1->hasOneUse() && isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); } return false; } static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { // Multiplications are only custom-lowered for 128-bit vectors so that // VMULL can be detected. Otherwise v2i64 multiplications are not legal. EVT VT = Op.getValueType(); assert(VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"); SDNode *N0 = Op.getOperand(0).getNode(); SDNode *N1 = Op.getOperand(1).getNode(); unsigned NewOpc = 0; bool isMLA = false; bool isN0SExt = isSignExtended(N0, DAG); bool isN1SExt = isSignExtended(N1, DAG); if (isN0SExt && isN1SExt) NewOpc = ARMISD::VMULLs; else { bool isN0ZExt = isZeroExtended(N0, DAG); bool isN1ZExt = isZeroExtended(N1, DAG); if (isN0ZExt && isN1ZExt) NewOpc = ARMISD::VMULLu; else if (isN1SExt || isN1ZExt) { // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these // into (s/zext A * s/zext C) + (s/zext B * s/zext C) if (isN1SExt && isAddSubSExt(N0, DAG)) { NewOpc = ARMISD::VMULLs; isMLA = true; } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { NewOpc = ARMISD::VMULLu; isMLA = true; } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { std::swap(N0, N1); NewOpc = ARMISD::VMULLu; isMLA = true; } } if (!NewOpc) { if (VT == MVT::v2i64) // Fall through to expand this. It is not legal. return SDValue(); else // Other vector multiplications are legal. return Op; } } // Legalize to a VMULL instruction. SDLoc DL(Op); SDValue Op0; SDValue Op1 = SkipExtensionForVMULL(N1, DAG); if (!isMLA) { Op0 = SkipExtensionForVMULL(N0, DAG); assert(Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"); return DAG.getNode(NewOpc, DL, VT, Op0, Op1); } // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during // isel lowering to take advantage of no-stall back to back vmul + vmla. // vmull q0, d4, d6 // vmlal q0, d5, d6 // is faster than // vaddl q0, d4, d5 // vmovl q1, d6 // vmul q0, q0, q1 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG); SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG); EVT Op1VT = Op1.getValueType(); return DAG.getNode(N0->getOpcode(), DL, VT, DAG.getNode(NewOpc, DL, VT, DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), DAG.getNode(NewOpc, DL, VT, DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); } static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, SelectionDAG &DAG) { // TODO: Should this propagate fast-math-flags? // Convert to float // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X); Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y); X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X); Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y); // Get reciprocal estimate. // float4 recip = vrecpeq_f32(yf); Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32), Y); // Because char has a smaller range than uchar, we can actually get away // without any newton steps. This requires that we use a weird bias // of 0xb000, however (again, this has been exhaustively tested). // float4 result = as_float4(as_int4(xf*recip) + 0xb000); X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y); X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X); Y = DAG.getConstant(0xb000, dl, MVT::v4i32); X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y); X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X); // Convert back to short. X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X); X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X); return X; } static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, SelectionDAG &DAG) { // TODO: Should this propagate fast-math-flags? SDValue N2; // Convert to float. // float4 yf = vcvt_f32_s32(vmovl_s16(y)); // float4 xf = vcvt_f32_s32(vmovl_s16(x)); N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0); N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1); N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); // Use reciprocal estimate and one refinement step. // float4 recip = vrecpeq_f32(yf); // recip *= vrecpsq_f32(yf, recip); N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32), N1); N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32), N1, N2); N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); // Because short has a smaller range than ushort, we can actually get away // with only a single newton step. This requires that we use a weird bias // of 89, however (again, this has been exhaustively tested). // float4 result = as_float4(as_int4(xf*recip) + 0x89); N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); N1 = DAG.getConstant(0x89, dl, MVT::v4i32); N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); // Convert back to integer and return. // return vmovn_s32(vcvt_s32_f32(result)); N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); return N0; } static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); assert((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::SDIV"); SDLoc dl(Op); SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2, N3; if (VT == MVT::v8i8) { N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0); N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1); N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(4, dl)); N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, DAG.getIntPtrConstant(4, dl)); N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(0, dl)); N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, DAG.getIntPtrConstant(0, dl)); N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); N0 = LowerCONCAT_VECTORS(N0, DAG); N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); return N0; } return LowerSDIV_v4i16(N0, N1, dl, DAG); } static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { // TODO: Should this propagate fast-math-flags? EVT VT = Op.getValueType(); assert((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"); SDLoc dl(Op); SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2, N3; if (VT == MVT::v8i8) { N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0); N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1); N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(4, dl)); N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, DAG.getIntPtrConstant(4, dl)); N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(0, dl)); N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, DAG.getIntPtrConstant(0, dl)); N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); N0 = LowerCONCAT_VECTORS(N0, DAG); N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl, MVT::i32), N0); return N0; } // v4i16 sdiv ... Convert to float. // float4 yf = vcvt_f32_s32(vmovl_u16(y)); // float4 xf = vcvt_f32_s32(vmovl_u16(x)); N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0); N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1); N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); // Use reciprocal estimate and two refinement steps. // float4 recip = vrecpeq_f32(yf); // recip *= vrecpsq_f32(yf, recip); // recip *= vrecpsq_f32(yf, recip); N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32), BN1); N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32), BN1, N2); N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32), BN1, N2); N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); // Simply multiplying by the reciprocal estimate can leave us a few ulps // too low, so we add 2 ulps (exhaustive testing shows that this is enough, // and that it will never cause us to return an answer too large). // float4 result = as_float4(as_int4(xf*recip) + 2); N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); N1 = DAG.getConstant(2, dl, MVT::v4i32); N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); // Convert back to integer and return. // return vmovn_u32(vcvt_s32_f32(result)); N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); return N0; } static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) { SDNode *N = Op.getNode(); EVT VT = N->getValueType(0); SDVTList VTs = DAG.getVTList(VT, MVT::i32); SDValue Carry = Op.getOperand(2); SDLoc DL(Op); SDValue Result; if (Op.getOpcode() == ISD::ADDCARRY) { // This converts the boolean value carry into the carry flag. Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG); // Do the addition proper using the carry flag we wanted. Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0), Op.getOperand(1), Carry); // Now convert the carry flag into a boolean value. Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG); } else { // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we // have to invert the carry first. Carry = DAG.getNode(ISD::SUB, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32), Carry); // This converts the boolean value carry into the carry flag. Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG); // Do the subtraction proper using the carry flag we wanted. Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0), Op.getOperand(1), Carry); // Now convert the carry flag into a boolean value. Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG); // But the carry returned by ARMISD::SUBE is not a borrow as expected // by ISD::SUBCARRY, so compute 1 - C. Carry = DAG.getNode(ISD::SUB, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32), Carry); } // Return both values. return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry); } SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetDarwin()); // For iOS, we want to call an alternative entry point: __sincos_stret, // return values are passed via sret. SDLoc dl(Op); SDValue Arg = Op.getOperand(0); EVT ArgVT = Arg.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); auto PtrVT = getPointerTy(DAG.getDataLayout()); MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Pair of floats / doubles used to pass the result. Type *RetTy = StructType::get(ArgTy, ArgTy); auto &DL = DAG.getDataLayout(); ArgListTy Args; bool ShouldUseSRet = Subtarget->isAPCS_ABI(); SDValue SRet; if (ShouldUseSRet) { // Create stack object for sret. const uint64_t ByteSize = DL.getTypeAllocSize(RetTy); const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy); int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false); SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL)); ArgListEntry Entry; Entry.Node = SRet; Entry.Ty = RetTy->getPointerTo(); Entry.IsSExt = false; Entry.IsZExt = false; Entry.IsSRet = true; Args.push_back(Entry); RetTy = Type::getVoidTy(*DAG.getContext()); } ArgListEntry Entry; Entry.Node = Arg; Entry.Ty = ArgTy; Entry.IsSExt = false; Entry.IsZExt = false; Args.push_back(Entry); RTLIB::Libcall LC = (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32; const char *LibcallName = getLibcallName(LC); CallingConv::ID CC = getLibcallCallingConv(LC); SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL)); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(DAG.getEntryNode()) .setCallee(CC, RetTy, Callee, std::move(Args)) .setDiscardResult(ShouldUseSRet); std::pair CallResult = LowerCallTo(CLI); if (!ShouldUseSRet) return CallResult.first; SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo()); // Address of cos field. SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet, DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl)); SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo()); SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, LoadSin.getValue(0), LoadCos.getValue(0)); } SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, bool Signed, SDValue &Chain) const { EVT VT = Op.getValueType(); assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected type for custom lowering DIV"); SDLoc dl(Op); const auto &DL = DAG.getDataLayout(); const auto &TLI = DAG.getTargetLoweringInfo(); const char *Name = nullptr; if (Signed) Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64"; else Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64"; SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL)); ARMTargetLowering::ArgListTy Args; for (auto AI : {1, 0}) { ArgListEntry Arg; Arg.Node = Op.getOperand(AI); Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext()); Args.push_back(Arg); } CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(Chain) .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()), ES, std::move(Args)); return LowerCallTo(CLI).first; } // This is a code size optimisation: return the original SDIV node to // DAGCombiner when we don't want to expand SDIV into a sequence of // instructions, and an empty node otherwise which will cause the // SDIV to be expanded in DAGCombine. SDValue ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl &Created) const { // TODO: Support SREM if (N->getOpcode() != ISD::SDIV) return SDValue(); const auto &ST = static_cast(DAG.getSubtarget()); const auto &MF = DAG.getMachineFunction(); const bool MinSize = MF.getFunction().optForMinSize(); const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode() : ST.hasDivideInARMMode(); // Don't touch vector types; rewriting this may lead to scalarizing // the int divs. if (N->getOperand(0).getValueType().isVector()) return SDValue(); // Bail if MinSize is not set, and also for both ARM and Thumb mode we need // hwdiv support for this to be really profitable. if (!(MinSize && HasDivide)) return SDValue(); // ARM mode is a bit simpler than Thumb: we can handle large power // of 2 immediates with 1 mov instruction; no further checks required, // just return the sdiv node. if (!ST.isThumb()) return SDValue(N, 0); // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV, // and thus lose the code size benefits of a MOVS that requires only 2. // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here, // but as it's doing exactly this, it's not worth the trouble to get TTI. if (Divisor.sgt(128)) return SDValue(); return SDValue(N, 0); } SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const { assert(Op.getValueType() == MVT::i32 && "unexpected type for custom lowering DIV"); SDLoc dl(Op); SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other, DAG.getEntryNode(), Op.getOperand(1)); return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK); } static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain) { SDLoc DL(N); SDValue Op = N->getOperand(1); if (N->getValueType(0) == MVT::i32) return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op); SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op, DAG.getConstant(0, DL, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op, DAG.getConstant(1, DL, MVT::i32)); return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi)); } void ARMTargetLowering::ExpandDIV_Windows( SDValue Op, SelectionDAG &DAG, bool Signed, SmallVectorImpl &Results) const { const auto &DL = DAG.getDataLayout(); const auto &TLI = DAG.getTargetLoweringInfo(); assert(Op.getValueType() == MVT::i64 && "unexpected type for custom lowering DIV"); SDLoc dl(Op); SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode()); SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK); SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result); SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result, DAG.getConstant(32, dl, TLI.getPointerTy(DL))); Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper); Results.push_back(Lower); Results.push_back(Upper); } static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { if (isStrongerThanMonotonic(cast(Op)->getOrdering())) // Acquire/Release load/store is not legal for targets without a dmb or // equivalent available. return SDValue(); // Monotonic load/store is legal for all targets. return Op; } static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { SDLoc DL(N); // Under Power Management extensions, the cycle-count is: // mrc p15, #0, , c9, c13, #0 SDValue Ops[] = { N->getOperand(0), // Chain DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32), DAG.getConstant(15, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32), DAG.getConstant(9, DL, MVT::i32), DAG.getConstant(13, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32) }; SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList(MVT::i32, MVT::Other), Ops); Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32, DAG.getConstant(0, DL, MVT::i32))); Results.push_back(Cycles32.getValue(1)); } static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) { SDLoc dl(V.getNode()); SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32); SDValue VHi = DAG.getAnyExtOrTrunc( DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)), dl, MVT::i32); bool isBigEndian = DAG.getDataLayout().isBigEndian(); if (isBigEndian) std::swap (VLo, VHi); SDValue RegClass = DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32); SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32); const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 }; return SDValue( DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0); } static void ReplaceCMP_SWAP_64Results(SDNode *N, SmallVectorImpl & Results, SelectionDAG &DAG) { assert(N->getValueType(0) == MVT::i64 && "AtomicCmpSwap on types less than 64 should be legal"); SDValue Ops[] = {N->getOperand(1), createGPRPairNode(DAG, N->getOperand(2)), createGPRPairNode(DAG, N->getOperand(3)), N->getOperand(0)}; SDNode *CmpSwap = DAG.getMachineNode( ARM::CMP_SWAP_64, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops); MachineMemOperand *MemOp = cast(N)->getMemOperand(); DAG.setNodeMemRefs(cast(CmpSwap), {MemOp}); bool isBigEndian = DAG.getDataLayout().isBigEndian(); Results.push_back( DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0, SDLoc(N), MVT::i32, SDValue(CmpSwap, 0))); Results.push_back( DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1, SDLoc(N), MVT::i32, SDValue(CmpSwap, 0))); Results.push_back(SDValue(CmpSwap, 2)); } static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget, SelectionDAG &DAG) { const auto &TLI = DAG.getTargetLoweringInfo(); assert(Subtarget.getTargetTriple().isOSMSVCRT() && "Custom lowering is MSVCRT specific!"); SDLoc dl(Op); SDValue Val = Op.getOperand(0); MVT Ty = Val->getSimpleValueType(0); SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, dl, Ty, Op.getOperand(1)); SDValue Callee = DAG.getExternalSymbol(Ty == MVT::f32 ? "powf" : "pow", TLI.getPointerTy(DAG.getDataLayout())); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Val; Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext()); Entry.IsZExt = true; Args.push_back(Entry); Entry.Node = Exponent; Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext()); Entry.IsZExt = true; Args.push_back(Entry); Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext()); // In the in-chain to the call is the entry node If we are emitting a // tailcall, the chain will be mutated if the node has a non-entry input // chain. SDValue InChain = DAG.getEntryNode(); SDValue TCChain = InChain; const Function &F = DAG.getMachineFunction().getFunction(); bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) && F.getReturnType() == LCRTy; if (IsTC) InChain = TCChain; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(InChain) .setCallee(CallingConv::ARM_AAPCS_VFP, LCRTy, Callee, std::move(Args)) .setTailCall(IsTC); std::pair CI = TLI.LowerCallTo(CLI); // Return the chain (the DAG root) if it is a tail call return !CI.second.getNode() ? DAG.getRoot() : CI.first; } SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump()); switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget); case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, Subtarget); case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget); case ISD::SHL: case ISD::SRL: case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); case ISD::SREM: return LowerREM(Op.getNode(), DAG); case ISD::UREM: return LowerREM(Op.getNode(), DAG); case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); case ISD::SRL_PARTS: case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget); case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget); case ISD::SETCC: return LowerVSETCC(Op, DAG); case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG); case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); case ISD::SDIV: if (Subtarget->isTargetWindows() && !Op.getValueType().isVector()) return LowerDIV_Windows(Op, DAG, /* Signed */ true); return LowerSDIV(Op, DAG); case ISD::UDIV: if (Subtarget->isTargetWindows() && !Op.getValueType().isVector()) return LowerDIV_Windows(Op, DAG, /* Signed */ false); return LowerUDIV(Op, DAG); case ISD::ADDCARRY: case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG); case ISD::SADDO: case ISD::SSUBO: return LowerSignedALUO(Op, DAG); case ISD::UADDO: case ISD::USUBO: return LowerUnsignedALUO(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); case ISD::SDIVREM: case ISD::UDIVREM: return LowerDivRem(Op, DAG); case ISD::DYNAMIC_STACKALLOC: if (Subtarget->isTargetWindows()) return LowerDYNAMIC_STACKALLOC(Op, DAG); llvm_unreachable("Don't know how to custom lower this!"); case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); case ISD::FPOWI: return LowerFPOWI(Op, *Subtarget, DAG); case ARMISD::WIN__DBZCHK: return SDValue(); } } static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) { unsigned IntNo = cast(N->getOperand(0))->getZExtValue(); unsigned Opc = 0; if (IntNo == Intrinsic::arm_smlald) Opc = ARMISD::SMLALD; else if (IntNo == Intrinsic::arm_smlaldx) Opc = ARMISD::SMLALDX; else if (IntNo == Intrinsic::arm_smlsld) Opc = ARMISD::SMLSLD; else if (IntNo == Intrinsic::arm_smlsldx) Opc = ARMISD::SMLSLDX; else return; SDLoc dl(N); SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3), DAG.getConstant(0, dl, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3), DAG.getConstant(1, dl, MVT::i32)); SDValue LongMul = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::i32), N->getOperand(1), N->getOperand(2), Lo, Hi); Results.push_back(LongMul.getValue(0)); Results.push_back(LongMul.getValue(1)); } /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. void ARMTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { SDValue Res; switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom expand this!"); case ISD::READ_REGISTER: ExpandREAD_REGISTER(N, Results, DAG); break; case ISD::BITCAST: Res = ExpandBITCAST(N, DAG, Subtarget); break; case ISD::SRL: case ISD::SRA: Res = Expand64BitShift(N, DAG, Subtarget); break; case ISD::SREM: case ISD::UREM: Res = LowerREM(N, DAG); break; case ISD::SDIVREM: case ISD::UDIVREM: Res = LowerDivRem(SDValue(N, 0), DAG); assert(Res.getNumOperands() == 2 && "DivRem needs two values"); Results.push_back(Res.getValue(0)); Results.push_back(Res.getValue(1)); return; case ISD::READCYCLECOUNTER: ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; case ISD::UDIV: case ISD::SDIV: assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows"); return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV, Results); case ISD::ATOMIC_CMP_SWAP: ReplaceCMP_SWAP_64Results(N, Results, DAG); return; case ISD::INTRINSIC_WO_CHAIN: return ReplaceLongIntrinsic(N, Results, DAG); } if (Res.getNode()) Results.push_back(Res); } //===----------------------------------------------------------------------===// // ARM Scheduler Hooks //===----------------------------------------------------------------------===// /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and /// registers the function context. void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI) const { assert(!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported with SjLj"); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); MachineConstantPool *MCP = MF->getConstantPool(); ARMFunctionInfo *AFI = MF->getInfo(); const Function &F = MF->getFunction(); bool isThumb = Subtarget->isThumb(); bool isThumb2 = Subtarget->isThumb2(); unsigned PCLabelId = AFI->createPICLabelUId(); unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8; ARMConstantPoolValue *CPV = ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj); unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass; // Grab constant pool and fixed stack memory operands. MachineMemOperand *CPMMO = MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF), MachineMemOperand::MOLoad, 4, 4); MachineMemOperand *FIMMOSt = MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4, 4); // Load the address of the dispatch MBB into the jump buffer. if (isThumb2) { // Incoming value: jbuf // ldr.n r5, LCPI1_1 // orr r5, r5, #1 // add r5, pc // str r5, [$jbuf, #+4] ; &jbuf[1] unsigned NewVReg1 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) .addConstantPoolIndex(CPI) .addMemOperand(CPMMO) .add(predOps(ARMCC::AL)); // Set the low bit because of thumb mode. unsigned NewVReg2 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) .addReg(NewVReg1, RegState::Kill) .addImm(0x01) .add(predOps(ARMCC::AL)) .add(condCodeOp()); unsigned NewVReg3 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3) .addReg(NewVReg2, RegState::Kill) .addImm(PCLabelId); BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12)) .addReg(NewVReg3, RegState::Kill) .addFrameIndex(FI) .addImm(36) // &jbuf[1] :: pc .addMemOperand(FIMMOSt) .add(predOps(ARMCC::AL)); } else if (isThumb) { // Incoming value: jbuf // ldr.n r1, LCPI1_4 // add r1, pc // mov r2, #1 // orrs r1, r2 // add r2, $jbuf, #+4 ; &jbuf[1] // str r1, [r2] unsigned NewVReg1 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) .addConstantPoolIndex(CPI) .addMemOperand(CPMMO) .add(predOps(ARMCC::AL)); unsigned NewVReg2 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2) .addReg(NewVReg1, RegState::Kill) .addImm(PCLabelId); // Set the low bit because of thumb mode. unsigned NewVReg3 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) .addReg(ARM::CPSR, RegState::Define) .addImm(1) .add(predOps(ARMCC::AL)); unsigned NewVReg4 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) .addReg(ARM::CPSR, RegState::Define) .addReg(NewVReg2, RegState::Kill) .addReg(NewVReg3, RegState::Kill) .add(predOps(ARMCC::AL)); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5) .addFrameIndex(FI) .addImm(36); // &jbuf[1] :: pc BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) .addReg(NewVReg4, RegState::Kill) .addReg(NewVReg5, RegState::Kill) .addImm(0) .addMemOperand(FIMMOSt) .add(predOps(ARMCC::AL)); } else { // Incoming value: jbuf // ldr r1, LCPI1_1 // add r1, pc, r1 // str r1, [$jbuf, #+4] ; &jbuf[1] unsigned NewVReg1 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) .addConstantPoolIndex(CPI) .addImm(0) .addMemOperand(CPMMO) .add(predOps(ARMCC::AL)); unsigned NewVReg2 = MRI->createVirtualRegister(TRC); BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) .addReg(NewVReg1, RegState::Kill) .addImm(PCLabelId) .add(predOps(ARMCC::AL)); BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12)) .addReg(NewVReg2, RegState::Kill) .addFrameIndex(FI) .addImm(36) // &jbuf[1] :: pc .addMemOperand(FIMMOSt) .add(predOps(ARMCC::AL)); } } void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, MachineBasicBlock *MBB) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); MachineFrameInfo &MFI = MF->getFrameInfo(); int FI = MFI.getFunctionContextIndex(); const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass : &ARM::GPRnopcRegClass; // Get a mapping of the call site numbers to all of the landing pads they're // associated with. DenseMap> CallSiteNumToLPad; unsigned MaxCSNum = 0; for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) { if (!BB->isEHPad()) continue; // FIXME: We should assert that the EH_LABEL is the first MI in the landing // pad. for (MachineBasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { if (!II->isEHLabel()) continue; MCSymbol *Sym = II->getOperand(0).getMCSymbol(); if (!MF->hasCallSiteLandingPad(Sym)) continue; SmallVectorImpl &CallSiteIdxs = MF->getCallSiteLandingPad(Sym); for (SmallVectorImpl::iterator CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end(); CSI != CSE; ++CSI) { CallSiteNumToLPad[*CSI].push_back(&*BB); MaxCSNum = std::max(MaxCSNum, *CSI); } break; } } // Get an ordered list of the machine basic blocks for the jump table. std::vector LPadList; SmallPtrSet InvokeBBs; LPadList.reserve(CallSiteNumToLPad.size()); for (unsigned I = 1; I <= MaxCSNum; ++I) { SmallVectorImpl &MBBList = CallSiteNumToLPad[I]; for (SmallVectorImpl::iterator II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) { LPadList.push_back(*II); InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end()); } } assert(!LPadList.empty() && "No landing pad destinations for the dispatch jump table!"); // Create the jump table and associated information. MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline); unsigned MJTI = JTI->createJumpTableIndex(LPadList); // Create the MBBs for the dispatch code. // Shove the dispatch's address into the return slot in the function context. MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); DispatchBB->setIsEHPad(); MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); unsigned trap_opcode; if (Subtarget->isThumb()) trap_opcode = ARM::tTRAP; else trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP; BuildMI(TrapBB, dl, TII->get(trap_opcode)); DispatchBB->addSuccessor(TrapBB); MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); DispatchBB->addSuccessor(DispContBB); // Insert and MBBs. MF->insert(MF->end(), DispatchBB); MF->insert(MF->end(), DispContBB); MF->insert(MF->end(), TrapBB); // Insert code into the entry block that creates and registers the function // context. SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI); MachineMemOperand *FIMMOLd = MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4); MachineInstrBuilder MIB; MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); const ARMBaseInstrInfo *AII = static_cast(TII); const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); // Add a register mask with no preserved registers. This results in all // registers being marked as clobbered. This can't work if the dispatch block // is in a Thumb1 function and is linked with ARM code which uses the FP // registers, as there is no way to preserve the FP registers in Thumb1 mode. MIB.addRegMask(RI.getSjLjDispatchPreservedMask(*MF)); bool IsPositionIndependent = isPositionIndependent(); unsigned NumLPads = LPadList.size(); if (Subtarget->isThumb2()) { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) .addFrameIndex(FI) .addImm(4) .addMemOperand(FIMMOLd) .add(predOps(ARMCC::AL)); if (NumLPads < 256) { BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) .addReg(NewVReg1) .addImm(LPadList.size()) .add(predOps(ARMCC::AL)); } else { unsigned VReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1) .addImm(NumLPads & 0xFFFF) .add(predOps(ARMCC::AL)); unsigned VReg2 = VReg1; if ((NumLPads & 0xFFFF0000) != 0) { VReg2 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2) .addReg(VReg1) .addImm(NumLPads >> 16) .add(predOps(ARMCC::AL)); } BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr)) .addReg(NewVReg1) .addReg(VReg2) .add(predOps(ARMCC::AL)); } BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc)) .addMBB(TrapBB) .addImm(ARMCC::HI) .addReg(ARM::CPSR); unsigned NewVReg3 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3) .addJumpTableIndex(MJTI) .add(predOps(ARMCC::AL)); unsigned NewVReg4 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4) .addReg(NewVReg3, RegState::Kill) .addReg(NewVReg1) .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)) .add(predOps(ARMCC::AL)) .add(condCodeOp()); BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT)) .addReg(NewVReg4, RegState::Kill) .addReg(NewVReg1) .addJumpTableIndex(MJTI); } else if (Subtarget->isThumb()) { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) .addFrameIndex(FI) .addImm(1) .addMemOperand(FIMMOLd) .add(predOps(ARMCC::AL)); if (NumLPads < 256) { BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) .addReg(NewVReg1) .addImm(NumLPads) .add(predOps(ARMCC::AL)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext()); const Constant *C = ConstantInt::get(Int32Ty, NumLPads); // MachineConstantPool wants an explicit alignment. unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty); if (Align == 0) Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); unsigned VReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci)) .addReg(VReg1, RegState::Define) .addConstantPoolIndex(Idx) .add(predOps(ARMCC::AL)); BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr)) .addReg(NewVReg1) .addReg(VReg1) .add(predOps(ARMCC::AL)); } BuildMI(DispatchBB, dl, TII->get(ARM::tBcc)) .addMBB(TrapBB) .addImm(ARMCC::HI) .addReg(ARM::CPSR); unsigned NewVReg2 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) .addReg(ARM::CPSR, RegState::Define) .addReg(NewVReg1) .addImm(2) .add(predOps(ARMCC::AL)); unsigned NewVReg3 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) .addJumpTableIndex(MJTI) .add(predOps(ARMCC::AL)); unsigned NewVReg4 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) .addReg(ARM::CPSR, RegState::Define) .addReg(NewVReg2, RegState::Kill) .addReg(NewVReg3) .add(predOps(ARMCC::AL)); MachineMemOperand *JTMMOLd = MF->getMachineMemOperand( MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) .addReg(NewVReg4, RegState::Kill) .addImm(0) .addMemOperand(JTMMOLd) .add(predOps(ARMCC::AL)); unsigned NewVReg6 = NewVReg5; if (IsPositionIndependent) { NewVReg6 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) .addReg(ARM::CPSR, RegState::Define) .addReg(NewVReg5, RegState::Kill) .addReg(NewVReg3) .add(predOps(ARMCC::AL)); } BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr)) .addReg(NewVReg6, RegState::Kill) .addJumpTableIndex(MJTI); } else { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) .addFrameIndex(FI) .addImm(4) .addMemOperand(FIMMOLd) .add(predOps(ARMCC::AL)); if (NumLPads < 256) { BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) .addReg(NewVReg1) .addImm(NumLPads) .add(predOps(ARMCC::AL)); } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) { unsigned VReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1) .addImm(NumLPads & 0xFFFF) .add(predOps(ARMCC::AL)); unsigned VReg2 = VReg1; if ((NumLPads & 0xFFFF0000) != 0) { VReg2 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2) .addReg(VReg1) .addImm(NumLPads >> 16) .add(predOps(ARMCC::AL)); } BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) .addReg(NewVReg1) .addReg(VReg2) .add(predOps(ARMCC::AL)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext()); const Constant *C = ConstantInt::get(Int32Ty, NumLPads); // MachineConstantPool wants an explicit alignment. unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty); if (Align == 0) Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); unsigned VReg1 = MRI->createVirtualRegister(TRC); BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) .addReg(VReg1, RegState::Define) .addConstantPoolIndex(Idx) .addImm(0) .add(predOps(ARMCC::AL)); BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) .addReg(NewVReg1) .addReg(VReg1, RegState::Kill) .add(predOps(ARMCC::AL)); } BuildMI(DispatchBB, dl, TII->get(ARM::Bcc)) .addMBB(TrapBB) .addImm(ARMCC::HI) .addReg(ARM::CPSR); unsigned NewVReg3 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3) .addReg(NewVReg1) .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)) .add(predOps(ARMCC::AL)) .add(condCodeOp()); unsigned NewVReg4 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4) .addJumpTableIndex(MJTI) .add(predOps(ARMCC::AL)); MachineMemOperand *JTMMOLd = MF->getMachineMemOperand( MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5) .addReg(NewVReg3, RegState::Kill) .addReg(NewVReg4) .addImm(0) .addMemOperand(JTMMOLd) .add(predOps(ARMCC::AL)); if (IsPositionIndependent) { BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) .addReg(NewVReg5, RegState::Kill) .addReg(NewVReg4) .addJumpTableIndex(MJTI); } else { BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr)) .addReg(NewVReg5, RegState::Kill) .addJumpTableIndex(MJTI); } } // Add the jump table entries as successors to the MBB. SmallPtrSet SeenMBBs; for (std::vector::iterator I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { MachineBasicBlock *CurMBB = *I; if (SeenMBBs.insert(CurMBB).second) DispContBB->addSuccessor(CurMBB); } // N.B. the order the invoke BBs are processed in doesn't matter here. const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF); SmallVector MBBLPads; for (MachineBasicBlock *BB : InvokeBBs) { // Remove the landing pad successor from the invoke block and replace it // with the new dispatch block. SmallVector Successors(BB->succ_begin(), BB->succ_end()); while (!Successors.empty()) { MachineBasicBlock *SMBB = Successors.pop_back_val(); if (SMBB->isEHPad()) { BB->removeSuccessor(SMBB); MBBLPads.push_back(SMBB); } } BB->addSuccessor(DispatchBB, BranchProbability::getZero()); BB->normalizeSuccProbs(); // Find the invoke call and mark all of the callee-saved registers as // 'implicit defined' so that they're spilled. This prevents code from // moving instructions to before the EH block, where they will never be // executed. for (MachineBasicBlock::reverse_iterator II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { if (!II->isCall()) continue; DenseMap DefRegs; for (MachineInstr::mop_iterator OI = II->operands_begin(), OE = II->operands_end(); OI != OE; ++OI) { if (!OI->isReg()) continue; DefRegs[OI->getReg()] = true; } MachineInstrBuilder MIB(*MF, &*II); for (unsigned i = 0; SavedRegs[i] != 0; ++i) { unsigned Reg = SavedRegs[i]; if (Subtarget->isThumb2() && !ARM::tGPRRegClass.contains(Reg) && !ARM::hGPRRegClass.contains(Reg)) continue; if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg)) continue; if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg)) continue; if (!DefRegs[Reg]) MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); } break; } } // Mark all former landing pads as non-landing pads. The dispatch is the only // landing pad now. for (SmallVectorImpl::iterator I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I) (*I)->setIsEHPad(false); // The instruction is gone now. MI.eraseFromParent(); } static MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) if (*I != Succ) return *I; llvm_unreachable("Expecting a BB with two successors!"); } /// Return the load opcode for a given load size. If load size >= 8, /// neon opcode will be returned. static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) { if (LdSize >= 8) return LdSize == 16 ? ARM::VLD1q32wb_fixed : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0; if (IsThumb1) return LdSize == 4 ? ARM::tLDRi : LdSize == 2 ? ARM::tLDRHi : LdSize == 1 ? ARM::tLDRBi : 0; if (IsThumb2) return LdSize == 4 ? ARM::t2LDR_POST : LdSize == 2 ? ARM::t2LDRH_POST : LdSize == 1 ? ARM::t2LDRB_POST : 0; return LdSize == 4 ? ARM::LDR_POST_IMM : LdSize == 2 ? ARM::LDRH_POST : LdSize == 1 ? ARM::LDRB_POST_IMM : 0; } /// Return the store opcode for a given store size. If store size >= 8, /// neon opcode will be returned. static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) { if (StSize >= 8) return StSize == 16 ? ARM::VST1q32wb_fixed : StSize == 8 ? ARM::VST1d32wb_fixed : 0; if (IsThumb1) return StSize == 4 ? ARM::tSTRi : StSize == 2 ? ARM::tSTRHi : StSize == 1 ? ARM::tSTRBi : 0; if (IsThumb2) return StSize == 4 ? ARM::t2STR_POST : StSize == 2 ? ARM::t2STRH_POST : StSize == 1 ? ARM::t2STRB_POST : 0; return StSize == 4 ? ARM::STR_POST_IMM : StSize == 2 ? ARM::STRH_POST : StSize == 1 ? ARM::STRB_POST_IMM : 0; } /// Emit a post-increment load operation with given size. The instructions /// will be added to BB at Pos. static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2) { unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2); assert(LdOpc != 0 && "Should have a load opcode"); if (LdSize >= 8) { BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) .addReg(AddrOut, RegState::Define) .addReg(AddrIn) .addImm(0) .add(predOps(ARMCC::AL)); } else if (IsThumb1) { // load + update AddrIn BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) .addReg(AddrIn) .addImm(0) .add(predOps(ARMCC::AL)); BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut) .add(t1CondCodeOp()) .addReg(AddrIn) .addImm(LdSize) .add(predOps(ARMCC::AL)); } else if (IsThumb2) { BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) .addReg(AddrOut, RegState::Define) .addReg(AddrIn) .addImm(LdSize) .add(predOps(ARMCC::AL)); } else { // arm BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) .addReg(AddrOut, RegState::Define) .addReg(AddrIn) .addReg(0) .addImm(LdSize) .add(predOps(ARMCC::AL)); } } /// Emit a post-increment store operation with given size. The instructions /// will be added to BB at Pos. static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2) { unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2); assert(StOpc != 0 && "Should have a store opcode"); if (StSize >= 8) { BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) .addReg(AddrIn) .addImm(0) .addReg(Data) .add(predOps(ARMCC::AL)); } else if (IsThumb1) { // store + update AddrIn BuildMI(*BB, Pos, dl, TII->get(StOpc)) .addReg(Data) .addReg(AddrIn) .addImm(0) .add(predOps(ARMCC::AL)); BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut) .add(t1CondCodeOp()) .addReg(AddrIn) .addImm(StSize) .add(predOps(ARMCC::AL)); } else if (IsThumb2) { BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) .addReg(Data) .addReg(AddrIn) .addImm(StSize) .add(predOps(ARMCC::AL)); } else { // arm BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) .addReg(Data) .addReg(AddrIn) .addReg(0) .addImm(StSize) .add(predOps(ARMCC::AL)); } } MachineBasicBlock * ARMTargetLowering::EmitStructByval(MachineInstr &MI, MachineBasicBlock *BB) const { // This pseudo instruction has 3 operands: dst, src, size // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). // Otherwise, we will generate unrolled scalar copies. const TargetInstrInfo *TII = Subtarget->getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = ++BB->getIterator(); unsigned dest = MI.getOperand(0).getReg(); unsigned src = MI.getOperand(1).getReg(); unsigned SizeVal = MI.getOperand(2).getImm(); unsigned Align = MI.getOperand(3).getImm(); DebugLoc dl = MI.getDebugLoc(); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned UnitSize = 0; const TargetRegisterClass *TRC = nullptr; const TargetRegisterClass *VecTRC = nullptr; bool IsThumb1 = Subtarget->isThumb1Only(); bool IsThumb2 = Subtarget->isThumb2(); bool IsThumb = Subtarget->isThumb(); if (Align & 1) { UnitSize = 1; } else if (Align & 2) { UnitSize = 2; } else { // Check whether we can use NEON instructions. if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) && Subtarget->hasNEON()) { if ((Align % 16 == 0) && SizeVal >= 16) UnitSize = 16; else if ((Align % 8 == 0) && SizeVal >= 8) UnitSize = 8; } // Can't use NEON instructions. if (UnitSize == 0) UnitSize = 4; } // Select the correct opcode and register class for unit size load/store bool IsNeon = UnitSize >= 8; TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass; if (IsNeon) VecTRC = UnitSize == 16 ? &ARM::DPairRegClass : UnitSize == 8 ? &ARM::DPRRegClass : nullptr; unsigned BytesLeft = SizeVal % UnitSize; unsigned LoopSize = SizeVal - BytesLeft; if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) { // Use LDR and STR to copy. // [scratch, srcOut] = LDR_POST(srcIn, UnitSize) // [destOut] = STR_POST(scratch, destIn, UnitSize) unsigned srcIn = src; unsigned destIn = dest; for (unsigned i = 0; i < LoopSize; i+=UnitSize) { unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut, IsThumb1, IsThumb2); emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut, IsThumb1, IsThumb2); srcIn = srcOut; destIn = destOut; } // Handle the leftover bytes with LDRB and STRB. // [scratch, srcOut] = LDRB_POST(srcIn, 1) // [destOut] = STRB_POST(scratch, destIn, 1) for (unsigned i = 0; i < BytesLeft; i++) { unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); unsigned scratch = MRI.createVirtualRegister(TRC); emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut, IsThumb1, IsThumb2); emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut, IsThumb1, IsThumb2); srcIn = srcOut; destIn = destOut; } MI.eraseFromParent(); // The instruction is gone now. return BB; } // Expand the pseudo op to a loop. // thisMBB: // ... // movw varEnd, # --> with thumb2 // movt varEnd, # // ldrcp varEnd, idx --> without thumb2 // fallthrough --> loopMBB // loopMBB: // PHI varPhi, varEnd, varLoop // PHI srcPhi, src, srcLoop // PHI destPhi, dst, destLoop // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) // [destLoop] = STR_POST(scratch, destPhi, UnitSize) // subs varLoop, varPhi, #UnitSize // bne loopMBB // fallthrough --> exitMBB // exitMBB: // epilogue to handle left-over bytes // [scratch, srcOut] = LDRB_POST(srcLoop, 1) // [destOut] = STRB_POST(scratch, destLoop, 1) MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MF->insert(It, loopMBB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); // Load an immediate to varEnd. unsigned varEnd = MRI.createVirtualRegister(TRC); if (Subtarget->useMovt(*MF)) { unsigned Vtmp = varEnd; if ((LoopSize & 0xFFFF0000) != 0) Vtmp = MRI.createVirtualRegister(TRC); BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp) .addImm(LoopSize & 0xFFFF) .add(predOps(ARMCC::AL)); if ((LoopSize & 0xFFFF0000) != 0) BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd) .addReg(Vtmp) .addImm(LoopSize >> 16) .add(predOps(ARMCC::AL)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext()); const Constant *C = ConstantInt::get(Int32Ty, LoopSize); // MachineConstantPool wants an explicit alignment. unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty); if (Align == 0) Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); if (IsThumb) BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)) .addReg(varEnd, RegState::Define) .addConstantPoolIndex(Idx) .add(predOps(ARMCC::AL)); else BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)) .addReg(varEnd, RegState::Define) .addConstantPoolIndex(Idx) .addImm(0) .add(predOps(ARMCC::AL)); } BB->addSuccessor(loopMBB); // Generate the loop body: // varPhi = PHI(varLoop, varEnd) // srcPhi = PHI(srcLoop, src) // destPhi = PHI(destLoop, dst) MachineBasicBlock *entryBB = BB; BB = loopMBB; unsigned varLoop = MRI.createVirtualRegister(TRC); unsigned varPhi = MRI.createVirtualRegister(TRC); unsigned srcLoop = MRI.createVirtualRegister(TRC); unsigned srcPhi = MRI.createVirtualRegister(TRC); unsigned destLoop = MRI.createVirtualRegister(TRC); unsigned destPhi = MRI.createVirtualRegister(TRC); BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi) .addReg(varLoop).addMBB(loopMBB) .addReg(varEnd).addMBB(entryBB); BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi) .addReg(srcLoop).addMBB(loopMBB) .addReg(src).addMBB(entryBB); BuildMI(BB, dl, TII->get(ARM::PHI), destPhi) .addReg(destLoop).addMBB(loopMBB) .addReg(dest).addMBB(entryBB); // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop, IsThumb1, IsThumb2); emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop, IsThumb1, IsThumb2); // Decrement loop variable by UnitSize. if (IsThumb1) { BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop) .add(t1CondCodeOp()) .addReg(varPhi) .addImm(UnitSize) .add(predOps(ARMCC::AL)); } else { MachineInstrBuilder MIB = BuildMI(*BB, BB->end(), dl, TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); MIB.addReg(varPhi) .addImm(UnitSize) .add(predOps(ARMCC::AL)) .add(condCodeOp()); MIB->getOperand(5).setReg(ARM::CPSR); MIB->getOperand(5).setIsDef(true); } BuildMI(*BB, BB->end(), dl, TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc)) .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); // loopMBB can loop back to loopMBB or fall through to exitMBB. BB->addSuccessor(loopMBB); BB->addSuccessor(exitMBB); // Add epilogue to handle BytesLeft. BB = exitMBB; auto StartOfExit = exitMBB->begin(); // [scratch, srcOut] = LDRB_POST(srcLoop, 1) // [destOut] = STRB_POST(scratch, destLoop, 1) unsigned srcIn = srcLoop; unsigned destIn = destLoop; for (unsigned i = 0; i < BytesLeft; i++) { unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); unsigned scratch = MRI.createVirtualRegister(TRC); emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut, IsThumb1, IsThumb2); emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut, IsThumb1, IsThumb2); srcIn = srcOut; destIn = destOut; } MI.eraseFromParent(); // The instruction is gone now. return BB; } MachineBasicBlock * ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI, MachineBasicBlock *MBB) const { const TargetMachine &TM = getTargetMachine(); const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); assert(Subtarget->isTargetWindows() && "__chkstk is only supported on Windows"); assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode"); // __chkstk takes the number of words to allocate on the stack in R4, and // returns the stack adjustment in number of bytes in R4. This will not // clober any other registers (other than the obvious lr). // // Although, technically, IP should be considered a register which may be // clobbered, the call itself will not touch it. Windows on ARM is a pure // thumb-2 environment, so there is no interworking required. As a result, we // do not expect a veneer to be emitted by the linker, clobbering IP. // // Each module receives its own copy of __chkstk, so no import thunk is // required, again, ensuring that IP is not clobbered. // // Finally, although some linkers may theoretically provide a trampoline for // out of range calls (which is quite common due to a 32M range limitation of // branches for Thumb), we can generate the long-call version via // -mcmodel=large, alleviating the need for the trampoline which may clobber // IP. switch (TM.getCodeModel()) { case CodeModel::Tiny: llvm_unreachable("Tiny code model not available on ARM."); case CodeModel::Small: case CodeModel::Medium: case CodeModel::Kernel: BuildMI(*MBB, MI, DL, TII.get(ARM::tBL)) .add(predOps(ARMCC::AL)) .addExternalSymbol("__chkstk") .addReg(ARM::R4, RegState::Implicit | RegState::Kill) .addReg(ARM::R4, RegState::Implicit | RegState::Define) .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead) .addReg(ARM::CPSR, RegState::Implicit | RegState::Define | RegState::Dead); break; case CodeModel::Large: { MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass); BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg) .addExternalSymbol("__chkstk"); BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr)) .add(predOps(ARMCC::AL)) .addReg(Reg, RegState::Kill) .addReg(ARM::R4, RegState::Implicit | RegState::Kill) .addReg(ARM::R4, RegState::Implicit | RegState::Define) .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead) .addReg(ARM::CPSR, RegState::Implicit | RegState::Define | RegState::Dead); break; } } BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP) .addReg(ARM::SP, RegState::Kill) .addReg(ARM::R4, RegState::Kill) .setMIFlags(MachineInstr::FrameSetup) .add(predOps(ARMCC::AL)) .add(condCodeOp()); MI.eraseFromParent(); return MBB; } MachineBasicBlock * ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock(); MF->insert(++MBB->getIterator(), ContBB); ContBB->splice(ContBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); ContBB->transferSuccessorsAndUpdatePHIs(MBB); MBB->addSuccessor(ContBB); MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0)); MF->push_back(TrapBB); MBB->addSuccessor(TrapBB); BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8)) .addReg(MI.getOperand(0).getReg()) .addImm(0) .add(predOps(ARMCC::AL)); BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc)) .addMBB(TrapBB) .addImm(ARMCC::EQ) .addReg(ARM::CPSR); MI.eraseFromParent(); return ContBB; } // The CPSR operand of SelectItr might be missing a kill marker // because there were multiple uses of CPSR, and ISel didn't know // which to mark. Figure out whether SelectItr should have had a // kill marker, and set it if it should. Returns the correct kill // marker value. static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock* BB, const TargetRegisterInfo* TRI) { // Scan forward through BB for a use/def of CPSR. MachineBasicBlock::iterator miI(std::next(SelectItr)); for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) { const MachineInstr& mi = *miI; if (mi.readsRegister(ARM::CPSR)) return false; if (mi.definesRegister(ARM::CPSR)) break; // Should have kill-flag - update below. } // If we hit the end of the block, check whether CPSR is live into a // successor. if (miI == BB->end()) { for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(), sEnd = BB->succ_end(); sItr != sEnd; ++sItr) { MachineBasicBlock* succ = *sItr; if (succ->isLiveIn(ARM::CPSR)) return false; } } // We found a def, or hit the end of the basic block and CPSR wasn't live // out. SelectMI should have a kill flag on CPSR. SelectItr->addRegisterKilled(ARM::CPSR, TRI); return true; } MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); switch (MI.getOpcode()) { default: { MI.print(errs()); llvm_unreachable("Unexpected instr type to insert"); } // Thumb1 post-indexed loads are really just single-register LDMs. case ARM::tLDR_postidx: { MachineOperand Def(MI.getOperand(1)); BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD)) .add(Def) // Rn_wb .add(MI.getOperand(2)) // Rn .add(MI.getOperand(3)) // PredImm .add(MI.getOperand(4)) // PredReg .add(MI.getOperand(0)); // Rt MI.eraseFromParent(); return BB; } // The Thumb2 pre-indexed stores have the same MI operands, they just // define them differently in the .td files from the isel patterns, so // they need pseudos. case ARM::t2STR_preidx: MI.setDesc(TII->get(ARM::t2STR_PRE)); return BB; case ARM::t2STRB_preidx: MI.setDesc(TII->get(ARM::t2STRB_PRE)); return BB; case ARM::t2STRH_preidx: MI.setDesc(TII->get(ARM::t2STRH_PRE)); return BB; case ARM::STRi_preidx: case ARM::STRBi_preidx: { unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM; // Decode the offset. unsigned Offset = MI.getOperand(4).getImm(); bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub; Offset = ARM_AM::getAM2Offset(Offset); if (isSub) Offset = -Offset; MachineMemOperand *MMO = *MI.memoperands_begin(); BuildMI(*BB, MI, dl, TII->get(NewOpc)) .add(MI.getOperand(0)) // Rn_wb .add(MI.getOperand(1)) // Rt .add(MI.getOperand(2)) // Rn .addImm(Offset) // offset (skip GPR==zero_reg) .add(MI.getOperand(5)) // pred .add(MI.getOperand(6)) .addMemOperand(MMO); MI.eraseFromParent(); return BB; } case ARM::STRr_preidx: case ARM::STRBr_preidx: case ARM::STRH_preidx: { unsigned NewOpc; switch (MI.getOpcode()) { default: llvm_unreachable("unexpected opcode!"); case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break; case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break; case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break; } MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); for (unsigned i = 0; i < MI.getNumOperands(); ++i) MIB.add(MI.getOperand(i)); MI.eraseFromParent(); return BB; } case ARM::tMOVCCr_pseudo: { // To "insert" a SELECT_CC instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the // destination vreg to set, the condition code register to branch on, the // true/false values to select between, and a branch opcode to use. const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = ++BB->getIterator(); // thisMBB: // ... // TrueVal = ... // cmpTY ccX, r1, r2 // bCC copy1MBB // fallthrough --> copy0MBB MachineBasicBlock *thisMBB = BB; MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, copy0MBB); F->insert(It, sinkMBB); // Check whether CPSR is live past the tMOVCCr_pseudo. const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); if (!MI.killsRegister(ARM::CPSR) && !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) { copy0MBB->addLiveIn(ARM::CPSR); sinkMBB->addLiveIn(ARM::CPSR); } // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); BuildMI(BB, dl, TII->get(ARM::tBcc)) .addMBB(sinkMBB) .addImm(MI.getOperand(3).getImm()) .addReg(MI.getOperand(4).getReg()); // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB BB = copy0MBB; // Update machine-CFG edges BB->addSuccessor(sinkMBB); // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg()) .addReg(MI.getOperand(1).getReg()) .addMBB(copy0MBB) .addReg(MI.getOperand(2).getReg()) .addMBB(thisMBB); MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } case ARM::BCCi64: case ARM::BCCZi64: { // If there is an unconditional branch to the other successor, remove it. BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end()); // Compare both parts that make up the double comparison separately for // equality. bool RHSisZero = MI.getOpcode() == ARM::BCCZi64; unsigned LHS1 = MI.getOperand(1).getReg(); unsigned LHS2 = MI.getOperand(2).getReg(); if (RHSisZero) { BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(LHS1) .addImm(0) .add(predOps(ARMCC::AL)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(LHS2).addImm(0) .addImm(ARMCC::EQ).addReg(ARM::CPSR); } else { unsigned RHS1 = MI.getOperand(3).getReg(); unsigned RHS2 = MI.getOperand(4).getReg(); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) .addReg(LHS1) .addReg(RHS1) .add(predOps(ARMCC::AL)); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) .addReg(LHS2).addReg(RHS2) .addImm(ARMCC::EQ).addReg(ARM::CPSR); } MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB(); MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); if (MI.getOperand(0).getImm() == ARMCC::NE) std::swap(destMBB, exitMBB); BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); if (isThumb2) BuildMI(BB, dl, TII->get(ARM::t2B)) .addMBB(exitMBB) .add(predOps(ARMCC::AL)); else BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB); MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } case ARM::Int_eh_sjlj_setjmp: case ARM::Int_eh_sjlj_setjmp_nofp: case ARM::tInt_eh_sjlj_setjmp: case ARM::t2Int_eh_sjlj_setjmp: case ARM::t2Int_eh_sjlj_setjmp_nofp: return BB; case ARM::Int_eh_sjlj_setup_dispatch: EmitSjLjDispatchBlock(MI, BB); return BB; case ARM::ABS: case ARM::t2ABS: { // To insert an ABS instruction, we have to insert the // diamond control-flow pattern. The incoming instruction knows the // source vreg to test against 0, the destination vreg to set, // the condition code register to branch on, the // true/false values to select between, and a branch opcode to use. // It transforms // V1 = ABS V0 // into // V2 = MOVS V0 // BCC (branch to SinkBB if V0 >= 0) // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0) // SinkBB: V1 = PHI(V2, V3) const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator BBI = ++BB->getIterator(); MachineFunction *Fn = BB->getParent(); MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB); Fn->insert(BBI, RSBBB); Fn->insert(BBI, SinkBB); unsigned int ABSSrcReg = MI.getOperand(1).getReg(); unsigned int ABSDstReg = MI.getOperand(0).getReg(); bool ABSSrcKIll = MI.getOperand(1).isKill(); bool isThumb2 = Subtarget->isThumb2(); MachineRegisterInfo &MRI = Fn->getRegInfo(); // In Thumb mode S must not be specified if source register is the SP or // PC and if destination register is the SP, so restrict register class unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass); // Transfer the remainder of BB and its successor edges to sinkMBB. SinkBB->splice(SinkBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); SinkBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(RSBBB); BB->addSuccessor(SinkBB); // fall through to SinkMBB RSBBB->addSuccessor(SinkBB); // insert a cmp at the end of BB BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(ABSSrcReg) .addImm(0) .add(predOps(ARMCC::AL)); // insert a bcc with opposite CC to ARMCC::MI at the end of BB BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB) .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR); // insert rsbri in RSBBB // Note: BCC and rsbri will be converted into predicated rsbmi // by if-conversion pass BuildMI(*RSBBB, RSBBB->begin(), dl, TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0) .addImm(0) .add(predOps(ARMCC::AL)) .add(condCodeOp()); // insert PHI in SinkBB, // reuse ABSDstReg to not change uses of ABS instruction BuildMI(*SinkBB, SinkBB->begin(), dl, TII->get(ARM::PHI), ABSDstReg) .addReg(NewRsbDstReg).addMBB(RSBBB) .addReg(ABSSrcReg).addMBB(BB); // remove ABS instruction MI.eraseFromParent(); // return last added BB return SinkBB; } case ARM::COPY_STRUCT_BYVAL_I32: ++NumLoopByVals; return EmitStructByval(MI, BB); case ARM::WIN__CHKSTK: return EmitLowered__chkstk(MI, BB); case ARM::WIN__DBZCHK: return EmitLowered__dbzchk(MI, BB); } } /// Attaches vregs to MEMCPY that it will use as scratch registers /// when it is expanded into LDM/STM. This is done as a post-isel lowering /// instead of as a custom inserter because we need the use list from the SDNode. static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, MachineInstr &MI, const SDNode *Node) { bool isThumb1 = Subtarget->isThumb1Only(); DebugLoc DL = MI.getDebugLoc(); MachineFunction *MF = MI.getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineInstrBuilder MIB(*MF, MI); // If the new dst/src is unused mark it as dead. if (!Node->hasAnyUseOfValue(0)) { MI.getOperand(0).setIsDead(true); } if (!Node->hasAnyUseOfValue(1)) { MI.getOperand(1).setIsDead(true); } // The MEMCPY both defines and kills the scratch registers. for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) { unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); MIB.addReg(TmpReg, RegState::Define|RegState::Dead); } } void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const { if (MI.getOpcode() == ARM::MEMCPY) { attachMEMCPYScratchRegs(Subtarget, MI, Node); return; } const MCInstrDesc *MCID = &MI.getDesc(); // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, // RSC. Coming out of isel, they have an implicit CPSR def, but the optional // operand is still set to noreg. If needed, set the optional operand's // register to CPSR, and remove the redundant implicit def. // // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR). // Rename pseudo opcodes. unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode()); unsigned ccOutIdx; if (NewOpc) { const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo(); MCID = &TII->get(NewOpc); assert(MCID->getNumOperands() == MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize() && "converted opcode should be the same except for cc_out" " (and, on Thumb1, pred)"); MI.setDesc(*MCID); // Add the optional cc_out operand MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true)); // On Thumb1, move all input operands to the end, then add the predicate if (Subtarget->isThumb1Only()) { for (unsigned c = MCID->getNumOperands() - 4; c--;) { MI.addOperand(MI.getOperand(1)); MI.RemoveOperand(1); } // Restore the ties for (unsigned i = MI.getNumOperands(); i--;) { const MachineOperand& op = MI.getOperand(i); if (op.isReg() && op.isUse()) { int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO); if (DefIdx != -1) MI.tieOperands(DefIdx, i); } } MI.addOperand(MachineOperand::CreateImm(ARMCC::AL)); MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false)); ccOutIdx = 1; } else ccOutIdx = MCID->getNumOperands() - 1; } else ccOutIdx = MCID->getNumOperands() - 1; // Any ARM instruction that sets the 's' bit should specify an optional // "cc_out" operand in the last operand position. if (!MI.hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { assert(!NewOpc && "Optional cc_out operand required"); return; } // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it // since we already have an optional CPSR def. bool definesCPSR = false; bool deadCPSR = false; for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) { definesCPSR = true; if (MO.isDead()) deadCPSR = true; MI.RemoveOperand(i); break; } } if (!definesCPSR) { assert(!NewOpc && "Optional cc_out operand required"); return; } assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag"); if (deadCPSR) { assert(!MI.getOperand(ccOutIdx).getReg() && "expect uninitialized optional cc_out operand"); // Thumb1 instructions must have the S bit even if the CPSR is dead. if (!Subtarget->isThumb1Only()) return; } // If this instruction was defined with an optional CPSR def and its dag node // had a live implicit CPSR def, then activate the optional CPSR def. MachineOperand &MO = MI.getOperand(ccOutIdx); MO.setReg(ARM::CPSR); MO.setIsDef(true); } //===----------------------------------------------------------------------===// // ARM Optimization Hooks //===----------------------------------------------------------------------===// // Helper function that checks if N is a null or all ones constant. static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) { return AllOnes ? isAllOnesConstant(N) : isNullConstant(N); } // Return true if N is conditionally 0 or all ones. // Detects these expressions where cc is an i1 value: // // (select cc 0, y) [AllOnes=0] // (select cc y, 0) [AllOnes=0] // (zext cc) [AllOnes=0] // (sext cc) [AllOnes=0/1] // (select cc -1, y) [AllOnes=1] // (select cc y, -1) [AllOnes=1] // // Invert is set when N is the null/all ones constant when CC is false. // OtherOp is set to the alternative value of N. static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG) { switch (N->getOpcode()) { default: return false; case ISD::SELECT: { CC = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); if (isZeroOrAllOnes(N1, AllOnes)) { Invert = false; OtherOp = N2; return true; } if (isZeroOrAllOnes(N2, AllOnes)) { Invert = true; OtherOp = N1; return true; } return false; } case ISD::ZERO_EXTEND: // (zext cc) can never be the all ones value. if (AllOnes) return false; LLVM_FALLTHROUGH; case ISD::SIGN_EXTEND: { SDLoc dl(N); EVT VT = N->getValueType(0); CC = N->getOperand(0); if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC) return false; Invert = !AllOnes; if (AllOnes) // When looking for an AllOnes constant, N is an sext, and the 'other' // value is 0. OtherOp = DAG.getConstant(0, dl, VT); else if (N->getOpcode() == ISD::ZERO_EXTEND) // When looking for a 0 constant, N can be zext or sext. OtherOp = DAG.getConstant(1, dl, VT); else OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl, VT); return true; } } } // Combine a constant select operand into its use: // // (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) // (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) // (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1] // (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) // (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) // // The transform is rejected if the select doesn't have a constant operand that // is null, or all ones when AllOnes is set. // // Also recognize sext/zext from i1: // // (add (zext cc), x) -> (select cc (add x, 1), x) // (add (sext cc), x) -> (select cc (add x, -1), x) // // These transformations eventually create predicated instructions. // // @param N The node to transform. // @param Slct The N operand that is a select. // @param OtherOp The other N operand (x above). // @param DCI Context. // @param AllOnes Require the select constant to be all ones instead of null. // @returns The new node, or SDValue() on failure. static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes = false) { SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); SDValue NonConstantVal; SDValue CCOp; bool SwapSelectOps; if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps, NonConstantVal, DAG)) return SDValue(); // Slct is now know to be the desired identity constant when CC is true. SDValue TrueVal = OtherOp; SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal); // Unless SwapSelectOps says CC should be false. if (SwapSelectOps) std::swap(TrueVal, FalseVal); return DAG.getNode(ISD::SELECT, SDLoc(N), VT, CCOp, TrueVal, FalseVal); } // Attempt combineSelectAndUse on each operand of a commutative operator N. static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); if (N0.getNode()->hasOneUse()) if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes)) return Result; if (N1.getNode()->hasOneUse()) if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes)) return Result; return SDValue(); } static bool IsVUZPShuffleNode(SDNode *N) { // VUZP shuffle node. if (N->getOpcode() == ARMISD::VUZP) return true; // "VUZP" on i32 is an alias for VTRN. if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32) return true; return false; } static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Look for ADD(VUZP.0, VUZP.1). if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() || N0 == N1) return SDValue(); // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD. if (!N->getValueType(0).is64BitVector()) return SDValue(); // Generate vpadd. SelectionDAG &DAG = DCI.DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc dl(N); SDNode *Unzip = N0.getNode(); EVT VT = N->getValueType(0); SmallVector Ops; Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl, TLI.getPointerTy(DAG.getDataLayout()))); Ops.push_back(Unzip->getOperand(0)); Ops.push_back(Unzip->getOperand(1)); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops); } static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Check for two extended operands. if (!(N0.getOpcode() == ISD::SIGN_EXTEND && N1.getOpcode() == ISD::SIGN_EXTEND) && !(N0.getOpcode() == ISD::ZERO_EXTEND && N1.getOpcode() == ISD::ZERO_EXTEND)) return SDValue(); SDValue N00 = N0.getOperand(0); SDValue N10 = N1.getOperand(0); // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1)) if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() || N00 == N10) return SDValue(); // We only recognize Q register paddl here; this can't be reached until // after type legalization. if (!N00.getValueType().is64BitVector() || !N0.getValueType().is128BitVector()) return SDValue(); // Generate vpaddl. SelectionDAG &DAG = DCI.DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc dl(N); EVT VT = N->getValueType(0); SmallVector Ops; // Form vpaddl.sN or vpaddl.uN depending on the kind of extension. unsigned Opcode; if (N0.getOpcode() == ISD::SIGN_EXTEND) Opcode = Intrinsic::arm_neon_vpaddls; else Opcode = Intrinsic::arm_neon_vpaddlu; Ops.push_back(DAG.getConstant(Opcode, dl, TLI.getPointerTy(DAG.getDataLayout()))); EVT ElemTy = N00.getValueType().getVectorElementType(); unsigned NumElts = VT.getVectorNumElements(); EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2); SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT, N00.getOperand(0), N00.getOperand(1)); Ops.push_back(Concat); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops); } // FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in // an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is // much easier to match. static SDValue AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Only perform optimization if after legalize, and if NEON is available. We // also expected both operands to be BUILD_VECTORs. if (DCI.isBeforeLegalize() || !Subtarget->hasNEON() || N0.getOpcode() != ISD::BUILD_VECTOR || N1.getOpcode() != ISD::BUILD_VECTOR) return SDValue(); // Check output type since VPADDL operand elements can only be 8, 16, or 32. EVT VT = N->getValueType(0); if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64) return SDValue(); // Check that the vector operands are of the right form. // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR // operands, where N is the size of the formed vector. // Each EXTRACT_VECTOR should have the same input vector and odd or even // index such that we have a pair wise add pattern. // Grab the vector that all EXTRACT_VECTOR nodes should be referencing. if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); SDValue Vec = N0->getOperand(0)->getOperand(0); SDNode *V = Vec.getNode(); unsigned nextIndex = 0; // For each operands to the ADD which are BUILD_VECTORs, // check to see if each of their operands are an EXTRACT_VECTOR with // the same vector and appropriate index. for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) { if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { SDValue ExtVec0 = N0->getOperand(i); SDValue ExtVec1 = N1->getOperand(i); // First operand is the vector, verify its the same. if (V != ExtVec0->getOperand(0).getNode() || V != ExtVec1->getOperand(0).getNode()) return SDValue(); // Second is the constant, verify its correct. ConstantSDNode *C0 = dyn_cast(ExtVec0->getOperand(1)); ConstantSDNode *C1 = dyn_cast(ExtVec1->getOperand(1)); // For the constant, we want to see all the even or all the odd. if (!C0 || !C1 || C0->getZExtValue() != nextIndex || C1->getZExtValue() != nextIndex+1) return SDValue(); // Increment index. nextIndex+=2; } else return SDValue(); } // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure // we're using the entire input vector, otherwise there's a size/legality // mismatch somewhere. if (nextIndex != Vec.getValueType().getVectorNumElements() || Vec.getValueType().getVectorElementType() == VT.getVectorElementType()) return SDValue(); // Create VPADDL node. SelectionDAG &DAG = DCI.DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc dl(N); // Build operand list. SmallVector Ops; Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl, TLI.getPointerTy(DAG.getDataLayout()))); // Input is the vector. Ops.push_back(Vec); // Get widened type and narrowed type. MVT widenType; unsigned numElem = VT.getVectorNumElements(); EVT inputLaneType = Vec.getValueType().getVectorElementType(); switch (inputLaneType.getSimpleVT().SimpleTy) { case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; default: llvm_unreachable("Invalid vector element type for padd optimization."); } SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops); unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE; return DAG.getNode(ExtOp, dl, VT, tmp); } static SDValue findMUL_LOHI(SDValue V) { if (V->getOpcode() == ISD::UMUL_LOHI || V->getOpcode() == ISD::SMUL_LOHI) return V; return SDValue(); } static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { if (Subtarget->isThumb()) { if (!Subtarget->hasDSP()) return SDValue(); } else if (!Subtarget->hasV5TEOps()) return SDValue(); // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and // accumulates the product into a 64-bit value. The 16-bit values will // be sign extended somehow or SRA'd into 32-bit values // (addc (adde (mul 16bit, 16bit), lo), hi) SDValue Mul = AddcNode->getOperand(0); SDValue Lo = AddcNode->getOperand(1); if (Mul.getOpcode() != ISD::MUL) { Lo = AddcNode->getOperand(0); Mul = AddcNode->getOperand(1); if (Mul.getOpcode() != ISD::MUL) return SDValue(); } SDValue SRA = AddeNode->getOperand(0); SDValue Hi = AddeNode->getOperand(1); if (SRA.getOpcode() != ISD::SRA) { SRA = AddeNode->getOperand(1); Hi = AddeNode->getOperand(0); if (SRA.getOpcode() != ISD::SRA) return SDValue(); } if (auto Const = dyn_cast(SRA.getOperand(1))) { if (Const->getZExtValue() != 31) return SDValue(); } else return SDValue(); if (SRA.getOperand(0) != Mul) return SDValue(); SelectionDAG &DAG = DCI.DAG; SDLoc dl(AddcNode); unsigned Opcode = 0; SDValue Op0; SDValue Op1; if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) { Opcode = ARMISD::SMLALBB; Op0 = Mul.getOperand(0); Op1 = Mul.getOperand(1); } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) { Opcode = ARMISD::SMLALBT; Op0 = Mul.getOperand(0); Op1 = Mul.getOperand(1).getOperand(0); } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) { Opcode = ARMISD::SMLALTB; Op0 = Mul.getOperand(0).getOperand(0); Op1 = Mul.getOperand(1); } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) { Opcode = ARMISD::SMLALTT; Op0 = Mul->getOperand(0).getOperand(0); Op1 = Mul->getOperand(1).getOperand(0); } if (!Op0 || !Op1) return SDValue(); SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), Op0, Op1, Lo, Hi); // Replace the ADDs' nodes uses by the MLA node's values. SDValue HiMLALResult(SMLAL.getNode(), 1); SDValue LoMLALResult(SMLAL.getNode(), 0); DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult); DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult); // Return original node to notify the driver to stop replacing. SDValue resNode(AddcNode, 0); return resNode; } static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Look for multiply add opportunities. // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where // each add nodes consumes a value from ISD::UMUL_LOHI and there is // a glue link from the first add to the second add. // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by // a S/UMLAL instruction. // UMUL_LOHI // / :lo \ :hi // V \ [no multiline comment] // loAdd -> ADDC | // \ :carry / // V V // ADDE <- hiAdd // // In the special case where only the higher part of a signed result is used // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts // a constant with the exact value of 0x80000000, we recognize we are dealing // with a "rounded multiply and add" (or subtract) and transform it into // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively. assert((AddeSubeNode->getOpcode() == ARMISD::ADDE || AddeSubeNode->getOpcode() == ARMISD::SUBE) && "Expect an ADDE or SUBE"); assert(AddeSubeNode->getNumOperands() == 3 && AddeSubeNode->getOperand(2).getValueType() == MVT::i32 && "ADDE node has the wrong inputs"); // Check that we are chained to the right ADDC or SUBC node. SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode(); if ((AddeSubeNode->getOpcode() == ARMISD::ADDE && AddcSubcNode->getOpcode() != ARMISD::ADDC) || (AddeSubeNode->getOpcode() == ARMISD::SUBE && AddcSubcNode->getOpcode() != ARMISD::SUBC)) return SDValue(); SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0); SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1); // Check if the two operands are from the same mul_lohi node. if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode()) return SDValue(); assert(AddcSubcNode->getNumValues() == 2 && AddcSubcNode->getValueType(0) == MVT::i32 && "Expect ADDC with two result values. First: i32"); // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it // maybe a SMLAL which multiplies two 16-bit values. if (AddeSubeNode->getOpcode() == ARMISD::ADDE && AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI && AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI && AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI && AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI) return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget); // Check for the triangle shape. SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0); SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1); // Make sure that the ADDE/SUBE operands are not coming from the same node. if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode()) return SDValue(); // Find the MUL_LOHI node walking up ADDE/SUBE's operands. bool IsLeftOperandMUL = false; SDValue MULOp = findMUL_LOHI(AddeSubeOp0); if (MULOp == SDValue()) MULOp = findMUL_LOHI(AddeSubeOp1); else IsLeftOperandMUL = true; if (MULOp == SDValue()) return SDValue(); // Figure out the right opcode. unsigned Opc = MULOp->getOpcode(); unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL; // Figure out the high and low input values to the MLAL node. SDValue *HiAddSub = nullptr; SDValue *LoMul = nullptr; SDValue *LowAddSub = nullptr; // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI. if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1))) return SDValue(); if (IsLeftOperandMUL) HiAddSub = &AddeSubeOp1; else HiAddSub = &AddeSubeOp0; // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node // whose low result is fed to the ADDC/SUBC we are checking. if (AddcSubcOp0 == MULOp.getValue(0)) { LoMul = &AddcSubcOp0; LowAddSub = &AddcSubcOp1; } if (AddcSubcOp1 == MULOp.getValue(0)) { LoMul = &AddcSubcOp1; LowAddSub = &AddcSubcOp0; } if (!LoMul) return SDValue(); // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC // the replacement below will create a cycle. if (AddcSubcNode == HiAddSub->getNode() || AddcSubcNode->isPredecessorOf(HiAddSub->getNode())) return SDValue(); // Create the merged node. SelectionDAG &DAG = DCI.DAG; // Start building operand list. SmallVector Ops; Ops.push_back(LoMul->getOperand(0)); Ops.push_back(LoMul->getOperand(1)); // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be // the case, we must be doing signed multiplication and only use the higher // part of the result of the MLAL, furthermore the LowAddSub must be a constant // addition or subtraction with the value of 0x800000. if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() && FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) && LowAddSub->getNode()->getOpcode() == ISD::Constant && static_cast(LowAddSub->getNode())->getZExtValue() == 0x80000000) { Ops.push_back(*HiAddSub); if (AddcSubcNode->getOpcode() == ARMISD::SUBC) { FinalOpc = ARMISD::SMMLSR; } else { FinalOpc = ARMISD::SMMLAR; } SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops); DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode); return SDValue(AddeSubeNode, 0); } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC) // SMMLS is generated during instruction selection and the rest of this // function can not handle the case where AddcSubcNode is a SUBC. return SDValue(); // Finish building the operand list for {U/S}MLAL Ops.push_back(*LowAddSub); Ops.push_back(*HiAddSub); SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), DAG.getVTList(MVT::i32, MVT::i32), Ops); // Replace the ADDs' nodes uses by the MLA node's values. SDValue HiMLALResult(MLALNode.getNode(), 1); DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult); SDValue LoMLALResult(MLALNode.getNode(), 0); DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult); // Return original node to notify the driver to stop replacing. return SDValue(AddeSubeNode, 0); } static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // UMAAL is similar to UMLAL except that it adds two unsigned values. // While trying to combine for the other MLAL nodes, first search for the // chance to use UMAAL. Check if Addc uses a node which has already // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde // as the addend, and it's handled in PerformUMLALCombine. if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget); // Check that we have a glued ADDC node. SDNode* AddcNode = AddeNode->getOperand(2).getNode(); if (AddcNode->getOpcode() != ARMISD::ADDC) return SDValue(); // Find the converted UMAAL or quit if it doesn't exist. SDNode *UmlalNode = nullptr; SDValue AddHi; if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) { UmlalNode = AddcNode->getOperand(0).getNode(); AddHi = AddcNode->getOperand(1); } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) { UmlalNode = AddcNode->getOperand(1).getNode(); AddHi = AddcNode->getOperand(0); } else { return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget); } // The ADDC should be glued to an ADDE node, which uses the same UMLAL as // the ADDC as well as Zero. if (!isNullConstant(UmlalNode->getOperand(3))) return SDValue(); if ((isNullConstant(AddeNode->getOperand(0)) && AddeNode->getOperand(1).getNode() == UmlalNode) || (AddeNode->getOperand(0).getNode() == UmlalNode && isNullConstant(AddeNode->getOperand(1)))) { SelectionDAG &DAG = DCI.DAG; SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1), UmlalNode->getOperand(2), AddHi }; SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode), DAG.getVTList(MVT::i32, MVT::i32), Ops); // Replace the ADDs' nodes uses by the UMAAL node's values. DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1)); DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0)); // Return original node to notify the driver to stop replacing. return SDValue(AddeNode, 0); } return SDValue(); } static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) return SDValue(); // Check that we have a pair of ADDC and ADDE as operands. // Both addends of the ADDE must be zero. SDNode* AddcNode = N->getOperand(2).getNode(); SDNode* AddeNode = N->getOperand(3).getNode(); if ((AddcNode->getOpcode() == ARMISD::ADDC) && (AddeNode->getOpcode() == ARMISD::ADDE) && isNullConstant(AddeNode->getOperand(0)) && isNullConstant(AddeNode->getOperand(1)) && (AddeNode->getOperand(2).getNode() == AddcNode)) return DAG.getNode(ARMISD::UMAAL, SDLoc(N), DAG.getVTList(MVT::i32, MVT::i32), {N->getOperand(0), N->getOperand(1), AddcNode->getOperand(0), AddcNode->getOperand(1)}); else return SDValue(); } static SDValue PerformAddcSubcCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { SelectionDAG &DAG(DCI.DAG); if (N->getOpcode() == ARMISD::SUBC) { // (SUBC (ADDE 0, 0, C), 1) -> C SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); if (LHS->getOpcode() == ARMISD::ADDE && isNullConstant(LHS->getOperand(0)) && isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) { return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2)); } } if (Subtarget->isThumb1Only()) { SDValue RHS = N->getOperand(1); if (ConstantSDNode *C = dyn_cast(RHS)) { int32_t imm = C->getSExtValue(); if (imm < 0 && imm > std::numeric_limits::min()) { SDLoc DL(N); RHS = DAG.getConstant(-imm, DL, MVT::i32); unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC : ARMISD::ADDC; return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS); } } } return SDValue(); } static SDValue PerformAddeSubeCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { if (Subtarget->isThumb1Only()) { SelectionDAG &DAG = DCI.DAG; SDValue RHS = N->getOperand(1); if (ConstantSDNode *C = dyn_cast(RHS)) { int64_t imm = C->getSExtValue(); if (imm < 0) { SDLoc DL(N); // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already // accounts for part of the negation. RHS = DAG.getConstant(~imm, DL, MVT::i32); unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE : ARMISD::ADDE; return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS, N->getOperand(2)); } } } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) { return AddCombineTo64bitMLAL(N, DCI, Subtarget); } return SDValue(); } /// PerformADDECombine - Target-specific dag combine transform from /// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or /// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL static SDValue PerformADDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Only ARM and Thumb2 support UMLAL/SMLAL. if (Subtarget->isThumb1Only()) return PerformAddeSubeCombine(N, DCI, Subtarget); // Only perform the checks after legalize when the pattern is available. if (DCI.isBeforeLegalize()) return SDValue(); return AddCombineTo64bitUMAAL(N, DCI, Subtarget); } /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with /// operands N0 and N1. This is a helper for PerformADDCombine that is /// called with the default operands, and if that fails, with commuted /// operands. static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget){ // Attempt to create vpadd for this add. if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget)) return Result; // Attempt to create vpaddl for this add. if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget)) return Result; if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI, Subtarget)) return Result; // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) if (N0.getNode()->hasOneUse()) if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI)) return Result; return SDValue(); } bool ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const { if (Level == BeforeLegalizeTypes) return true; if (Subtarget->isThumb() && Subtarget->isThumb1Only()) return true; if (N->getOpcode() != ISD::SHL) return true; // Turn off commute-with-shift transform after legalization, so it doesn't // conflict with PerformSHLSimplify. (We could try to detect when // PerformSHLSimplify would trigger more precisely, but it isn't // really necessary.) return false; } bool ARMTargetLowering::shouldFoldShiftPairToMask(const SDNode *N, CombineLevel Level) const { if (!Subtarget->isThumb1Only()) return true; if (Level == BeforeLegalizeTypes) return true; return false; } static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST) { // Allow the generic combiner to identify potential bswaps. if (DCI.isBeforeLegalize()) return SDValue(); // DAG combiner will fold: // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2 // Other code patterns that can be also be modified have the following form: // b + ((a << 1) | 510) // b + ((a << 1) & 510) // b + ((a << 1) ^ 510) // b + ((a << 1) + 510) // Many instructions can perform the shift for free, but it requires both // the operands to be registers. If c1 << c2 is too large, a mov immediate // instruction will needed. So, unfold back to the original pattern if: // - if c1 and c2 are small enough that they don't require mov imms. // - the user(s) of the node can perform an shl // No shifted operands for 16-bit instructions. if (ST->isThumb() && ST->isThumb1Only()) return SDValue(); // Check that all the users could perform the shl themselves. for (auto U : N->uses()) { switch(U->getOpcode()) { default: return SDValue(); case ISD::SUB: case ISD::ADD: case ISD::AND: case ISD::OR: case ISD::XOR: case ISD::SETCC: case ARMISD::CMP: // Check that the user isn't already using a constant because there // aren't any instructions that support an immediate operand and a // shifted operand. if (isa(U->getOperand(0)) || isa(U->getOperand(1))) return SDValue(); // Check that it's not already using a shift. if (U->getOperand(0).getOpcode() == ISD::SHL || U->getOperand(1).getOpcode() == ISD::SHL) return SDValue(); break; } } if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR && N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND) return SDValue(); if (N->getOperand(0).getOpcode() != ISD::SHL) return SDValue(); SDValue SHL = N->getOperand(0); auto *C1ShlC2 = dyn_cast(N->getOperand(1)); auto *C2 = dyn_cast(SHL.getOperand(1)); if (!C1ShlC2 || !C2) return SDValue(); APInt C2Int = C2->getAPIntValue(); APInt C1Int = C1ShlC2->getAPIntValue(); // Check that performing a lshr will not lose any information. APInt Mask = APInt::getHighBitsSet(C2Int.getBitWidth(), C2Int.getBitWidth() - C2->getZExtValue()); if ((C1Int & Mask) != C1Int) return SDValue(); // Shift the first constant. C1Int.lshrInPlace(C2Int); // The immediates are encoded as an 8-bit value that can be rotated. auto LargeImm = [](const APInt &Imm) { unsigned Zeros = Imm.countLeadingZeros() + Imm.countTrailingZeros(); return Imm.getBitWidth() - Zeros > 8; }; if (LargeImm(C1Int) || LargeImm(C2Int)) return SDValue(); SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue X = SHL.getOperand(0); SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X, DAG.getConstant(C1Int, dl, MVT::i32)); // Shift left to compensate for the lshr of C1Int. SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1)); LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump(); SHL.dump(); N->dump()); LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump()); return Res; } /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. /// static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // Only works one way, because it needs an immediate operand. if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget)) return Result; // First try with the default operand order. if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget)) return Result; // If that didn't work, try again with the operands commuted. return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget); } /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. /// static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) if (N1.getNode()->hasOneUse()) if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI)) return Result; return SDValue(); } /// PerformVMULCombine /// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the /// special multiplier accumulator forwarding. /// vmul d3, d0, d2 /// vmla d3, d1, d2 /// is faster than /// vadd d3, d0, d1 /// vmul d3, d3, d2 // However, for (A + B) * (A + B), // vadd d2, d0, d1 // vmul d3, d0, d2 // vmla d3, d1, d2 // is slower than // vadd d2, d0, d1 // vmul d3, d2, d2 static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { if (!Subtarget->hasVMLxForwarding()) return SDValue(); SelectionDAG &DAG = DCI.DAG; SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); unsigned Opcode = N0.getOpcode(); if (Opcode != ISD::ADD && Opcode != ISD::SUB && Opcode != ISD::FADD && Opcode != ISD::FSUB) { Opcode = N1.getOpcode(); if (Opcode != ISD::ADD && Opcode != ISD::SUB && Opcode != ISD::FADD && Opcode != ISD::FSUB) return SDValue(); std::swap(N0, N1); } if (N0 == N1) return SDValue(); EVT VT = N->getValueType(0); SDLoc DL(N); SDValue N00 = N0->getOperand(0); SDValue N01 = N0->getOperand(1); return DAG.getNode(Opcode, DL, VT, DAG.getNode(ISD::MUL, DL, VT, N00, N1), DAG.getNode(ISD::MUL, DL, VT, N01, N1)); } static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { SelectionDAG &DAG = DCI.DAG; if (Subtarget->isThumb1Only()) return SDValue(); if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) return SDValue(); EVT VT = N->getValueType(0); if (VT.is64BitVector() || VT.is128BitVector()) return PerformVMULCombine(N, DCI, Subtarget); if (VT != MVT::i32) return SDValue(); ConstantSDNode *C = dyn_cast(N->getOperand(1)); if (!C) return SDValue(); int64_t MulAmt = C->getSExtValue(); unsigned ShiftAmt = countTrailingZeros(MulAmt); ShiftAmt = ShiftAmt & (32 - 1); SDValue V = N->getOperand(0); SDLoc DL(N); SDValue Res; MulAmt >>= ShiftAmt; if (MulAmt >= 0) { if (isPowerOf2_32(MulAmt - 1)) { // (mul x, 2^N + 1) => (add (shl x, N), x) Res = DAG.getNode(ISD::ADD, DL, VT, V, DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Log2_32(MulAmt - 1), DL, MVT::i32))); } else if (isPowerOf2_32(MulAmt + 1)) { // (mul x, 2^N - 1) => (sub (shl x, N), x) Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Log2_32(MulAmt + 1), DL, MVT::i32)), V); } else return SDValue(); } else { uint64_t MulAmtAbs = -MulAmt; if (isPowerOf2_32(MulAmtAbs + 1)) { // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) Res = DAG.getNode(ISD::SUB, DL, VT, V, DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Log2_32(MulAmtAbs + 1), DL, MVT::i32))); } else if (isPowerOf2_32(MulAmtAbs - 1)) { // (mul x, -(2^N + 1)) => - (add (shl x, N), x) Res = DAG.getNode(ISD::ADD, DL, VT, V, DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(Log2_32(MulAmtAbs - 1), DL, MVT::i32))); Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, MVT::i32), Res); } else return SDValue(); } if (ShiftAmt != 0) Res = DAG.getNode(ISD::SHL, DL, VT, Res, DAG.getConstant(ShiftAmt, DL, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); return SDValue(); } static SDValue CombineANDShift(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Allow DAGCombine to pattern-match before we touch the canonical form. if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) return SDValue(); if (N->getValueType(0) != MVT::i32) return SDValue(); ConstantSDNode *N1C = dyn_cast(N->getOperand(1)); if (!N1C) return SDValue(); uint32_t C1 = (uint32_t)N1C->getZExtValue(); // Don't transform uxtb/uxth. if (C1 == 255 || C1 == 65535) return SDValue(); SDNode *N0 = N->getOperand(0).getNode(); if (!N0->hasOneUse()) return SDValue(); if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL) return SDValue(); bool LeftShift = N0->getOpcode() == ISD::SHL; ConstantSDNode *N01C = dyn_cast(N0->getOperand(1)); if (!N01C) return SDValue(); uint32_t C2 = (uint32_t)N01C->getZExtValue(); if (!C2 || C2 >= 32) return SDValue(); // Clear irrelevant bits in the mask. if (LeftShift) C1 &= (-1U << C2); else C1 &= (-1U >> C2); SelectionDAG &DAG = DCI.DAG; SDLoc DL(N); // We have a pattern of the form "(and (shl x, c2) c1)" or // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to // transform to a pair of shifts, to save materializing c1. // First pattern: right shift, then mask off leading bits. // FIXME: Use demanded bits? if (!LeftShift && isMask_32(C1)) { uint32_t C3 = countLeadingZeros(C1); if (C2 < C3) { SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0), DAG.getConstant(C3 - C2, DL, MVT::i32)); return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL, DAG.getConstant(C3, DL, MVT::i32)); } } // First pattern, reversed: left shift, then mask off trailing bits. if (LeftShift && isMask_32(~C1)) { uint32_t C3 = countTrailingZeros(C1); if (C2 < C3) { SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0), DAG.getConstant(C3 - C2, DL, MVT::i32)); return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL, DAG.getConstant(C3, DL, MVT::i32)); } } // Second pattern: left shift, then mask off leading bits. // FIXME: Use demanded bits? if (LeftShift && isShiftedMask_32(C1)) { uint32_t Trailing = countTrailingZeros(C1); uint32_t C3 = countLeadingZeros(C1); if (Trailing == C2 && C2 + C3 < 32) { SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0), DAG.getConstant(C2 + C3, DL, MVT::i32)); return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL, DAG.getConstant(C3, DL, MVT::i32)); } } // Second pattern, reversed: right shift, then mask off trailing bits. // FIXME: Handle other patterns of known/demanded bits. if (!LeftShift && isShiftedMask_32(C1)) { uint32_t Leading = countLeadingZeros(C1); uint32_t C3 = countTrailingZeros(C1); if (Leading == C2 && C2 + C3 < 32) { SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0), DAG.getConstant(C2 + C3, DL, MVT::i32)); return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL, DAG.getConstant(C3, DL, MVT::i32)); } } // FIXME: Transform "(and (shl x, c2) c1)" -> // "(shl (and x, c1>>c2), c2)" if "c1 >> c2" is a cheaper immediate than // c1. return SDValue(); } static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Attempt to use immediate-form VBIC BuildVectorSDNode *BVN = dyn_cast(N->getOperand(1)); SDLoc dl(N); EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (BVN && BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { EVT VbicVT; SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(), SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VbicVT, VT.is128BitVector(), OtherModImm); if (Val.getNode()) { SDValue Input = DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vbic); } } } if (!Subtarget->isThumb1Only()) { // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI)) return Result; if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget)) return Result; } if (Subtarget->isThumb1Only()) if (SDValue Result = CombineANDShift(N, DCI, Subtarget)) return Result; return SDValue(); } // Try combining OR nodes to SMULWB, SMULWT. static SDValue PerformORCombineToSMULWBT(SDNode *OR, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { if (!Subtarget->hasV6Ops() || (Subtarget->isThumb() && (!Subtarget->hasThumb2() || !Subtarget->hasDSP()))) return SDValue(); SDValue SRL = OR->getOperand(0); SDValue SHL = OR->getOperand(1); if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) { SRL = OR->getOperand(1); SHL = OR->getOperand(0); } if (!isSRL16(SRL) || !isSHL16(SHL)) return SDValue(); // The first operands to the shifts need to be the two results from the // same smul_lohi node. if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) || SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI) return SDValue(); SDNode *SMULLOHI = SRL.getOperand(0).getNode(); if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) || SHL.getOperand(0) != SDValue(SMULLOHI, 1)) return SDValue(); // Now we have: // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16))) // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments. // For SMUWB the 16-bit value will signed extended somehow. // For SMULWT only the SRA is required. // Check both sides of SMUL_LOHI SDValue OpS16 = SMULLOHI->getOperand(0); SDValue OpS32 = SMULLOHI->getOperand(1); SelectionDAG &DAG = DCI.DAG; if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) { OpS16 = OpS32; OpS32 = SMULLOHI->getOperand(0); } SDLoc dl(OR); unsigned Opcode = 0; if (isS16(OpS16, DAG)) Opcode = ARMISD::SMULWB; else if (isSRA16(OpS16)) { Opcode = ARMISD::SMULWT; OpS16 = OpS16->getOperand(0); } else return SDValue(); SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16); DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res); return SDValue(OR, 0); } static SDValue PerformORCombineToBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // BFI is only available on V6T2+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) return SDValue(); EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SelectionDAG &DAG = DCI.DAG; SDLoc DL(N); // 1) or (and A, mask), val => ARMbfi A, val, mask // iff (val & mask) == val // // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) // && mask == ~mask2 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) // && ~mask == mask2 // (i.e., copy a bitfield value into another bitfield of the same width) if (VT != MVT::i32) return SDValue(); SDValue N00 = N0.getOperand(0); // The value and the mask need to be constants so we can verify this is // actually a bitfield set. If the mask is 0xffff, we can do better // via a movt instruction, so don't use BFI in that case. SDValue MaskOp = N0.getOperand(1); ConstantSDNode *MaskC = dyn_cast(MaskOp); if (!MaskC) return SDValue(); unsigned Mask = MaskC->getZExtValue(); if (Mask == 0xffff) return SDValue(); SDValue Res; // Case (1): or (and A, mask), val => ARMbfi A, val, mask ConstantSDNode *N1C = dyn_cast(N1); if (N1C) { unsigned Val = N1C->getZExtValue(); if ((Val & ~Mask) != Val) return SDValue(); if (ARM::isBitFieldInvertedMask(Mask)) { Val >>= countTrailingZeros(~Mask); Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, DAG.getConstant(Val, DL, MVT::i32), DAG.getConstant(Mask, DL, MVT::i32)); DCI.CombineTo(N, Res, false); // Return value from the original node to inform the combiner than N is // now dead. return SDValue(N, 0); } } else if (N1.getOpcode() == ISD::AND) { // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask ConstantSDNode *N11C = dyn_cast(N1.getOperand(1)); if (!N11C) return SDValue(); unsigned Mask2 = N11C->getZExtValue(); // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern // as is to match. if (ARM::isBitFieldInvertedMask(Mask) && (Mask == ~Mask2)) { // The pack halfword instruction works better for masks that fit it, // so use that when it's available. if (Subtarget->hasDSP() && (Mask == 0xffff || Mask == 0xffff0000)) return SDValue(); // 2a unsigned amt = countTrailingZeros(Mask2); Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), DAG.getConstant(amt, DL, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, DAG.getConstant(Mask, DL, MVT::i32)); DCI.CombineTo(N, Res, false); // Return value from the original node to inform the combiner than N is // now dead. return SDValue(N, 0); } else if (ARM::isBitFieldInvertedMask(~Mask) && (~Mask == Mask2)) { // The pack halfword instruction works better for masks that fit it, // so use that when it's available. if (Subtarget->hasDSP() && (Mask2 == 0xffff || Mask2 == 0xffff0000)) return SDValue(); // 2b unsigned lsb = countTrailingZeros(Mask); Res = DAG.getNode(ISD::SRL, DL, VT, N00, DAG.getConstant(lsb, DL, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, DAG.getConstant(Mask2, DL, MVT::i32)); DCI.CombineTo(N, Res, false); // Return value from the original node to inform the combiner than N is // now dead. return SDValue(N, 0); } } if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) && N00.getOpcode() == ISD::SHL && isa(N00.getOperand(1)) && ARM::isBitFieldInvertedMask(~Mask)) { // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask // where lsb(mask) == #shamt and masked bits of B are known zero. SDValue ShAmt = N00.getOperand(1); unsigned ShAmtC = cast(ShAmt)->getZExtValue(); unsigned LSB = countTrailingZeros(Mask); if (ShAmtC != LSB) return SDValue(); Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0), DAG.getConstant(~Mask, DL, MVT::i32)); DCI.CombineTo(N, Res, false); // Return value from the original node to inform the combiner than N is // now dead. return SDValue(N, 0); } return SDValue(); } /// PerformORCombine - Target-specific dag combine xforms for ISD::OR static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Attempt to use immediate-form VORR BuildVectorSDNode *BVN = dyn_cast(N->getOperand(1)); SDLoc dl(N); EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (BVN && Subtarget->hasNEON() && BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { EVT VorrVT; SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VorrVT, VT.is128BitVector(), OtherModImm); if (Val.getNode()) { SDValue Input = DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); } } } if (!Subtarget->isThumb1Only()) { // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) return Result; if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget)) return Result; } SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT)) { // The code below optimizes (or (and X, Y), Z). // The AND operand needs to have a single user to make these optimizations // profitable. if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) return SDValue(); APInt SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; APInt SplatBits0, SplatBits1; BuildVectorSDNode *BVN0 = dyn_cast(N0->getOperand(1)); BuildVectorSDNode *BVN1 = dyn_cast(N1->getOperand(1)); // Ensure that the second operand of both ands are constants if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, HasAnyUndefs) && !HasAnyUndefs) { if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, HasAnyUndefs) && !HasAnyUndefs) { // Ensure that the bit width of the constants are the same and that // the splat arguments are logical inverses as per the pattern we // are trying to simplify. if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && SplatBits0 == ~SplatBits1) { // Canonicalize the vector type to make instruction selection // simpler. EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, N0->getOperand(1), N0->getOperand(0), N1->getOperand(0)); return DAG.getNode(ISD::BITCAST, dl, VT, Result); } } } } // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when // reasonable. if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) { if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget)) return Res; } if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget)) return Result; return SDValue(); } static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); if (!Subtarget->isThumb1Only()) { // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) return Result; if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget)) return Result; } return SDValue(); } // ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it, // and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and // their position in "to" (Rd). static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) { assert(N->getOpcode() == ARMISD::BFI); SDValue From = N->getOperand(1); ToMask = ~cast(N->getOperand(2))->getAPIntValue(); FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation()); // If the Base came from a SHR #C, we can deduce that it is really testing bit // #C in the base of the SHR. if (From->getOpcode() == ISD::SRL && isa(From->getOperand(1))) { APInt Shift = cast(From->getOperand(1))->getAPIntValue(); assert(Shift.getLimitedValue() < 32 && "Shift too large!"); FromMask <<= Shift.getLimitedValue(31); From = From->getOperand(0); } return From; } // If A and B contain one contiguous set of bits, does A | B == A . B? // // Neither A nor B must be zero. static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) { unsigned LastActiveBitInA = A.countTrailingZeros(); unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1; return LastActiveBitInA - 1 == FirstActiveBitInB; } static SDValue FindBFIToCombineWith(SDNode *N) { // We have a BFI in N. Follow a possible chain of BFIs and find a BFI it can combine with, // if one exists. APInt ToMask, FromMask; SDValue From = ParseBFI(N, ToMask, FromMask); SDValue To = N->getOperand(0); // Now check for a compatible BFI to merge with. We can pass through BFIs that // aren't compatible, but not if they set the same bit in their destination as // we do (or that of any BFI we're going to combine with). SDValue V = To; APInt CombinedToMask = ToMask; while (V.getOpcode() == ARMISD::BFI) { APInt NewToMask, NewFromMask; SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask); if (NewFrom != From) { // This BFI has a different base. Keep going. CombinedToMask |= NewToMask; V = V.getOperand(0); continue; } // Do the written bits conflict with any we've seen so far? if ((NewToMask & CombinedToMask).getBoolValue()) // Conflicting bits - bail out because going further is unsafe. return SDValue(); // Are the new bits contiguous when combined with the old bits? if (BitsProperlyConcatenate(ToMask, NewToMask) && BitsProperlyConcatenate(FromMask, NewFromMask)) return V; if (BitsProperlyConcatenate(NewToMask, ToMask) && BitsProperlyConcatenate(NewFromMask, FromMask)) return V; // We've seen a write to some bits, so track it. CombinedToMask |= NewToMask; // Keep going... V = V.getOperand(0); } return SDValue(); } static SDValue PerformBFICombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDValue N1 = N->getOperand(1); if (N1.getOpcode() == ISD::AND) { // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff // the bits being cleared by the AND are not demanded by the BFI. ConstantSDNode *N11C = dyn_cast(N1.getOperand(1)); if (!N11C) return SDValue(); unsigned InvMask = cast(N->getOperand(2))->getZExtValue(); unsigned LSB = countTrailingZeros(~InvMask); unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB; assert(Width < static_cast(std::numeric_limits::digits) && "undefined behavior"); unsigned Mask = (1u << Width) - 1; unsigned Mask2 = N11C->getZExtValue(); if ((Mask & (~Mask2)) == 0) return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0), N->getOperand(0), N1.getOperand(0), N->getOperand(2)); } else if (N->getOperand(0).getOpcode() == ARMISD::BFI) { // We have a BFI of a BFI. Walk up the BFI chain to see how long it goes. // Keep track of any consecutive bits set that all come from the same base // value. We can combine these together into a single BFI. SDValue CombineBFI = FindBFIToCombineWith(N); if (CombineBFI == SDValue()) return SDValue(); // We've found a BFI. APInt ToMask1, FromMask1; SDValue From1 = ParseBFI(N, ToMask1, FromMask1); APInt ToMask2, FromMask2; SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2); assert(From1 == From2); (void)From2; // First, unlink CombineBFI. DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0)); // Then create a new BFI, combining the two together. APInt NewFromMask = FromMask1 | FromMask2; APInt NewToMask = ToMask1 | ToMask2; EVT VT = N->getValueType(0); SDLoc dl(N); if (NewFromMask[0] == 0) From1 = DCI.DAG.getNode( ISD::SRL, dl, VT, From1, DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT)); return DCI.DAG.getNode(ARMISD::BFI, dl, VT, N->getOperand(0), From1, DCI.DAG.getConstant(~NewToMask, dl, VT)); } return SDValue(); } /// PerformVMOVRRDCombine - Target-specific dag combine xforms for /// ARMISD::VMOVRRD. static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // vmovrrd(vmovdrr x, y) -> x,y SDValue InDouble = N->getOperand(0); if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP()) return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); // vmovrrd(load f64) -> (load i32), (load i32) SDNode *InNode = InDouble.getNode(); if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() && InNode->getValueType(0) == MVT::f64 && InNode->getOperand(1).getOpcode() == ISD::FrameIndex && !cast(InNode)->isVolatile()) { // TODO: Should this be done for non-FrameIndex operands? LoadSDNode *LD = cast(InNode); SelectionDAG &DAG = DCI.DAG; SDLoc DL(LD); SDValue BasePtr = LD->getBasePtr(); SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(), LD->getAlignment(), LD->getMemOperand()->getFlags()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, DL, MVT::i32)); SDValue NewLD2 = DAG.getLoad( MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(), std::min(4U, LD->getAlignment() / 2), LD->getMemOperand()->getFlags()); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); if (DCI.DAG.getDataLayout().isBigEndian()) std::swap (NewLD1, NewLD2); SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); return Result; } return SDValue(); } /// PerformVMOVDRRCombine - Target-specific dag combine xforms for /// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands. static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X) SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); if (Op0.getOpcode() == ISD::BITCAST) Op0 = Op0.getOperand(0); if (Op1.getOpcode() == ISD::BITCAST) Op1 = Op1.getOperand(0); if (Op0.getOpcode() == ARMISD::VMOVRRD && Op0.getNode() == Op1.getNode() && Op0.getResNo() == 0 && Op1.getResNo() == 1) return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0.getOperand(0)); return SDValue(); } /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node /// are normal, non-volatile loads. If so, it is profitable to bitcast an /// i64 vector to have f64 elements, since the value can then be loaded /// directly into a VFP register. static bool hasNormalLoadOperand(SDNode *N) { unsigned NumElts = N->getValueType(0).getVectorNumElements(); for (unsigned i = 0; i < NumElts; ++i) { SDNode *Elt = N->getOperand(i).getNode(); if (ISD::isNormalLoad(Elt) && !cast(Elt)->isVolatile()) return true; } return false; } /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for /// ISD::BUILD_VECTOR. static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value // into a pair of GPRs, which is fine when the value is used as a scalar, // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. SelectionDAG &DAG = DCI.DAG; if (N->getNumOperands() == 2) if (SDValue RV = PerformVMOVDRRCombine(N, DAG)) return RV; // Load i64 elements as f64 values so that type legalization does not split // them up into i32 values. EVT VT = N->getValueType(0); if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N)) return SDValue(); SDLoc dl(N); SmallVector Ops; unsigned NumElts = VT.getVectorNumElements(); for (unsigned i = 0; i < NumElts; ++i) { SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i)); Ops.push_back(V); // Make the DAGCombiner fold the bitcast. DCI.AddToWorklist(V.getNode()); } EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, BV); } /// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR. static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR. // At that time, we may have inserted bitcasts from integer to float. // If these bitcasts have survived DAGCombine, change the lowering of this // BUILD_VECTOR in something more vector friendly, i.e., that does not // force to use floating point types. // Make sure we can change the type of the vector. // This is possible iff: // 1. The vector is only used in a bitcast to a integer type. I.e., // 1.1. Vector is used only once. // 1.2. Use is a bit convert to an integer type. // 2. The size of its operands are 32-bits (64-bits are not legal). EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); // Check 1.1. and 2. if (EltVT.getSizeInBits() != 32 || !N->hasOneUse()) return SDValue(); // By construction, the input type must be float. assert(EltVT == MVT::f32 && "Unexpected type!"); // Check 1.2. SDNode *Use = *N->use_begin(); if (Use->getOpcode() != ISD::BITCAST || Use->getValueType(0).isFloatingPoint()) return SDValue(); // Check profitability. // Model is, if more than half of the relevant operands are bitcast from // i32, turn the build_vector into a sequence of insert_vector_elt. // Relevant operands are everything that is not statically // (i.e., at compile time) bitcasted. unsigned NumOfBitCastedElts = 0; unsigned NumElts = VT.getVectorNumElements(); unsigned NumOfRelevantElts = NumElts; for (unsigned Idx = 0; Idx < NumElts; ++Idx) { SDValue Elt = N->getOperand(Idx); if (Elt->getOpcode() == ISD::BITCAST) { // Assume only bit cast to i32 will go away. if (Elt->getOperand(0).getValueType() == MVT::i32) ++NumOfBitCastedElts; } else if (Elt.isUndef() || isa(Elt)) // Constants are statically casted, thus do not count them as // relevant operands. --NumOfRelevantElts; } // Check if more than half of the elements require a non-free bitcast. if (NumOfBitCastedElts <= NumOfRelevantElts / 2) return SDValue(); SelectionDAG &DAG = DCI.DAG; // Create the new vector type. EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); // Check if the type is legal. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isTypeLegal(VecVT)) return SDValue(); // Combine: // ARMISD::BUILD_VECTOR E1, E2, ..., EN. // => BITCAST INSERT_VECTOR_ELT // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1), // (BITCAST EN), N. SDValue Vec = DAG.getUNDEF(VecVT); SDLoc dl(N); for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) { SDValue V = N->getOperand(Idx); if (V.isUndef()) continue; if (V.getOpcode() == ISD::BITCAST && V->getOperand(0).getValueType() == MVT::i32) // Fold obvious case. V = V.getOperand(0); else { V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(V.getNode()); } SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32); Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx); } Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(Vec.getNode()); return Vec; } /// PerformInsertEltCombine - Target-specific dag combine xforms for /// ISD::INSERT_VECTOR_ELT. static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // Bitcast an i64 load inserted into a vector to f64. // Otherwise, the i64 value will be legalized to a pair of i32 values. EVT VT = N->getValueType(0); SDNode *Elt = N->getOperand(1).getNode(); if (VT.getVectorElementType() != MVT::i64 || !ISD::isNormalLoad(Elt) || cast(Elt)->isVolatile()) return SDValue(); SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, VT.getVectorNumElements()); SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0)); SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1)); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(Vec.getNode()); DCI.AddToWorklist(V.getNode()); SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT, Vec, V, N->getOperand(2)); return DAG.getNode(ISD::BITCAST, dl, VT, InsElt); } /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for /// ISD::VECTOR_SHUFFLE. static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { // The LLVM shufflevector instruction does not require the shuffle mask // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the // operands do not match the mask length, they are extended by concatenating // them with undef vectors. That is probably the right thing for other // targets, but for NEON it is better to concatenate two double-register // size vector operands into a single quad-register size vector. Do that // transformation here: // shuffle(concat(v1, undef), concat(v2, undef)) -> // shuffle(concat(v1, v2), undef) SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); if (Op0.getOpcode() != ISD::CONCAT_VECTORS || Op1.getOpcode() != ISD::CONCAT_VECTORS || Op0.getNumOperands() != 2 || Op1.getNumOperands() != 2) return SDValue(); SDValue Concat0Op1 = Op0.getOperand(1); SDValue Concat1Op1 = Op1.getOperand(1); if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef()) return SDValue(); // Skip the transformation if any of the types are illegal. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = N->getValueType(0); if (!TLI.isTypeLegal(VT) || !TLI.isTypeLegal(Concat0Op1.getValueType()) || !TLI.isTypeLegal(Concat1Op1.getValueType())) return SDValue(); SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Op0.getOperand(0), Op1.getOperand(0)); // Translate the shuffle mask. SmallVector NewMask; unsigned NumElts = VT.getVectorNumElements(); unsigned HalfElts = NumElts/2; ShuffleVectorSDNode *SVN = cast(N); for (unsigned n = 0; n < NumElts; ++n) { int MaskElt = SVN->getMaskElt(n); int NewElt = -1; if (MaskElt < (int)HalfElts) NewElt = MaskElt; else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts)) NewElt = HalfElts + MaskElt - NumElts; NewMask.push_back(NewElt); } return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat, DAG.getUNDEF(VT), NewMask); } /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, /// NEON load/store intrinsics, and generic vector load/stores, to merge /// base address updates. /// For generic load/stores, the memory type is assumed to be a vector. /// The caller is assumed to have checked legality. static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || N->getOpcode() == ISD::INTRINSIC_W_CHAIN); const bool isStore = N->getOpcode() == ISD::STORE; const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1); SDValue Addr = N->getOperand(AddrOpIdx); MemSDNode *MemN = cast(N); SDLoc dl(N); // Search for a use of the address operand that is an increment. for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE = Addr.getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User->getOpcode() != ISD::ADD || UI.getUse().getResNo() != Addr.getResNo()) continue; // Check that the add is independent of the load/store. Otherwise, folding // it would create a cycle. We can avoid searching through Addr as it's a // predecessor to both. SmallPtrSet Visited; SmallVector Worklist; Visited.insert(Addr.getNode()); Worklist.push_back(N); Worklist.push_back(User); if (SDNode::hasPredecessorHelper(N, Visited, Worklist) || SDNode::hasPredecessorHelper(User, Visited, Worklist)) continue; // Find the new opcode for the updating load/store. bool isLoadOp = true; bool isLaneOp = false; unsigned NewOpc = 0; unsigned NumVecs = 0; if (isIntrinsic) { unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); switch (IntNo) { default: llvm_unreachable("unexpected intrinsic for Neon base update"); case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD; NumVecs = 1; break; case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD; NumVecs = 2; break; case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD; NumVecs = 3; break; case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD; NumVecs = 4; break; case Intrinsic::arm_neon_vld2dup: case Intrinsic::arm_neon_vld3dup: case Intrinsic::arm_neon_vld4dup: // TODO: Support updating VLDxDUP nodes. For now, we just skip // combining base updates for such intrinsics. continue; case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD; NumVecs = 2; isLaneOp = true; break; case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD; NumVecs = 3; isLaneOp = true; break; case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD; NumVecs = 4; isLaneOp = true; break; case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD; NumVecs = 1; isLoadOp = false; break; case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD; NumVecs = 2; isLoadOp = false; break; case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD; NumVecs = 3; isLoadOp = false; break; case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD; NumVecs = 4; isLoadOp = false; break; case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD; NumVecs = 2; isLoadOp = false; isLaneOp = true; break; case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD; NumVecs = 3; isLoadOp = false; isLaneOp = true; break; case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD; NumVecs = 4; isLoadOp = false; isLaneOp = true; break; } } else { isLaneOp = true; switch (N->getOpcode()) { default: llvm_unreachable("unexpected opcode for Neon base update"); case ARMISD::VLD1DUP: NewOpc = ARMISD::VLD1DUP_UPD; NumVecs = 1; break; case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD; NumVecs = 1; isLaneOp = false; break; case ISD::STORE: NewOpc = ARMISD::VST1_UPD; NumVecs = 1; isLaneOp = false; isLoadOp = false; break; } } // Find the size of memory referenced by the load/store. EVT VecTy; if (isLoadOp) { VecTy = N->getValueType(0); } else if (isIntrinsic) { VecTy = N->getOperand(AddrOpIdx+1).getValueType(); } else { assert(isStore && "Node has to be a load, a store, or an intrinsic!"); VecTy = N->getOperand(1).getValueType(); } unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; if (isLaneOp) NumBytes /= VecTy.getVectorNumElements(); // If the increment is a constant, it must match the memory ref size. SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); ConstantSDNode *CInc = dyn_cast(Inc.getNode()); if (NumBytes >= 3 * 16 && (!CInc || CInc->getZExtValue() != NumBytes)) { // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two // separate instructions that make it harder to use a non-constant update. continue; } // OK, we found an ADD we can fold into the base update. // Now, create a _UPD node, taking care of not breaking alignment. EVT AlignedVecTy = VecTy; unsigned Alignment = MemN->getAlignment(); // If this is a less-than-standard-aligned load/store, change the type to // match the standard alignment. // The alignment is overlooked when selecting _UPD variants; and it's // easier to introduce bitcasts here than fix that. // There are 3 ways to get to this base-update combine: // - intrinsics: they are assumed to be properly aligned (to the standard // alignment of the memory type), so we don't need to do anything. // - ARMISD::VLDx nodes: they are only generated from the aforementioned // intrinsics, so, likewise, there's nothing to do. // - generic load/store instructions: the alignment is specified as an // explicit operand, rather than implicitly as the standard alignment // of the memory type (like the intrisics). We need to change the // memory type to match the explicit alignment. That way, we don't // generate non-standard-aligned ARMISD::VLDx nodes. if (isa(N)) { if (Alignment == 0) Alignment = 1; if (Alignment < VecTy.getScalarSizeInBits() / 8) { MVT EltTy = MVT::getIntegerVT(Alignment * 8); assert(NumVecs == 1 && "Unexpected multi-element generic load/store."); assert(!isLaneOp && "Unexpected generic load/store lane."); unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8); AlignedVecTy = MVT::getVectorVT(EltTy, NumElts); } // Don't set an explicit alignment on regular load/stores that we want // to transform to VLD/VST 1_UPD nodes. // This matches the behavior of regular load/stores, which only get an // explicit alignment if the MMO alignment is larger than the standard // alignment of the memory type. // Intrinsics, however, always get an explicit alignment, set to the // alignment of the MMO. Alignment = 1; } // Create the new updating load/store node. // First, create an SDVTList for the new updating node's results. EVT Tys[6]; unsigned NumResultVecs = (isLoadOp ? NumVecs : 0); unsigned n; for (n = 0; n < NumResultVecs; ++n) Tys[n] = AlignedVecTy; Tys[n++] = MVT::i32; Tys[n] = MVT::Other; SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2)); // Then, gather the new node's operands. SmallVector Ops; Ops.push_back(N->getOperand(0)); // incoming chain Ops.push_back(N->getOperand(AddrOpIdx)); Ops.push_back(Inc); if (StoreSDNode *StN = dyn_cast(N)) { // Try to match the intrinsic's signature Ops.push_back(StN->getValue()); } else { // Loads (and of course intrinsics) match the intrinsics' signature, // so just add all but the alignment operand. for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands() - 1; ++i) Ops.push_back(N->getOperand(i)); } // For all node types, the alignment operand is always the last one. Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32)); // If this is a non-standard-aligned STORE, the penultimate operand is the // stored value. Bitcast it to the aligned type. if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) { SDValue &StVal = Ops[Ops.size()-2]; StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal); } EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy; SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT, MemN->getMemOperand()); // Update the uses. SmallVector NewResults; for (unsigned i = 0; i < NumResultVecs; ++i) NewResults.push_back(SDValue(UpdN.getNode(), i)); // If this is an non-standard-aligned LOAD, the first result is the loaded // value. Bitcast it to the expected result type. if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) { SDValue &LdVal = NewResults[0]; LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal); } NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain DCI.CombineTo(N, NewResults); DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); break; } return SDValue(); } static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) return SDValue(); return CombineBaseUpdate(N, DCI); } /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic /// are also VDUPLANEs. If so, combine them to a vldN-dup operation and /// return true. static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); // vldN-dup instructions only support 64-bit vectors for N > 1. if (!VT.is64BitVector()) return false; // Check if the VDUPLANE operand is a vldN-dup intrinsic. SDNode *VLD = N->getOperand(0).getNode(); if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) return false; unsigned NumVecs = 0; unsigned NewOpc = 0; unsigned IntNo = cast(VLD->getOperand(1))->getZExtValue(); if (IntNo == Intrinsic::arm_neon_vld2lane) { NumVecs = 2; NewOpc = ARMISD::VLD2DUP; } else if (IntNo == Intrinsic::arm_neon_vld3lane) { NumVecs = 3; NewOpc = ARMISD::VLD3DUP; } else if (IntNo == Intrinsic::arm_neon_vld4lane) { NumVecs = 4; NewOpc = ARMISD::VLD4DUP; } else { return false; } // First check that all the vldN-lane uses are VDUPLANEs and that the lane // numbers match the load. unsigned VLDLaneNo = cast(VLD->getOperand(NumVecs+3))->getZExtValue(); for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); UI != UE; ++UI) { // Ignore uses of the chain result. if (UI.getUse().getResNo() == NumVecs) continue; SDNode *User = *UI; if (User->getOpcode() != ARMISD::VDUPLANE || VLDLaneNo != cast(User->getOperand(1))->getZExtValue()) return false; } // Create the vldN-dup node. EVT Tys[5]; unsigned n; for (n = 0; n < NumVecs; ++n) Tys[n] = VT; Tys[n] = MVT::Other; SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1)); SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; MemIntrinsicSDNode *VLDMemInt = cast(VLD); SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, Ops, VLDMemInt->getMemoryVT(), VLDMemInt->getMemOperand()); // Update the uses. for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); UI != UE; ++UI) { unsigned ResNo = UI.getUse().getResNo(); // Ignore uses of the chain result. if (ResNo == NumVecs) continue; SDNode *User = *UI; DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); } // Now the vldN-lane intrinsic is dead except for its chain result. // Update uses of the chain. std::vector VLDDupResults; for (unsigned n = 0; n < NumVecs; ++n) VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); DCI.CombineTo(VLD, VLDDupResults); return true; } /// PerformVDUPLANECombine - Target-specific dag combine xforms for /// ARMISD::VDUPLANE. static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDValue Op = N->getOperand(0); // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation. if (CombineVLDDUP(N, DCI)) return SDValue(N, 0); // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is // redundant. Ignore bit_converts for now; element sizes are checked below. while (Op.getOpcode() == ISD::BITCAST) Op = Op.getOperand(0); if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) return SDValue(); // Make sure the VMOV element size is not bigger than the VDUPLANE elements. unsigned EltSize = Op.getScalarValueSizeInBits(); // The canonical VMOV for a zero vector uses a 32-bit element size. unsigned Imm = cast(Op.getOperand(0))->getZExtValue(); unsigned EltBits; if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) EltSize = 8; EVT VT = N->getValueType(0); if (EltSize > VT.getScalarSizeInBits()) return SDValue(); return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); } /// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP. static SDValue PerformVDUPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; SDValue Op = N->getOperand(0); // Match VDUP(LOAD) -> VLD1DUP. // We match this pattern here rather than waiting for isel because the // transform is only legal for unindexed loads. LoadSDNode *LD = dyn_cast(Op.getNode()); if (LD && Op.hasOneUse() && LD->isUnindexed() && LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) { SDValue Ops[] = { LD->getOperand(0), LD->getOperand(1), DAG.getConstant(LD->getAlignment(), SDLoc(N), MVT::i32) }; SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other); SDValue VLDDup = DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, Ops, LD->getMemoryVT(), LD->getMemOperand()); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1)); return VLDDup; } return SDValue(); } static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { EVT VT = N->getValueType(0); // If this is a legal vector load, try to combine it into a VLD1_UPD. if (ISD::isNormalLoad(N) && VT.isVector() && DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT)) return CombineBaseUpdate(N, DCI); return SDValue(); } /// PerformSTORECombine - Target-specific dag combine xforms for /// ISD::STORE. static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { StoreSDNode *St = cast(N); if (St->isVolatile()) return SDValue(); // Optimize trunc store (of multiple scalars) to shuffle and store. First, // pack all of the elements in one place. Next, store to memory in fewer // chunks. SDValue StVal = St->getValue(); EVT VT = StVal.getValueType(); if (St->isTruncatingStore() && VT.isVector()) { SelectionDAG &DAG = DCI.DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT StVT = St->getMemoryVT(); unsigned NumElems = VT.getVectorNumElements(); assert(StVT != VT && "Cannot truncate to the same type"); unsigned FromEltSz = VT.getScalarSizeInBits(); unsigned ToEltSz = StVT.getScalarSizeInBits(); // From, To sizes and ElemCount must be pow of two if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); // We are going to use the original vector elt for storing. // Accumulated smaller vector elements must be a multiple of the store size. if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); unsigned SizeRatio = FromEltSz / ToEltSz; assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); // Create a type on which we perform the shuffle. EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), NumElems*SizeRatio); assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); SDLoc DL(St); SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); SmallVector ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1 : i * SizeRatio; // Can't shuffle using an illegal type. if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec); // At this point all of the data is stored at the bottom of the // register. We now need to save it to mem. // Find the largest store unit MVT StoreType = MVT::i8; for (MVT Tp : MVT::integer_valuetypes()) { if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) StoreType = Tp; } // Didn't find a legal store type. if (!TLI.isTypeLegal(StoreType)) return SDValue(); // Bitcast the original vector into a vector of store-size units EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); SmallVector Chains; SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL, TLI.getPointerTy(DAG.getDataLayout())); SDValue BasePtr = St->getBasePtr(); // Perform one or more big stores into memory. unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); for (unsigned I = 0; I < E; I++) { SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType, ShuffWide, DAG.getIntPtrConstant(I, DL)); SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment); Chains.push_back(Ch); } return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); } if (!ISD::isNormalStore(St)) return SDValue(); // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and // ARM stores of arguments in the same cache line. if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && StVal.getNode()->hasOneUse()) { SelectionDAG &DAG = DCI.DAG; bool isBigEndian = DAG.getDataLayout().isBigEndian(); SDLoc DL(St); SDValue BasePtr = St->getBasePtr(); SDValue NewST1 = DAG.getStore( St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0), BasePtr, St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, DL, MVT::i32)); return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(isBigEndian ? 0 : 1), OffsetPtr, St->getPointerInfo(), std::min(4U, St->getAlignment() / 2), St->getMemOperand()->getFlags()); } if (StVal.getValueType() == MVT::i64 && StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { // Bitcast an i64 store extracted from a vector to f64. // Otherwise, the i64 value will be legalized to a pair of i32 values. SelectionDAG &DAG = DCI.DAG; SDLoc dl(StVal); SDValue IntVec = StVal.getOperand(0); EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, IntVec.getValueType().getVectorNumElements()); SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Vec, StVal.getOperand(1)); dl = SDLoc(N); SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(Vec.getNode()); DCI.AddToWorklist(ExtElt.getNode()); DCI.AddToWorklist(V.getNode()); return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags(), St->getAAInfo()); } // If this is a legal vector store, try to combine it into a VST1_UPD. if (ISD::isNormalStore(N) && VT.isVector() && DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT)) return CombineBaseUpdate(N, DCI); return SDValue(); } /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) /// can replace combinations of VMUL and VCVT (floating-point to integer) /// when the VMUL has a constant operand that is a power of 2. /// /// Example (assume d17 = ): /// vmul.f32 d16, d17, d16 /// vcvt.s32.f32 d16, d16 /// becomes: /// vcvt.s32.f32 d16, d16, #3 static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { if (!Subtarget->hasNEON()) return SDValue(); SDValue Op = N->getOperand(0); if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() || Op.getOpcode() != ISD::FMUL) return SDValue(); SDValue ConstVec = Op->getOperand(1); if (!isa(ConstVec)) return SDValue(); MVT FloatTy = Op.getSimpleValueType().getVectorElementType(); uint32_t FloatBits = FloatTy.getSizeInBits(); MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); uint32_t IntBits = IntTy.getSizeInBits(); unsigned NumLanes = Op.getValueType().getVectorNumElements(); if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) { // These instructions only exist converting from f32 to i32. We can handle // smaller integers by generating an extra truncate, but larger ones would // be lossy. We also can't handle more then 4 lanes, since these intructions // only support v2i32/v4i32 types. return SDValue(); } BitVector UndefElements; BuildVectorSDNode *BV = cast(ConstVec); int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33); if (C == -1 || C == 0 || C > 32) return SDValue(); SDLoc dl(N); bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : Intrinsic::arm_neon_vcvtfp2fxu; SDValue FixConv = DAG.getNode( ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0), DAG.getConstant(C, dl, MVT::i32)); if (IntBits < FloatBits) FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv); return FixConv; } /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) /// can replace combinations of VCVT (integer to floating-point) and VDIV /// when the VDIV has a constant operand that is a power of 2. /// /// Example (assume d17 = ): /// vcvt.f32.s32 d16, d16 /// vdiv.f32 d16, d17, d16 /// becomes: /// vcvt.f32.s32 d16, d16, #3 static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { if (!Subtarget->hasNEON()) return SDValue(); SDValue Op = N->getOperand(0); unsigned OpOpcode = Op.getNode()->getOpcode(); if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() || (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP)) return SDValue(); SDValue ConstVec = N->getOperand(1); if (!isa(ConstVec)) return SDValue(); MVT FloatTy = N->getSimpleValueType(0).getVectorElementType(); uint32_t FloatBits = FloatTy.getSizeInBits(); MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType(); uint32_t IntBits = IntTy.getSizeInBits(); unsigned NumLanes = Op.getValueType().getVectorNumElements(); if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) { // These instructions only exist converting from i32 to f32. We can handle // smaller integers by generating an extra extend, but larger ones would // be lossy. We also can't handle more then 4 lanes, since these intructions // only support v2i32/v4i32 types. return SDValue(); } BitVector UndefElements; BuildVectorSDNode *BV = cast(ConstVec); int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33); if (C == -1 || C == 0 || C > 32) return SDValue(); SDLoc dl(N); bool isSigned = OpOpcode == ISD::SINT_TO_FP; SDValue ConvInput = Op.getOperand(0); if (IntBits < FloatBits) ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, ConvInput); unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : Intrinsic::arm_neon_vcvtfxu2fp; return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), ConvInput, DAG.getConstant(C, dl, MVT::i32)); } /// Getvshiftimm - Check if this is a valid build_vector for the immediate /// operand of a vector shift operation, where all the elements of the /// build_vector must have the same constant integer value. static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { // Ignore bit_converts. while (Op.getOpcode() == ISD::BITCAST) Op = Op.getOperand(0); BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, ElementBits) || SplatBitSize > ElementBits) return false; Cnt = SplatBits.getSExtValue(); return true; } /// isVShiftLImm - Check if this is a valid build_vector for the immediate /// operand of a vector shift left operation. That value must be in the range: /// 0 <= Value < ElementBits for a left shift; or /// 0 <= Value <= ElementBits for a long left shift. static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { assert(VT.isVector() && "vector shift count is not a vector type"); int64_t ElementBits = VT.getScalarSizeInBits(); if (! getVShiftImm(Op, ElementBits, Cnt)) return false; return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); } /// isVShiftRImm - Check if this is a valid build_vector for the immediate /// operand of a vector shift right operation. For a shift opcode, the value /// is positive, but for an intrinsic the value count must be negative. The /// absolute value must be in the range: /// 1 <= |Value| <= ElementBits for a right shift; or /// 1 <= |Value| <= ElementBits/2 for a narrow right shift. static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, int64_t &Cnt) { assert(VT.isVector() && "vector shift count is not a vector type"); int64_t ElementBits = VT.getScalarSizeInBits(); if (! getVShiftImm(Op, ElementBits, Cnt)) return false; if (!isIntrinsic) return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); if (Cnt >= -(isNarrow ? ElementBits/2 : ElementBits) && Cnt <= -1) { Cnt = -Cnt; return true; } return false; } /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { unsigned IntNo = cast(N->getOperand(0))->getZExtValue(); switch (IntNo) { default: // Don't do anything for most intrinsics. break; // Vector shifts: check for immediate versions and lower them. // Note: This is done during DAG combining instead of DAG legalizing because // the build_vectors for 64-bit vector element shift counts are generally // not legal, and it is hard to see their values after they get legalized to // loads from a constant pool. case Intrinsic::arm_neon_vshifts: case Intrinsic::arm_neon_vshiftu: case Intrinsic::arm_neon_vrshifts: case Intrinsic::arm_neon_vrshiftu: case Intrinsic::arm_neon_vrshiftn: case Intrinsic::arm_neon_vqshifts: case Intrinsic::arm_neon_vqshiftu: case Intrinsic::arm_neon_vqshiftsu: case Intrinsic::arm_neon_vqshiftns: case Intrinsic::arm_neon_vqshiftnu: case Intrinsic::arm_neon_vqshiftnsu: case Intrinsic::arm_neon_vqrshiftns: case Intrinsic::arm_neon_vqrshiftnu: case Intrinsic::arm_neon_vqrshiftnsu: { EVT VT = N->getOperand(1).getValueType(); int64_t Cnt; unsigned VShiftOpc = 0; switch (IntNo) { case Intrinsic::arm_neon_vshifts: case Intrinsic::arm_neon_vshiftu: if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { VShiftOpc = ARMISD::VSHL; break; } if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRs : ARMISD::VSHRu); break; } return SDValue(); case Intrinsic::arm_neon_vrshifts: case Intrinsic::arm_neon_vrshiftu: if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) break; return SDValue(); case Intrinsic::arm_neon_vqshifts: case Intrinsic::arm_neon_vqshiftu: if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) break; return SDValue(); case Intrinsic::arm_neon_vqshiftsu: if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) break; llvm_unreachable("invalid shift count for vqshlu intrinsic"); case Intrinsic::arm_neon_vrshiftn: case Intrinsic::arm_neon_vqshiftns: case Intrinsic::arm_neon_vqshiftnu: case Intrinsic::arm_neon_vqshiftnsu: case Intrinsic::arm_neon_vqrshiftns: case Intrinsic::arm_neon_vqrshiftnu: case Intrinsic::arm_neon_vqrshiftnsu: // Narrowing shifts require an immediate right shift. if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) break; llvm_unreachable("invalid shift count for narrowing vector shift " "intrinsic"); default: llvm_unreachable("unhandled vector shift"); } switch (IntNo) { case Intrinsic::arm_neon_vshifts: case Intrinsic::arm_neon_vshiftu: // Opcode already set above. break; case Intrinsic::arm_neon_vrshifts: VShiftOpc = ARMISD::VRSHRs; break; case Intrinsic::arm_neon_vrshiftu: VShiftOpc = ARMISD::VRSHRu; break; case Intrinsic::arm_neon_vrshiftn: VShiftOpc = ARMISD::VRSHRN; break; case Intrinsic::arm_neon_vqshifts: VShiftOpc = ARMISD::VQSHLs; break; case Intrinsic::arm_neon_vqshiftu: VShiftOpc = ARMISD::VQSHLu; break; case Intrinsic::arm_neon_vqshiftsu: VShiftOpc = ARMISD::VQSHLsu; break; case Intrinsic::arm_neon_vqshiftns: VShiftOpc = ARMISD::VQSHRNs; break; case Intrinsic::arm_neon_vqshiftnu: VShiftOpc = ARMISD::VQSHRNu; break; case Intrinsic::arm_neon_vqshiftnsu: VShiftOpc = ARMISD::VQSHRNsu; break; case Intrinsic::arm_neon_vqrshiftns: VShiftOpc = ARMISD::VQRSHRNs; break; case Intrinsic::arm_neon_vqrshiftnu: VShiftOpc = ARMISD::VQRSHRNu; break; case Intrinsic::arm_neon_vqrshiftnsu: VShiftOpc = ARMISD::VQRSHRNsu; break; } SDLoc dl(N); return DAG.getNode(VShiftOpc, dl, N->getValueType(0), N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32)); } case Intrinsic::arm_neon_vshiftins: { EVT VT = N->getOperand(1).getValueType(); int64_t Cnt; unsigned VShiftOpc = 0; if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) VShiftOpc = ARMISD::VSLI; else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) VShiftOpc = ARMISD::VSRI; else { llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); } SDLoc dl(N); return DAG.getNode(VShiftOpc, dl, N->getValueType(0), N->getOperand(1), N->getOperand(2), DAG.getConstant(Cnt, dl, MVT::i32)); } case Intrinsic::arm_neon_vqrshifts: case Intrinsic::arm_neon_vqrshiftu: // No immediate versions of these to check for. break; } return SDValue(); } /// PerformShiftCombine - Checks for immediate versions of vector shifts and /// lowers them. As with the vector shift intrinsics, this is done during DAG /// combining instead of DAG legalizing because the build_vectors for 64-bit /// vector element shift counts are generally not legal, and it is hard to see /// their values after they get legalized to loads from a constant pool. static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) { // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16. SDValue N1 = N->getOperand(1); if (ConstantSDNode *C = dyn_cast(N1)) { SDValue N0 = N->getOperand(0); if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && DAG.MaskedValueIsZero(N0.getOperand(0), APInt::getHighBitsSet(32, 16))) return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1); } } // Nothing to be done for scalar shifts. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!VT.isVector() || !TLI.isTypeLegal(VT)) return SDValue(); assert(ST->hasNEON() && "unexpected vector shift"); int64_t Cnt; switch (N->getOpcode()) { default: llvm_unreachable("unexpected shift opcode"); case ISD::SHL: if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) { SDLoc dl(N); return DAG.getNode(ARMISD::VSHL, dl, VT, N->getOperand(0), DAG.getConstant(Cnt, dl, MVT::i32)); } break; case ISD::SRA: case ISD::SRL: if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? ARMISD::VSHRs : ARMISD::VSHRu); SDLoc dl(N); return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), DAG.getConstant(Cnt, dl, MVT::i32)); } } return SDValue(); } /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { SDValue N0 = N->getOperand(0); // Check for sign- and zero-extensions of vector extract operations of 8- // and 16-bit vector elements. NEON supports these directly. They are // handled during DAG combining because type legalization will promote them // to 32-bit types and it is messy to recognize the operations after that. if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { SDValue Vec = N0.getOperand(0); SDValue Lane = N0.getOperand(1); EVT VT = N->getValueType(0); EVT EltVT = N0.getValueType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (VT == MVT::i32 && (EltVT == MVT::i8 || EltVT == MVT::i16) && TLI.isTypeLegal(Vec.getValueType()) && isa(Lane)) { unsigned Opc = 0; switch (N->getOpcode()) { default: llvm_unreachable("unexpected opcode"); case ISD::SIGN_EXTEND: Opc = ARMISD::VGETLANEs; break; case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Opc = ARMISD::VGETLANEu; break; } return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane); } } return SDValue(); } static const APInt *isPowerOf2Constant(SDValue V) { ConstantSDNode *C = dyn_cast(V); if (!C) return nullptr; const APInt *CV = &C->getAPIntValue(); return CV->isPowerOf2() ? CV : nullptr; } SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const { // If we have a CMOV, OR and AND combination such as: // if (x & CN) // y |= CM; // // And: // * CN is a single bit; // * All bits covered by CM are known zero in y // // Then we can convert this into a sequence of BFI instructions. This will // always be a win if CM is a single bit, will always be no worse than the // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is // three bits (due to the extra IT instruction). SDValue Op0 = CMOV->getOperand(0); SDValue Op1 = CMOV->getOperand(1); auto CCNode = cast(CMOV->getOperand(2)); auto CC = CCNode->getAPIntValue().getLimitedValue(); SDValue CmpZ = CMOV->getOperand(4); // The compare must be against zero. if (!isNullConstant(CmpZ->getOperand(1))) return SDValue(); assert(CmpZ->getOpcode() == ARMISD::CMPZ); SDValue And = CmpZ->getOperand(0); if (And->getOpcode() != ISD::AND) return SDValue(); const APInt *AndC = isPowerOf2Constant(And->getOperand(1)); if (!AndC) return SDValue(); SDValue X = And->getOperand(0); if (CC == ARMCC::EQ) { // We're performing an "equal to zero" compare. Swap the operands so we // canonicalize on a "not equal to zero" compare. std::swap(Op0, Op1); } else { assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?"); } if (Op1->getOpcode() != ISD::OR) return SDValue(); ConstantSDNode *OrC = dyn_cast(Op1->getOperand(1)); if (!OrC) return SDValue(); SDValue Y = Op1->getOperand(0); if (Op0 != Y) return SDValue(); // Now, is it profitable to continue? APInt OrCI = OrC->getAPIntValue(); unsigned Heuristic = Subtarget->isThumb() ? 3 : 2; if (OrCI.countPopulation() > Heuristic) return SDValue(); // Lastly, can we determine that the bits defined by OrCI // are zero in Y? KnownBits Known = DAG.computeKnownBits(Y); if ((OrCI & Known.Zero) != OrCI) return SDValue(); // OK, we can do the combine. SDValue V = Y; SDLoc dl(X); EVT VT = X.getValueType(); unsigned BitInX = AndC->logBase2(); if (BitInX != 0) { // We must shift X first. X = DAG.getNode(ISD::SRL, dl, VT, X, DAG.getConstant(BitInX, dl, VT)); } for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits(); BitInY < NumActiveBits; ++BitInY) { if (OrCI[BitInY] == 0) continue; APInt Mask(VT.getSizeInBits(), 0); Mask.setBit(BitInY); V = DAG.getNode(ARMISD::BFI, dl, VT, V, X, // Confusingly, the operand is an *inverted* mask. DAG.getConstant(~Mask, dl, VT)); } return V; } /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND. SDValue ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const { SDValue Cmp = N->getOperand(4); if (Cmp.getOpcode() != ARMISD::CMPZ) // Only looking at NE cases. return SDValue(); EVT VT = N->getValueType(0); SDLoc dl(N); SDValue LHS = Cmp.getOperand(0); SDValue RHS = Cmp.getOperand(1); SDValue Chain = N->getOperand(0); SDValue BB = N->getOperand(1); SDValue ARMcc = N->getOperand(2); ARMCC::CondCodes CC = (ARMCC::CondCodes)cast(ARMcc)->getZExtValue(); // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0)) // -> (brcond Chain BB CC CPSR Cmp) if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() && LHS->getOperand(0)->getOpcode() == ARMISD::CMOV && LHS->getOperand(0)->hasOneUse()) { auto *LHS00C = dyn_cast(LHS->getOperand(0)->getOperand(0)); auto *LHS01C = dyn_cast(LHS->getOperand(0)->getOperand(1)); auto *LHS1C = dyn_cast(LHS->getOperand(1)); auto *RHSC = dyn_cast(RHS); if ((LHS00C && LHS00C->getZExtValue() == 0) && (LHS01C && LHS01C->getZExtValue() == 1) && (LHS1C && LHS1C->getZExtValue() == 1) && (RHSC && RHSC->getZExtValue() == 0)) { return DAG.getNode( ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2), LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4)); } } return SDValue(); } /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. SDValue ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { SDValue Cmp = N->getOperand(4); if (Cmp.getOpcode() != ARMISD::CMPZ) // Only looking at EQ and NE cases. return SDValue(); EVT VT = N->getValueType(0); SDLoc dl(N); SDValue LHS = Cmp.getOperand(0); SDValue RHS = Cmp.getOperand(1); SDValue FalseVal = N->getOperand(0); SDValue TrueVal = N->getOperand(1); SDValue ARMcc = N->getOperand(2); ARMCC::CondCodes CC = (ARMCC::CondCodes)cast(ARMcc)->getZExtValue(); // BFI is only available on V6T2+. if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) { SDValue R = PerformCMOVToBFICombine(N, DAG); if (R) return R; } // Simplify // mov r1, r0 // cmp r1, x // mov r0, y // moveq r0, x // to // cmp r0, x // movne r0, y // // mov r1, r0 // cmp r1, x // mov r0, x // movne r0, y // to // cmp r0, x // movne r0, y /// FIXME: Turn this into a target neutral optimization? SDValue Res; if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) { Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, N->getOperand(3), Cmp); } else if (CC == ARMCC::EQ && TrueVal == RHS) { SDValue ARMcc; SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl); Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, N->getOperand(3), NewCmp); } // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0)) // -> (cmov F T CC CPSR Cmp) if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse()) { auto *LHS0C = dyn_cast(LHS->getOperand(0)); auto *LHS1C = dyn_cast(LHS->getOperand(1)); auto *RHSC = dyn_cast(RHS); if ((LHS0C && LHS0C->getZExtValue() == 0) && (LHS1C && LHS1C->getZExtValue() == 1) && (RHSC && RHSC->getZExtValue() == 0)) { return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, LHS->getOperand(2), LHS->getOperand(3), LHS->getOperand(4)); } } if (!VT.isInteger()) return SDValue(); // Materialize a boolean comparison for integers so we can avoid branching. if (isNullConstant(FalseVal)) { if (CC == ARMCC::EQ && isOneConstant(TrueVal)) { if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) { // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it // right 5 bits will make that 32 be 1, otherwise it will be 0. // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS); Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub), DAG.getConstant(5, dl, MVT::i32)); } else { // CMOV 0, 1, ==, (CMPZ x, y) -> // (ADDCARRY (SUB x, y), t:0, t:1) // where t = (SUBCARRY 0, (SUB x, y), 0) // // The SUBCARRY computes 0 - (x - y) and this will give a borrow when // x != y. In other words, a carry C == 1 when x == y, C == 0 // otherwise. // The final ADDCARRY computes // x - y + (0 - (x - y)) + C == C SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS); SDVTList VTs = DAG.getVTList(VT, MVT::i32); SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub); // ISD::SUBCARRY returns a borrow but we want the carry here // actually. SDValue Carry = DAG.getNode(ISD::SUB, dl, MVT::i32, DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1)); Res = DAG.getNode(ISD::ADDCARRY, dl, VTs, Sub, Neg, Carry); } } else if (CC == ARMCC::NE && !isNullConstant(RHS) && (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) { // This seems pointless but will allow us to combine it further below. // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1 SDValue Sub = DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS); SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR, Sub.getValue(1), SDValue()); Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc, N->getOperand(3), CPSRGlue.getValue(1)); FalseVal = Sub; } } else if (isNullConstant(TrueVal)) { if (CC == ARMCC::EQ && !isNullConstant(RHS) && (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) { // This seems pointless but will allow us to combine it further below // Note that we change == for != as this is the dual for the case above. // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1 SDValue Sub = DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS); SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR, Sub.getValue(1), SDValue()); Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal, DAG.getConstant(ARMCC::NE, dl, MVT::i32), N->getOperand(3), CPSRGlue.getValue(1)); FalseVal = Sub; } } // On Thumb1, the DAG above may be further combined if z is a power of 2 // (z == 2 ^ K). // CMOV (SUBS x, y), z, !=, (SUBS x, y):1 -> // merge t3, t4 // where t1 = (SUBCARRY (SUB x, y), z, 0) // t2 = (SUBCARRY (SUB x, y), t1:0, t1:1) // t3 = if K != 0 then (SHL t2:0, K) else t2:0 // t4 = (SUB 1, t2:1) [ we want a carry, not a borrow ] const APInt *TrueConst; if (Subtarget->isThumb1Only() && CC == ARMCC::NE && (FalseVal.getOpcode() == ARMISD::SUBS) && (FalseVal.getOperand(0) == LHS) && (FalseVal.getOperand(1) == RHS) && (TrueConst = isPowerOf2Constant(TrueVal))) { SDVTList VTs = DAG.getVTList(VT, MVT::i32); unsigned ShiftAmount = TrueConst->logBase2(); if (ShiftAmount) TrueVal = DAG.getConstant(1, dl, VT); SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal); Res = DAG.getNode(ISD::SUBCARRY, dl, VTs, FalseVal, Subc, Subc.getValue(1)); // Make it a carry, not a borrow. SDValue Carry = DAG.getNode( ISD::SUB, dl, VT, DAG.getConstant(1, dl, MVT::i32), Res.getValue(1)); Res = DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Res, Carry); if (ShiftAmount) Res = DAG.getNode(ISD::SHL, dl, VT, Res, DAG.getConstant(ShiftAmount, dl, MVT::i32)); } if (Res.getNode()) { KnownBits Known = DAG.computeKnownBits(SDValue(N,0)); // Capture demanded bits information that would be otherwise lost. if (Known.Zero == 0xfffffffe) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, DAG.getValueType(MVT::i1)); else if (Known.Zero == 0xffffff00) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, DAG.getValueType(MVT::i8)); else if (Known.Zero == 0xffff0000) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, DAG.getValueType(MVT::i16)); } return Res; } SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget); case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget); case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ISD::OR: return PerformORCombine(N, DCI, Subtarget); case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); case ARMISD::ADDC: case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget); case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); case ISD::STORE: return PerformSTORECombine(N, DCI); case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget); case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); case ARMISD::VDUP: return PerformVDUPCombine(N, DCI); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI.DAG, Subtarget); case ISD::FDIV: return PerformVDIVCombine(N, DCI.DAG, Subtarget); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: case ISD::SRA: case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG); case ISD::LOAD: return PerformLOADCombine(N, DCI); case ARMISD::VLD1DUP: case ARMISD::VLD2DUP: case ARMISD::VLD3DUP: case ARMISD::VLD4DUP: return PerformVLDCombine(N, DCI); case ARMISD::BUILD_VECTOR: return PerformARMBUILD_VECTORCombine(N, DCI); case ARMISD::SMULWB: { unsigned BitWidth = N->getValueType(0).getSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)) return SDValue(); break; } case ARMISD::SMULWT: { unsigned BitWidth = N->getValueType(0).getSizeInBits(); APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16); if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)) return SDValue(); break; } case ARMISD::SMLALBB: { unsigned BitWidth = N->getValueType(0).getSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) || (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))) return SDValue(); break; } case ARMISD::SMLALBT: { unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits(); APInt LowMask = APInt::getLowBitsSet(LowWidth, 16); unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits(); APInt HighMask = APInt::getHighBitsSet(HighWidth, 16); if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) || (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI))) return SDValue(); break; } case ARMISD::SMLALTB: { unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits(); APInt HighMask = APInt::getHighBitsSet(HighWidth, 16); unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits(); APInt LowMask = APInt::getLowBitsSet(LowWidth, 16); if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) || (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI))) return SDValue(); break; } case ARMISD::SMLALTT: { unsigned BitWidth = N->getValueType(0).getSizeInBits(); APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16); if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) || (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))) return SDValue(); break; } case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast(N->getOperand(1))->getZExtValue()) { case Intrinsic::arm_neon_vld1: case Intrinsic::arm_neon_vld1x2: case Intrinsic::arm_neon_vld1x3: case Intrinsic::arm_neon_vld1x4: case Intrinsic::arm_neon_vld2: case Intrinsic::arm_neon_vld3: case Intrinsic::arm_neon_vld4: case Intrinsic::arm_neon_vld2lane: case Intrinsic::arm_neon_vld3lane: case Intrinsic::arm_neon_vld4lane: case Intrinsic::arm_neon_vld2dup: case Intrinsic::arm_neon_vld3dup: case Intrinsic::arm_neon_vld4dup: case Intrinsic::arm_neon_vst1: case Intrinsic::arm_neon_vst1x2: case Intrinsic::arm_neon_vst1x3: case Intrinsic::arm_neon_vst1x4: case Intrinsic::arm_neon_vst2: case Intrinsic::arm_neon_vst3: case Intrinsic::arm_neon_vst4: case Intrinsic::arm_neon_vst2lane: case Intrinsic::arm_neon_vst3lane: case Intrinsic::arm_neon_vst4lane: return PerformVLDCombine(N, DCI); default: break; } break; } return SDValue(); } bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const { return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); } bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, unsigned, bool *Fast) const { // Depends what it gets converted into if the type is weird. if (!VT.isSimple()) return false; // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i8: case MVT::i16: case MVT::i32: { // Unaligned access can use (for example) LRDB, LRDH, LDR if (AllowsUnaligned) { if (Fast) *Fast = Subtarget->hasV7Ops(); return true; } return false; } case MVT::f64: case MVT::v2f64: { // For any little-endian targets with neon, we can support unaligned ld/st // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8. // A big-endian target may also explicitly support unaligned accesses if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) { if (Fast) *Fast = true; return true; } return false; } } } static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, unsigned AlignCheck) { return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && (DstAlign == 0 || DstAlign % AlignCheck == 0)); } EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { const Function &F = MF.getFunction(); // See if we can use NEON instructions for this... if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() && !F.hasFnAttribute(Attribute::NoImplicitFloat)) { bool Fast; if (Size >= 16 && (memOpAlign(SrcAlign, DstAlign, 16) || (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) { return MVT::v2f64; } else if (Size >= 8 && (memOpAlign(SrcAlign, DstAlign, 8) || (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) && Fast))) { return MVT::f64; } } // Let the target-independent logic figure it out. return MVT::Other; } // 64-bit integers are split into their high and low parts and held in two // different registers, so the trunc is free since the low register can just // be used. bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) return false; unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); unsigned DestBits = DstTy->getPrimitiveSizeInBits(); return (SrcBits == 64 && DestBits == 32); } bool ARMTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() || !DstVT.isInteger()) return false; unsigned SrcBits = SrcVT.getSizeInBits(); unsigned DestBits = DstVT.getSizeInBits(); return (SrcBits == 64 && DestBits == 32); } bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { if (Val.getOpcode() != ISD::LOAD) return false; EVT VT1 = Val.getValueType(); if (!VT1.isSimple() || !VT1.isInteger() || !VT2.isSimple() || !VT2.isInteger()) return false; switch (VT1.getSimpleVT().SimpleTy) { default: break; case MVT::i1: case MVT::i8: case MVT::i16: // 8-bit and 16-bit loads implicitly zero-extend to 32-bits. return true; } return false; } bool ARMTargetLowering::isFNegFree(EVT VT) const { if (!VT.isSimple()) return false; // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that // negate values directly (fneg is free). So, we don't want to let the DAG // combiner rewrite fneg into xors and some other instructions. For f16 and // FullFP16 argument passing, some bitcast nodes may be introduced, // triggering this DAG combine rewrite, so we are avoiding that with this. switch (VT.getSimpleVT().SimpleTy) { default: break; case MVT::f16: return Subtarget->hasFullFP16(); } return false; } bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { EVT VT = ExtVal.getValueType(); if (!isTypeLegal(VT)) return false; // Don't create a loadext if we can fold the extension into a wide/long // instruction. // If there's more than one user instruction, the loadext is desirable no // matter what. There can be two uses by the same instruction. if (ExtVal->use_empty() || !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode())) return true; SDNode *U = *ExtVal->use_begin(); if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB || U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL)) return false; return true; } bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; if (!isTypeLegal(EVT::getEVT(Ty1))) return false; assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop"); // Assuming the caller doesn't have a zeroext or signext return parameter, // truncation all the way down to i1 is valid. return true; } int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const { if (isLegalAddressingMode(DL, AM, Ty, AS)) { if (Subtarget->hasFPAO()) return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster return 0; } return -1; } static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { if (V < 0) return false; unsigned Scale = 1; switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: // Scale == 1; break; case MVT::i16: // Scale == 2; Scale = 2; break; case MVT::i32: // Scale == 4; Scale = 4; break; } if ((V & (Scale - 1)) != 0) return false; V /= Scale; return V == (V & ((1LL << 5) - 1)); } static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget) { bool isNeg = false; if (V < 0) { isNeg = true; V = - V; } switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: // + imm12 or - imm8 if (isNeg) return V == (V & ((1LL << 8) - 1)); return V == (V & ((1LL << 12) - 1)); case MVT::f32: case MVT::f64: // Same as ARM mode. FIXME: NEON? if (!Subtarget->hasVFP2()) return false; if ((V & 3) != 0) return false; V >>= 2; return V == (V & ((1LL << 8) - 1)); } } /// isLegalAddressImmediate - Return true if the integer value can be used /// as the offset of the target addressing mode for load / store of the /// given type. static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget) { if (V == 0) return true; if (!VT.isSimple()) return false; if (Subtarget->isThumb1Only()) return isLegalT1AddressImmediate(V, VT); else if (Subtarget->isThumb2()) return isLegalT2AddressImmediate(V, VT, Subtarget); // ARM mode. if (V < 0) V = - V; switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: case MVT::i32: // +- imm12 return V == (V & ((1LL << 12) - 1)); case MVT::i16: // +- imm8 return V == (V & ((1LL << 8) - 1)); case MVT::f32: case MVT::f64: if (!Subtarget->hasVFP2()) // FIXME: NEON? return false; if ((V & 3) != 0) return false; V >>= 2; return V == (V & ((1LL << 8) - 1)); } } bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const { int Scale = AM.Scale; if (Scale < 0) return false; switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: if (Scale == 1) return true; // r + r << imm Scale = Scale & ~1; return Scale == 2 || Scale == 4 || Scale == 8; case MVT::i64: // FIXME: What are we trying to model here? ldrd doesn't have an r + r // version in Thumb mode. // r + r if (Scale == 1) return true; // r * 2 (this can be lowered to r + r). if (!AM.HasBaseReg && Scale == 2) return true; return false; case MVT::isVoid: // Note, we allow "void" uses (basically, uses that aren't loads or // stores), because arm allows folding a scale into many arithmetic // operations. This should be made more precise and revisited later. // Allow r << imm, but the imm has to be a multiple of two. if (Scale & 1) return false; return isPowerOf2_32(Scale); } } bool ARMTargetLowering::isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const { const int Scale = AM.Scale; // Negative scales are not supported in Thumb1. if (Scale < 0) return false; // Thumb1 addressing modes do not support register scaling excepting the // following cases: // 1. Scale == 1 means no scaling. // 2. Scale == 2 this can be lowered to r + r if there is no base register. return (Scale == 1) || (!AM.HasBaseReg && Scale == 2); } /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { EVT VT = getValueType(DL, Ty, true); if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) return false; // Can never fold addr of global into load/store. if (AM.BaseGV) return false; switch (AM.Scale) { case 0: // no scale reg, must be "r+i" or "r", or "i". break; default: // ARM doesn't support any R+R*scale+imm addr modes. if (AM.BaseOffs) return false; if (!VT.isSimple()) return false; if (Subtarget->isThumb1Only()) return isLegalT1ScaledAddressingMode(AM, VT); if (Subtarget->isThumb2()) return isLegalT2ScaledAddressingMode(AM, VT); int Scale = AM.Scale; switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: case MVT::i32: if (Scale < 0) Scale = -Scale; if (Scale == 1) return true; // r + r << imm return isPowerOf2_32(Scale & ~1); case MVT::i16: case MVT::i64: // r +/- r if (Scale == 1 || (AM.HasBaseReg && Scale == -1)) return true; // r * 2 (this can be lowered to r + r). if (!AM.HasBaseReg && Scale == 2) return true; return false; case MVT::isVoid: // Note, we allow "void" uses (basically, uses that aren't loads or // stores), because arm allows folding a scale into many arithmetic // operations. This should be made more precise and revisited later. // Allow r << imm, but the imm has to be a multiple of two. if (Scale & 1) return false; return isPowerOf2_32(Scale); } } return true; } /// isLegalICmpImmediate - Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can compare /// a register against the immediate without having to materialize the /// immediate into a register. bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { // Thumb2 and ARM modes can use cmn for negative immediates. if (!Subtarget->isThumb()) return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 || ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1; if (Subtarget->isThumb2()) return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 || ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1; // Thumb1 doesn't have cmn, and only 8-bit immediates. return Imm >= 0 && Imm <= 255; } /// isLegalAddImmediate - Return true if the specified immediate is a legal add /// *or sub* immediate, that is the target has add or sub instructions which can /// add a register with the immediate without having to materialize the /// immediate into a register. bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { // Same encoding for add/sub, just flip the sign. int64_t AbsImm = std::abs(Imm); if (!Subtarget->isThumb()) return ARM_AM::getSOImmVal(AbsImm) != -1; if (Subtarget->isThumb2()) return ARM_AM::getT2SOImmVal(AbsImm) != -1; // Thumb1 only has 8-bit unsigned immediate. return AbsImm >= 0 && AbsImm <= 255; } static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG) { if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) return false; if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { // AddressingMode 3 Base = Ptr->getOperand(0); if (ConstantSDNode *RHS = dyn_cast(Ptr->getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); if (RHSC < 0 && RHSC > -256) { assert(Ptr->getOpcode() == ISD::ADD); isInc = false; Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0)); return true; } } isInc = (Ptr->getOpcode() == ISD::ADD); Offset = Ptr->getOperand(1); return true; } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { // AddressingMode 2 if (ConstantSDNode *RHS = dyn_cast(Ptr->getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); if (RHSC < 0 && RHSC > -0x1000) { assert(Ptr->getOpcode() == ISD::ADD); isInc = false; Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0)); Base = Ptr->getOperand(0); return true; } } if (Ptr->getOpcode() == ISD::ADD) { isInc = true; ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode()); if (ShOpcVal != ARM_AM::no_shift) { Base = Ptr->getOperand(1); Offset = Ptr->getOperand(0); } else { Base = Ptr->getOperand(0); Offset = Ptr->getOperand(1); } return true; } isInc = (Ptr->getOpcode() == ISD::ADD); Base = Ptr->getOperand(0); Offset = Ptr->getOperand(1); return true; } // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. return false; } static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG) { if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) return false; Base = Ptr->getOperand(0); if (ConstantSDNode *RHS = dyn_cast(Ptr->getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); if (RHSC < 0 && RHSC > -0x100) { // 8 bits. assert(Ptr->getOpcode() == ISD::ADD); isInc = false; Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0)); return true; } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. isInc = Ptr->getOpcode() == ISD::ADD; Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0)); return true; } } return false; } /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. bool ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { if (Subtarget->isThumb1Only()) return false; EVT VT; SDValue Ptr; bool isSEXTLoad = false; if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; } else if (StoreSDNode *ST = dyn_cast(N)) { Ptr = ST->getBasePtr(); VT = ST->getMemoryVT(); } else return false; bool isInc; bool isLegal = false; if (Subtarget->isThumb2()) isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, Offset, isInc, DAG); else isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, Offset, isInc, DAG); if (!isLegal) return false; AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; return true; } /// getPostIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if this node can be /// combined with a load / store to form a post-indexed load / store. bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { EVT VT; SDValue Ptr; bool isSEXTLoad = false, isNonExt; if (LoadSDNode *LD = dyn_cast(N)) { VT = LD->getMemoryVT(); Ptr = LD->getBasePtr(); isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD; } else if (StoreSDNode *ST = dyn_cast(N)) { VT = ST->getMemoryVT(); Ptr = ST->getBasePtr(); isNonExt = !ST->isTruncatingStore(); } else return false; if (Subtarget->isThumb1Only()) { // Thumb-1 can do a limited post-inc load or store as an updating LDM. It // must be non-extending/truncating, i32, with an offset of 4. assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!"); if (Op->getOpcode() != ISD::ADD || !isNonExt) return false; auto *RHS = dyn_cast(Op->getOperand(1)); if (!RHS || RHS->getZExtValue() != 4) return false; Offset = Op->getOperand(1); Base = Op->getOperand(0); AM = ISD::POST_INC; return true; } bool isInc; bool isLegal = false; if (Subtarget->isThumb2()) isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, isInc, DAG); else isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, isInc, DAG); if (!isLegal) return false; if (Ptr != Base) { // Swap base ptr and offset to catch more post-index load / store when // it's legal. In Thumb2 mode, offset must be an immediate. if (Ptr == Offset && Op->getOpcode() == ISD::ADD && !Subtarget->isThumb2()) std::swap(Base, Offset); // Post-indexed load / store update the base pointer. if (Ptr != Base) return false; } AM = isInc ? ISD::POST_INC : ISD::POST_DEC; return true; } void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { unsigned BitWidth = Known.getBitWidth(); Known.resetAll(); switch (Op.getOpcode()) { default: break; case ARMISD::ADDC: case ARMISD::ADDE: case ARMISD::SUBC: case ARMISD::SUBE: // Special cases when we convert a carry to a boolean. if (Op.getResNo() == 0) { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); // (ADDE 0, 0, C) will give us a single bit. if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) && isNullConstant(RHS)) { Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); return; } } break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1); if (Known.isUnknown()) return; KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1); Known.Zero &= KnownRHS.Zero; Known.One &= KnownRHS.One; return; } case ISD::INTRINSIC_W_CHAIN: { ConstantSDNode *CN = cast(Op->getOperand(1)); Intrinsic::ID IntID = static_cast(CN->getZExtValue()); switch (IntID) { default: return; case Intrinsic::arm_ldaex: case Intrinsic::arm_ldrex: { EVT VT = cast(Op)->getMemoryVT(); unsigned MemBits = VT.getScalarSizeInBits(); Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); return; } } } case ARMISD::BFI: { // Conservatively, we can recurse down the first operand // and just mask out all affected bits. Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); // The operand to BFI is already a mask suitable for removing the bits it // sets. ConstantSDNode *CI = cast(Op.getOperand(2)); const APInt &Mask = CI->getAPIntValue(); Known.Zero &= Mask; Known.One &= Mask; return; } case ARMISD::VGETLANEs: case ARMISD::VGETLANEu: { const SDValue &SrcSV = Op.getOperand(0); EVT VecVT = SrcSV.getValueType(); assert(VecVT.isVector() && "VGETLANE expected a vector type"); const unsigned NumSrcElts = VecVT.getVectorNumElements(); ConstantSDNode *Pos = cast(Op.getOperand(1).getNode()); assert(Pos->getAPIntValue().ult(NumSrcElts) && "VGETLANE index out of bounds"); unsigned Idx = Pos->getZExtValue(); APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx); Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1); EVT VT = Op.getValueType(); const unsigned DstSz = VT.getScalarSizeInBits(); const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits(); assert(SrcSz == Known.getBitWidth()); assert(DstSz > SrcSz); if (Op.getOpcode() == ARMISD::VGETLANEs) Known = Known.sext(DstSz); else { Known = Known.zext(DstSz); Known.Zero.setBitsFrom(SrcSz); } assert(DstSz == Known.getBitWidth()); break; } } } bool ARMTargetLowering::targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedAPInt, TargetLoweringOpt &TLO) const { // Delay optimization, so we don't have to deal with illegal types, or block // optimizations. if (!TLO.LegalOps) return false; // Only optimize AND for now. if (Op.getOpcode() != ISD::AND) return false; EVT VT = Op.getValueType(); // Ignore vectors. if (VT.isVector()) return false; assert(VT == MVT::i32 && "Unexpected integer type"); // Make sure the RHS really is a constant. ConstantSDNode *C = dyn_cast(Op.getOperand(1)); if (!C) return false; unsigned Mask = C->getZExtValue(); unsigned Demanded = DemandedAPInt.getZExtValue(); unsigned ShrunkMask = Mask & Demanded; unsigned ExpandedMask = Mask | ~Demanded; // If the mask is all zeros, let the target-independent code replace the // result with zero. if (ShrunkMask == 0) return false; // If the mask is all ones, erase the AND. (Currently, the target-independent // code won't do this, so we have to do it explicitly to avoid an infinite // loop in obscure cases.) if (ExpandedMask == ~0U) return TLO.CombineTo(Op, Op.getOperand(0)); auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool { return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0; }; auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool { if (NewMask == Mask) return true; SDLoc DL(Op); SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT); SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); return TLO.CombineTo(Op, NewOp); }; // Prefer uxtb mask. if (IsLegalMask(0xFF)) return UseMask(0xFF); // Prefer uxth mask. if (IsLegalMask(0xFFFF)) return UseMask(0xFFFF); // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2. // FIXME: Prefer a contiguous sequence of bits for other optimizations. if (ShrunkMask < 256) return UseMask(ShrunkMask); // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2. // FIXME: Prefer a contiguous sequence of bits for other optimizations. if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256) return UseMask(ExpandedMask); // Potential improvements: // // We could try to recognize lsls+lsrs or lsrs+lsls pairs here. // We could try to prefer Thumb1 immediates which can be lowered to a // two-instruction sequence. // We could try to recognize more legal ARM/Thumb2 immediates here. return false; } //===----------------------------------------------------------------------===// // ARM Inline Assembly Support //===----------------------------------------------------------------------===// bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { // Looking for "rev" which is V6+. if (!Subtarget->hasV6Ops()) return false; InlineAsm *IA = cast(CI->getCalledValue()); std::string AsmStr = IA->getAsmString(); SmallVector AsmPieces; SplitString(AsmStr, AsmPieces, ";\n"); switch (AsmPieces.size()) { default: return false; case 1: AsmStr = AsmPieces[0]; AsmPieces.clear(); SplitString(AsmStr, AsmPieces, " \t,"); // rev $0, $1 if (AsmPieces.size() == 3 && AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" && IA->getConstraintString().compare(0, 4, "=l,l") == 0) { IntegerType *Ty = dyn_cast(CI->getType()); if (Ty && Ty->getBitWidth() == 32) return IntrinsicLowering::LowerToByteSwap(CI); } break; } return false; } const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const { // At this point, we have to lower this constraint to something else, so we // lower it to an "r" or "w". However, by doing this we will force the result // to be in register, while the X constraint is much more permissive. // // Although we are correct (we are free to emit anything, without // constraints), we might break use cases that would expect us to be more // efficient and emit something else. if (!Subtarget->hasVFP2()) return "r"; if (ConstraintVT.isFloatingPoint()) return "w"; if (ConstraintVT.isVector() && Subtarget->hasNEON() && (ConstraintVT.getSizeInBits() == 64 || ConstraintVT.getSizeInBits() == 128)) return "w"; return "r"; } /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. ARMTargetLowering::ConstraintType ARMTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default: break; case 'l': return C_RegisterClass; case 'w': return C_RegisterClass; case 'h': return C_RegisterClass; case 'x': return C_RegisterClass; case 't': return C_RegisterClass; case 'j': return C_Other; // Constant for movw. // An address with a single base register. Due to the way we // currently handle addresses it is the same as an 'r' memory constraint. case 'Q': return C_Memory; } } else if (Constraint.size() == 2) { switch (Constraint[0]) { default: break; // All 'U+' constraints are addresses. case 'U': return C_Memory; } } return TargetLowering::getConstraintType(Constraint); } /// Examine constraint type and operand type and determine a weight value. /// This object must already have been set up with the operand type /// and the current alternative constraint selected. TargetLowering::ConstraintWeight ARMTargetLowering::getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const { ConstraintWeight weight = CW_Invalid; Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); break; case 'l': if (type->isIntegerTy()) { if (Subtarget->isThumb()) weight = CW_SpecificReg; else weight = CW_Register; } break; case 'w': if (type->isFloatingPointTy()) weight = CW_Register; break; } return weight; } using RCPair = std::pair; RCPair ARMTargetLowering::getRegForInlineAsmConstraint( const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC ARM Constraint Letters switch (Constraint[0]) { case 'l': // Low regs or general regs. if (Subtarget->isThumb()) return RCPair(0U, &ARM::tGPRRegClass); return RCPair(0U, &ARM::GPRRegClass); case 'h': // High regs or no regs. if (Subtarget->isThumb()) return RCPair(0U, &ARM::hGPRRegClass); break; case 'r': if (Subtarget->isThumb1Only()) return RCPair(0U, &ARM::tGPRRegClass); return RCPair(0U, &ARM::GPRRegClass); case 'w': if (VT == MVT::Other) break; if (VT == MVT::f32) return RCPair(0U, &ARM::SPRRegClass); if (VT.getSizeInBits() == 64) return RCPair(0U, &ARM::DPRRegClass); if (VT.getSizeInBits() == 128) return RCPair(0U, &ARM::QPRRegClass); break; case 'x': if (VT == MVT::Other) break; if (VT == MVT::f32) return RCPair(0U, &ARM::SPR_8RegClass); if (VT.getSizeInBits() == 64) return RCPair(0U, &ARM::DPR_8RegClass); if (VT.getSizeInBits() == 128) return RCPair(0U, &ARM::QPR_8RegClass); break; case 't': if (VT == MVT::Other) break; if (VT == MVT::f32 || VT == MVT::i32) return RCPair(0U, &ARM::SPRRegClass); if (VT.getSizeInBits() == 64) return RCPair(0U, &ARM::DPR_VFP2RegClass); if (VT.getSizeInBits() == 128) return RCPair(0U, &ARM::QPR_VFP2RegClass); break; } } if (StringRef("{cc}").equals_lower(Constraint)) return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass); return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector&Ops, SelectionDAG &DAG) const { SDValue Result; // Currently only support length 1 constraints. if (Constraint.length() != 1) return; char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { default: break; case 'j': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': ConstantSDNode *C = dyn_cast(Op); if (!C) return; int64_t CVal64 = C->getSExtValue(); int CVal = (int) CVal64; // None of these constraints allow values larger than 32 bits. Check // that the value fits in an int. if (CVal != CVal64) return; switch (ConstraintLetter) { case 'j': // Constant suitable for movw, must be between 0 and // 65535. if (Subtarget->hasV6T2Ops()) if (CVal >= 0 && CVal <= 65535) break; return; case 'I': if (Subtarget->isThumb1Only()) { // This must be a constant between 0 and 255, for ADD // immediates. if (CVal >= 0 && CVal <= 255) break; } else if (Subtarget->isThumb2()) { // A constant that can be used as an immediate value in a // data-processing instruction. if (ARM_AM::getT2SOImmVal(CVal) != -1) break; } else { // A constant that can be used as an immediate value in a // data-processing instruction. if (ARM_AM::getSOImmVal(CVal) != -1) break; } return; case 'J': if (Subtarget->isThumb1Only()) { // This must be a constant between -255 and -1, for negated ADD // immediates. This can be used in GCC with an "n" modifier that // prints the negated value, for use with SUB instructions. It is // not useful otherwise but is implemented for compatibility. if (CVal >= -255 && CVal <= -1) break; } else { // This must be a constant between -4095 and 4095. It is not clear // what this constraint is intended for. Implemented for // compatibility with GCC. if (CVal >= -4095 && CVal <= 4095) break; } return; case 'K': if (Subtarget->isThumb1Only()) { // A 32-bit value where only one byte has a nonzero value. Exclude // zero to match GCC. This constraint is used by GCC internally for // constants that can be loaded with a move/shift combination. // It is not useful otherwise but is implemented for compatibility. if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) break; } else if (Subtarget->isThumb2()) { // A constant whose bitwise inverse can be used as an immediate // value in a data-processing instruction. This can be used in GCC // with a "B" modifier that prints the inverted value, for use with // BIC and MVN instructions. It is not useful otherwise but is // implemented for compatibility. if (ARM_AM::getT2SOImmVal(~CVal) != -1) break; } else { // A constant whose bitwise inverse can be used as an immediate // value in a data-processing instruction. This can be used in GCC // with a "B" modifier that prints the inverted value, for use with // BIC and MVN instructions. It is not useful otherwise but is // implemented for compatibility. if (ARM_AM::getSOImmVal(~CVal) != -1) break; } return; case 'L': if (Subtarget->isThumb1Only()) { // This must be a constant between -7 and 7, // for 3-operand ADD/SUB immediate instructions. if (CVal >= -7 && CVal < 7) break; } else if (Subtarget->isThumb2()) { // A constant whose negation can be used as an immediate value in a // data-processing instruction. This can be used in GCC with an "n" // modifier that prints the negated value, for use with SUB // instructions. It is not useful otherwise but is implemented for // compatibility. if (ARM_AM::getT2SOImmVal(-CVal) != -1) break; } else { // A constant whose negation can be used as an immediate value in a // data-processing instruction. This can be used in GCC with an "n" // modifier that prints the negated value, for use with SUB // instructions. It is not useful otherwise but is implemented for // compatibility. if (ARM_AM::getSOImmVal(-CVal) != -1) break; } return; case 'M': if (Subtarget->isThumb1Only()) { // This must be a multiple of 4 between 0 and 1020, for // ADD sp + immediate. if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) break; } else { // A power of two or a constant between 0 and 32. This is used in // GCC for the shift amount on shifted register operands, but it is // useful in general for any shift amounts. if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) break; } return; case 'N': if (Subtarget->isThumb()) { // FIXME thumb2 // This must be a constant between 0 and 31, for shift amounts. if (CVal >= 0 && CVal <= 31) break; } return; case 'O': if (Subtarget->isThumb()) { // FIXME thumb2 // This must be a multiple of 4 between -508 and 508, for // ADD/SUB sp = sp + immediate. if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) break; } return; } Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType()); break; } if (Result.getNode()) { Ops.push_back(Result); return; } return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } static RTLIB::Libcall getDivRemLibcall( const SDNode *N, MVT::SimpleValueType SVT) { assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemLibcall"); bool isSigned = N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::SREM; RTLIB::Libcall LC; switch (SVT) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; } return LC; } static TargetLowering::ArgListTy getDivRemArgList( const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) { assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM || N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) && "Unhandled Opcode in getDivRemArgList"); bool isSigned = N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::SREM; TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { EVT ArgVT = N->getOperand(i).getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*Context); Entry.Node = N->getOperand(i); Entry.Ty = ArgTy; Entry.IsSExt = isSigned; Entry.IsZExt = !isSigned; Args.push_back(Entry); } if (Subtarget->isTargetWindows() && Args.size() >= 2) std::swap(Args[0], Args[1]); return Args; } SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() || Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() || Subtarget->isTargetWindows()) && "Register-based DivRem lowering only"); unsigned Opcode = Op->getOpcode(); assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && "Invalid opcode for Div/Rem lowering"); bool isSigned = (Opcode == ISD::SDIVREM); EVT VT = Op->getValueType(0); Type *Ty = VT.getTypeForEVT(*DAG.getContext()); SDLoc dl(Op); // If the target has hardware divide, use divide + multiply + subtract: // div = a / b // rem = a - b * div // return {div, rem} // This should be lowered into UDIV/SDIV + MLS later on. bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode() : Subtarget->hasDivideInARMMode(); if (hasDivide && Op->getValueType(0).isSimple() && Op->getSimpleValueType(0) == MVT::i32) { unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; const SDValue Dividend = Op->getOperand(0); const SDValue Divisor = Op->getOperand(1); SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor); SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor); SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul); SDValue Values[2] = {Div, Rem}; return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values); } RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(), VT.getSimpleVT().SimpleTy); SDValue InChain = DAG.getEntryNode(); TargetLowering::ArgListTy Args = getDivRemArgList(Op.getNode(), DAG.getContext(), Subtarget); SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy(DAG.getDataLayout())); Type *RetTy = StructType::get(Ty, Ty); if (Subtarget->isTargetWindows()) InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(InChain) .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned); std::pair CallInfo = LowerCallTo(CLI); return CallInfo.first; } // Lowers REM using divmod helpers // see RTABI section 4.2/4.3 SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const { // Build return types (div and rem) std::vector RetTyParams; Type *RetTyElement; switch (N->getValueType(0).getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break; case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break; case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break; case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break; } RetTyParams.push_back(RetTyElement); RetTyParams.push_back(RetTyElement); ArrayRef ret = ArrayRef(RetTyParams); Type *RetTy = StructType::get(*DAG.getContext(), ret); RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT(). SimpleTy); SDValue InChain = DAG.getEntryNode(); TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext(), Subtarget); bool isSigned = N->getOpcode() == ISD::SREM; SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy(DAG.getDataLayout())); if (Subtarget->isTargetWindows()) InChain = WinDBZCheckDenominator(DAG, N, InChain); // Lower call CallLoweringInfo CLI(DAG); CLI.setChain(InChain) .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args)) .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N)); std::pair CallResult = LowerCallTo(CLI); // Return second (rem) result operand (first contains div) SDNode *ResNode = CallResult.first.getNode(); assert(ResNode->getNumOperands() == 2 && "divmod should return two operands"); return ResNode->getOperand(1); } SDValue ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetWindows() && "unsupported target platform"); SDLoc DL(Op); // Get the inputs. SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); if (DAG.getMachineFunction().getFunction().hasFnAttribute( "no-stack-arg-probe")) { unsigned Align = cast(Op.getOperand(2))->getZExtValue(); SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32); Chain = SP.getValue(1); SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size); if (Align) SP = DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0), DAG.getConstant(-(uint64_t)Align, DL, MVT::i32)); Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP); SDValue Ops[2] = { SP, Chain }; return DAG.getMergeValues(Ops, DL); } SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size, DAG.getConstant(2, DL, MVT::i32)); SDValue Flag; Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag); Flag = Chain.getValue(1); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag); SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32); Chain = NewSP.getValue(1); SDValue Ops[2] = { NewSP, Chain }; return DAG.getMergeValues(Ops, DL); } SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() && "Unexpected type for custom-lowering FP_EXTEND"); RTLIB::Libcall LC; LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); SDValue SrcVal = Op.getOperand(0); return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false, SDLoc(Op)).first; } SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOperand(0).getValueType() == MVT::f64 && Subtarget->isFPOnlySP() && "Unexpected type for custom-lowering FP_ROUND"); RTLIB::Libcall LC; LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); SDValue SrcVal = Op.getOperand(0); return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false, SDLoc(Op)).first; } bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. return false; } bool ARM::isBitFieldInvertedMask(unsigned v) { if (v == 0xffffffff) return false; // there can be 1's on either or both "outsides", all the "inside" // bits must be 0's return isShiftedMask_32(~v); } /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { if (!Subtarget->hasVFP3()) return false; if (VT == MVT::f16 && Subtarget->hasFullFP16()) return ARM_AM::getFP16Imm(Imm) != -1; if (VT == MVT::f32) return ARM_AM::getFP32Imm(Imm) != -1; if (VT == MVT::f64 && !Subtarget->isFPOnlySP()) return ARM_AM::getFP64Imm(Imm) != -1; return false; } /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment /// specified in the intrinsic calls. bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { case Intrinsic::arm_neon_vld1: case Intrinsic::arm_neon_vld2: case Intrinsic::arm_neon_vld3: case Intrinsic::arm_neon_vld4: case Intrinsic::arm_neon_vld2lane: case Intrinsic::arm_neon_vld3lane: case Intrinsic::arm_neon_vld4lane: case Intrinsic::arm_neon_vld2dup: case Intrinsic::arm_neon_vld3dup: case Intrinsic::arm_neon_vld4dup: { Info.opc = ISD::INTRINSIC_W_CHAIN; // Conservatively set memVT to the entire set of vectors loaded. auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64; Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); Info.align = cast(AlignArg)->getZExtValue(); // volatile loads with NEON intrinsics not supported Info.flags = MachineMemOperand::MOLoad; return true; } case Intrinsic::arm_neon_vld1x2: case Intrinsic::arm_neon_vld1x3: case Intrinsic::arm_neon_vld1x4: { Info.opc = ISD::INTRINSIC_W_CHAIN; // Conservatively set memVT to the entire set of vectors loaded. auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64; Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); Info.offset = 0; Info.align = 0; // volatile loads with NEON intrinsics not supported Info.flags = MachineMemOperand::MOLoad; return true; } case Intrinsic::arm_neon_vst1: case Intrinsic::arm_neon_vst2: case Intrinsic::arm_neon_vst3: case Intrinsic::arm_neon_vst4: case Intrinsic::arm_neon_vst2lane: case Intrinsic::arm_neon_vst3lane: case Intrinsic::arm_neon_vst4lane: { Info.opc = ISD::INTRINSIC_VOID; // Conservatively set memVT to the entire set of vectors stored. auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); unsigned NumElts = 0; for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; NumElts += DL.getTypeSizeInBits(ArgTy) / 64; } Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); Info.align = cast(AlignArg)->getZExtValue(); // volatile stores with NEON intrinsics not supported Info.flags = MachineMemOperand::MOStore; return true; } case Intrinsic::arm_neon_vst1x2: case Intrinsic::arm_neon_vst1x3: case Intrinsic::arm_neon_vst1x4: { Info.opc = ISD::INTRINSIC_VOID; // Conservatively set memVT to the entire set of vectors stored. auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); unsigned NumElts = 0; for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; NumElts += DL.getTypeSizeInBits(ArgTy) / 64; } Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = 0; // volatile stores with NEON intrinsics not supported Info.flags = MachineMemOperand::MOStore; return true; } case Intrinsic::arm_ldaex: case Intrinsic::arm_ldrex: { auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); PointerType *PtrTy = cast(I.getArgOperand(0)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; } case Intrinsic::arm_stlex: case Intrinsic::arm_strex: { auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); PointerType *PtrTy = cast(I.getArgOperand(1)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(PtrTy->getElementType()); Info.ptrVal = I.getArgOperand(1); Info.offset = 0; Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; } case Intrinsic::arm_stlexd: case Intrinsic::arm_strexd: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; Info.ptrVal = I.getArgOperand(2); Info.offset = 0; Info.align = 8; Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; case Intrinsic::arm_ldaexd: case Intrinsic::arm_ldrexd: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = 8; Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; default: break; } return false; } /// Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned Bits = Ty->getPrimitiveSizeInBits(); if (Bits == 0 || Bits > 32) return false; return true; } bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const { if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) return false; return (Index == 0 || Index == ResVT.getVectorNumElements()); } Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); // First, if the target has no DMB, see what fallback we can use. if (!Subtarget->hasDataBarrier()) { // Some ARMv6 cpus can support data barriers with an mcr instruction. // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get // here. if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) { Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr); Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0), Builder.getInt32(0), Builder.getInt32(7), Builder.getInt32(10), Builder.getInt32(5)}; return Builder.CreateCall(MCR, args); } else { // Instead of using barriers, atomic accesses on these subtargets use // libcalls. llvm_unreachable("makeDMB on a target so old that it has no barriers"); } } else { Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb); // Only a full system barrier exists in the M-class architectures. Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain; Constant *CDomain = Builder.getInt32(Domain); return Builder.CreateCall(DMB, CDomain); } } // Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html Instruction *ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const { switch (Ord) { case AtomicOrdering::NotAtomic: case AtomicOrdering::Unordered: llvm_unreachable("Invalid fence: unordered/non-atomic"); case AtomicOrdering::Monotonic: case AtomicOrdering::Acquire: return nullptr; // Nothing to do case AtomicOrdering::SequentiallyConsistent: if (!Inst->hasAtomicStore()) return nullptr; // Nothing to do LLVM_FALLTHROUGH; case AtomicOrdering::Release: case AtomicOrdering::AcquireRelease: if (Subtarget->preferISHSTBarriers()) return makeDMB(Builder, ARM_MB::ISHST); // FIXME: add a comment with a link to documentation justifying this. else return makeDMB(Builder, ARM_MB::ISH); } llvm_unreachable("Unknown fence ordering in emitLeadingFence"); } Instruction *ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const { switch (Ord) { case AtomicOrdering::NotAtomic: case AtomicOrdering::Unordered: llvm_unreachable("Invalid fence: unordered/not-atomic"); case AtomicOrdering::Monotonic: case AtomicOrdering::Release: return nullptr; // Nothing to do case AtomicOrdering::Acquire: case AtomicOrdering::AcquireRelease: case AtomicOrdering::SequentiallyConsistent: return makeDMB(Builder, ARM_MB::ISH); } llvm_unreachable("Unknown fence ordering in emitTrailingFence"); } // Loads and stores less than 64-bits are already atomic; ones above that // are doomed anyway, so defer to the default libcall and blame the OS when // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit // anything for those. bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); return (Size == 64) && !Subtarget->isMClass(); } // Loads and stores less than 64-bits are already atomic; ones above that // are doomed anyway, so defer to the default libcall and blame the OS when // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit // anything for those. // FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that // guarantee, see DDI0406C ARM architecture reference manual, // sections A8.8.72-74 LDRD) TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { unsigned Size = LI->getType()->getPrimitiveSizeInBits(); return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly : AtomicExpansionKind::None; } // For the real atomic operations, we have ldrex/strex up to 32 bits, // and up to 64 bits on the non-M profiles TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None; } TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { // At -O0, fast-regalloc cannot cope with the live vregs necessary to // implement cmpxchg without spilling. If the address being exchanged is also // on the stack and close enough to the spill slot, this can lead to a // situation where the monitor always gets cleared and the atomic operation // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. bool HasAtomicCmpXchg = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg) return AtomicExpansionKind::LLSC; return AtomicExpansionKind::None; } bool ARMTargetLowering::shouldInsertFencesForAtomic( const Instruction *I) const { return InsertFencesForAtomic; } // This has so far only been implemented for MachO. bool ARMTargetLowering::useLoadStackGuardNode() const { return Subtarget->isTargetMachO(); } bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const { // If we do not have NEON, vector types are not natively supported. if (!Subtarget->hasNEON()) return false; // Floating point values and vector values map to the same register file. // Therefore, although we could do a store extract of a vector type, this is // better to leave at float as we have more freedom in the addressing mode for // those. if (VectorTy->isFPOrFPVectorTy()) return false; // If the index is unknown at compile time, this is very expensive to lower // and it is not possible to combine the store with the extract. if (!isa(Idx)) return false; assert(VectorTy->isVectorTy() && "VectorTy is not a vector type"); unsigned BitWidth = cast(VectorTy)->getBitWidth(); // We can do a store + vector extract on any vector that fits perfectly in a D // or Q register. if (BitWidth == 64 || BitWidth == 128) { Cost = 0; return true; } return false; } bool ARMTargetLowering::isCheapToSpeculateCttz() const { return Subtarget->hasV6T2Ops(); } bool ARMTargetLowering::isCheapToSpeculateCtlz() const { return Subtarget->hasV6T2Ops(); } Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Type *ValTy = cast(Addr->getType())->getElementType(); bool IsAcquire = isAcquireOrStronger(Ord); // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd // intrinsic must return {i32, i32} and we have to recombine them into a // single i64 here. if (ValTy->getPrimitiveSizeInBits() == 64) { Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd; Function *Ldrex = Intrinsic::getDeclaration(M, Int); Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi"); Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); if (!Subtarget->isLittle()) std::swap (Lo, Hi); Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); return Builder.CreateOr( Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64"); } Type *Tys[] = { Addr->getType() }; Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex; Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys); return Builder.CreateTruncOrBitCast( Builder.CreateCall(Ldrex, Addr), cast(Addr->getType())->getElementType()); } void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance( IRBuilder<> &Builder) const { if (!Subtarget->hasV7Ops()) return; Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex)); } Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); bool IsRelease = isReleaseOrStronger(Ord); // Since the intrinsics must have legal type, the i64 intrinsics take two // parameters: "i32, i32". We must marshal Val into the appropriate form // before the call. if (Val->getType()->getPrimitiveSizeInBits() == 64) { Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd; Function *Strex = Intrinsic::getDeclaration(M, Int); Type *Int32Ty = Type::getInt32Ty(M->getContext()); Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo"); Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi"); if (!Subtarget->isLittle()) std::swap(Lo, Hi); Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); return Builder.CreateCall(Strex, {Lo, Hi, Addr}); } Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex; Type *Tys[] = { Addr->getType() }; Function *Strex = Intrinsic::getDeclaration(M, Int, Tys); return Builder.CreateCall( Strex, {Builder.CreateZExtOrBitCast( Val, Strex->getFunctionType()->getParamType(0)), Addr}); } bool ARMTargetLowering::alignLoopsWithOptSize() const { return Subtarget->isMClass(); } /// A helper function for determining the number of interleaved accesses we /// will generate when lowering accesses of the given type. unsigned ARMTargetLowering::getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const { return (DL.getTypeSizeInBits(VecTy) + 127) / 128; } bool ARMTargetLowering::isLegalInterleavedAccessType( VectorType *VecTy, const DataLayout &DL) const { unsigned VecSize = DL.getTypeSizeInBits(VecTy); unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType()); // Ensure the vector doesn't have f16 elements. Even though we could do an // i16 vldN, we can't hold the f16 vectors and will end up converting via // f32. if (VecTy->getElementType()->isHalfTy()) return false; // Ensure the number of vector elements is greater than 1. if (VecTy->getNumElements() < 2) return false; // Ensure the element type is legal. if (ElSize != 8 && ElSize != 16 && ElSize != 32) return false; // Ensure the total vector size is 64 or a multiple of 128. Types larger than // 128 will be split into multiple interleaved accesses. return VecSize == 64 || VecSize % 128 == 0; } /// Lower an interleaved load into a vldN intrinsic. /// /// E.g. Lower an interleaved load (Factor = 2): /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements /// /// Into: /// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4) /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1 bool ARMTargetLowering::lowerInterleavedLoad( LoadInst *LI, ArrayRef Shuffles, ArrayRef Indices, unsigned Factor) const { assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && "Invalid interleave factor"); assert(!Shuffles.empty() && "Empty shufflevector input"); assert(Shuffles.size() == Indices.size() && "Unmatched number of shufflevectors and indices"); VectorType *VecTy = Shuffles[0]->getType(); Type *EltTy = VecTy->getVectorElementType(); const DataLayout &DL = LI->getModule()->getDataLayout(); // Skip if we do not have NEON and skip illegal vector types. We can // "legalize" wide vector types into multiple interleaved accesses as long as // the vector types are divisible by 128. if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL)) return false; unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL); // A pointer vector can not be the return type of the ldN intrinsics. Need to // load integer vectors first and then convert to pointer vectors. if (EltTy->isPointerTy()) VecTy = VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements()); IRBuilder<> Builder(LI); // The base address of the load. Value *BaseAddr = LI->getPointerOperand(); if (NumLoads > 1) { // If we're going to generate more than one load, reset the sub-vector type // to something legal. VecTy = VectorType::get(VecTy->getVectorElementType(), VecTy->getVectorNumElements() / NumLoads); // We will compute the pointer operand of each load from the original base // address using GEPs. Cast the base address to a pointer to the scalar // element type. BaseAddr = Builder.CreateBitCast( BaseAddr, VecTy->getVectorElementType()->getPointerTo( LI->getPointerAddressSpace())); } assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!"); Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace()); Type *Tys[] = {VecTy, Int8Ptr}; static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2, Intrinsic::arm_neon_vld3, Intrinsic::arm_neon_vld4}; Function *VldnFunc = Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys); // Holds sub-vectors extracted from the load intrinsic return values. The // sub-vectors are associated with the shufflevector instructions they will // replace. DenseMap> SubVecs; for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) { // If we're generating more than one load, compute the base address of // subsequent loads as an offset from the previous. if (LoadCount > 0) BaseAddr = Builder.CreateConstGEP1_32( BaseAddr, VecTy->getVectorNumElements() * Factor); SmallVector Ops; Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr)); Ops.push_back(Builder.getInt32(LI->getAlignment())); CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN"); // Replace uses of each shufflevector with the corresponding vector loaded // by ldN. for (unsigned i = 0; i < Shuffles.size(); i++) { ShuffleVectorInst *SV = Shuffles[i]; unsigned Index = Indices[i]; Value *SubVec = Builder.CreateExtractValue(VldN, Index); // Convert the integer vector to pointer vector if the element is pointer. if (EltTy->isPointerTy()) SubVec = Builder.CreateIntToPtr( SubVec, VectorType::get(SV->getType()->getVectorElementType(), VecTy->getVectorNumElements())); SubVecs[SV].push_back(SubVec); } } // Replace uses of the shufflevector instructions with the sub-vectors // returned by the load intrinsic. If a shufflevector instruction is // associated with more than one sub-vector, those sub-vectors will be // concatenated into a single wide vector. for (ShuffleVectorInst *SVI : Shuffles) { auto &SubVec = SubVecs[SVI]; auto *WideVec = SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0]; SVI->replaceAllUsesWith(WideVec); } return true; } /// Lower an interleaved store into a vstN intrinsic. /// /// E.g. Lower an interleaved store (Factor = 3): /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> /// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4 /// /// Into: /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3> /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7> /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11> /// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4) /// /// Note that the new shufflevectors will be removed and we'll only generate one /// vst3 instruction in CodeGen. /// /// Example for a more general valid mask (Factor 3). Lower: /// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1, /// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19> /// store <12 x i32> %i.vec, <12 x i32>* %ptr /// /// Into: /// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7> /// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35> /// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19> /// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4) bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const { assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && "Invalid interleave factor"); VectorType *VecTy = SVI->getType(); assert(VecTy->getVectorNumElements() % Factor == 0 && "Invalid interleaved store"); unsigned LaneLen = VecTy->getVectorNumElements() / Factor; Type *EltTy = VecTy->getVectorElementType(); VectorType *SubVecTy = VectorType::get(EltTy, LaneLen); const DataLayout &DL = SI->getModule()->getDataLayout(); // Skip if we do not have NEON and skip illegal vector types. We can // "legalize" wide vector types into multiple interleaved accesses as long as // the vector types are divisible by 128. if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL)) return false; unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL); Value *Op0 = SVI->getOperand(0); Value *Op1 = SVI->getOperand(1); IRBuilder<> Builder(SI); // StN intrinsics don't support pointer vectors as arguments. Convert pointer // vectors to integer vectors. if (EltTy->isPointerTy()) { Type *IntTy = DL.getIntPtrType(EltTy); // Convert to the corresponding integer vector. Type *IntVecTy = VectorType::get(IntTy, Op0->getType()->getVectorNumElements()); Op0 = Builder.CreatePtrToInt(Op0, IntVecTy); Op1 = Builder.CreatePtrToInt(Op1, IntVecTy); SubVecTy = VectorType::get(IntTy, LaneLen); } // The base address of the store. Value *BaseAddr = SI->getPointerOperand(); if (NumStores > 1) { // If we're going to generate more than one store, reset the lane length // and sub-vector type to something legal. LaneLen /= NumStores; SubVecTy = VectorType::get(SubVecTy->getVectorElementType(), LaneLen); // We will compute the pointer operand of each store from the original base // address using GEPs. Cast the base address to a pointer to the scalar // element type. BaseAddr = Builder.CreateBitCast( BaseAddr, SubVecTy->getVectorElementType()->getPointerTo( SI->getPointerAddressSpace())); } assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!"); auto Mask = SVI->getShuffleMask(); Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace()); Type *Tys[] = {Int8Ptr, SubVecTy}; static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2, Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4}; for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) { // If we generating more than one store, we compute the base address of // subsequent stores as an offset from the previous. if (StoreCount > 0) BaseAddr = Builder.CreateConstGEP1_32(BaseAddr, LaneLen * Factor); SmallVector Ops; Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr)); Function *VstNFunc = Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys); // Split the shufflevector operands into sub vectors for the new vstN call. for (unsigned i = 0; i < Factor; i++) { unsigned IdxI = StoreCount * LaneLen * Factor + i; if (Mask[IdxI] >= 0) { Ops.push_back(Builder.CreateShuffleVector( Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0))); } else { unsigned StartMask = 0; for (unsigned j = 1; j < LaneLen; j++) { unsigned IdxJ = StoreCount * LaneLen * Factor + j; if (Mask[IdxJ * Factor + IdxI] >= 0) { StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ; break; } } // Note: If all elements in a chunk are undefs, StartMask=0! // Note: Filling undef gaps with random elements is ok, since // those elements were being written anyway (with undefs). // In the case of all undefs we're defaulting to using elems from 0 // Note: StartMask cannot be negative, it's checked in // isReInterleaveMask Ops.push_back(Builder.CreateShuffleVector( Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0))); } } Ops.push_back(Builder.getInt32(SI->getAlignment())); Builder.CreateCall(VstNFunc, Ops); } return true; } enum HABaseType { HA_UNKNOWN = 0, HA_FLOAT, HA_DOUBLE, HA_VECT64, HA_VECT128 }; static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members) { if (auto *ST = dyn_cast(Ty)) { for (unsigned i = 0; i < ST->getNumElements(); ++i) { uint64_t SubMembers = 0; if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers)) return false; Members += SubMembers; } } else if (auto *AT = dyn_cast(Ty)) { uint64_t SubMembers = 0; if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers)) return false; Members += SubMembers * AT->getNumElements(); } else if (Ty->isFloatTy()) { if (Base != HA_UNKNOWN && Base != HA_FLOAT) return false; Members = 1; Base = HA_FLOAT; } else if (Ty->isDoubleTy()) { if (Base != HA_UNKNOWN && Base != HA_DOUBLE) return false; Members = 1; Base = HA_DOUBLE; } else if (auto *VT = dyn_cast(Ty)) { Members = 1; switch (Base) { case HA_FLOAT: case HA_DOUBLE: return false; case HA_VECT64: return VT->getBitWidth() == 64; case HA_VECT128: return VT->getBitWidth() == 128; case HA_UNKNOWN: switch (VT->getBitWidth()) { case 64: Base = HA_VECT64; return true; case 128: Base = HA_VECT128; return true; default: return false; } } } return (Members > 0 && Members <= 4); } /// Return the correct alignment for the current calling convention. unsigned ARMTargetLowering::getABIAlignmentForCallingConv(Type *ArgTy, DataLayout DL) const { if (!ArgTy->isVectorTy()) return DL.getABITypeAlignment(ArgTy); // Avoid over-aligning vector parameters. It would require realigning the // stack and waste space for no real benefit. return std::min(DL.getABITypeAlignment(ArgTy), DL.getStackAlignment()); } /// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of /// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when /// passing according to AAPCS rules. bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { if (getEffectiveCallingConv(CallConv, isVarArg) != CallingConv::ARM_AAPCS_VFP) return false; HABaseType Base = HA_UNKNOWN; uint64_t Members = 0; bool IsHA = isHomogeneousAggregate(Ty, Base, Members); LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump()); bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy(); return IsHA || IsIntArray; } unsigned ARMTargetLowering::getExceptionPointerRegister( const Constant *PersonalityFn) const { // Platforms which do not use SjLj EH may return values in these registers // via the personality function. return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R0; } unsigned ARMTargetLowering::getExceptionSelectorRegister( const Constant *PersonalityFn) const { // Platforms which do not use SjLj EH may return values in these registers // via the personality function. return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1; } void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { // Update IsSplitCSR in ARMFunctionInfo. ARMFunctionInfo *AFI = Entry->getParent()->getInfo(); AFI->setIsSplitCSR(true); } void ARMTargetLowering::insertCopiesSplitCSR( MachineBasicBlock *Entry, const SmallVectorImpl &Exits) const { const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); if (!IStart) return; const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); MachineBasicBlock::iterator MBBI = Entry->begin(); for (const MCPhysReg *I = IStart; *I; ++I) { const TargetRegisterClass *RC = nullptr; if (ARM::GPRRegClass.contains(*I)) RC = &ARM::GPRRegClass; else if (ARM::DPRRegClass.contains(*I)) RC = &ARM::DPRRegClass; else llvm_unreachable("Unexpected register class in CSRsViaCopy!"); unsigned NewVR = MRI->createVirtualRegister(RC); // Create copy from CSR to a virtual register. // FIXME: this currently does not emit CFI pseudo-instructions, it works // fine for CXX_FAST_TLS since the C++-style TLS access functions should be // nounwind. If we want to generalize this later, we may need to emit // CFI pseudo-instructions. assert(Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(*I); // Insert the copy-back instructions right before the terminator. for (auto *Exit : Exits) BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), TII->get(TargetOpcode::COPY), *I) .addReg(NewVR); } } void ARMTargetLowering::finalizeLowering(MachineFunction &MF) const { MF.getFrameInfo().computeMaxCallFrameSize(MF); TargetLoweringBase::finalizeLowering(MF); } Index: vendor/llvm/dist-release_80/lib/Target/AVR/AVRISelLowering.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/Target/AVR/AVRISelLowering.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/AVR/AVRISelLowering.cpp (revision 348932) @@ -1,2038 +1,2044 @@ //===-- AVRISelLowering.cpp - AVR DAG Lowering Implementation -------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the interfaces that AVR uses to lower LLVM code into a // selection DAG. // //===----------------------------------------------------------------------===// #include "AVRISelLowering.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/Function.h" #include "llvm/Support/ErrorHandling.h" #include "AVR.h" #include "AVRMachineFunctionInfo.h" +#include "AVRSubtarget.h" #include "AVRTargetMachine.h" #include "MCTargetDesc/AVRMCTargetDesc.h" namespace llvm { -AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm) - : TargetLowering(tm) { +AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM, + const AVRSubtarget &STI) + : TargetLowering(TM), Subtarget(STI) { // Set up the register classes. addRegisterClass(MVT::i8, &AVR::GPR8RegClass); addRegisterClass(MVT::i16, &AVR::DREGSRegClass); // Compute derived properties from the register classes. - computeRegisterProperties(tm.getSubtargetImpl()->getRegisterInfo()); + computeRegisterProperties(Subtarget.getRegisterInfo()); setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(AVR::SP); setSupportsUnalignedAtomics(true); setOperationAction(ISD::GlobalAddress, MVT::i16, Custom); setOperationAction(ISD::BlockAddress, MVT::i16, Custom); setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand); for (MVT VT : MVT::integer_valuetypes()) { for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) { setLoadExtAction(N, VT, MVT::i1, Promote); setLoadExtAction(N, VT, MVT::i8, Expand); } } setTruncStoreAction(MVT::i16, MVT::i8, Expand); for (MVT VT : MVT::integer_valuetypes()) { setOperationAction(ISD::ADDC, VT, Legal); setOperationAction(ISD::SUBC, VT, Legal); setOperationAction(ISD::ADDE, VT, Legal); setOperationAction(ISD::SUBE, VT, Legal); } // sub (x, imm) gets canonicalized to add (x, -imm), so for illegal types // revert into a sub since we don't have an add with immediate instruction. setOperationAction(ISD::ADD, MVT::i32, Custom); setOperationAction(ISD::ADD, MVT::i64, Custom); // our shift instructions are only able to shift 1 bit at a time, so handle // this in a custom way. setOperationAction(ISD::SRA, MVT::i8, Custom); setOperationAction(ISD::SHL, MVT::i8, Custom); setOperationAction(ISD::SRL, MVT::i8, Custom); setOperationAction(ISD::SRA, MVT::i16, Custom); setOperationAction(ISD::SHL, MVT::i16, Custom); setOperationAction(ISD::SRL, MVT::i16, Custom); setOperationAction(ISD::SHL_PARTS, MVT::i16, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand); setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand); setOperationAction(ISD::ROTL, MVT::i8, Custom); setOperationAction(ISD::ROTL, MVT::i16, Custom); setOperationAction(ISD::ROTR, MVT::i8, Custom); setOperationAction(ISD::ROTR, MVT::i16, Custom); setOperationAction(ISD::BR_CC, MVT::i8, Custom); setOperationAction(ISD::BR_CC, MVT::i16, Custom); setOperationAction(ISD::BR_CC, MVT::i32, Custom); setOperationAction(ISD::BR_CC, MVT::i64, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); setOperationAction(ISD::SETCC, MVT::i8, Custom); setOperationAction(ISD::SETCC, MVT::i16, Custom); setOperationAction(ISD::SETCC, MVT::i32, Custom); setOperationAction(ISD::SETCC, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::i8, Expand); setOperationAction(ISD::SELECT, MVT::i16, Expand); setOperationAction(ISD::BSWAP, MVT::i16, Expand); // Add support for postincrement and predecrement load/stores. setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal); setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal); setIndexedLoadAction(ISD::PRE_DEC, MVT::i8, Legal); setIndexedLoadAction(ISD::PRE_DEC, MVT::i16, Legal); setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal); setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal); setIndexedStoreAction(ISD::PRE_DEC, MVT::i8, Legal); setIndexedStoreAction(ISD::PRE_DEC, MVT::i16, Legal); setOperationAction(ISD::BR_JT, MVT::Other, Expand); setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VAEND, MVT::Other, Expand); setOperationAction(ISD::VAARG, MVT::Other, Expand); setOperationAction(ISD::VACOPY, MVT::Other, Expand); // Atomic operations which must be lowered to rtlib calls for (MVT VT : MVT::integer_valuetypes()) { setOperationAction(ISD::ATOMIC_SWAP, VT, Expand); setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand); setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand); setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand); setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand); setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand); } // Division/remainder setOperationAction(ISD::UDIV, MVT::i8, Expand); setOperationAction(ISD::UDIV, MVT::i16, Expand); setOperationAction(ISD::UREM, MVT::i8, Expand); setOperationAction(ISD::UREM, MVT::i16, Expand); setOperationAction(ISD::SDIV, MVT::i8, Expand); setOperationAction(ISD::SDIV, MVT::i16, Expand); setOperationAction(ISD::SREM, MVT::i8, Expand); setOperationAction(ISD::SREM, MVT::i16, Expand); // Make division and modulus custom for (MVT VT : MVT::integer_valuetypes()) { setOperationAction(ISD::UDIVREM, VT, Custom); setOperationAction(ISD::SDIVREM, VT, Custom); } // Do not use MUL. The AVR instructions are closer to SMUL_LOHI &co. setOperationAction(ISD::MUL, MVT::i8, Expand); setOperationAction(ISD::MUL, MVT::i16, Expand); // Expand 16 bit multiplications. setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); + // Expand multiplications to libcalls when there is + // no hardware MUL. + if (!Subtarget.supportsMultiplication()) { + setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); + } + for (MVT VT : MVT::integer_valuetypes()) { setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); } for (MVT VT : MVT::integer_valuetypes()) { setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTLZ, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); } for (MVT VT : MVT::integer_valuetypes()) { setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); // TODO: The generated code is pretty poor. Investigate using the // same "shift and subtract with carry" trick that we do for // extending 8-bit to 16-bit. This may require infrastructure // improvements in how we treat 16-bit "registers" to be feasible. } // Division rtlib functions (not supported) setLibcallName(RTLIB::SDIV_I8, nullptr); setLibcallName(RTLIB::SDIV_I16, nullptr); setLibcallName(RTLIB::SDIV_I32, nullptr); setLibcallName(RTLIB::SDIV_I64, nullptr); setLibcallName(RTLIB::SDIV_I128, nullptr); setLibcallName(RTLIB::UDIV_I8, nullptr); setLibcallName(RTLIB::UDIV_I16, nullptr); setLibcallName(RTLIB::UDIV_I32, nullptr); setLibcallName(RTLIB::UDIV_I64, nullptr); setLibcallName(RTLIB::UDIV_I128, nullptr); // Modulus rtlib functions (not supported) setLibcallName(RTLIB::SREM_I8, nullptr); setLibcallName(RTLIB::SREM_I16, nullptr); setLibcallName(RTLIB::SREM_I32, nullptr); setLibcallName(RTLIB::SREM_I64, nullptr); setLibcallName(RTLIB::SREM_I128, nullptr); setLibcallName(RTLIB::UREM_I8, nullptr); setLibcallName(RTLIB::UREM_I16, nullptr); setLibcallName(RTLIB::UREM_I32, nullptr); setLibcallName(RTLIB::UREM_I64, nullptr); setLibcallName(RTLIB::UREM_I128, nullptr); // Division and modulus rtlib functions setLibcallName(RTLIB::SDIVREM_I8, "__divmodqi4"); setLibcallName(RTLIB::SDIVREM_I16, "__divmodhi4"); setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); setLibcallName(RTLIB::SDIVREM_I64, "__divmoddi4"); setLibcallName(RTLIB::SDIVREM_I128, "__divmodti4"); setLibcallName(RTLIB::UDIVREM_I8, "__udivmodqi4"); setLibcallName(RTLIB::UDIVREM_I16, "__udivmodhi4"); setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); setLibcallName(RTLIB::UDIVREM_I64, "__udivmoddi4"); setLibcallName(RTLIB::UDIVREM_I128, "__udivmodti4"); // Several of the runtime library functions use a special calling conv setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::AVR_BUILTIN); setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::AVR_BUILTIN); setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::AVR_BUILTIN); setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::AVR_BUILTIN); // Trigonometric rtlib functions setLibcallName(RTLIB::SIN_F32, "sin"); setLibcallName(RTLIB::COS_F32, "cos"); setMinFunctionAlignment(1); setMinimumJumpTableEntries(INT_MAX); } const char *AVRTargetLowering::getTargetNodeName(unsigned Opcode) const { #define NODE(name) \ case AVRISD::name: \ return #name switch (Opcode) { default: return nullptr; NODE(RET_FLAG); NODE(RETI_FLAG); NODE(CALL); NODE(WRAPPER); NODE(LSL); NODE(LSR); NODE(ROL); NODE(ROR); NODE(ASR); NODE(LSLLOOP); NODE(LSRLOOP); NODE(ASRLOOP); NODE(BRCOND); NODE(CMP); NODE(CMPC); NODE(TST); NODE(SELECT_CC); #undef NODE } } EVT AVRTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const { assert(!VT.isVector() && "No AVR SetCC type for vectors!"); return MVT::i8; } SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { //:TODO: this function has to be completely rewritten to produce optimal // code, for now it's producing very long but correct code. unsigned Opc8; const SDNode *N = Op.getNode(); EVT VT = Op.getValueType(); SDLoc dl(N); // Expand non-constant shifts to loops. if (!isa(N->getOperand(1))) { switch (Op.getOpcode()) { default: llvm_unreachable("Invalid shift opcode!"); case ISD::SHL: return DAG.getNode(AVRISD::LSLLOOP, dl, VT, N->getOperand(0), N->getOperand(1)); case ISD::SRL: return DAG.getNode(AVRISD::LSRLOOP, dl, VT, N->getOperand(0), N->getOperand(1)); case ISD::ROTL: return DAG.getNode(AVRISD::ROLLOOP, dl, VT, N->getOperand(0), N->getOperand(1)); case ISD::ROTR: return DAG.getNode(AVRISD::RORLOOP, dl, VT, N->getOperand(0), N->getOperand(1)); case ISD::SRA: return DAG.getNode(AVRISD::ASRLOOP, dl, VT, N->getOperand(0), N->getOperand(1)); } } uint64_t ShiftAmount = cast(N->getOperand(1))->getZExtValue(); SDValue Victim = N->getOperand(0); switch (Op.getOpcode()) { case ISD::SRA: Opc8 = AVRISD::ASR; break; case ISD::ROTL: Opc8 = AVRISD::ROL; break; case ISD::ROTR: Opc8 = AVRISD::ROR; break; case ISD::SRL: Opc8 = AVRISD::LSR; break; case ISD::SHL: Opc8 = AVRISD::LSL; break; default: llvm_unreachable("Invalid shift opcode"); } while (ShiftAmount--) { Victim = DAG.getNode(Opc8, dl, VT, Victim); } return Victim; } SDValue AVRTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { unsigned Opcode = Op->getOpcode(); assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && "Invalid opcode for Div/Rem lowering"); bool IsSigned = (Opcode == ISD::SDIVREM); EVT VT = Op->getValueType(0); Type *Ty = VT.getTypeForEVT(*DAG.getContext()); RTLIB::Libcall LC; switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC = IsSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC = IsSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC = IsSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; case MVT::i64: LC = IsSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; case MVT::i128: LC = IsSigned ? RTLIB::SDIVREM_I128 : RTLIB::UDIVREM_I128; break; } SDValue InChain = DAG.getEntryNode(); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (SDValue const &Value : Op->op_values()) { Entry.Node = Value; Entry.Ty = Value.getValueType().getTypeForEVT(*DAG.getContext()); Entry.IsSExt = IsSigned; Entry.IsZExt = !IsSigned; Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy(DAG.getDataLayout())); Type *RetTy = (Type *)StructType::get(Ty, Ty); SDLoc dl(Op); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(InChain) .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setInRegister() .setSExtResult(IsSigned) .setZExtResult(!IsSigned); std::pair CallInfo = LowerCallTo(CLI); return CallInfo.first; } SDValue AVRTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { auto DL = DAG.getDataLayout(); const GlobalValue *GV = cast(Op)->getGlobal(); int64_t Offset = cast(Op)->getOffset(); // Create the TargetGlobalAddress node, folding in the constant offset. SDValue Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op), getPointerTy(DL), Offset); return DAG.getNode(AVRISD::WRAPPER, SDLoc(Op), getPointerTy(DL), Result); } SDValue AVRTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { auto DL = DAG.getDataLayout(); const BlockAddress *BA = cast(Op)->getBlockAddress(); SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy(DL)); return DAG.getNode(AVRISD::WRAPPER, SDLoc(Op), getPointerTy(DL), Result); } /// IntCCToAVRCC - Convert a DAG integer condition code to an AVR CC. static AVRCC::CondCodes intCCToAVRCC(ISD::CondCode CC) { switch (CC) { default: llvm_unreachable("Unknown condition code!"); case ISD::SETEQ: return AVRCC::COND_EQ; case ISD::SETNE: return AVRCC::COND_NE; case ISD::SETGE: return AVRCC::COND_GE; case ISD::SETLT: return AVRCC::COND_LT; case ISD::SETUGE: return AVRCC::COND_SH; case ISD::SETULT: return AVRCC::COND_LO; } } /// Returns appropriate AVR CMP/CMPC nodes and corresponding condition code for /// the given operands. SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AVRcc, SelectionDAG &DAG, SDLoc DL) const { SDValue Cmp; EVT VT = LHS.getValueType(); bool UseTest = false; switch (CC) { default: break; case ISD::SETLE: { // Swap operands and reverse the branching condition. std::swap(LHS, RHS); CC = ISD::SETGE; break; } case ISD::SETGT: { if (const ConstantSDNode *C = dyn_cast(RHS)) { switch (C->getSExtValue()) { case -1: { // When doing lhs > -1 use a tst instruction on the top part of lhs // and use brpl instead of using a chain of cp/cpc. UseTest = true; AVRcc = DAG.getConstant(AVRCC::COND_PL, DL, MVT::i8); break; } case 0: { // Turn lhs > 0 into 0 < lhs since 0 can be materialized with // __zero_reg__ in lhs. RHS = LHS; LHS = DAG.getConstant(0, DL, VT); CC = ISD::SETLT; break; } default: { // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows // us to fold the constant into the cmp instruction. RHS = DAG.getConstant(C->getSExtValue() + 1, DL, VT); CC = ISD::SETGE; break; } } break; } // Swap operands and reverse the branching condition. std::swap(LHS, RHS); CC = ISD::SETLT; break; } case ISD::SETLT: { if (const ConstantSDNode *C = dyn_cast(RHS)) { switch (C->getSExtValue()) { case 1: { // Turn lhs < 1 into 0 >= lhs since 0 can be materialized with // __zero_reg__ in lhs. RHS = LHS; LHS = DAG.getConstant(0, DL, VT); CC = ISD::SETGE; break; } case 0: { // When doing lhs < 0 use a tst instruction on the top part of lhs // and use brmi instead of using a chain of cp/cpc. UseTest = true; AVRcc = DAG.getConstant(AVRCC::COND_MI, DL, MVT::i8); break; } } } break; } case ISD::SETULE: { // Swap operands and reverse the branching condition. std::swap(LHS, RHS); CC = ISD::SETUGE; break; } case ISD::SETUGT: { // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows us to // fold the constant into the cmp instruction. if (const ConstantSDNode *C = dyn_cast(RHS)) { RHS = DAG.getConstant(C->getSExtValue() + 1, DL, VT); CC = ISD::SETUGE; break; } // Swap operands and reverse the branching condition. std::swap(LHS, RHS); CC = ISD::SETULT; break; } } // Expand 32 and 64 bit comparisons with custom CMP and CMPC nodes instead of // using the default and/or/xor expansion code which is much longer. if (VT == MVT::i32) { SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS, DAG.getIntPtrConstant(0, DL)); SDValue LHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS, DAG.getIntPtrConstant(1, DL)); SDValue RHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS, DAG.getIntPtrConstant(0, DL)); SDValue RHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS, DAG.getIntPtrConstant(1, DL)); if (UseTest) { // When using tst we only care about the highest part. SDValue Top = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHShi, DAG.getIntPtrConstant(1, DL)); Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top); } else { Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo); Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp); } } else if (VT == MVT::i64) { SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, DAG.getIntPtrConstant(0, DL)); SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, DAG.getIntPtrConstant(1, DL)); SDValue LHS0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_0, DAG.getIntPtrConstant(0, DL)); SDValue LHS1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_0, DAG.getIntPtrConstant(1, DL)); SDValue LHS2 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_1, DAG.getIntPtrConstant(0, DL)); SDValue LHS3 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_1, DAG.getIntPtrConstant(1, DL)); SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, DAG.getIntPtrConstant(0, DL)); SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, DAG.getIntPtrConstant(1, DL)); SDValue RHS0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_0, DAG.getIntPtrConstant(0, DL)); SDValue RHS1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_0, DAG.getIntPtrConstant(1, DL)); SDValue RHS2 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_1, DAG.getIntPtrConstant(0, DL)); SDValue RHS3 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_1, DAG.getIntPtrConstant(1, DL)); if (UseTest) { // When using tst we only care about the highest part. SDValue Top = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS3, DAG.getIntPtrConstant(1, DL)); Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top); } else { Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHS0, RHS0); Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS1, RHS1, Cmp); Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS2, RHS2, Cmp); Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS3, RHS3, Cmp); } } else if (VT == MVT::i8 || VT == MVT::i16) { if (UseTest) { // When using tst we only care about the highest part. Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, (VT == MVT::i8) ? LHS : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS, DAG.getIntPtrConstant(1, DL))); } else { Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHS, RHS); } } else { llvm_unreachable("Invalid comparison size"); } // When using a test instruction AVRcc is already set. if (!UseTest) { AVRcc = DAG.getConstant(intCCToAVRCC(CC), DL, MVT::i8); } return Cmp; } SDValue AVRTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); SDValue RHS = Op.getOperand(3); SDValue Dest = Op.getOperand(4); SDLoc dl(Op); SDValue TargetCC; SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, dl); return DAG.getNode(AVRISD::BRCOND, dl, MVT::Other, Chain, Dest, TargetCC, Cmp); } SDValue AVRTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue TrueV = Op.getOperand(2); SDValue FalseV = Op.getOperand(3); ISD::CondCode CC = cast(Op.getOperand(4))->get(); SDLoc dl(Op); SDValue TargetCC; SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, dl); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SDValue Ops[] = {TrueV, FalseV, TargetCC, Cmp}; return DAG.getNode(AVRISD::SELECT_CC, dl, VTs, Ops); } SDValue AVRTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); ISD::CondCode CC = cast(Op.getOperand(2))->get(); SDLoc DL(Op); SDValue TargetCC; SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, DL); SDValue TrueV = DAG.getConstant(1, DL, Op.getValueType()); SDValue FalseV = DAG.getConstant(0, DL, Op.getValueType()); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SDValue Ops[] = {TrueV, FalseV, TargetCC, Cmp}; return DAG.getNode(AVRISD::SELECT_CC, DL, VTs, Ops); } SDValue AVRTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { const MachineFunction &MF = DAG.getMachineFunction(); const AVRMachineFunctionInfo *AFI = MF.getInfo(); const Value *SV = cast(Op.getOperand(2))->getValue(); auto DL = DAG.getDataLayout(); SDLoc dl(Op); // Vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue FI = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), getPointerTy(DL)); return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), MachinePointerInfo(SV), 0); } SDValue AVRTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); case ISD::SHL: case ISD::SRA: case ISD::SRL: case ISD::ROTL: case ISD::ROTR: return LowerShifts(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::SDIVREM: case ISD::UDIVREM: return LowerDivRem(Op, DAG); } return SDValue(); } /// Replace a node with an illegal result type /// with a new node built out of custom code. void AVRTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { SDLoc DL(N); switch (N->getOpcode()) { case ISD::ADD: { // Convert add (x, imm) into sub (x, -imm). if (const ConstantSDNode *C = dyn_cast(N->getOperand(1))) { SDValue Sub = DAG.getNode( ISD::SUB, DL, N->getValueType(0), N->getOperand(0), DAG.getConstant(-C->getAPIntValue(), DL, C->getValueType(0))); Results.push_back(Sub); } break; } default: { SDValue Res = LowerOperation(SDValue(N, 0), DAG); for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I) Results.push_back(Res.getValue(I)); break; } } } /// Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool AVRTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { int64_t Offs = AM.BaseOffs; // Allow absolute addresses. if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && Offs == 0) { return true; } // Flash memory instructions only allow zero offsets. if (isa(Ty) && AS == AVR::ProgramMemory) { return false; } // Allow reg+<6bit> offset. if (Offs < 0) Offs = -Offs; if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 0 && isUInt<6>(Offs)) { return true; } return false; } /// Returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. bool AVRTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { EVT VT; const SDNode *Op; SDLoc DL(N); if (const LoadSDNode *LD = dyn_cast(N)) { VT = LD->getMemoryVT(); Op = LD->getBasePtr().getNode(); if (LD->getExtensionType() != ISD::NON_EXTLOAD) return false; if (AVR::isProgramMemoryAccess(LD)) { return false; } } else if (const StoreSDNode *ST = dyn_cast(N)) { VT = ST->getMemoryVT(); Op = ST->getBasePtr().getNode(); if (AVR::isProgramMemoryAccess(ST)) { return false; } } else { return false; } if (VT != MVT::i8 && VT != MVT::i16) { return false; } if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) { return false; } if (const ConstantSDNode *RHS = dyn_cast(Op->getOperand(1))) { int RHSC = RHS->getSExtValue(); if (Op->getOpcode() == ISD::SUB) RHSC = -RHSC; if ((VT == MVT::i16 && RHSC != -2) || (VT == MVT::i8 && RHSC != -1)) { return false; } Base = Op->getOperand(0); Offset = DAG.getConstant(RHSC, DL, MVT::i8); AM = ISD::PRE_DEC; return true; } return false; } /// Returns true by value, base pointer and /// offset pointer and addressing mode by reference if this node can be /// combined with a load / store to form a post-indexed load / store. bool AVRTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { EVT VT; SDLoc DL(N); if (const LoadSDNode *LD = dyn_cast(N)) { VT = LD->getMemoryVT(); if (LD->getExtensionType() != ISD::NON_EXTLOAD) return false; } else if (const StoreSDNode *ST = dyn_cast(N)) { VT = ST->getMemoryVT(); if (AVR::isProgramMemoryAccess(ST)) { return false; } } else { return false; } if (VT != MVT::i8 && VT != MVT::i16) { return false; } if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) { return false; } if (const ConstantSDNode *RHS = dyn_cast(Op->getOperand(1))) { int RHSC = RHS->getSExtValue(); if (Op->getOpcode() == ISD::SUB) RHSC = -RHSC; if ((VT == MVT::i16 && RHSC != 2) || (VT == MVT::i8 && RHSC != 1)) { return false; } Base = Op->getOperand(0); Offset = DAG.getConstant(RHSC, DL, MVT::i8); AM = ISD::POST_INC; return true; } return false; } bool AVRTargetLowering::isOffsetFoldingLegal( const GlobalAddressSDNode *GA) const { return true; } //===----------------------------------------------------------------------===// // Formal Arguments Calling Convention Implementation //===----------------------------------------------------------------------===// #include "AVRGenCallingConv.inc" /// For each argument in a function store the number of pieces it is composed /// of. static void parseFunctionArgs(const SmallVectorImpl &Ins, SmallVectorImpl &Out) { for (const ISD::InputArg &Arg : Ins) { if(Arg.PartOffset > 0) continue; unsigned Bytes = ((Arg.ArgVT.getSizeInBits()) + 7) / 8; Out.push_back((Bytes + 1) / 2); } } /// For external symbols there is no function prototype information so we /// have to rely directly on argument sizes. static void parseExternFuncCallArgs(const SmallVectorImpl &In, SmallVectorImpl &Out) { for (unsigned i = 0, e = In.size(); i != e;) { unsigned Size = 0; unsigned Offset = 0; while ((i != e) && (In[i].PartOffset == Offset)) { Offset += In[i].VT.getStoreSize(); ++i; ++Size; } Out.push_back(Size); } } static StringRef getFunctionName(TargetLowering::CallLoweringInfo &CLI) { SDValue Callee = CLI.Callee; if (const ExternalSymbolSDNode *G = dyn_cast(Callee)) { return G->getSymbol(); } if (const GlobalAddressSDNode *G = dyn_cast(Callee)) { return G->getGlobal()->getName(); } llvm_unreachable("don't know how to get the name for this callee"); } /// Analyze incoming and outgoing function arguments. We need custom C++ code /// to handle special constraints in the ABI like reversing the order of the /// pieces of splitted arguments. In addition, all pieces of a certain argument /// have to be passed either using registers or the stack but never mixing both. static void analyzeStandardArguments(TargetLowering::CallLoweringInfo *CLI, const Function *F, const DataLayout *TD, const SmallVectorImpl *Outs, const SmallVectorImpl *Ins, CallingConv::ID CallConv, SmallVectorImpl &ArgLocs, CCState &CCInfo, bool IsCall, bool IsVarArg) { static const MCPhysReg RegList8[] = {AVR::R24, AVR::R22, AVR::R20, AVR::R18, AVR::R16, AVR::R14, AVR::R12, AVR::R10, AVR::R8}; static const MCPhysReg RegList16[] = {AVR::R25R24, AVR::R23R22, AVR::R21R20, AVR::R19R18, AVR::R17R16, AVR::R15R14, AVR::R13R12, AVR::R11R10, AVR::R9R8}; if (IsVarArg) { // Variadic functions do not need all the analisys below. if (IsCall) { CCInfo.AnalyzeCallOperands(*Outs, ArgCC_AVR_Vararg); } else { CCInfo.AnalyzeFormalArguments(*Ins, ArgCC_AVR_Vararg); } return; } // Fill in the Args array which will contain original argument sizes. SmallVector Args; if (IsCall) { parseExternFuncCallArgs(*Outs, Args); } else { assert(F != nullptr && "function should not be null"); parseFunctionArgs(*Ins, Args); } unsigned RegsLeft = array_lengthof(RegList8), ValNo = 0; // Variadic functions always use the stack. bool UsesStack = false; for (unsigned i = 0, pos = 0, e = Args.size(); i != e; ++i) { unsigned Size = Args[i]; // If we have a zero-sized argument, don't attempt to lower it. // AVR-GCC does not support zero-sized arguments and so we need not // worry about ABI compatibility. if (Size == 0) continue; MVT LocVT = (IsCall) ? (*Outs)[pos].VT : (*Ins)[pos].VT; // If we have plenty of regs to pass the whole argument do it. if (!UsesStack && (Size <= RegsLeft)) { const MCPhysReg *RegList = (LocVT == MVT::i16) ? RegList16 : RegList8; for (unsigned j = 0; j != Size; ++j) { unsigned Reg = CCInfo.AllocateReg( ArrayRef(RegList, array_lengthof(RegList8))); CCInfo.addLoc( CCValAssign::getReg(ValNo++, LocVT, Reg, LocVT, CCValAssign::Full)); --RegsLeft; } // Reverse the order of the pieces to agree with the "big endian" format // required in the calling convention ABI. std::reverse(ArgLocs.begin() + pos, ArgLocs.begin() + pos + Size); } else { // Pass the rest of arguments using the stack. UsesStack = true; for (unsigned j = 0; j != Size; ++j) { unsigned Offset = CCInfo.AllocateStack( TD->getTypeAllocSize(EVT(LocVT).getTypeForEVT(CCInfo.getContext())), TD->getABITypeAlignment( EVT(LocVT).getTypeForEVT(CCInfo.getContext()))); CCInfo.addLoc(CCValAssign::getMem(ValNo++, LocVT, Offset, LocVT, CCValAssign::Full)); } } pos += Size; } } static void analyzeBuiltinArguments(TargetLowering::CallLoweringInfo &CLI, const Function *F, const DataLayout *TD, const SmallVectorImpl *Outs, const SmallVectorImpl *Ins, CallingConv::ID CallConv, SmallVectorImpl &ArgLocs, CCState &CCInfo, bool IsCall, bool IsVarArg) { StringRef FuncName = getFunctionName(CLI); if (FuncName.startswith("__udivmod") || FuncName.startswith("__divmod")) { CCInfo.AnalyzeCallOperands(*Outs, ArgCC_AVR_BUILTIN_DIV); } else { analyzeStandardArguments(&CLI, F, TD, Outs, Ins, CallConv, ArgLocs, CCInfo, IsCall, IsVarArg); } } static void analyzeArguments(TargetLowering::CallLoweringInfo *CLI, const Function *F, const DataLayout *TD, const SmallVectorImpl *Outs, const SmallVectorImpl *Ins, CallingConv::ID CallConv, SmallVectorImpl &ArgLocs, CCState &CCInfo, bool IsCall, bool IsVarArg) { switch (CallConv) { case CallingConv::AVR_BUILTIN: { analyzeBuiltinArguments(*CLI, F, TD, Outs, Ins, CallConv, ArgLocs, CCInfo, IsCall, IsVarArg); return; } default: { analyzeStandardArguments(CLI, F, TD, Outs, Ins, CallConv, ArgLocs, CCInfo, IsCall, IsVarArg); return; } } } SDValue AVRTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); auto DL = DAG.getDataLayout(); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); analyzeArguments(nullptr, &MF.getFunction(), &DL, 0, &Ins, CallConv, ArgLocs, CCInfo, false, isVarArg); SDValue ArgValue; for (CCValAssign &VA : ArgLocs) { // Arguments stored on registers. if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); const TargetRegisterClass *RC; if (RegVT == MVT::i8) { RC = &AVR::GPR8RegClass; } else if (RegVT == MVT::i16) { RC = &AVR::DREGSRegClass; } else { llvm_unreachable("Unknown argument type!"); } unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); // :NOTE: Clang should not promote any i8 into i16 but for safety the // following code will handle zexts or sexts generated by other // front ends. Otherwise: // If this is an 8 bit value, it is really passed promoted // to 16 bits. Insert an assert[sz]ext to capture this, then // truncate to the right size. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); break; case CCValAssign::SExt: ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); break; case CCValAssign::ZExt: ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); break; } InVals.push_back(ArgValue); } else { // Sanity check. assert(VA.isMemLoc()); EVT LocVT = VA.getLocVT(); // Create the frame index object for this incoming parameter. int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, VA.getLocMemOffset(), true); // Create the SelectionDAG nodes corresponding to a load // from this parameter. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DL)); InVals.push_back(DAG.getLoad(LocVT, dl, Chain, FIN, MachinePointerInfo::getFixedStack(MF, FI), 0)); } } // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { unsigned StackSize = CCInfo.getNextStackOffset(); AVRMachineFunctionInfo *AFI = MF.getInfo(); AFI->setVarArgsFrameIndex(MFI.CreateFixedObject(2, StackSize, true)); } return Chain; } //===----------------------------------------------------------------------===// // Call Calling Convention Implementation //===----------------------------------------------------------------------===// SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &DL = CLI.DL; SmallVectorImpl &Outs = CLI.Outs; SmallVectorImpl &OutVals = CLI.OutVals; SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); // AVR does not yet support tail call optimization. isTailCall = false; // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. const Function *F = nullptr; if (const GlobalAddressSDNode *G = dyn_cast(Callee)) { const GlobalValue *GV = G->getGlobal(); F = cast(GV); Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(DAG.getDataLayout())); } else if (const ExternalSymbolSDNode *ES = dyn_cast(Callee)) { Callee = DAG.getTargetExternalSymbol(ES->getSymbol(), getPointerTy(DAG.getDataLayout())); } analyzeArguments(&CLI, F, &DAG.getDataLayout(), &Outs, 0, CallConv, ArgLocs, CCInfo, true, isVarArg); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL); SmallVector, 8> RegsToPass; // First, walk the register assignments, inserting copies. unsigned AI, AE; bool HasStackArgs = false; for (AI = 0, AE = ArgLocs.size(); AI != AE; ++AI) { CCValAssign &VA = ArgLocs[AI]; EVT RegVT = VA.getLocVT(); SDValue Arg = OutVals[AI]; // Promote the value if needed. With Clang this should not happen. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, RegVT, Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, RegVT, Arg); break; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, DL, RegVT, Arg); break; case CCValAssign::BCvt: Arg = DAG.getNode(ISD::BITCAST, DL, RegVT, Arg); break; } // Stop when we encounter a stack argument, we need to process them // in reverse order in the loop below. if (VA.isMemLoc()) { HasStackArgs = true; break; } // Arguments that can be passed on registers must be kept in the RegsToPass // vector. RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } // Second, stack arguments have to walked in reverse order by inserting // chained stores, this ensures their order is not changed by the scheduler // and that the push instruction sequence generated is correct, otherwise they // can be freely intermixed. if (HasStackArgs) { for (AE = AI, AI = ArgLocs.size(); AI != AE; --AI) { unsigned Loc = AI - 1; CCValAssign &VA = ArgLocs[Loc]; SDValue Arg = OutVals[Loc]; assert(VA.isMemLoc()); // SP points to one stack slot further so add one to adjust it. SDValue PtrOff = DAG.getNode( ISD::ADD, DL, getPointerTy(DAG.getDataLayout()), DAG.getRegister(AVR::SP, getPointerTy(DAG.getDataLayout())), DAG.getIntPtrConstant(VA.getLocMemOffset() + 1, DL)); Chain = DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo::getStack(MF, VA.getLocMemOffset()), 0); } } // Build a sequence of copy-to-reg nodes chained together with token chain and // flag operands which copy the outgoing args into registers. The InFlag in // necessary since all emited instructions must be stuck together. SDValue InFlag; for (auto Reg : RegsToPass) { Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, InFlag); InFlag = Chain.getValue(1); } // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector Ops; Ops.push_back(Chain); Ops.push_back(Callee); // Add argument registers to the end of the list so that they are known live // into the call. for (auto Reg : RegsToPass) { Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); } // Add a register mask operand representing the call-preserved registers. const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine(); - const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); if (InFlag.getNode()) { Ops.push_back(InFlag); } Chain = DAG.getNode(AVRISD::CALL, DL, NodeTys, Ops); InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true), DAG.getIntPtrConstant(0, DL, true), InFlag, DL); if (!Ins.empty()) { InFlag = Chain.getValue(1); } // Handle result values, copying them out of physregs into vregs that we // return. return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, DL, DAG, InVals); } /// Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. /// SDValue AVRTargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { // Assign locations to each value returned by this call. SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); // Handle runtime calling convs. auto CCFunction = CCAssignFnForReturn(CallConv); CCInfo.AnalyzeCallResult(Ins, CCFunction); if (CallConv != CallingConv::AVR_BUILTIN && RVLocs.size() > 1) { // Reverse splitted return values to get the "big endian" format required // to agree with the calling convention ABI. std::reverse(RVLocs.begin(), RVLocs.end()); } // Copy all of the result registers out of their specified physreg. for (CCValAssign const &RVLoc : RVLocs) { Chain = DAG.getCopyFromReg(Chain, dl, RVLoc.getLocReg(), RVLoc.getValVT(), InFlag) .getValue(1); InFlag = Chain.getValue(2); InVals.push_back(Chain.getValue(0)); } return Chain; } //===----------------------------------------------------------------------===// // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// CCAssignFn *AVRTargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const { switch (CC) { case CallingConv::AVR_BUILTIN: return RetCC_AVR_BUILTIN; default: return RetCC_AVR; } } bool AVRTargetLowering::CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); auto CCFunction = CCAssignFnForReturn(CallConv); return CCInfo.CheckReturn(Outs, CCFunction); } SDValue AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to locations. SmallVector RVLocs; // CCState - Info about the registers and stack slot. CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); // Analyze return values. auto CCFunction = CCAssignFnForReturn(CallConv); CCInfo.AnalyzeReturn(Outs, CCFunction); // If this is the first return lowered for this function, add the regs to // the liveout set for the function. MachineFunction &MF = DAG.getMachineFunction(); unsigned e = RVLocs.size(); // Reverse splitted return values to get the "big endian" format required // to agree with the calling convention ABI. if (e > 1) { std::reverse(RVLocs.begin(), RVLocs.end()); } SDValue Flag; SmallVector RetOps(1, Chain); // Copy the result values into the output registers. for (unsigned i = 0; i != e; ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag); // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } // Don't emit the ret/reti instruction when the naked attribute is present in // the function being compiled. if (MF.getFunction().getAttributes().hasAttribute( AttributeList::FunctionIndex, Attribute::Naked)) { return Chain; } unsigned RetOpc = (CallConv == CallingConv::AVR_INTR || CallConv == CallingConv::AVR_SIGNAL) ? AVRISD::RETI_FLAG : AVRISD::RET_FLAG; RetOps[0] = Chain; // Update chain. if (Flag.getNode()) { RetOps.push_back(Flag); } return DAG.getNode(RetOpc, dl, MVT::Other, RetOps); } //===----------------------------------------------------------------------===// // Custom Inserters //===----------------------------------------------------------------------===// MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI, MachineBasicBlock *BB) const { unsigned Opc; const TargetRegisterClass *RC; bool HasRepeatedOperand = false; MachineFunction *F = BB->getParent(); MachineRegisterInfo &RI = F->getRegInfo(); const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine(); - const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); switch (MI.getOpcode()) { default: llvm_unreachable("Invalid shift opcode!"); case AVR::Lsl8: Opc = AVR::ADDRdRr; // LSL is an alias of ADD Rd, Rd RC = &AVR::GPR8RegClass; HasRepeatedOperand = true; break; case AVR::Lsl16: Opc = AVR::LSLWRd; RC = &AVR::DREGSRegClass; break; case AVR::Asr8: Opc = AVR::ASRRd; RC = &AVR::GPR8RegClass; break; case AVR::Asr16: Opc = AVR::ASRWRd; RC = &AVR::DREGSRegClass; break; case AVR::Lsr8: Opc = AVR::LSRRd; RC = &AVR::GPR8RegClass; break; case AVR::Lsr16: Opc = AVR::LSRWRd; RC = &AVR::DREGSRegClass; break; case AVR::Rol8: Opc = AVR::ADCRdRr; // ROL is an alias of ADC Rd, Rd RC = &AVR::GPR8RegClass; HasRepeatedOperand = true; break; case AVR::Rol16: Opc = AVR::ROLWRd; RC = &AVR::DREGSRegClass; break; case AVR::Ror8: Opc = AVR::RORRd; RC = &AVR::GPR8RegClass; break; case AVR::Ror16: Opc = AVR::RORWRd; RC = &AVR::DREGSRegClass; break; } const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator I; for (I = BB->getIterator(); I != F->end() && &(*I) != BB; ++I); if (I != F->end()) ++I; // Create loop block. MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(I, LoopBB); F->insert(I, RemBB); // Update machine-CFG edges by transferring all successors of the current // block to the block containing instructions after shift. RemBB->splice(RemBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); RemBB->transferSuccessorsAndUpdatePHIs(BB); // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB. BB->addSuccessor(LoopBB); BB->addSuccessor(RemBB); LoopBB->addSuccessor(RemBB); LoopBB->addSuccessor(LoopBB); unsigned ShiftAmtReg = RI.createVirtualRegister(&AVR::LD8RegClass); unsigned ShiftAmtReg2 = RI.createVirtualRegister(&AVR::LD8RegClass); unsigned ShiftReg = RI.createVirtualRegister(RC); unsigned ShiftReg2 = RI.createVirtualRegister(RC); unsigned ShiftAmtSrcReg = MI.getOperand(2).getReg(); unsigned SrcReg = MI.getOperand(1).getReg(); unsigned DstReg = MI.getOperand(0).getReg(); // BB: // cpi N, 0 // breq RemBB BuildMI(BB, dl, TII.get(AVR::CPIRdK)).addReg(ShiftAmtSrcReg).addImm(0); BuildMI(BB, dl, TII.get(AVR::BREQk)).addMBB(RemBB); // LoopBB: // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB] // ShiftAmt = phi [%N, BB], [%ShiftAmt2, LoopBB] // ShiftReg2 = shift ShiftReg // ShiftAmt2 = ShiftAmt - 1; BuildMI(LoopBB, dl, TII.get(AVR::PHI), ShiftReg) .addReg(SrcReg) .addMBB(BB) .addReg(ShiftReg2) .addMBB(LoopBB); BuildMI(LoopBB, dl, TII.get(AVR::PHI), ShiftAmtReg) .addReg(ShiftAmtSrcReg) .addMBB(BB) .addReg(ShiftAmtReg2) .addMBB(LoopBB); auto ShiftMI = BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg); if (HasRepeatedOperand) ShiftMI.addReg(ShiftReg); BuildMI(LoopBB, dl, TII.get(AVR::SUBIRdK), ShiftAmtReg2) .addReg(ShiftAmtReg) .addImm(1); BuildMI(LoopBB, dl, TII.get(AVR::BRNEk)).addMBB(LoopBB); // RemBB: // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB] BuildMI(*RemBB, RemBB->begin(), dl, TII.get(AVR::PHI), DstReg) .addReg(SrcReg) .addMBB(BB) .addReg(ShiftReg2) .addMBB(LoopBB); MI.eraseFromParent(); // The pseudo instruction is gone now. return RemBB; } static bool isCopyMulResult(MachineBasicBlock::iterator const &I) { if (I->getOpcode() == AVR::COPY) { unsigned SrcReg = I->getOperand(1).getReg(); return (SrcReg == AVR::R0 || SrcReg == AVR::R1); } return false; } // The mul instructions wreak havock on our zero_reg R1. We need to clear it // after the result has been evacuated. This is probably not the best way to do // it, but it works for now. MachineBasicBlock *AVRTargetLowering::insertMul(MachineInstr &MI, MachineBasicBlock *BB) const { const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine(); - const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); MachineBasicBlock::iterator I(MI); ++I; // in any case insert *after* the mul instruction if (isCopyMulResult(I)) ++I; if (isCopyMulResult(I)) ++I; BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::EORRdRr), AVR::R1) .addReg(AVR::R1) .addReg(AVR::R1); return BB; } MachineBasicBlock * AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const { int Opc = MI.getOpcode(); // Pseudo shift instructions with a non constant shift amount are expanded // into a loop. switch (Opc) { case AVR::Lsl8: case AVR::Lsl16: case AVR::Lsr8: case AVR::Lsr16: case AVR::Rol8: case AVR::Rol16: case AVR::Ror8: case AVR::Ror16: case AVR::Asr8: case AVR::Asr16: return insertShift(MI, MBB); case AVR::MULRdRr: case AVR::MULSRdRr: return insertMul(MI, MBB); } assert((Opc == AVR::Select16 || Opc == AVR::Select8) && "Unexpected instr type to insert"); const AVRInstrInfo &TII = (const AVRInstrInfo &)*MI.getParent() ->getParent() ->getSubtarget() .getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); // To "insert" a SELECT instruction, we insert the diamond // control-flow pattern. The incoming instruction knows the // destination vreg to set, the condition code register to branch // on, the true/false values to select between, and a branch opcode // to use. MachineFunction *MF = MBB->getParent(); const BasicBlock *LLVM_BB = MBB->getBasicBlock(); MachineBasicBlock *trueMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *falseMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator I; for (I = MF->begin(); I != MF->end() && &(*I) != MBB; ++I); if (I != MF->end()) ++I; MF->insert(I, trueMBB); MF->insert(I, falseMBB); // Transfer remaining instructions and all successors of the current // block to the block which will contain the Phi node for the // select. trueMBB->splice(trueMBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); trueMBB->transferSuccessorsAndUpdatePHIs(MBB); AVRCC::CondCodes CC = (AVRCC::CondCodes)MI.getOperand(3).getImm(); BuildMI(MBB, dl, TII.getBrCond(CC)).addMBB(trueMBB); BuildMI(MBB, dl, TII.get(AVR::RJMPk)).addMBB(falseMBB); MBB->addSuccessor(falseMBB); MBB->addSuccessor(trueMBB); // Unconditionally flow back to the true block BuildMI(falseMBB, dl, TII.get(AVR::RJMPk)).addMBB(trueMBB); falseMBB->addSuccessor(trueMBB); // Set up the Phi node to determine where we came from BuildMI(*trueMBB, trueMBB->begin(), dl, TII.get(AVR::PHI), MI.getOperand(0).getReg()) .addReg(MI.getOperand(1).getReg()) .addMBB(MBB) .addReg(MI.getOperand(2).getReg()) .addMBB(falseMBB) ; MI.eraseFromParent(); // The pseudo instruction is gone now. return trueMBB; } //===----------------------------------------------------------------------===// // Inline Asm Support //===----------------------------------------------------------------------===// AVRTargetLowering::ConstraintType AVRTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { // See http://www.nongnu.org/avr-libc/user-manual/inline_asm.html switch (Constraint[0]) { case 'a': // Simple upper registers case 'b': // Base pointer registers pairs case 'd': // Upper register case 'l': // Lower registers case 'e': // Pointer register pairs case 'q': // Stack pointer register case 'r': // Any register case 'w': // Special upper register pairs return C_RegisterClass; case 't': // Temporary register case 'x': case 'X': // Pointer register pair X case 'y': case 'Y': // Pointer register pair Y case 'z': case 'Z': // Pointer register pair Z return C_Register; case 'Q': // A memory address based on Y or Z pointer with displacement. return C_Memory; case 'G': // Floating point constant case 'I': // 6-bit positive integer constant case 'J': // 6-bit negative integer constant case 'K': // Integer constant (Range: 2) case 'L': // Integer constant (Range: 0) case 'M': // 8-bit integer constant case 'N': // Integer constant (Range: -1) case 'O': // Integer constant (Range: 8, 16, 24) case 'P': // Integer constant (Range: 1) case 'R': // Integer constant (Range: -6 to 5)x return C_Other; default: break; } } return TargetLowering::getConstraintType(Constraint); } unsigned AVRTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { // Not sure if this is actually the right thing to do, but we got to do // *something* [agnat] switch (ConstraintCode[0]) { case 'Q': return InlineAsm::Constraint_Q; } return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); } AVRTargetLowering::ConstraintWeight AVRTargetLowering::getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const { ConstraintWeight weight = CW_Invalid; Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. // (this behaviour has been copied from the ARM backend) if (!CallOperandVal) { return CW_Default; } // Look at the constraint type. switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); break; case 'd': case 'r': case 'l': weight = CW_Register; break; case 'a': case 'b': case 'e': case 'q': case 't': case 'w': case 'x': case 'X': case 'y': case 'Y': case 'z': case 'Z': weight = CW_SpecificReg; break; case 'G': if (const ConstantFP *C = dyn_cast(CallOperandVal)) { if (C->isZero()) { weight = CW_Constant; } } break; case 'I': if (const ConstantInt *C = dyn_cast(CallOperandVal)) { if (isUInt<6>(C->getZExtValue())) { weight = CW_Constant; } } break; case 'J': if (const ConstantInt *C = dyn_cast(CallOperandVal)) { if ((C->getSExtValue() >= -63) && (C->getSExtValue() <= 0)) { weight = CW_Constant; } } break; case 'K': if (const ConstantInt *C = dyn_cast(CallOperandVal)) { if (C->getZExtValue() == 2) { weight = CW_Constant; } } break; case 'L': if (const ConstantInt *C = dyn_cast(CallOperandVal)) { if (C->getZExtValue() == 0) { weight = CW_Constant; } } break; case 'M': if (const ConstantInt *C = dyn_cast(CallOperandVal)) { if (isUInt<8>(C->getZExtValue())) { weight = CW_Constant; } } break; case 'N': if (const ConstantInt *C = dyn_cast(CallOperandVal)) { if (C->getSExtValue() == -1) { weight = CW_Constant; } } break; case 'O': if (const ConstantInt *C = dyn_cast(CallOperandVal)) { if ((C->getZExtValue() == 8) || (C->getZExtValue() == 16) || (C->getZExtValue() == 24)) { weight = CW_Constant; } } break; case 'P': if (const ConstantInt *C = dyn_cast(CallOperandVal)) { if (C->getZExtValue() == 1) { weight = CW_Constant; } } break; case 'R': if (const ConstantInt *C = dyn_cast(CallOperandVal)) { if ((C->getSExtValue() >= -6) && (C->getSExtValue() <= 5)) { weight = CW_Constant; } } break; case 'Q': weight = CW_Memory; break; } return weight; } std::pair AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { - auto STI = static_cast(this->getTargetMachine()) - .getSubtargetImpl(); - // We only support i8 and i16. // //:FIXME: remove this assert for now since it gets sometimes executed // assert((VT == MVT::i16 || VT == MVT::i8) && "Wrong operand type."); if (Constraint.size() == 1) { switch (Constraint[0]) { case 'a': // Simple upper registers r16..r23. return std::make_pair(0U, &AVR::LD8loRegClass); case 'b': // Base pointer registers: y, z. return std::make_pair(0U, &AVR::PTRDISPREGSRegClass); case 'd': // Upper registers r16..r31. return std::make_pair(0U, &AVR::LD8RegClass); case 'l': // Lower registers r0..r15. return std::make_pair(0U, &AVR::GPR8loRegClass); case 'e': // Pointer register pairs: x, y, z. return std::make_pair(0U, &AVR::PTRREGSRegClass); case 'q': // Stack pointer register: SPH:SPL. return std::make_pair(0U, &AVR::GPRSPRegClass); case 'r': // Any register: r0..r31. if (VT == MVT::i8) return std::make_pair(0U, &AVR::GPR8RegClass); assert(VT == MVT::i16 && "inline asm constraint too large"); return std::make_pair(0U, &AVR::DREGSRegClass); case 't': // Temporary register: r0. return std::make_pair(unsigned(AVR::R0), &AVR::GPR8RegClass); case 'w': // Special upper register pairs: r24, r26, r28, r30. return std::make_pair(0U, &AVR::IWREGSRegClass); case 'x': // Pointer register pair X: r27:r26. case 'X': return std::make_pair(unsigned(AVR::R27R26), &AVR::PTRREGSRegClass); case 'y': // Pointer register pair Y: r29:r28. case 'Y': return std::make_pair(unsigned(AVR::R29R28), &AVR::PTRREGSRegClass); case 'z': // Pointer register pair Z: r31:r30. case 'Z': return std::make_pair(unsigned(AVR::R31R30), &AVR::PTRREGSRegClass); default: break; } } - return TargetLowering::getRegForInlineAsmConstraint(STI->getRegisterInfo(), - Constraint, VT); + return TargetLowering::getRegForInlineAsmConstraint( + Subtarget.getRegisterInfo(), Constraint, VT); } void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector &Ops, SelectionDAG &DAG) const { SDValue Result(0, 0); SDLoc DL(Op); EVT Ty = Op.getValueType(); // Currently only support length 1 constraints. if (Constraint.length() != 1) { return; } char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { default: break; // Deal with integers first: case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'R': { const ConstantSDNode *C = dyn_cast(Op); if (!C) { return; } int64_t CVal64 = C->getSExtValue(); uint64_t CUVal64 = C->getZExtValue(); switch (ConstraintLetter) { case 'I': // 0..63 if (!isUInt<6>(CUVal64)) return; Result = DAG.getTargetConstant(CUVal64, DL, Ty); break; case 'J': // -63..0 if (CVal64 < -63 || CVal64 > 0) return; Result = DAG.getTargetConstant(CVal64, DL, Ty); break; case 'K': // 2 if (CUVal64 != 2) return; Result = DAG.getTargetConstant(CUVal64, DL, Ty); break; case 'L': // 0 if (CUVal64 != 0) return; Result = DAG.getTargetConstant(CUVal64, DL, Ty); break; case 'M': // 0..255 if (!isUInt<8>(CUVal64)) return; // i8 type may be printed as a negative number, // e.g. 254 would be printed as -2, // so we force it to i16 at least. if (Ty.getSimpleVT() == MVT::i8) { Ty = MVT::i16; } Result = DAG.getTargetConstant(CUVal64, DL, Ty); break; case 'N': // -1 if (CVal64 != -1) return; Result = DAG.getTargetConstant(CVal64, DL, Ty); break; case 'O': // 8, 16, 24 if (CUVal64 != 8 && CUVal64 != 16 && CUVal64 != 24) return; Result = DAG.getTargetConstant(CUVal64, DL, Ty); break; case 'P': // 1 if (CUVal64 != 1) return; Result = DAG.getTargetConstant(CUVal64, DL, Ty); break; case 'R': // -6..5 if (CVal64 < -6 || CVal64 > 5) return; Result = DAG.getTargetConstant(CVal64, DL, Ty); break; } break; } case 'G': const ConstantFPSDNode *FC = dyn_cast(Op); if (!FC || !FC->isZero()) return; // Soften float to i8 0 Result = DAG.getTargetConstant(0, DL, MVT::i8); break; } if (Result.getNode()) { Ops.push_back(Result); return; } return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } unsigned AVRTargetLowering::getRegisterByName(const char *RegName, EVT VT, SelectionDAG &DAG) const { unsigned Reg; if (VT == MVT::i8) { Reg = StringSwitch(RegName) .Case("r0", AVR::R0).Case("r1", AVR::R1).Case("r2", AVR::R2) .Case("r3", AVR::R3).Case("r4", AVR::R4).Case("r5", AVR::R5) .Case("r6", AVR::R6).Case("r7", AVR::R7).Case("r8", AVR::R8) .Case("r9", AVR::R9).Case("r10", AVR::R10).Case("r11", AVR::R11) .Case("r12", AVR::R12).Case("r13", AVR::R13).Case("r14", AVR::R14) .Case("r15", AVR::R15).Case("r16", AVR::R16).Case("r17", AVR::R17) .Case("r18", AVR::R18).Case("r19", AVR::R19).Case("r20", AVR::R20) .Case("r21", AVR::R21).Case("r22", AVR::R22).Case("r23", AVR::R23) .Case("r24", AVR::R24).Case("r25", AVR::R25).Case("r26", AVR::R26) .Case("r27", AVR::R27).Case("r28", AVR::R28).Case("r29", AVR::R29) .Case("r30", AVR::R30).Case("r31", AVR::R31) .Case("X", AVR::R27R26).Case("Y", AVR::R29R28).Case("Z", AVR::R31R30) .Default(0); } else { Reg = StringSwitch(RegName) .Case("r0", AVR::R1R0).Case("r2", AVR::R3R2) .Case("r4", AVR::R5R4).Case("r6", AVR::R7R6) .Case("r8", AVR::R9R8).Case("r10", AVR::R11R10) .Case("r12", AVR::R13R12).Case("r14", AVR::R15R14) .Case("r16", AVR::R17R16).Case("r18", AVR::R19R18) .Case("r20", AVR::R21R20).Case("r22", AVR::R23R22) .Case("r24", AVR::R25R24).Case("r26", AVR::R27R26) .Case("r28", AVR::R29R28).Case("r30", AVR::R31R30) .Case("X", AVR::R27R26).Case("Y", AVR::R29R28).Case("Z", AVR::R31R30) .Default(0); } if (Reg) return Reg; report_fatal_error("Invalid register name global variable"); } } // end of namespace llvm Index: vendor/llvm/dist-release_80/lib/Target/AVR/AVRISelLowering.h =================================================================== --- vendor/llvm/dist-release_80/lib/Target/AVR/AVRISelLowering.h (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/AVR/AVRISelLowering.h (revision 348932) @@ -1,174 +1,180 @@ //===-- AVRISelLowering.h - AVR DAG Lowering Interface ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the interfaces that AVR uses to lower LLVM code into a // selection DAG. // //===----------------------------------------------------------------------===// #ifndef LLVM_AVR_ISEL_LOWERING_H #define LLVM_AVR_ISEL_LOWERING_H #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/TargetLowering.h" namespace llvm { namespace AVRISD { /// AVR Specific DAG Nodes enum NodeType { /// Start the numbering where the builtin ops leave off. FIRST_NUMBER = ISD::BUILTIN_OP_END, /// Return from subroutine. RET_FLAG, /// Return from ISR. RETI_FLAG, /// Represents an abstract call instruction, /// which includes a bunch of information. CALL, /// A wrapper node for TargetConstantPool, /// TargetExternalSymbol, and TargetGlobalAddress. WRAPPER, LSL, ///< Logical shift left. LSR, ///< Logical shift right. ASR, ///< Arithmetic shift right. ROR, ///< Bit rotate right. ROL, ///< Bit rotate left. LSLLOOP, ///< A loop of single logical shift left instructions. LSRLOOP, ///< A loop of single logical shift right instructions. ROLLOOP, ///< A loop of single left bit rotate instructions. RORLOOP, ///< A loop of single right bit rotate instructions. ASRLOOP, ///< A loop of single arithmetic shift right instructions. /// AVR conditional branches. Operand 0 is the chain operand, operand 1 /// is the block to branch if condition is true, operand 2 is the /// condition code, and operand 3 is the flag operand produced by a CMP /// or TEST instruction. BRCOND, /// Compare instruction. CMP, /// Compare with carry instruction. CMPC, /// Test for zero or minus instruction. TST, /// Operand 0 and operand 1 are selection variable, operand 2 /// is condition code and operand 3 is flag operand. SELECT_CC }; } // end of namespace AVRISD +class AVRSubtarget; class AVRTargetMachine; /// Performs target lowering for the AVR. class AVRTargetLowering : public TargetLowering { public: - explicit AVRTargetLowering(AVRTargetMachine &TM); + explicit AVRTargetLowering(const AVRTargetMachine &TM, + const AVRSubtarget &STI); public: MVT getScalarShiftAmountTy(const DataLayout &, EVT LHSTy) const override { return MVT::i8; } MVT::SimpleValueType getCmpLibcallReturnType() const override { return MVT::i8; } const char *getTargetNodeName(unsigned Opcode) const override; SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I = nullptr) const override; bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override; bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override; ConstraintType getConstraintType(StringRef Constraint) const override; ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override; std::pair getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override; void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector &Ops, SelectionDAG &DAG) const override; unsigned getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const override; private: SDValue getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AVRcc, SelectionDAG &DAG, SDLoc dl) const; SDValue LowerShifts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const; bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const override; SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &dl, SelectionDAG &DAG) const override; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const override; SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const override; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; + +protected: + + const AVRSubtarget &Subtarget; private: MachineBasicBlock *insertShift(MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock *insertMul(MachineInstr &MI, MachineBasicBlock *BB) const; }; } // end namespace llvm #endif // LLVM_AVR_ISEL_LOWERING_H Index: vendor/llvm/dist-release_80/lib/Target/AVR/AVRSubtarget.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/Target/AVR/AVRSubtarget.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/AVR/AVRSubtarget.cpp (revision 348932) @@ -1,47 +1,55 @@ //===-- AVRSubtarget.cpp - AVR Subtarget Information ----------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the AVR specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #include "AVRSubtarget.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Support/TargetRegistry.h" #include "AVR.h" #include "AVRTargetMachine.h" #include "MCTargetDesc/AVRMCTargetDesc.h" #define DEBUG_TYPE "avr-subtarget" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "AVRGenSubtargetInfo.inc" namespace llvm { AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU, - const std::string &FS, AVRTargetMachine &TM) + const std::string &FS, const AVRTargetMachine &TM) : AVRGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(), - TLInfo(TM), TSInfo(), + TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)), TSInfo(), // Subtarget features m_hasSRAM(false), m_hasJMPCALL(false), m_hasIJMPCALL(false), m_hasEIJMPCALL(false), m_hasADDSUBIW(false), m_hasSmallStack(false), m_hasMOVW(false), m_hasLPM(false), m_hasLPMX(false), m_hasELPM(false), m_hasELPMX(false), m_hasSPM(false), m_hasSPMX(false), m_hasDES(false), m_supportsRMW(false), m_supportsMultiplication(false), m_hasBREAK(false), m_hasTinyEncoding(false), ELFArch(false), m_FeatureSetDummy(false) { // Parse features string. ParseSubtargetFeatures(CPU, FS); +} + +AVRSubtarget & +AVRSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS, + const TargetMachine &TM) { + // Parse features string. + ParseSubtargetFeatures(CPU, FS); + return *this; } } // end of namespace llvm Index: vendor/llvm/dist-release_80/lib/Target/AVR/AVRSubtarget.h =================================================================== --- vendor/llvm/dist-release_80/lib/Target/AVR/AVRSubtarget.h (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/AVR/AVRSubtarget.h (revision 348932) @@ -1,118 +1,121 @@ //===-- AVRSubtarget.h - Define Subtarget for the AVR -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file declares the AVR specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #ifndef LLVM_AVR_SUBTARGET_H #define LLVM_AVR_SUBTARGET_H #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "AVRFrameLowering.h" #include "AVRISelLowering.h" #include "AVRInstrInfo.h" #include "AVRSelectionDAGInfo.h" #define GET_SUBTARGETINFO_HEADER #include "AVRGenSubtargetInfo.inc" namespace llvm { /// A specific AVR target MCU. class AVRSubtarget : public AVRGenSubtargetInfo { public: //! Creates an AVR subtarget. //! \param TT The target triple. //! \param CPU The CPU to target. //! \param FS The feature string. //! \param TM The target machine. AVRSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, - AVRTargetMachine &TM); + const AVRTargetMachine &TM); const AVRInstrInfo *getInstrInfo() const override { return &InstrInfo; } const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; } const AVRTargetLowering *getTargetLowering() const override { return &TLInfo; } const AVRSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } const AVRRegisterInfo *getRegisterInfo() const override { return &InstrInfo.getRegisterInfo(); } /// Parses a subtarget feature string, setting appropriate options. /// \note Definition of function is auto generated by `tblgen`. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + AVRSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS, + const TargetMachine &TM); // Subtarget feature getters. // See AVR.td for details. bool hasSRAM() const { return m_hasSRAM; } bool hasJMPCALL() const { return m_hasJMPCALL; } bool hasIJMPCALL() const { return m_hasIJMPCALL; } bool hasEIJMPCALL() const { return m_hasEIJMPCALL; } bool hasADDSUBIW() const { return m_hasADDSUBIW; } bool hasSmallStack() const { return m_hasSmallStack; } bool hasMOVW() const { return m_hasMOVW; } bool hasLPM() const { return m_hasLPM; } bool hasLPMX() const { return m_hasLPMX; } bool hasELPM() const { return m_hasELPM; } bool hasELPMX() const { return m_hasELPMX; } bool hasSPM() const { return m_hasSPM; } bool hasSPMX() const { return m_hasSPMX; } bool hasDES() const { return m_hasDES; } bool supportsRMW() const { return m_supportsRMW; } bool supportsMultiplication() const { return m_supportsMultiplication; } bool hasBREAK() const { return m_hasBREAK; } bool hasTinyEncoding() const { return m_hasTinyEncoding; } /// Gets the ELF architecture for the e_flags field /// of an ELF object file. unsigned getELFArch() const { assert(ELFArch != 0 && "every device must have an associate ELF architecture"); return ELFArch; } private: AVRInstrInfo InstrInfo; AVRFrameLowering FrameLowering; AVRTargetLowering TLInfo; AVRSelectionDAGInfo TSInfo; // Subtarget feature settings // See AVR.td for details. bool m_hasSRAM; bool m_hasJMPCALL; bool m_hasIJMPCALL; bool m_hasEIJMPCALL; bool m_hasADDSUBIW; bool m_hasSmallStack; bool m_hasMOVW; bool m_hasLPM; bool m_hasLPMX; bool m_hasELPM; bool m_hasELPMX; bool m_hasSPM; bool m_hasSPMX; bool m_hasDES; bool m_supportsRMW; bool m_supportsMultiplication; bool m_hasBREAK; bool m_hasTinyEncoding; /// The ELF e_flags architecture. unsigned ELFArch; // Dummy member, used by FeatureSet's. We cannot have a SubtargetFeature with // no variable, so we instead bind pseudo features to this variable. bool m_FeatureSetDummy; }; } // end namespace llvm #endif // LLVM_AVR_SUBTARGET_H Index: vendor/llvm/dist-release_80/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp (revision 348932) @@ -1,1284 +1,1287 @@ //===-- MipsTargetStreamer.cpp - Mips Target Streamer Methods -------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file provides Mips specific target streamer methods. // //===----------------------------------------------------------------------===// #include "MipsTargetStreamer.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsABIInfo.h" #include "MipsELFStreamer.h" #include "MipsMCExpr.h" #include "MipsMCTargetDesc.h" #include "MipsTargetObjectFile.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" using namespace llvm; namespace { static cl::opt RoundSectionSizes( "mips-round-section-sizes", cl::init(false), cl::desc("Round section sizes up to the section alignment"), cl::Hidden); } // end anonymous namespace MipsTargetStreamer::MipsTargetStreamer(MCStreamer &S) : MCTargetStreamer(S), ModuleDirectiveAllowed(true) { GPRInfoSet = FPRInfoSet = FrameInfoSet = false; } void MipsTargetStreamer::emitDirectiveSetMicroMips() {} void MipsTargetStreamer::emitDirectiveSetNoMicroMips() {} void MipsTargetStreamer::setUsesMicroMips() {} void MipsTargetStreamer::emitDirectiveSetMips16() {} void MipsTargetStreamer::emitDirectiveSetNoMips16() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetReorder() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetNoReorder() {} void MipsTargetStreamer::emitDirectiveSetMacro() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetNoMacro() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMsa() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetNoMsa() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMt() {} void MipsTargetStreamer::emitDirectiveSetNoMt() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetCRC() {} void MipsTargetStreamer::emitDirectiveSetNoCRC() {} void MipsTargetStreamer::emitDirectiveSetVirt() {} void MipsTargetStreamer::emitDirectiveSetNoVirt() {} void MipsTargetStreamer::emitDirectiveSetGINV() {} void MipsTargetStreamer::emitDirectiveSetNoGINV() {} void MipsTargetStreamer::emitDirectiveSetAt() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetAtWithArg(unsigned RegNo) { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetNoAt() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveEnd(StringRef Name) {} void MipsTargetStreamer::emitDirectiveEnt(const MCSymbol &Symbol) {} void MipsTargetStreamer::emitDirectiveAbiCalls() {} void MipsTargetStreamer::emitDirectiveNaN2008() {} void MipsTargetStreamer::emitDirectiveNaNLegacy() {} void MipsTargetStreamer::emitDirectiveOptionPic0() {} void MipsTargetStreamer::emitDirectiveOptionPic2() {} void MipsTargetStreamer::emitDirectiveInsn() { forbidModuleDirective(); } void MipsTargetStreamer::emitFrame(unsigned StackReg, unsigned StackSize, unsigned ReturnReg) {} void MipsTargetStreamer::emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) {} void MipsTargetStreamer::emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) { } void MipsTargetStreamer::emitDirectiveSetArch(StringRef Arch) { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips0() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips1() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips2() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips3() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips4() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips5() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips32() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips32R2() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips32R3() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips32R5() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips32R6() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips64() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips64R2() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips64R3() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips64R5() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMips64R6() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetPop() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetPush() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetSoftFloat() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetHardFloat() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetDsp() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetDspr2() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {} bool MipsTargetStreamer::emitDirectiveCpRestore( int Offset, function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI) { forbidModuleDirective(); return true; } void MipsTargetStreamer::emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, const MCSymbol &Sym, bool IsReg) { } void MipsTargetStreamer::emitDirectiveCpreturn(unsigned SaveLocation, bool SaveLocationIsRegister) {} void MipsTargetStreamer::emitDirectiveModuleFP() {} void MipsTargetStreamer::emitDirectiveModuleOddSPReg() { if (!ABIFlagsSection.OddSPReg && !ABIFlagsSection.Is32BitABI) report_fatal_error("+nooddspreg is only valid for O32"); } void MipsTargetStreamer::emitDirectiveModuleSoftFloat() {} void MipsTargetStreamer::emitDirectiveModuleHardFloat() {} void MipsTargetStreamer::emitDirectiveModuleMT() {} void MipsTargetStreamer::emitDirectiveModuleCRC() {} void MipsTargetStreamer::emitDirectiveModuleNoCRC() {} void MipsTargetStreamer::emitDirectiveModuleVirt() {} void MipsTargetStreamer::emitDirectiveModuleNoVirt() {} void MipsTargetStreamer::emitDirectiveModuleGINV() {} void MipsTargetStreamer::emitDirectiveModuleNoGINV() {} void MipsTargetStreamer::emitDirectiveSetFp( MipsABIFlagsSection::FpABIKind Value) { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetOddSPReg() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetNoOddSPReg() { forbidModuleDirective(); } void MipsTargetStreamer::emitR(unsigned Opcode, unsigned Reg0, SMLoc IDLoc, const MCSubtargetInfo *STI) { MCInst TmpInst; TmpInst.setOpcode(Opcode); TmpInst.addOperand(MCOperand::createReg(Reg0)); TmpInst.setLoc(IDLoc); getStreamer().EmitInstruction(TmpInst, *STI); } void MipsTargetStreamer::emitRX(unsigned Opcode, unsigned Reg0, MCOperand Op1, SMLoc IDLoc, const MCSubtargetInfo *STI) { MCInst TmpInst; TmpInst.setOpcode(Opcode); TmpInst.addOperand(MCOperand::createReg(Reg0)); TmpInst.addOperand(Op1); TmpInst.setLoc(IDLoc); getStreamer().EmitInstruction(TmpInst, *STI); } void MipsTargetStreamer::emitRI(unsigned Opcode, unsigned Reg0, int32_t Imm, SMLoc IDLoc, const MCSubtargetInfo *STI) { emitRX(Opcode, Reg0, MCOperand::createImm(Imm), IDLoc, STI); } void MipsTargetStreamer::emitRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, SMLoc IDLoc, const MCSubtargetInfo *STI) { emitRX(Opcode, Reg0, MCOperand::createReg(Reg1), IDLoc, STI); } void MipsTargetStreamer::emitII(unsigned Opcode, int16_t Imm1, int16_t Imm2, SMLoc IDLoc, const MCSubtargetInfo *STI) { MCInst TmpInst; TmpInst.setOpcode(Opcode); TmpInst.addOperand(MCOperand::createImm(Imm1)); TmpInst.addOperand(MCOperand::createImm(Imm2)); TmpInst.setLoc(IDLoc); getStreamer().EmitInstruction(TmpInst, *STI); } void MipsTargetStreamer::emitRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1, MCOperand Op2, SMLoc IDLoc, const MCSubtargetInfo *STI) { MCInst TmpInst; TmpInst.setOpcode(Opcode); TmpInst.addOperand(MCOperand::createReg(Reg0)); TmpInst.addOperand(MCOperand::createReg(Reg1)); TmpInst.addOperand(Op2); TmpInst.setLoc(IDLoc); getStreamer().EmitInstruction(TmpInst, *STI); } void MipsTargetStreamer::emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, unsigned Reg2, SMLoc IDLoc, const MCSubtargetInfo *STI) { emitRRX(Opcode, Reg0, Reg1, MCOperand::createReg(Reg2), IDLoc, STI); } void MipsTargetStreamer::emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm, SMLoc IDLoc, const MCSubtargetInfo *STI) { emitRRX(Opcode, Reg0, Reg1, MCOperand::createImm(Imm), IDLoc, STI); } void MipsTargetStreamer::emitRRIII(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm0, int16_t Imm1, int16_t Imm2, SMLoc IDLoc, const MCSubtargetInfo *STI) { MCInst TmpInst; TmpInst.setOpcode(Opcode); TmpInst.addOperand(MCOperand::createReg(Reg0)); TmpInst.addOperand(MCOperand::createReg(Reg1)); TmpInst.addOperand(MCOperand::createImm(Imm0)); TmpInst.addOperand(MCOperand::createImm(Imm1)); TmpInst.addOperand(MCOperand::createImm(Imm2)); TmpInst.setLoc(IDLoc); getStreamer().EmitInstruction(TmpInst, *STI); } void MipsTargetStreamer::emitAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg, bool Is64Bit, const MCSubtargetInfo *STI) { emitRRR(Is64Bit ? Mips::DADDu : Mips::ADDu, DstReg, SrcReg, TrgReg, SMLoc(), STI); } void MipsTargetStreamer::emitDSLL(unsigned DstReg, unsigned SrcReg, int16_t ShiftAmount, SMLoc IDLoc, const MCSubtargetInfo *STI) { if (ShiftAmount >= 32) { emitRRI(Mips::DSLL32, DstReg, SrcReg, ShiftAmount - 32, IDLoc, STI); return; } emitRRI(Mips::DSLL, DstReg, SrcReg, ShiftAmount, IDLoc, STI); } void MipsTargetStreamer::emitEmptyDelaySlot(bool hasShortDelaySlot, SMLoc IDLoc, const MCSubtargetInfo *STI) { if (hasShortDelaySlot) emitRR(Mips::MOVE16_MM, Mips::ZERO, Mips::ZERO, IDLoc, STI); else emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, STI); } void MipsTargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) { const FeatureBitset &Features = STI->getFeatureBits(); if (Features[Mips::FeatureMicroMips]) emitRR(Mips::MOVE16_MM, Mips::ZERO, Mips::ZERO, IDLoc, STI); else emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, STI); } /// Emit the $gp restore operation for .cprestore. void MipsTargetStreamer::emitGPRestore(int Offset, SMLoc IDLoc, const MCSubtargetInfo *STI) { emitLoadWithImmOffset(Mips::LW, Mips::GP, Mips::SP, Offset, Mips::GP, IDLoc, STI); } /// Emit a store instruction with an immediate offset. void MipsTargetStreamer::emitStoreWithImmOffset( unsigned Opcode, unsigned SrcReg, unsigned BaseReg, int64_t Offset, function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI) { if (isInt<16>(Offset)) { emitRRI(Opcode, SrcReg, BaseReg, Offset, IDLoc, STI); return; } // sw $8, offset($8) => lui $at, %hi(offset) // add $at, $at, $8 // sw $8, %lo(offset)($at) unsigned ATReg = GetATReg(); if (!ATReg) return; unsigned LoOffset = Offset & 0x0000ffff; unsigned HiOffset = (Offset & 0xffff0000) >> 16; // If msb of LoOffset is 1(negative number) we must increment HiOffset // to account for the sign-extension of the low part. if (LoOffset & 0x8000) HiOffset++; // Generate the base address in ATReg. emitRI(Mips::LUi, ATReg, HiOffset, IDLoc, STI); if (BaseReg != Mips::ZERO) emitRRR(Mips::ADDu, ATReg, ATReg, BaseReg, IDLoc, STI); // Emit the store with the adjusted base and offset. emitRRI(Opcode, SrcReg, ATReg, LoOffset, IDLoc, STI); } /// Emit a store instruction with an symbol offset. Symbols are assumed to be /// out of range for a simm16 will be expanded to appropriate instructions. void MipsTargetStreamer::emitStoreWithSymOffset( unsigned Opcode, unsigned SrcReg, unsigned BaseReg, MCOperand &HiOperand, MCOperand &LoOperand, unsigned ATReg, SMLoc IDLoc, const MCSubtargetInfo *STI) { // sw $8, sym => lui $at, %hi(sym) // sw $8, %lo(sym)($at) // Generate the base address in ATReg. emitRX(Mips::LUi, ATReg, HiOperand, IDLoc, STI); if (BaseReg != Mips::ZERO) emitRRR(Mips::ADDu, ATReg, ATReg, BaseReg, IDLoc, STI); // Emit the store with the adjusted base and offset. emitRRX(Opcode, SrcReg, ATReg, LoOperand, IDLoc, STI); } /// Emit a load instruction with an immediate offset. DstReg and TmpReg are /// permitted to be the same register iff DstReg is distinct from BaseReg and /// DstReg is a GPR. It is the callers responsibility to identify such cases /// and pass the appropriate register in TmpReg. void MipsTargetStreamer::emitLoadWithImmOffset(unsigned Opcode, unsigned DstReg, unsigned BaseReg, int64_t Offset, unsigned TmpReg, SMLoc IDLoc, const MCSubtargetInfo *STI) { if (isInt<16>(Offset)) { emitRRI(Opcode, DstReg, BaseReg, Offset, IDLoc, STI); return; } // 1) lw $8, offset($9) => lui $8, %hi(offset) // add $8, $8, $9 // lw $8, %lo(offset)($9) // 2) lw $8, offset($8) => lui $at, %hi(offset) // add $at, $at, $8 // lw $8, %lo(offset)($at) unsigned LoOffset = Offset & 0x0000ffff; unsigned HiOffset = (Offset & 0xffff0000) >> 16; // If msb of LoOffset is 1(negative number) we must increment HiOffset // to account for the sign-extension of the low part. if (LoOffset & 0x8000) HiOffset++; // Generate the base address in TmpReg. emitRI(Mips::LUi, TmpReg, HiOffset, IDLoc, STI); if (BaseReg != Mips::ZERO) emitRRR(Mips::ADDu, TmpReg, TmpReg, BaseReg, IDLoc, STI); // Emit the load with the adjusted base and offset. emitRRI(Opcode, DstReg, TmpReg, LoOffset, IDLoc, STI); } /// Emit a load instruction with an symbol offset. Symbols are assumed to be /// out of range for a simm16 will be expanded to appropriate instructions. /// DstReg and TmpReg are permitted to be the same register iff DstReg is a /// GPR. It is the callers responsibility to identify such cases and pass the /// appropriate register in TmpReg. void MipsTargetStreamer::emitLoadWithSymOffset(unsigned Opcode, unsigned DstReg, unsigned BaseReg, MCOperand &HiOperand, MCOperand &LoOperand, unsigned TmpReg, SMLoc IDLoc, const MCSubtargetInfo *STI) { // 1) lw $8, sym => lui $8, %hi(sym) // lw $8, %lo(sym)($8) // 2) ldc1 $f0, sym => lui $at, %hi(sym) // ldc1 $f0, %lo(sym)($at) // Generate the base address in TmpReg. emitRX(Mips::LUi, TmpReg, HiOperand, IDLoc, STI); if (BaseReg != Mips::ZERO) emitRRR(Mips::ADDu, TmpReg, TmpReg, BaseReg, IDLoc, STI); // Emit the load with the adjusted base and offset. emitRRX(Opcode, DstReg, TmpReg, LoOperand, IDLoc, STI); } MipsTargetAsmStreamer::MipsTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS) : MipsTargetStreamer(S), OS(OS) {} void MipsTargetAsmStreamer::emitDirectiveSetMicroMips() { OS << "\t.set\tmicromips\n"; forbidModuleDirective(); } void MipsTargetAsmStreamer::emitDirectiveSetNoMicroMips() { OS << "\t.set\tnomicromips\n"; forbidModuleDirective(); } void MipsTargetAsmStreamer::emitDirectiveSetMips16() { OS << "\t.set\tmips16\n"; forbidModuleDirective(); } void MipsTargetAsmStreamer::emitDirectiveSetNoMips16() { OS << "\t.set\tnomips16\n"; MipsTargetStreamer::emitDirectiveSetNoMips16(); } void MipsTargetAsmStreamer::emitDirectiveSetReorder() { OS << "\t.set\treorder\n"; MipsTargetStreamer::emitDirectiveSetReorder(); } void MipsTargetAsmStreamer::emitDirectiveSetNoReorder() { OS << "\t.set\tnoreorder\n"; forbidModuleDirective(); } void MipsTargetAsmStreamer::emitDirectiveSetMacro() { OS << "\t.set\tmacro\n"; MipsTargetStreamer::emitDirectiveSetMacro(); } void MipsTargetAsmStreamer::emitDirectiveSetNoMacro() { OS << "\t.set\tnomacro\n"; MipsTargetStreamer::emitDirectiveSetNoMacro(); } void MipsTargetAsmStreamer::emitDirectiveSetMsa() { OS << "\t.set\tmsa\n"; MipsTargetStreamer::emitDirectiveSetMsa(); } void MipsTargetAsmStreamer::emitDirectiveSetNoMsa() { OS << "\t.set\tnomsa\n"; MipsTargetStreamer::emitDirectiveSetNoMsa(); } void MipsTargetAsmStreamer::emitDirectiveSetMt() { OS << "\t.set\tmt\n"; MipsTargetStreamer::emitDirectiveSetMt(); } void MipsTargetAsmStreamer::emitDirectiveSetNoMt() { OS << "\t.set\tnomt\n"; MipsTargetStreamer::emitDirectiveSetNoMt(); } void MipsTargetAsmStreamer::emitDirectiveSetCRC() { OS << "\t.set\tcrc\n"; MipsTargetStreamer::emitDirectiveSetCRC(); } void MipsTargetAsmStreamer::emitDirectiveSetNoCRC() { OS << "\t.set\tnocrc\n"; MipsTargetStreamer::emitDirectiveSetNoCRC(); } void MipsTargetAsmStreamer::emitDirectiveSetVirt() { OS << "\t.set\tvirt\n"; MipsTargetStreamer::emitDirectiveSetVirt(); } void MipsTargetAsmStreamer::emitDirectiveSetNoVirt() { OS << "\t.set\tnovirt\n"; MipsTargetStreamer::emitDirectiveSetNoVirt(); } void MipsTargetAsmStreamer::emitDirectiveSetGINV() { OS << "\t.set\tginv\n"; MipsTargetStreamer::emitDirectiveSetGINV(); } void MipsTargetAsmStreamer::emitDirectiveSetNoGINV() { OS << "\t.set\tnoginv\n"; MipsTargetStreamer::emitDirectiveSetNoGINV(); } void MipsTargetAsmStreamer::emitDirectiveSetAt() { OS << "\t.set\tat\n"; MipsTargetStreamer::emitDirectiveSetAt(); } void MipsTargetAsmStreamer::emitDirectiveSetAtWithArg(unsigned RegNo) { OS << "\t.set\tat=$" << Twine(RegNo) << "\n"; MipsTargetStreamer::emitDirectiveSetAtWithArg(RegNo); } void MipsTargetAsmStreamer::emitDirectiveSetNoAt() { OS << "\t.set\tnoat\n"; MipsTargetStreamer::emitDirectiveSetNoAt(); } void MipsTargetAsmStreamer::emitDirectiveEnd(StringRef Name) { OS << "\t.end\t" << Name << '\n'; } void MipsTargetAsmStreamer::emitDirectiveEnt(const MCSymbol &Symbol) { OS << "\t.ent\t" << Symbol.getName() << '\n'; } void MipsTargetAsmStreamer::emitDirectiveAbiCalls() { OS << "\t.abicalls\n"; } void MipsTargetAsmStreamer::emitDirectiveNaN2008() { OS << "\t.nan\t2008\n"; } void MipsTargetAsmStreamer::emitDirectiveNaNLegacy() { OS << "\t.nan\tlegacy\n"; } void MipsTargetAsmStreamer::emitDirectiveOptionPic0() { OS << "\t.option\tpic0\n"; } void MipsTargetAsmStreamer::emitDirectiveOptionPic2() { OS << "\t.option\tpic2\n"; } void MipsTargetAsmStreamer::emitDirectiveInsn() { MipsTargetStreamer::emitDirectiveInsn(); OS << "\t.insn\n"; } void MipsTargetAsmStreamer::emitFrame(unsigned StackReg, unsigned StackSize, unsigned ReturnReg) { OS << "\t.frame\t$" << StringRef(MipsInstPrinter::getRegisterName(StackReg)).lower() << "," << StackSize << ",$" << StringRef(MipsInstPrinter::getRegisterName(ReturnReg)).lower() << '\n'; } void MipsTargetAsmStreamer::emitDirectiveSetArch(StringRef Arch) { OS << "\t.set arch=" << Arch << "\n"; MipsTargetStreamer::emitDirectiveSetArch(Arch); } void MipsTargetAsmStreamer::emitDirectiveSetMips0() { OS << "\t.set\tmips0\n"; MipsTargetStreamer::emitDirectiveSetMips0(); } void MipsTargetAsmStreamer::emitDirectiveSetMips1() { OS << "\t.set\tmips1\n"; MipsTargetStreamer::emitDirectiveSetMips1(); } void MipsTargetAsmStreamer::emitDirectiveSetMips2() { OS << "\t.set\tmips2\n"; MipsTargetStreamer::emitDirectiveSetMips2(); } void MipsTargetAsmStreamer::emitDirectiveSetMips3() { OS << "\t.set\tmips3\n"; MipsTargetStreamer::emitDirectiveSetMips3(); } void MipsTargetAsmStreamer::emitDirectiveSetMips4() { OS << "\t.set\tmips4\n"; MipsTargetStreamer::emitDirectiveSetMips4(); } void MipsTargetAsmStreamer::emitDirectiveSetMips5() { OS << "\t.set\tmips5\n"; MipsTargetStreamer::emitDirectiveSetMips5(); } void MipsTargetAsmStreamer::emitDirectiveSetMips32() { OS << "\t.set\tmips32\n"; MipsTargetStreamer::emitDirectiveSetMips32(); } void MipsTargetAsmStreamer::emitDirectiveSetMips32R2() { OS << "\t.set\tmips32r2\n"; MipsTargetStreamer::emitDirectiveSetMips32R2(); } void MipsTargetAsmStreamer::emitDirectiveSetMips32R3() { OS << "\t.set\tmips32r3\n"; MipsTargetStreamer::emitDirectiveSetMips32R3(); } void MipsTargetAsmStreamer::emitDirectiveSetMips32R5() { OS << "\t.set\tmips32r5\n"; MipsTargetStreamer::emitDirectiveSetMips32R5(); } void MipsTargetAsmStreamer::emitDirectiveSetMips32R6() { OS << "\t.set\tmips32r6\n"; MipsTargetStreamer::emitDirectiveSetMips32R6(); } void MipsTargetAsmStreamer::emitDirectiveSetMips64() { OS << "\t.set\tmips64\n"; MipsTargetStreamer::emitDirectiveSetMips64(); } void MipsTargetAsmStreamer::emitDirectiveSetMips64R2() { OS << "\t.set\tmips64r2\n"; MipsTargetStreamer::emitDirectiveSetMips64R2(); } void MipsTargetAsmStreamer::emitDirectiveSetMips64R3() { OS << "\t.set\tmips64r3\n"; MipsTargetStreamer::emitDirectiveSetMips64R3(); } void MipsTargetAsmStreamer::emitDirectiveSetMips64R5() { OS << "\t.set\tmips64r5\n"; MipsTargetStreamer::emitDirectiveSetMips64R5(); } void MipsTargetAsmStreamer::emitDirectiveSetMips64R6() { OS << "\t.set\tmips64r6\n"; MipsTargetStreamer::emitDirectiveSetMips64R6(); } void MipsTargetAsmStreamer::emitDirectiveSetDsp() { OS << "\t.set\tdsp\n"; MipsTargetStreamer::emitDirectiveSetDsp(); } void MipsTargetAsmStreamer::emitDirectiveSetDspr2() { OS << "\t.set\tdspr2\n"; MipsTargetStreamer::emitDirectiveSetDspr2(); } void MipsTargetAsmStreamer::emitDirectiveSetNoDsp() { OS << "\t.set\tnodsp\n"; MipsTargetStreamer::emitDirectiveSetNoDsp(); } void MipsTargetAsmStreamer::emitDirectiveSetPop() { OS << "\t.set\tpop\n"; MipsTargetStreamer::emitDirectiveSetPop(); } void MipsTargetAsmStreamer::emitDirectiveSetPush() { OS << "\t.set\tpush\n"; MipsTargetStreamer::emitDirectiveSetPush(); } void MipsTargetAsmStreamer::emitDirectiveSetSoftFloat() { OS << "\t.set\tsoftfloat\n"; MipsTargetStreamer::emitDirectiveSetSoftFloat(); } void MipsTargetAsmStreamer::emitDirectiveSetHardFloat() { OS << "\t.set\thardfloat\n"; MipsTargetStreamer::emitDirectiveSetHardFloat(); } // Print a 32 bit hex number with all numbers. static void printHex32(unsigned Value, raw_ostream &OS) { OS << "0x"; for (int i = 7; i >= 0; i--) OS.write_hex((Value & (0xF << (i * 4))) >> (i * 4)); } void MipsTargetAsmStreamer::emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) { OS << "\t.mask \t"; printHex32(CPUBitmask, OS); OS << ',' << CPUTopSavedRegOff << '\n'; } void MipsTargetAsmStreamer::emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) { OS << "\t.fmask\t"; printHex32(FPUBitmask, OS); OS << "," << FPUTopSavedRegOff << '\n'; } void MipsTargetAsmStreamer::emitDirectiveCpLoad(unsigned RegNo) { OS << "\t.cpload\t$" << StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << "\n"; forbidModuleDirective(); } bool MipsTargetAsmStreamer::emitDirectiveCpRestore( int Offset, function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI) { MipsTargetStreamer::emitDirectiveCpRestore(Offset, GetATReg, IDLoc, STI); OS << "\t.cprestore\t" << Offset << "\n"; return true; } void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, const MCSymbol &Sym, bool IsReg) { OS << "\t.cpsetup\t$" << StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << ", "; if (IsReg) OS << "$" << StringRef(MipsInstPrinter::getRegisterName(RegOrOffset)).lower(); else OS << RegOrOffset; OS << ", "; OS << Sym.getName(); forbidModuleDirective(); } void MipsTargetAsmStreamer::emitDirectiveCpreturn(unsigned SaveLocation, bool SaveLocationIsRegister) { OS << "\t.cpreturn"; forbidModuleDirective(); } void MipsTargetAsmStreamer::emitDirectiveModuleFP() { - OS << "\t.module\tfp="; - OS << ABIFlagsSection.getFpABIString(ABIFlagsSection.getFpABI()) << "\n"; + MipsABIFlagsSection::FpABIKind FpABI = ABIFlagsSection.getFpABI(); + if (FpABI == MipsABIFlagsSection::FpABIKind::SOFT) + OS << "\t.module\tsoftfloat\n"; + else + OS << "\t.module\tfp=" << ABIFlagsSection.getFpABIString(FpABI) << "\n"; } void MipsTargetAsmStreamer::emitDirectiveSetFp( MipsABIFlagsSection::FpABIKind Value) { MipsTargetStreamer::emitDirectiveSetFp(Value); OS << "\t.set\tfp="; OS << ABIFlagsSection.getFpABIString(Value) << "\n"; } void MipsTargetAsmStreamer::emitDirectiveModuleOddSPReg() { MipsTargetStreamer::emitDirectiveModuleOddSPReg(); OS << "\t.module\t" << (ABIFlagsSection.OddSPReg ? "" : "no") << "oddspreg\n"; } void MipsTargetAsmStreamer::emitDirectiveSetOddSPReg() { MipsTargetStreamer::emitDirectiveSetOddSPReg(); OS << "\t.set\toddspreg\n"; } void MipsTargetAsmStreamer::emitDirectiveSetNoOddSPReg() { MipsTargetStreamer::emitDirectiveSetNoOddSPReg(); OS << "\t.set\tnooddspreg\n"; } void MipsTargetAsmStreamer::emitDirectiveModuleSoftFloat() { OS << "\t.module\tsoftfloat\n"; } void MipsTargetAsmStreamer::emitDirectiveModuleHardFloat() { OS << "\t.module\thardfloat\n"; } void MipsTargetAsmStreamer::emitDirectiveModuleMT() { OS << "\t.module\tmt\n"; } void MipsTargetAsmStreamer::emitDirectiveModuleCRC() { OS << "\t.module\tcrc\n"; } void MipsTargetAsmStreamer::emitDirectiveModuleNoCRC() { OS << "\t.module\tnocrc\n"; } void MipsTargetAsmStreamer::emitDirectiveModuleVirt() { OS << "\t.module\tvirt\n"; } void MipsTargetAsmStreamer::emitDirectiveModuleNoVirt() { OS << "\t.module\tnovirt\n"; } void MipsTargetAsmStreamer::emitDirectiveModuleGINV() { OS << "\t.module\tginv\n"; } void MipsTargetAsmStreamer::emitDirectiveModuleNoGINV() { OS << "\t.module\tnoginv\n"; } // This part is for ELF object output. MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI) : MipsTargetStreamer(S), MicroMipsEnabled(false), STI(STI) { MCAssembler &MCA = getStreamer().getAssembler(); // It's possible that MCObjectFileInfo isn't fully initialized at this point // due to an initialization order problem where LLVMTargetMachine creates the // target streamer before TargetLoweringObjectFile calls // InitializeMCObjectFileInfo. There doesn't seem to be a single place that // covers all cases so this statement covers most cases and direct object // emission must call setPic() once MCObjectFileInfo has been initialized. The // cases we don't handle here are covered by MipsAsmPrinter. Pic = MCA.getContext().getObjectFileInfo()->isPositionIndependent(); const FeatureBitset &Features = STI.getFeatureBits(); // Set the header flags that we can in the constructor. // FIXME: This is a fairly terrible hack. We set the rest // of these in the destructor. The problem here is two-fold: // // a: Some of the eflags can be set/reset by directives. // b: There aren't any usage paths that initialize the ABI // pointer until after we initialize either an assembler // or the target machine. // We can fix this by making the target streamer construct // the ABI, but this is fraught with wide ranging dependency // issues as well. unsigned EFlags = MCA.getELFHeaderEFlags(); // FIXME: Fix a dependency issue by instantiating the ABI object to some // default based off the triple. The triple doesn't describe the target // fully, but any external user of the API that uses the MCTargetStreamer // would otherwise crash on assertion failure. ABI = MipsABIInfo( STI.getTargetTriple().getArch() == Triple::ArchType::mipsel || STI.getTargetTriple().getArch() == Triple::ArchType::mips ? MipsABIInfo::O32() : MipsABIInfo::N64()); // Architecture if (Features[Mips::FeatureMips64r6]) EFlags |= ELF::EF_MIPS_ARCH_64R6; else if (Features[Mips::FeatureMips64r2] || Features[Mips::FeatureMips64r3] || Features[Mips::FeatureMips64r5]) EFlags |= ELF::EF_MIPS_ARCH_64R2; else if (Features[Mips::FeatureMips64]) EFlags |= ELF::EF_MIPS_ARCH_64; else if (Features[Mips::FeatureMips5]) EFlags |= ELF::EF_MIPS_ARCH_5; else if (Features[Mips::FeatureMips4]) EFlags |= ELF::EF_MIPS_ARCH_4; else if (Features[Mips::FeatureMips3]) EFlags |= ELF::EF_MIPS_ARCH_3; else if (Features[Mips::FeatureMips32r6]) EFlags |= ELF::EF_MIPS_ARCH_32R6; else if (Features[Mips::FeatureMips32r2] || Features[Mips::FeatureMips32r3] || Features[Mips::FeatureMips32r5]) EFlags |= ELF::EF_MIPS_ARCH_32R2; else if (Features[Mips::FeatureMips32]) EFlags |= ELF::EF_MIPS_ARCH_32; else if (Features[Mips::FeatureMips2]) EFlags |= ELF::EF_MIPS_ARCH_2; else EFlags |= ELF::EF_MIPS_ARCH_1; // Machine if (Features[Mips::FeatureCnMips]) EFlags |= ELF::EF_MIPS_MACH_OCTEON; // Other options. if (Features[Mips::FeatureNaN2008]) EFlags |= ELF::EF_MIPS_NAN2008; MCA.setELFHeaderEFlags(EFlags); } void MipsTargetELFStreamer::emitLabel(MCSymbol *S) { auto *Symbol = cast(S); getStreamer().getAssembler().registerSymbol(*Symbol); uint8_t Type = Symbol->getType(); if (Type != ELF::STT_FUNC) return; if (isMicroMipsEnabled()) Symbol->setOther(ELF::STO_MIPS_MICROMIPS); } void MipsTargetELFStreamer::finish() { MCAssembler &MCA = getStreamer().getAssembler(); const MCObjectFileInfo &OFI = *MCA.getContext().getObjectFileInfo(); // .bss, .text and .data are always at least 16-byte aligned. MCSection &TextSection = *OFI.getTextSection(); MCA.registerSection(TextSection); MCSection &DataSection = *OFI.getDataSection(); MCA.registerSection(DataSection); MCSection &BSSSection = *OFI.getBSSSection(); MCA.registerSection(BSSSection); TextSection.setAlignment(std::max(16u, TextSection.getAlignment())); DataSection.setAlignment(std::max(16u, DataSection.getAlignment())); BSSSection.setAlignment(std::max(16u, BSSSection.getAlignment())); if (RoundSectionSizes) { // Make sections sizes a multiple of the alignment. This is useful for // verifying the output of IAS against the output of other assemblers but // it's not necessary to produce a correct object and increases section // size. MCStreamer &OS = getStreamer(); for (MCSection &S : MCA) { MCSectionELF &Section = static_cast(S); unsigned Alignment = Section.getAlignment(); if (Alignment) { OS.SwitchSection(&Section); if (Section.UseCodeAlign()) OS.EmitCodeAlignment(Alignment, Alignment); else OS.EmitValueToAlignment(Alignment, 0, 1, Alignment); } } } const FeatureBitset &Features = STI.getFeatureBits(); // Update e_header flags. See the FIXME and comment above in // the constructor for a full rundown on this. unsigned EFlags = MCA.getELFHeaderEFlags(); // ABI // N64 does not require any ABI bits. if (getABI().IsO32()) EFlags |= ELF::EF_MIPS_ABI_O32; else if (getABI().IsN32()) EFlags |= ELF::EF_MIPS_ABI2; if (Features[Mips::FeatureGP64Bit]) { if (getABI().IsO32()) EFlags |= ELF::EF_MIPS_32BITMODE; /* Compatibility Mode */ } else if (Features[Mips::FeatureMips64r2] || Features[Mips::FeatureMips64]) EFlags |= ELF::EF_MIPS_32BITMODE; // -mplt is not implemented but we should act as if it was // given. if (!Features[Mips::FeatureNoABICalls]) EFlags |= ELF::EF_MIPS_CPIC; if (Pic) EFlags |= ELF::EF_MIPS_PIC | ELF::EF_MIPS_CPIC; MCA.setELFHeaderEFlags(EFlags); // Emit all the option records. // At the moment we are only emitting .Mips.options (ODK_REGINFO) and // .reginfo. MipsELFStreamer &MEF = static_cast(Streamer); MEF.EmitMipsOptionRecords(); emitMipsAbiFlags(); } void MipsTargetELFStreamer::emitAssignment(MCSymbol *S, const MCExpr *Value) { auto *Symbol = cast(S); // If on rhs is micromips symbol then mark Symbol as microMips. if (Value->getKind() != MCExpr::SymbolRef) return; const auto &RhsSym = cast( static_cast(Value)->getSymbol()); if (!(RhsSym.getOther() & ELF::STO_MIPS_MICROMIPS)) return; Symbol->setOther(ELF::STO_MIPS_MICROMIPS); } MCELFStreamer &MipsTargetELFStreamer::getStreamer() { return static_cast(Streamer); } void MipsTargetELFStreamer::emitDirectiveSetMicroMips() { MicroMipsEnabled = true; forbidModuleDirective(); } void MipsTargetELFStreamer::emitDirectiveSetNoMicroMips() { MicroMipsEnabled = false; forbidModuleDirective(); } void MipsTargetELFStreamer::setUsesMicroMips() { MCAssembler &MCA = getStreamer().getAssembler(); unsigned Flags = MCA.getELFHeaderEFlags(); Flags |= ELF::EF_MIPS_MICROMIPS; MCA.setELFHeaderEFlags(Flags); } void MipsTargetELFStreamer::emitDirectiveSetMips16() { MCAssembler &MCA = getStreamer().getAssembler(); unsigned Flags = MCA.getELFHeaderEFlags(); Flags |= ELF::EF_MIPS_ARCH_ASE_M16; MCA.setELFHeaderEFlags(Flags); forbidModuleDirective(); } void MipsTargetELFStreamer::emitDirectiveSetNoReorder() { MCAssembler &MCA = getStreamer().getAssembler(); unsigned Flags = MCA.getELFHeaderEFlags(); Flags |= ELF::EF_MIPS_NOREORDER; MCA.setELFHeaderEFlags(Flags); forbidModuleDirective(); } void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) { MCAssembler &MCA = getStreamer().getAssembler(); MCContext &Context = MCA.getContext(); MCStreamer &OS = getStreamer(); MCSectionELF *Sec = Context.getELFSection(".pdr", ELF::SHT_PROGBITS, 0); MCSymbol *Sym = Context.getOrCreateSymbol(Name); const MCSymbolRefExpr *ExprRef = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Context); MCA.registerSection(*Sec); Sec->setAlignment(4); OS.PushSection(); OS.SwitchSection(Sec); OS.EmitValueImpl(ExprRef, 4); OS.EmitIntValue(GPRInfoSet ? GPRBitMask : 0, 4); // reg_mask OS.EmitIntValue(GPRInfoSet ? GPROffset : 0, 4); // reg_offset OS.EmitIntValue(FPRInfoSet ? FPRBitMask : 0, 4); // fpreg_mask OS.EmitIntValue(FPRInfoSet ? FPROffset : 0, 4); // fpreg_offset OS.EmitIntValue(FrameInfoSet ? FrameOffset : 0, 4); // frame_offset OS.EmitIntValue(FrameInfoSet ? FrameReg : 0, 4); // frame_reg OS.EmitIntValue(FrameInfoSet ? ReturnReg : 0, 4); // return_reg // The .end directive marks the end of a procedure. Invalidate // the information gathered up until this point. GPRInfoSet = FPRInfoSet = FrameInfoSet = false; OS.PopSection(); // .end also implicitly sets the size. MCSymbol *CurPCSym = Context.createTempSymbol(); OS.EmitLabel(CurPCSym); const MCExpr *Size = MCBinaryExpr::createSub( MCSymbolRefExpr::create(CurPCSym, MCSymbolRefExpr::VK_None, Context), ExprRef, Context); // The ELFObjectWriter can determine the absolute size as it has access to // the layout information of the assembly file, so a size expression rather // than an absolute value is ok here. static_cast(Sym)->setSize(Size); } void MipsTargetELFStreamer::emitDirectiveEnt(const MCSymbol &Symbol) { GPRInfoSet = FPRInfoSet = FrameInfoSet = false; // .ent also acts like an implicit '.type symbol, STT_FUNC' static_cast(Symbol).setType(ELF::STT_FUNC); } void MipsTargetELFStreamer::emitDirectiveAbiCalls() { MCAssembler &MCA = getStreamer().getAssembler(); unsigned Flags = MCA.getELFHeaderEFlags(); Flags |= ELF::EF_MIPS_CPIC | ELF::EF_MIPS_PIC; MCA.setELFHeaderEFlags(Flags); } void MipsTargetELFStreamer::emitDirectiveNaN2008() { MCAssembler &MCA = getStreamer().getAssembler(); unsigned Flags = MCA.getELFHeaderEFlags(); Flags |= ELF::EF_MIPS_NAN2008; MCA.setELFHeaderEFlags(Flags); } void MipsTargetELFStreamer::emitDirectiveNaNLegacy() { MCAssembler &MCA = getStreamer().getAssembler(); unsigned Flags = MCA.getELFHeaderEFlags(); Flags &= ~ELF::EF_MIPS_NAN2008; MCA.setELFHeaderEFlags(Flags); } void MipsTargetELFStreamer::emitDirectiveOptionPic0() { MCAssembler &MCA = getStreamer().getAssembler(); unsigned Flags = MCA.getELFHeaderEFlags(); // This option overrides other PIC options like -KPIC. Pic = false; Flags &= ~ELF::EF_MIPS_PIC; MCA.setELFHeaderEFlags(Flags); } void MipsTargetELFStreamer::emitDirectiveOptionPic2() { MCAssembler &MCA = getStreamer().getAssembler(); unsigned Flags = MCA.getELFHeaderEFlags(); Pic = true; // NOTE: We are following the GAS behaviour here which means the directive // 'pic2' also sets the CPIC bit in the ELF header. This is different from // what is stated in the SYSV ABI which consider the bits EF_MIPS_PIC and // EF_MIPS_CPIC to be mutually exclusive. Flags |= ELF::EF_MIPS_PIC | ELF::EF_MIPS_CPIC; MCA.setELFHeaderEFlags(Flags); } void MipsTargetELFStreamer::emitDirectiveInsn() { MipsTargetStreamer::emitDirectiveInsn(); MipsELFStreamer &MEF = static_cast(Streamer); MEF.createPendingLabelRelocs(); } void MipsTargetELFStreamer::emitFrame(unsigned StackReg, unsigned StackSize, unsigned ReturnReg_) { MCContext &Context = getStreamer().getAssembler().getContext(); const MCRegisterInfo *RegInfo = Context.getRegisterInfo(); FrameInfoSet = true; FrameReg = RegInfo->getEncodingValue(StackReg); FrameOffset = StackSize; ReturnReg = RegInfo->getEncodingValue(ReturnReg_); } void MipsTargetELFStreamer::emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) { GPRInfoSet = true; GPRBitMask = CPUBitmask; GPROffset = CPUTopSavedRegOff; } void MipsTargetELFStreamer::emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) { FPRInfoSet = true; FPRBitMask = FPUBitmask; FPROffset = FPUTopSavedRegOff; } void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) { // .cpload $reg // This directive expands to: // lui $gp, %hi(_gp_disp) // addui $gp, $gp, %lo(_gp_disp) // addu $gp, $gp, $reg // when support for position independent code is enabled. if (!Pic || (getABI().IsN32() || getABI().IsN64())) return; // There's a GNU extension controlled by -mno-shared that allows // locally-binding symbols to be accessed using absolute addresses. // This is currently not supported. When supported -mno-shared makes // .cpload expand to: // lui $gp, %hi(__gnu_local_gp) // addiu $gp, $gp, %lo(__gnu_local_gp) StringRef SymName("_gp_disp"); MCAssembler &MCA = getStreamer().getAssembler(); MCSymbol *GP_Disp = MCA.getContext().getOrCreateSymbol(SymName); MCA.registerSymbol(*GP_Disp); MCInst TmpInst; TmpInst.setOpcode(Mips::LUi); TmpInst.addOperand(MCOperand::createReg(Mips::GP)); const MCExpr *HiSym = MipsMCExpr::create( MipsMCExpr::MEK_HI, MCSymbolRefExpr::create("_gp_disp", MCSymbolRefExpr::VK_None, MCA.getContext()), MCA.getContext()); TmpInst.addOperand(MCOperand::createExpr(HiSym)); getStreamer().EmitInstruction(TmpInst, STI); TmpInst.clear(); TmpInst.setOpcode(Mips::ADDiu); TmpInst.addOperand(MCOperand::createReg(Mips::GP)); TmpInst.addOperand(MCOperand::createReg(Mips::GP)); const MCExpr *LoSym = MipsMCExpr::create( MipsMCExpr::MEK_LO, MCSymbolRefExpr::create("_gp_disp", MCSymbolRefExpr::VK_None, MCA.getContext()), MCA.getContext()); TmpInst.addOperand(MCOperand::createExpr(LoSym)); getStreamer().EmitInstruction(TmpInst, STI); TmpInst.clear(); TmpInst.setOpcode(Mips::ADDu); TmpInst.addOperand(MCOperand::createReg(Mips::GP)); TmpInst.addOperand(MCOperand::createReg(Mips::GP)); TmpInst.addOperand(MCOperand::createReg(RegNo)); getStreamer().EmitInstruction(TmpInst, STI); forbidModuleDirective(); } bool MipsTargetELFStreamer::emitDirectiveCpRestore( int Offset, function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI) { MipsTargetStreamer::emitDirectiveCpRestore(Offset, GetATReg, IDLoc, STI); // .cprestore offset // When PIC mode is enabled and the O32 ABI is used, this directive expands // to: // sw $gp, offset($sp) // and adds a corresponding LW after every JAL. // Note that .cprestore is ignored if used with the N32 and N64 ABIs or if it // is used in non-PIC mode. if (!Pic || (getABI().IsN32() || getABI().IsN64())) return true; // Store the $gp on the stack. emitStoreWithImmOffset(Mips::SW, Mips::GP, Mips::SP, Offset, GetATReg, IDLoc, STI); return true; } void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, const MCSymbol &Sym, bool IsReg) { // Only N32 and N64 emit anything for .cpsetup iff PIC is set. if (!Pic || !(getABI().IsN32() || getABI().IsN64())) return; forbidModuleDirective(); MCAssembler &MCA = getStreamer().getAssembler(); MCInst Inst; // Either store the old $gp in a register or on the stack if (IsReg) { // move $save, $gpreg emitRRR(Mips::OR64, RegOrOffset, Mips::GP, Mips::ZERO, SMLoc(), &STI); } else { // sd $gpreg, offset($sp) emitRRI(Mips::SD, Mips::GP, Mips::SP, RegOrOffset, SMLoc(), &STI); } if (getABI().IsN32()) { MCSymbol *GPSym = MCA.getContext().getOrCreateSymbol("__gnu_local_gp"); const MipsMCExpr *HiExpr = MipsMCExpr::create( MipsMCExpr::MEK_HI, MCSymbolRefExpr::create(GPSym, MCA.getContext()), MCA.getContext()); const MipsMCExpr *LoExpr = MipsMCExpr::create( MipsMCExpr::MEK_LO, MCSymbolRefExpr::create(GPSym, MCA.getContext()), MCA.getContext()); // lui $gp, %hi(__gnu_local_gp) emitRX(Mips::LUi, Mips::GP, MCOperand::createExpr(HiExpr), SMLoc(), &STI); // addiu $gp, $gp, %lo(__gnu_local_gp) emitRRX(Mips::ADDiu, Mips::GP, Mips::GP, MCOperand::createExpr(LoExpr), SMLoc(), &STI); return; } const MipsMCExpr *HiExpr = MipsMCExpr::createGpOff( MipsMCExpr::MEK_HI, MCSymbolRefExpr::create(&Sym, MCA.getContext()), MCA.getContext()); const MipsMCExpr *LoExpr = MipsMCExpr::createGpOff( MipsMCExpr::MEK_LO, MCSymbolRefExpr::create(&Sym, MCA.getContext()), MCA.getContext()); // lui $gp, %hi(%neg(%gp_rel(funcSym))) emitRX(Mips::LUi, Mips::GP, MCOperand::createExpr(HiExpr), SMLoc(), &STI); // addiu $gp, $gp, %lo(%neg(%gp_rel(funcSym))) emitRRX(Mips::ADDiu, Mips::GP, Mips::GP, MCOperand::createExpr(LoExpr), SMLoc(), &STI); // daddu $gp, $gp, $funcreg emitRRR(Mips::DADDu, Mips::GP, Mips::GP, RegNo, SMLoc(), &STI); } void MipsTargetELFStreamer::emitDirectiveCpreturn(unsigned SaveLocation, bool SaveLocationIsRegister) { // Only N32 and N64 emit anything for .cpreturn iff PIC is set. if (!Pic || !(getABI().IsN32() || getABI().IsN64())) return; MCInst Inst; // Either restore the old $gp from a register or on the stack if (SaveLocationIsRegister) { Inst.setOpcode(Mips::OR); Inst.addOperand(MCOperand::createReg(Mips::GP)); Inst.addOperand(MCOperand::createReg(SaveLocation)); Inst.addOperand(MCOperand::createReg(Mips::ZERO)); } else { Inst.setOpcode(Mips::LD); Inst.addOperand(MCOperand::createReg(Mips::GP)); Inst.addOperand(MCOperand::createReg(Mips::SP)); Inst.addOperand(MCOperand::createImm(SaveLocation)); } getStreamer().EmitInstruction(Inst, STI); forbidModuleDirective(); } void MipsTargetELFStreamer::emitMipsAbiFlags() { MCAssembler &MCA = getStreamer().getAssembler(); MCContext &Context = MCA.getContext(); MCStreamer &OS = getStreamer(); MCSectionELF *Sec = Context.getELFSection( ".MIPS.abiflags", ELF::SHT_MIPS_ABIFLAGS, ELF::SHF_ALLOC, 24, ""); MCA.registerSection(*Sec); Sec->setAlignment(8); OS.SwitchSection(Sec); OS << ABIFlagsSection; } Index: vendor/llvm/dist-release_80/lib/Target/Mips/MicroMips32r6InstrInfo.td =================================================================== --- vendor/llvm/dist-release_80/lib/Target/Mips/MicroMips32r6InstrInfo.td (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/Mips/MicroMips32r6InstrInfo.td (revision 348932) @@ -1,1818 +1,1820 @@ //=- MicroMips32r6InstrInfo.td - MicroMips r6 Instruction Information -*- tablegen -*-=// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file describes microMIPSr6 instructions. // //===----------------------------------------------------------------------===// def brtarget21_mm : Operand { let EncoderMethod = "getBranchTarget21OpValueMM"; let OperandType = "OPERAND_PCREL"; let DecoderMethod = "DecodeBranchTarget21MM"; let ParserMatchClass = MipsJumpTargetAsmOperand; } def brtarget26_mm : Operand { let EncoderMethod = "getBranchTarget26OpValueMM"; let OperandType = "OPERAND_PCREL"; let DecoderMethod = "DecodeBranchTarget26MM"; let ParserMatchClass = MipsJumpTargetAsmOperand; } def brtargetr6 : Operand { let EncoderMethod = "getBranchTargetOpValueMMR6"; let OperandType = "OPERAND_PCREL"; let DecoderMethod = "DecodeBranchTargetMM"; let ParserMatchClass = MipsJumpTargetAsmOperand; } def brtarget_lsl2_mm : Operand { let EncoderMethod = "getBranchTargetOpValueLsl2MMR6"; let OperandType = "OPERAND_PCREL"; // Instructions that use this operand have their decoder method // set with DecodeDisambiguates let DecoderMethod = ""; let ParserMatchClass = MipsJumpTargetAsmOperand; } //===----------------------------------------------------------------------===// // // Instruction Encodings // //===----------------------------------------------------------------------===// class ADD_MMR6_ENC : ARITH_FM_MMR6<"add", 0x110>; class ADDIU_MMR6_ENC : ADDI_FM_MMR6<"addiu", 0xc>; class ADDU_MMR6_ENC : ARITH_FM_MMR6<"addu", 0x150>; class ADDIUPC_MMR6_ENC : PCREL19_FM_MMR6<0b00>; class ALUIPC_MMR6_ENC : PCREL16_FM_MMR6<0b11111>; class AND_MMR6_ENC : ARITH_FM_MMR6<"and", 0x250>; class ANDI_MMR6_ENC : ADDI_FM_MMR6<"andi", 0x34>; class AUIPC_MMR6_ENC : PCREL16_FM_MMR6<0b11110>; class ALIGN_MMR6_ENC : POOL32A_ALIGN_FM_MMR6<0b011111>; class AUI_MMR6_ENC : AUI_FM_MMR6; class BALC_MMR6_ENC : BRANCH_OFF26_FM<0b101101>; class BC_MMR6_ENC : BRANCH_OFF26_FM<0b100101>; class BC16_MMR6_ENC : BC16_FM_MM16R6; class BEQZC16_MMR6_ENC : BEQZC_BNEZC_FM_MM16R6<0x23>; class BNEZC16_MMR6_ENC : BEQZC_BNEZC_FM_MM16R6<0x2b>; class BITSWAP_MMR6_ENC : POOL32A_BITSWAP_FM_MMR6<0b101100>; class BRK_MMR6_ENC : BREAK_MMR6_ENC<"break">; class BEQZC_MMR6_ENC : CMP_BRANCH_OFF21_FM_MMR6<"beqzc", 0b100000>; class BNEZC_MMR6_ENC : CMP_BRANCH_OFF21_FM_MMR6<"bnezc", 0b101000>; class BGEC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bgec", 0b111101>, DecodeDisambiguates<"POP75GroupBranchMMR6">; class BGEUC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bgeuc", 0b110000>, DecodeDisambiguates<"BlezGroupBranchMMR6">; class BLTC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bltc", 0b110101>, DecodeDisambiguates<"POP65GroupBranchMMR6">; class BLTUC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bltuc", 0b111000>, DecodeDisambiguates<"BgtzGroupBranchMMR6">; class BEQC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"beqc", 0b011101>; class BNEC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bnec", 0b011111>; class BLTZC_MMR6_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM_MMR6<"bltzc", 0b110101>, DecodeDisambiguates<"POP65GroupBranchMMR6">; class BLEZC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<"blezc", 0b111101>, DecodeDisambiguates<"POP75GroupBranchMMR6">; class BGEZC_MMR6_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM_MMR6<"bgezc", 0b111101>, DecodeDisambiguates<"POP75GroupBranchMMR6">; class BGTZC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<"bgtzc", 0b110101>, DecodeDisambiguates<"POP65GroupBranchMMR6">; class BEQZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<"beqzalc", 0b011101>, DecodeDisambiguates<"POP35GroupBranchMMR6">; class BNEZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<"bnezalc", 0b011111>, DecodeDisambiguates<"POP37GroupBranchMMR6">; class BGTZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<"bgtzalc", 0b111000>, MMDecodeDisambiguatedBy<"BgtzGroupBranchMMR6">; class BLTZALC_MMR6_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM_MMR6<"bltzalc", 0b111000>, MMDecodeDisambiguatedBy<"BgtzGroupBranchMMR6">; class BGEZALC_MMR6_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM_MMR6<"bgezalc", 0b110000>, MMDecodeDisambiguatedBy<"BlezGroupBranchMMR6">; class BLEZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<"blezalc", 0b110000>, MMDecodeDisambiguatedBy<"BlezGroupBranchMMR6">; class CACHE_MMR6_ENC : CACHE_PREF_FM_MMR6<0b001000, 0b0110>; class CLO_MMR6_ENC : POOL32A_2R_FM_MMR6<0b0100101100>; class CLZ_MMR6_ENC : SPECIAL_2R_FM_MMR6<0b010000>; class DIV_MMR6_ENC : ARITH_FM_MMR6<"div", 0x118>; class DIVU_MMR6_ENC : ARITH_FM_MMR6<"divu", 0x198>; class EHB_MMR6_ENC : BARRIER_MMR6_ENC<"ehb", 0x3>; class EI_MMR6_ENC : POOL32A_EIDI_MMR6_ENC<"ei", 0x15d>; class DI_MMR6_ENC : POOL32A_EIDI_MMR6_ENC<"di", 0b0100011101>; class ERET_MMR6_ENC : POOL32A_ERET_FM_MMR6<"eret", 0x3cd>; class DERET_MMR6_ENC : POOL32A_ERET_FM_MMR6<"eret", 0b1110001101>; class ERETNC_MMR6_ENC : ERETNC_FM_MMR6<"eretnc">; class GINVI_MMR6_ENC : POOL32A_GINV_FM_MMR6<"ginvi", 0b00>; class GINVT_MMR6_ENC : POOL32A_GINV_FM_MMR6<"ginvt", 0b10>; class JALRC16_MMR6_ENC : POOL16C_JALRC_FM_MM16R6<0xb>; class JIALC_MMR6_ENC : JMP_IDX_COMPACT_FM<0b100000>; class JIC_MMR6_ENC : JMP_IDX_COMPACT_FM<0b101000>; class JRC16_MMR6_ENC: POOL16C_JALRC_FM_MM16R6<0x3>; class JRCADDIUSP_MMR6_ENC : POOL16C_JRCADDIUSP_FM_MM16R6<0x13>; class LSA_MMR6_ENC : POOL32A_LSA_FM<0b001111>; class LWPC_MMR6_ENC : PCREL19_FM_MMR6<0b01>; class LWM16_MMR6_ENC : POOL16C_LWM_SWM_FM_MM16R6<0x2>; class MFC0_MMR6_ENC : POOL32A_MFTC0_FM_MMR6<"mfc0", 0b00011, 0b111100>; class MFC1_MMR6_ENC : POOL32F_MFTC1_FM_MMR6<"mfc1", 0b10000000>; class MFC2_MMR6_ENC : POOL32A_MFTC2_FM_MMR6<"mfc2", 0b0100110100>; class MFHC0_MMR6_ENC : POOL32A_MFTC0_FM_MMR6<"mfhc0", 0b00011, 0b110100>; class MFHC2_MMR6_ENC : POOL32A_MFTC2_FM_MMR6<"mfhc2", 0b1000110100>; class MOD_MMR6_ENC : ARITH_FM_MMR6<"mod", 0x158>; class MODU_MMR6_ENC : ARITH_FM_MMR6<"modu", 0x1d8>; class MUL_MMR6_ENC : ARITH_FM_MMR6<"mul", 0x18>; class MUH_MMR6_ENC : ARITH_FM_MMR6<"muh", 0x58>; class MULU_MMR6_ENC : ARITH_FM_MMR6<"mulu", 0x98>; class MUHU_MMR6_ENC : ARITH_FM_MMR6<"muhu", 0xd8>; class MTC0_MMR6_ENC : POOL32A_MFTC0_FM_MMR6<"mtc0", 0b01011, 0b111100>; class MTC1_MMR6_ENC : POOL32F_MFTC1_FM_MMR6<"mtc1", 0b10100000>; class MTC2_MMR6_ENC : POOL32A_MFTC2_FM_MMR6<"mtc2", 0b0101110100>; class MTHC0_MMR6_ENC : POOL32A_MFTC0_FM_MMR6<"mthc0", 0b01011, 0b110100>; class MTHC2_MMR6_ENC : POOL32A_MFTC2_FM_MMR6<"mthc2", 0b1001110100>; class NOR_MMR6_ENC : ARITH_FM_MMR6<"nor", 0x2d0>; class OR_MMR6_ENC : ARITH_FM_MMR6<"or", 0x290>; class ORI_MMR6_ENC : ADDI_FM_MMR6<"ori", 0x14>; class PREF_MMR6_ENC : CACHE_PREF_FM_MMR6<0b011000, 0b0010>; class SB16_MMR6_ENC : LOAD_STORE_FM_MM16<0x22>; class SELEQZ_MMR6_ENC : POOL32A_FM_MMR6<0b0101000000>; class SELNEZ_MMR6_ENC : POOL32A_FM_MMR6<0b0110000000>; class SH16_MMR6_ENC : LOAD_STORE_FM_MM16<0x2a>; class SLL_MMR6_ENC : SHIFT_MMR6_ENC<"sll", 0x00, 0b0>; class SUB_MMR6_ENC : ARITH_FM_MMR6<"sub", 0x190>; class SUBU_MMR6_ENC : ARITH_FM_MMR6<"subu", 0x1d0>; class SW_MMR6_ENC : SW32_FM_MMR6<"sw", 0x3e>; class SW16_MMR6_ENC : LOAD_STORE_FM_MM16<0x3a>; class SWM16_MMR6_ENC : POOL16C_LWM_SWM_FM_MM16R6<0xa>; class SWSP_MMR6_ENC : LOAD_STORE_SP_FM_MM16<0x32>; class WRPGPR_MMR6_ENC : POOL32A_WRPGPR_WSBH_FM_MMR6<"wrpgpr", 0x3c5>; class WSBH_MMR6_ENC : POOL32A_WRPGPR_WSBH_FM_MMR6<"wsbh", 0x1ec>; class LB_MMR6_ENC : LB32_FM_MMR6; class LBU_MMR6_ENC : LBU32_FM_MMR6; class PAUSE_MMR6_ENC : POOL32A_PAUSE_FM_MMR6<"pause", 0b00101>; class RDHWR_MMR6_ENC : POOL32A_RDHWR_FM_MMR6; class WAIT_MMR6_ENC : WAIT_FM_MM, MMR6Arch<"wait">; class SSNOP_MMR6_ENC : BARRIER_FM_MM<0x1>, MMR6Arch<"ssnop">; class SYNC_MMR6_ENC : POOL32A_SYNC_FM_MMR6; class SYNCI_MMR6_ENC : POOL32I_SYNCI_FM_MMR6, MMR6Arch<"synci">; class RDPGPR_MMR6_ENC : POOL32A_RDPGPR_FM_MMR6<0b1110000101>; class SDBBP_MMR6_ENC : SDBBP_FM_MM, MMR6Arch<"sdbbp">; class SIGRIE_MMR6_ENC : SIGRIE_FM_MM, MMR6Arch<"sigrie">; class XOR_MMR6_ENC : ARITH_FM_MMR6<"xor", 0x310>; class XORI_MMR6_ENC : ADDI_FM_MMR6<"xori", 0x1c>; class ABS_S_MMR6_ENC : POOL32F_ABS_FM_MMR6<"abs.s", 0, 0b0001101>; class ABS_D_MMR6_ENC : POOL32F_ABS_FM_MMR6<"abs.d", 1, 0b0001101>; class FLOOR_L_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.l.s", 0, 0b00001100>; class FLOOR_L_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.l.d", 1, 0b00001100>; class FLOOR_W_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.w.s", 0, 0b00101100>; class FLOOR_W_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.w.d", 1, 0b00101100>; class CEIL_L_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.l.s", 0, 0b01001100>; class CEIL_L_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.l.d", 1, 0b01001100>; class CEIL_W_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.w.s", 0, 0b01101100>; class CEIL_W_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.w.d", 1, 0b01101100>; class TRUNC_L_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.l.s", 0, 0b10001100>; class TRUNC_L_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.l.d", 1, 0b10001100>; class TRUNC_W_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.w.s", 0, 0b10101100>; class TRUNC_W_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.w.d", 1, 0b10101100>; class SB_MMR6_ENC : SB32_SH32_STORE_FM_MMR6<0b000110>; class SH_MMR6_ENC : SB32_SH32_STORE_FM_MMR6<0b001110>; class LW_MMR6_ENC : LOAD_WORD_FM_MMR6; class LUI_MMR6_ENC : LOAD_UPPER_IMM_FM_MMR6; class JALRC_HB_MMR6_ENC : POOL32A_JALRC_FM_MMR6<"jalrc.hb", 0b0001111100>; class RINT_S_MMR6_ENC : POOL32F_RINT_FM_MMR6<"rint.s", 0>; class RINT_D_MMR6_ENC : POOL32F_RINT_FM_MMR6<"rint.d", 1>; class ROUND_L_S_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.l.s", 0, 0b11001100>; class ROUND_L_D_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.l.d", 1, 0b11001100>; class ROUND_W_S_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.w.s", 0, 0b11101100>; class ROUND_W_D_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.w.d", 1, 0b11101100>; class SEL_S_MMR6_ENC : POOL32F_SEL_FM_MMR6<"sel.s", 0, 0b010111000>; class SEL_D_MMR6_ENC : POOL32F_SEL_FM_MMR6<"sel.d", 1, 0b010111000>; class SELEQZ_S_MMR6_ENC : POOL32F_SEL_FM_MMR6<"seleqz.s", 0, 0b000111000>; class SELEQZ_D_MMR6_ENC : POOL32F_SEL_FM_MMR6<"seleqz.d", 1, 0b000111000>; class SELNEZ_S_MMR6_ENC : POOL32F_SEL_FM_MMR6<"selnez.s", 0, 0b001111000>; class SELNEZ_D_MMR6_ENC : POOL32F_SEL_FM_MMR6<"selnez.d", 1, 0b001111000>; class CLASS_S_MMR6_ENC : POOL32F_CLASS_FM_MMR6<"class.s", 0, 0b001100000>; class CLASS_D_MMR6_ENC : POOL32F_CLASS_FM_MMR6<"class.d", 1, 0b001100000>; class EXT_MMR6_ENC : POOL32A_EXT_INS_FM_MMR6<"ext", 0b101100>; class INS_MMR6_ENC : POOL32A_EXT_INS_FM_MMR6<"ins", 0b001100>; class JALRC_MMR6_ENC : POOL32A_JALRC_FM_MMR6<"jalrc", 0b0000111100>; class BOVC_MMR6_ENC : POP35_BOVC_FM_MMR6<"bovc">; class BNVC_MMR6_ENC : POP37_BNVC_FM_MMR6<"bnvc">; class ADDU16_MMR6_ENC : POOL16A_ADDU16_FM_MMR6; class AND16_MMR6_ENC : POOL16C_AND16_FM_MMR6; class ANDI16_MMR6_ENC : ANDI_FM_MM16<0b001011>; class NOT16_MMR6_ENC : POOL16C_NOT16_FM_MMR6; class OR16_MMR6_ENC : POOL16C_OR16_XOR16_FM_MMR6<0b1001>; class SLL16_MMR6_ENC : SHIFT_FM_MM16<0>; class SRL16_MMR6_ENC : SHIFT_FM_MM16<1>; class BREAK16_MMR6_ENC : POOL16C_BREAKPOINT_FM_MMR6<0b011011>; class LI16_MMR6_ENC : LI_FM_MM16; class MOVE16_MMR6_ENC : MOVE_FM_MM16<0b000011>; class MOVEP_MMR6_ENC : POOL16C_MOVEP16_FM_MMR6; class SDBBP16_MMR6_ENC : POOL16C_BREAKPOINT_FM_MMR6<0b111011>; class SUBU16_MMR6_ENC : POOL16A_SUBU16_FM_MMR6; class XOR16_MMR6_ENC : POOL16C_OR16_XOR16_FM_MMR6<0b1000>; class TLBINV_MMR6_ENC : POOL32A_TLBINV_FM_MMR6<"tlbinv", 0x10d>; class TLBINVF_MMR6_ENC : POOL32A_TLBINV_FM_MMR6<"tlbinvf", 0x14d>; class DVP_MMR6_ENC : POOL32A_DVPEVP_FM_MMR6<"dvp", 0b0001100101>; class EVP_MMR6_ENC : POOL32A_DVPEVP_FM_MMR6<"evp", 0b0011100101>; class BC1EQZC_MMR6_ENC : POOL32I_BRANCH_COP_1_2_FM_MMR6<"bc1eqzc", 0b01000>; class BC1NEZC_MMR6_ENC : POOL32I_BRANCH_COP_1_2_FM_MMR6<"bc1nezc", 0b01001>; class BC2EQZC_MMR6_ENC : POOL32I_BRANCH_COP_1_2_FM_MMR6<"bc2eqzc", 0b01010>; class BC2NEZC_MMR6_ENC : POOL32I_BRANCH_COP_1_2_FM_MMR6<"bc2nezc", 0b01011>; class LDC1_MMR6_ENC : LDWC1_SDWC1_FM_MMR6<"ldc1", 0b101111>; class SDC1_MMR6_ENC : LDWC1_SDWC1_FM_MMR6<"sdc1", 0b101110>; class LDC2_MMR6_ENC : POOL32B_LDWC2_SDWC2_FM_MMR6<"ldc2", 0b0010>; class SDC2_MMR6_ENC : POOL32B_LDWC2_SDWC2_FM_MMR6<"sdc2", 0b1010>; class LWC2_MMR6_ENC : POOL32B_LDWC2_SDWC2_FM_MMR6<"lwc2", 0b0000>; class SWC2_MMR6_ENC : POOL32B_LDWC2_SDWC2_FM_MMR6<"swc2", 0b1000>; class LL_MMR6_ENC : POOL32C_LL_E_SC_E_FM_MMR6<"ll", 0b0011, 0b000>; class SC_MMR6_ENC : POOL32C_LL_E_SC_E_FM_MMR6<"sc", 0b1011, 0b000>; /// Floating Point Instructions class FADD_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"add.s", 0, 0b00110000>; class FSUB_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"sub.s", 0, 0b01110000>; class FMUL_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"mul.s", 0, 0b10110000>; class FDIV_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"div.s", 0, 0b11110000>; class MADDF_S_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"maddf.s", 0, 0b110111000>; class MADDF_D_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"maddf.d", 1, 0b110111000>; class MSUBF_S_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"msubf.s", 0, 0b111111000>; class MSUBF_D_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"msubf.d", 1, 0b111111000>; class FMOV_S_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"mov.s", 0, 0b0000001>; class FNEG_S_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"neg.s", 0, 0b0101101>; class MAX_S_MMR6_ENC : POOL32F_MINMAX_FM<"max.s", 0, 0b000001011>; class MAX_D_MMR6_ENC : POOL32F_MINMAX_FM<"max.d", 1, 0b000001011>; class MAXA_S_MMR6_ENC : POOL32F_MINMAX_FM<"maxa.s", 0, 0b000101011>; class MAXA_D_MMR6_ENC : POOL32F_MINMAX_FM<"maxa.d", 1, 0b000101011>; class MIN_S_MMR6_ENC : POOL32F_MINMAX_FM<"min.s", 0, 0b000000011>; class MIN_D_MMR6_ENC : POOL32F_MINMAX_FM<"min.d", 1, 0b000000011>; class MINA_S_MMR6_ENC : POOL32F_MINMAX_FM<"mina.s", 0, 0b000100011>; class MINA_D_MMR6_ENC : POOL32F_MINMAX_FM<"mina.d", 1, 0b000100011>; class CVT_L_S_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.l.s", 0, 0b00000100>; class CVT_L_D_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.l.d", 1, 0b00000100>; class CVT_W_S_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.w.s", 0, 0b00100100>; class CVT_D_L_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.d.l", 2, 0b1001101>; class CVT_S_W_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.w", 1, 0b1101101>; class CVT_S_L_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.l", 2, 0b1101101>; //===----------------------------------------------------------------------===// // // Instruction Descriptions // //===----------------------------------------------------------------------===// class CMP_CBR_RT_Z_MMR6_DESC_BASE : BRANCH_DESC_BASE { dag InOperandList = (ins GPROpnd:$rt, opnd:$offset); dag OutOperandList = (outs); string AsmString = !strconcat(instr_asm, "\t$rt, $offset"); list Defs = [AT]; InstrItinClass Itinerary = II_BCCZC; } class BEQZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"beqzalc", brtarget_mm, GPR32Opnd> { list Defs = [RA]; } class BGEZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bgezalc", brtarget_mm, GPR32Opnd> { list Defs = [RA]; } class BGTZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bgtzalc", brtarget_mm, GPR32Opnd> { list Defs = [RA]; } class BLEZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"blezalc", brtarget_mm, GPR32Opnd> { list Defs = [RA]; } class BLTZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bltzalc", brtarget_mm, GPR32Opnd> { list Defs = [RA]; } class BNEZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bnezalc", brtarget_mm, GPR32Opnd> { list Defs = [RA]; } class BLTZC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bltzc", brtarget_lsl2_mm, GPR32Opnd>; class BLEZC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"blezc", brtarget_lsl2_mm, GPR32Opnd>; class BGEZC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bgezc", brtarget_lsl2_mm, GPR32Opnd>; class BGTZC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bgtzc", brtarget_lsl2_mm, GPR32Opnd>; class CMP_CBR_2R_MMR6_DESC_BASE : BRANCH_DESC_BASE { dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, opnd:$offset); dag OutOperandList = (outs); string AsmString = !strconcat(instr_asm, "\t$rs, $rt, $offset"); list Defs = [AT]; InstrItinClass Itinerary = II_BCCC; } class BGEC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bgec", brtarget_lsl2_mm, GPR32Opnd>; class BGEUC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bgeuc", brtarget_lsl2_mm, GPR32Opnd>; class BLTC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bltc", brtarget_lsl2_mm, GPR32Opnd>; class BLTUC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bltuc", brtarget_lsl2_mm, GPR32Opnd>; class BEQC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"beqc", brtarget_lsl2_mm, GPR32Opnd>; class BNEC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bnec", brtarget_lsl2_mm, GPR32Opnd>; class ADD_MMR6_DESC : ArithLogicR<"add", GPR32Opnd, 1, II_ADD>; class ADDIU_MMR6_DESC : ArithLogicI<"addiu", simm16, GPR32Opnd, II_ADDIU, immSExt16, add>; class ADDU_MMR6_DESC : ArithLogicR<"addu", GPR32Opnd, 1, II_ADDU>; class MUL_MMR6_DESC : ArithLogicR<"mul", GPR32Opnd, 1, II_MUL, mul>; class MUH_MMR6_DESC : ArithLogicR<"muh", GPR32Opnd, 1, II_MUH, mulhs>; class MULU_MMR6_DESC : ArithLogicR<"mulu", GPR32Opnd, 1, II_MULU>; class MUHU_MMR6_DESC : ArithLogicR<"muhu", GPR32Opnd, 1, II_MUHU, mulhu>; class BC_MMR6_DESC_BASE : BRANCH_DESC_BASE, MMR6Arch { dag InOperandList = (ins opnd:$offset); dag OutOperandList = (outs); string AsmString = !strconcat(instr_asm, "\t$offset"); bit isBarrier = 1; InstrItinClass Itinerary = Itin; } class BALC_MMR6_DESC : BC_MMR6_DESC_BASE<"balc", brtarget26_mm, II_BALC> { bit isCall = 1; list Defs = [RA]; } class BC_MMR6_DESC : BC_MMR6_DESC_BASE<"bc", brtarget26_mm, II_BC> { list Pattern = [(br bb:$offset)]; } class BC16_MMR6_DESC : MicroMipsInst16<(outs), (ins brtarget10_mm:$offset), !strconcat("bc16", "\t$offset"), [], II_BC, FrmI>, MMR6Arch<"bc16"> { let isBranch = 1; let isTerminator = 1; let isBarrier = 1; let hasDelaySlot = 0; let AdditionalPredicates = [RelocPIC]; let Defs = [AT]; } class BEQZC_BNEZC_MM16R6_DESC_BASE : CBranchZeroMM, MMR6Arch { let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 0; let Defs = [AT]; } class BEQZC16_MMR6_DESC : BEQZC_BNEZC_MM16R6_DESC_BASE<"beqzc16">; class BNEZC16_MMR6_DESC : BEQZC_BNEZC_MM16R6_DESC_BASE<"bnezc16">; class SUB_MMR6_DESC : ArithLogicR<"sub", GPR32Opnd, 0, II_SUB>; class SUBU_MMR6_DESC : ArithLogicR<"subu", GPR32Opnd, 0,II_SUBU>; class BITSWAP_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rd); dag InOperandList = (ins GPROpnd:$rt); string AsmString = !strconcat(instr_asm, "\t$rd, $rt"); list Pattern = []; InstrItinClass Itinerary = II_BITSWAP; } class BITSWAP_MMR6_DESC : BITSWAP_MMR6_DESC_BASE<"bitswap", GPR32Opnd>; class BRK_MMR6_DESC : BRK_FT<"break">; class CACHE_HINT_MMR6_DESC : MMR6Arch { dag OutOperandList = (outs); dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint); string AsmString = !strconcat(instr_asm, "\t$hint, $addr"); list Pattern = []; string DecoderMethod = "DecodeCacheOpMM"; InstrItinClass Itinerary = Itin; } class CACHE_MMR6_DESC : CACHE_HINT_MMR6_DESC<"cache", mem_mm_12, GPR32Opnd, II_CACHE>; class PREF_MMR6_DESC : CACHE_HINT_MMR6_DESC<"pref", mem_mm_12, GPR32Opnd, II_PREF>; class LB_LBU_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rt); dag InOperandList = (ins MemOpnd:$addr); string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); string DecoderMethod = "DecodeLoadByte15"; bit mayLoad = 1; InstrItinClass Itinerary = Itin; } class LB_MMR6_DESC : LB_LBU_MMR6_DESC_BASE<"lb", mem_mm_16, GPR32Opnd, II_LB>; class LBU_MMR6_DESC : LB_LBU_MMR6_DESC_BASE<"lbu", mem_mm_16, GPR32Opnd, II_LBU>; class CLO_CLZ_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rt); dag InOperandList = (ins GPROpnd:$rs); string AsmString = !strconcat(instr_asm, "\t$rt, $rs"); InstrItinClass Itinerary = Itin; } class CLO_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clo", GPR32Opnd, II_CLO>; class CLZ_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clz", GPR32Opnd, II_CLZ>; class EHB_MMR6_DESC : Barrier<"ehb", II_EHB>; class EI_MMR6_DESC : DEI_FT<"ei", GPR32Opnd, II_EI>; class DI_MMR6_DESC : DEI_FT<"di", GPR32Opnd, II_DI>; class ERET_MMR6_DESC : ER_FT<"eret", II_ERET>; class DERET_MMR6_DESC : ER_FT<"deret", II_DERET>; class ERETNC_MMR6_DESC : ER_FT<"eretnc", II_ERETNC>; class JALRC16_MMR6_DESC_BASE : MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"), [(MipsJmpLink RO:$rs)], II_JALR, FrmR>, MMR6Arch { let isCall = 1; let hasDelaySlot = 0; let Defs = [RA]; let hasPostISelHook = 1; } class JALRC16_MMR6_DESC : JALRC16_MMR6_DESC_BASE<"jalr", GPR32Opnd>; class JMP_MMR6_IDX_COMPACT_DESC_BASE : MMR6Arch { dag InOperandList = (ins GPROpnd:$rt, opnd:$offset); string AsmString = !strconcat(opstr, "\t$rt, $offset"); list Pattern = []; bit isTerminator = 1; bit hasDelaySlot = 0; InstrItinClass Itinerary = Itin; } class JIALC_MMR6_DESC : JMP_MMR6_IDX_COMPACT_DESC_BASE<"jialc", calloffset16, GPR32Opnd, II_JIALC> { bit isCall = 1; list Defs = [RA]; } class JIC_MMR6_DESC : JMP_MMR6_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16, GPR32Opnd, II_JIC> { bit isBarrier = 1; list Defs = [AT]; } class JRC16_MMR6_DESC_BASE : MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"), [], II_JR, FrmR>, MMR6Arch { let hasDelaySlot = 0; let isBranch = 1; let isIndirectBranch = 1; } class JRC16_MMR6_DESC : JRC16_MMR6_DESC_BASE<"jrc16", GPR32Opnd>; class JRCADDIUSP_MMR6_DESC : MicroMipsInst16<(outs), (ins uimm5_lsl2:$imm), "jrcaddiusp\t$imm", [], II_JRADDIUSP, FrmR>, MMR6Arch<"jrcaddiusp"> { let hasDelaySlot = 0; let isTerminator = 1; let isBarrier = 1; let isBranch = 1; let isIndirectBranch = 1; } class ALIGN_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rd); dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$bp); string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $bp"); list Pattern = []; InstrItinClass Itinerary = Itin; } class ALIGN_MMR6_DESC : ALIGN_MMR6_DESC_BASE<"align", GPR32Opnd, uimm2, II_ALIGN>; class AUI_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rt); dag InOperandList = (ins GPROpnd:$rs, uimm16:$imm); string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $imm"); list Pattern = []; InstrItinClass Itinerary = Itin; } class AUI_MMR6_DESC : AUI_MMR6_DESC_BASE<"aui", GPR32Opnd, II_AUI>; class ALUIPC_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rt); dag InOperandList = (ins simm16:$imm); string AsmString = !strconcat(instr_asm, "\t$rt, $imm"); list Pattern = []; InstrItinClass Itinerary = Itin; } class ALUIPC_MMR6_DESC : ALUIPC_MMR6_DESC_BASE<"aluipc", GPR32Opnd, II_ALUIPC>; class AUIPC_MMR6_DESC : ALUIPC_MMR6_DESC_BASE<"auipc", GPR32Opnd, II_AUIPC>; class LSA_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rd); dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$imm2); string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $rd, $imm2"); list Pattern = []; InstrItinClass Itinerary = Itin; } class LSA_MMR6_DESC : LSA_MMR6_DESC_BASE<"lsa", GPR32Opnd, uimm2_plus1, II_LSA>; class PCREL_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rt); dag InOperandList = (ins ImmOpnd:$imm); string AsmString = !strconcat(instr_asm, "\t$rt, $imm"); list Pattern = []; InstrItinClass Itinerary = Itin; } class ADDIUPC_MMR6_DESC : PCREL_MMR6_DESC_BASE<"addiupc", GPR32Opnd, simm19_lsl2, II_ADDIUPC>; class LWPC_MMR6_DESC: PCREL_MMR6_DESC_BASE<"lwpc", GPR32Opnd, simm19_lsl2, II_LWPC>; class SELEQNE_Z_MMR6_DESC_BASE : MMR6Arch { dag OutOperandList = (outs GPROpnd:$rd); dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt); string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); list Pattern = []; InstrItinClass Itinerary = Itin; } class SELEQZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"seleqz", GPR32Opnd, II_SELCCZ>; class SELNEZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"selnez", GPR32Opnd, II_SELCCZ>; class PAUSE_MMR6_DESC : Barrier<"pause", II_PAUSE>; class RDHWR_MMR6_DESC : MMR6Arch<"rdhwr">, MipsR6Inst { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins HWRegsOpnd:$rs, uimm3:$sel); string AsmString = !strconcat("rdhwr", "\t$rt, $rs, $sel"); list Pattern = []; InstrItinClass Itinerary = II_RDHWR; Format Form = FrmR; } class WAIT_MMR6_DESC : WaitMM<"wait">; // FIXME: ssnop should not be defined for R6. Per MD000582 microMIPS32 6.03: // Assemblers targeting specifically Release 6 should reject the SSNOP // instruction with an error. class SSNOP_MMR6_DESC : Barrier<"ssnop", II_SSNOP>; class SLL_MMR6_DESC : shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL>; class DIVMOD_MMR6_DESC_BASE : MipsR6Inst { dag OutOperandList = (outs GPROpnd:$rd); dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt); string AsmString = !strconcat(opstr, "\t$rd, $rs, $rt"); list Pattern = [(set GPROpnd:$rd, (OpNode GPROpnd:$rs, GPROpnd:$rt))]; string BaseOpcode = opstr; Format f = FrmR; let isCommutable = 0; let isReMaterializable = 1; InstrItinClass Itinerary = Itin; // This instruction doesn't trap division by zero itself. We must insert // teq instructions as well. bit usesCustomInserter = 1; } class DIV_MMR6_DESC : DIVMOD_MMR6_DESC_BASE<"div", GPR32Opnd, II_DIV, sdiv>; class DIVU_MMR6_DESC : DIVMOD_MMR6_DESC_BASE<"divu", GPR32Opnd, II_DIVU, udiv>; class MOD_MMR6_DESC : DIVMOD_MMR6_DESC_BASE<"mod", GPR32Opnd, II_MOD, srem>; class MODU_MMR6_DESC : DIVMOD_MMR6_DESC_BASE<"modu", GPR32Opnd, II_MODU, urem>; class AND_MMR6_DESC : ArithLogicR<"and", GPR32Opnd, 1, II_AND, and>; class ANDI_MMR6_DESC : ArithLogicI<"andi", uimm16, GPR32Opnd, II_ANDI>; class NOR_MMR6_DESC : LogicNOR<"nor", GPR32Opnd>; class OR_MMR6_DESC : ArithLogicR<"or", GPR32Opnd, 1, II_OR, or>; class ORI_MMR6_DESC : ArithLogicI<"ori", uimm16, GPR32Opnd, II_ORI, immZExt16, or> { int AddedComplexity = 1; } class XOR_MMR6_DESC : ArithLogicR<"xor", GPR32Opnd, 1, II_XOR, xor>; class XORI_MMR6_DESC : ArithLogicI<"xori", uimm16, GPR32Opnd, II_XORI, immZExt16, xor>; class SW_MMR6_DESC : Store<"sw", GPR32Opnd> { InstrItinClass Itinerary = II_SW; } class WRPGPR_WSBH_MMR6_DESC_BASE { dag InOperandList = (ins RO:$rs); dag OutOperandList = (outs RO:$rt); string AsmString = !strconcat(instr_asm, "\t$rt, $rs"); list Pattern = []; Format f = FrmR; string BaseOpcode = instr_asm; bit hasSideEffects = 0; InstrItinClass Itinerary = Itin; } class WRPGPR_MMR6_DESC : WRPGPR_WSBH_MMR6_DESC_BASE<"wrpgpr", GPR32Opnd, II_WRPGPR>; class WSBH_MMR6_DESC : WRPGPR_WSBH_MMR6_DESC_BASE<"wsbh", GPR32Opnd, II_WSBH>; class MTC0_MMR6_DESC_BASE { dag InOperandList = (ins SrcRC:$rt, uimm3:$sel); dag OutOperandList = (outs DstRC:$rs); string AsmString = !strconcat(opstr, "\t$rt, $rs, $sel"); list Pattern = []; Format f = FrmFR; string BaseOpcode = opstr; InstrItinClass Itinerary = Itin; } class MTC1_MMR6_DESC_BASE< string opstr, RegisterOperand DstRC, RegisterOperand SrcRC, InstrItinClass Itin = NoItinerary, SDPatternOperator OpNode = null_frag> : MipsR6Inst { dag InOperandList = (ins SrcRC:$rt); dag OutOperandList = (outs DstRC:$fs); string AsmString = !strconcat(opstr, "\t$rt, $fs"); list Pattern = [(set DstRC:$fs, (OpNode SrcRC:$rt))]; Format f = FrmFR; InstrItinClass Itinerary = Itin; string BaseOpcode = opstr; } class MTC1_64_MMR6_DESC_BASE< string opstr, RegisterOperand DstRC, RegisterOperand SrcRC, InstrItinClass Itin = NoItinerary> : MipsR6Inst { dag InOperandList = (ins DstRC:$fs_in, SrcRC:$rt); dag OutOperandList = (outs DstRC:$fs); string AsmString = !strconcat(opstr, "\t$rt, $fs"); list Pattern = []; Format f = FrmFR; InstrItinClass Itinerary = Itin; string BaseOpcode = opstr; // $fs_in is part of a white lie to work around a widespread bug in the FPU // implementation. See expandBuildPairF64 for details. let Constraints = "$fs = $fs_in"; } class MTC2_MMR6_DESC_BASE { dag InOperandList = (ins SrcRC:$rt); dag OutOperandList = (outs DstRC:$impl); string AsmString = !strconcat(opstr, "\t$rt, $impl"); list Pattern = []; Format f = FrmFR; string BaseOpcode = opstr; InstrItinClass Itinerary = Itin; } class MTC0_MMR6_DESC : MTC0_MMR6_DESC_BASE<"mtc0", COP0Opnd, GPR32Opnd, II_MTC0>; class MTC1_MMR6_DESC : MTC1_MMR6_DESC_BASE<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1, bitconvert>, HARDFLOAT; class MTC2_MMR6_DESC : MTC2_MMR6_DESC_BASE<"mtc2", COP2Opnd, GPR32Opnd, II_MTC2>; class MTHC0_MMR6_DESC : MTC0_MMR6_DESC_BASE<"mthc0", COP0Opnd, GPR32Opnd, II_MTHC0>; class MTHC2_MMR6_DESC : MTC2_MMR6_DESC_BASE<"mthc2", COP2Opnd, GPR32Opnd, II_MTC2>; class MFC0_MMR6_DESC_BASE { dag InOperandList = (ins SrcRC:$rs, uimm3:$sel); dag OutOperandList = (outs DstRC:$rt); string AsmString = !strconcat(opstr, "\t$rt, $rs, $sel"); list Pattern = []; Format f = FrmFR; string BaseOpcode = opstr; InstrItinClass Itinerary = Itin; } class MFC1_MMR6_DESC_BASE : MipsR6Inst { dag InOperandList = (ins SrcRC:$fs); dag OutOperandList = (outs DstRC:$rt); string AsmString = !strconcat(opstr, "\t$rt, $fs"); list Pattern = [(set DstRC:$rt, (OpNode SrcRC:$fs))]; Format f = FrmFR; InstrItinClass Itinerary = Itin; string BaseOpcode = opstr; } class MFC2_MMR6_DESC_BASE { dag InOperandList = (ins SrcRC:$impl); dag OutOperandList = (outs DstRC:$rt); string AsmString = !strconcat(opstr, "\t$rt, $impl"); list Pattern = []; Format f = FrmFR; string BaseOpcode = opstr; InstrItinClass Itinerary = Itin; } class MFC0_MMR6_DESC : MFC0_MMR6_DESC_BASE<"mfc0", GPR32Opnd, COP0Opnd, II_MFC0>; class MFC1_MMR6_DESC : MFC1_MMR6_DESC_BASE<"mfc1", GPR32Opnd, FGR32Opnd, II_MFC1, bitconvert>, HARDFLOAT; class MFC2_MMR6_DESC : MFC2_MMR6_DESC_BASE<"mfc2", GPR32Opnd, COP2Opnd, II_MFC2>; class MFHC0_MMR6_DESC : MFC0_MMR6_DESC_BASE<"mfhc0", GPR32Opnd, COP0Opnd, II_MFHC0>; class MFHC2_MMR6_DESC : MFC2_MMR6_DESC_BASE<"mfhc2", GPR32Opnd, COP2Opnd, II_MFC2>; class LDC1_D64_MMR6_DESC : MipsR6Inst, HARDFLOAT, FGR_64 { dag InOperandList = (ins mem_mm_16:$addr); dag OutOperandList = (outs FGR64Opnd:$ft); string AsmString = !strconcat("ldc1", "\t$ft, $addr"); list Pattern = [(set FGR64Opnd:$ft, (load addrimm16:$addr))]; Format f = FrmFI; InstrItinClass Itinerary = II_LDC1; string BaseOpcode = "ldc1"; bit mayLoad = 1; let DecoderMethod = "DecodeFMemMMR2"; } class SDC1_D64_MMR6_DESC : MipsR6Inst, HARDFLOAT, FGR_64 { dag InOperandList = (ins FGR64Opnd:$ft, mem_mm_16:$addr); dag OutOperandList = (outs); string AsmString = !strconcat("sdc1", "\t$ft, $addr"); list Pattern = [(store FGR64Opnd:$ft, addrimm16:$addr)]; Format f = FrmFI; InstrItinClass Itinerary = II_SDC1; string BaseOpcode = "sdc1"; bit mayStore = 1; let DecoderMethod = "DecodeFMemMMR2"; } class LDC2_LWC2_MMR6_DESC_BASE { dag OutOperandList = (outs COP2Opnd:$rt); dag InOperandList = (ins mem_mm_11:$addr); string AsmString = !strconcat(opstr, "\t$rt, $addr"); list Pattern = [(set COP2Opnd:$rt, (load addrimm11:$addr))]; Format f = FrmFI; InstrItinClass Itinerary = itin; string BaseOpcode = opstr; bit mayLoad = 1; string DecoderMethod = "DecodeFMemCop2MMR6"; } class LDC2_MMR6_DESC : LDC2_LWC2_MMR6_DESC_BASE<"ldc2", II_LDC2>; class LWC2_MMR6_DESC : LDC2_LWC2_MMR6_DESC_BASE<"lwc2", II_LWC2>; class SDC2_SWC2_MMR6_DESC_BASE { dag OutOperandList = (outs); dag InOperandList = (ins COP2Opnd:$rt, mem_mm_11:$addr); string AsmString = !strconcat(opstr, "\t$rt, $addr"); list Pattern = [(store COP2Opnd:$rt, addrimm11:$addr)]; Format f = FrmFI; InstrItinClass Itinerary = itin; string BaseOpcode = opstr; bit mayStore = 1; string DecoderMethod = "DecodeFMemCop2MMR6"; } class SDC2_MMR6_DESC : SDC2_SWC2_MMR6_DESC_BASE<"sdc2", II_SDC2>; class SWC2_MMR6_DESC : SDC2_SWC2_MMR6_DESC_BASE<"swc2", II_SWC2>; class GINV_MMR6_DESC_BASE { dag InOperandList = (ins SrcRC:$rs, uimm2:$type); dag OutOperandList = (outs); string AsmString = !strconcat(opstr, "\t$rs, $type"); list Pattern = []; Format f = FrmFR; string BaseOpcode = opstr; InstrItinClass Itinerary = Itin; } class GINVI_MMR6_DESC : GINV_MMR6_DESC_BASE<"ginvi", GPR32Opnd, II_GINVI> { dag InOperandList = (ins GPR32Opnd:$rs); string AsmString = "ginvi\t$rs"; } class GINVT_MMR6_DESC : GINV_MMR6_DESC_BASE<"ginvt", GPR32Opnd, II_GINVT>; class SC_MMR6_DESC_BASE { dag OutOperandList = (outs GPR32Opnd:$dst); dag InOperandList = (ins GPR32Opnd:$rt, mem_mm_9:$addr); string AsmString = !strconcat(opstr, "\t$rt, $addr"); InstrItinClass Itinerary = itin; string BaseOpcode = opstr; bit mayStore = 1; string Constraints = "$rt = $dst"; string DecoderMethod = "DecodeMemMMImm9"; } class LL_MMR6_DESC_BASE { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins mem_mm_9:$addr); string AsmString = !strconcat(opstr, "\t$rt, $addr"); InstrItinClass Itinerary = itin; string BaseOpcode = opstr; bit mayLoad = 1; string DecoderMethod = "DecodeMemMMImm9"; } class SC_MMR6_DESC : SC_MMR6_DESC_BASE<"sc", II_SC>; class LL_MMR6_DESC : LL_MMR6_DESC_BASE<"ll", II_LL>; /// Floating Point Instructions class FARITH_MMR6_DESC_BASE : HARDFLOAT { dag OutOperandList = (outs RC:$fd); dag InOperandList = (ins RC:$ft, RC:$fs); string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft"); list Pattern = [(set RC:$fd, (OpNode RC:$fs, RC:$ft))]; InstrItinClass Itinerary = Itin; bit isCommutable = isComm; } class FADD_S_MMR6_DESC : FARITH_MMR6_DESC_BASE<"add.s", FGR32Opnd, II_ADD_S, 1, fadd>; class FSUB_S_MMR6_DESC : FARITH_MMR6_DESC_BASE<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>; class FMUL_S_MMR6_DESC : FARITH_MMR6_DESC_BASE<"mul.s", FGR32Opnd, II_MUL_S, 1, fmul>; class FDIV_S_MMR6_DESC : FARITH_MMR6_DESC_BASE<"div.s", FGR32Opnd, II_DIV_S, 0, fdiv>; class MADDF_S_MMR6_DESC : COP1_4R_DESC_BASE<"maddf.s", FGR32Opnd, II_MADDF_S>, HARDFLOAT; class MADDF_D_MMR6_DESC : COP1_4R_DESC_BASE<"maddf.d", FGR64Opnd, II_MADDF_D>, HARDFLOAT; class MSUBF_S_MMR6_DESC : COP1_4R_DESC_BASE<"msubf.s", FGR32Opnd, II_MSUBF_S>, HARDFLOAT; class MSUBF_D_MMR6_DESC : COP1_4R_DESC_BASE<"msubf.d", FGR64Opnd, II_MSUBF_D>, HARDFLOAT; class FMOV_FNEG_MMR6_DESC_BASE : HARDFLOAT, NeverHasSideEffects { dag OutOperandList = (outs DstRC:$ft); dag InOperandList = (ins SrcRC:$fs); string AsmString = !strconcat(instr_asm, "\t$ft, $fs"); list Pattern = [(set DstRC:$ft, (OpNode SrcRC:$fs))]; InstrItinClass Itinerary = Itin; Format Form = FrmFR; } class FMOV_S_MMR6_DESC : FMOV_FNEG_MMR6_DESC_BASE<"mov.s", FGR32Opnd, FGR32Opnd, II_MOV_S>; class FNEG_S_MMR6_DESC : FMOV_FNEG_MMR6_DESC_BASE<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>; class MAX_S_MMR6_DESC : MAX_MIN_DESC_BASE<"max.s", FGR32Opnd, II_MAX_S>, HARDFLOAT; class MAX_D_MMR6_DESC : MAX_MIN_DESC_BASE<"max.d", FGR64Opnd, II_MAX_D>, HARDFLOAT; class MIN_S_MMR6_DESC : MAX_MIN_DESC_BASE<"min.s", FGR32Opnd, II_MIN_S>, HARDFLOAT; class MIN_D_MMR6_DESC : MAX_MIN_DESC_BASE<"min.d", FGR64Opnd, II_MIN_D>, HARDFLOAT; class MAXA_S_MMR6_DESC : MAX_MIN_DESC_BASE<"maxa.s", FGR32Opnd, II_MAXA_S>, HARDFLOAT; class MAXA_D_MMR6_DESC : MAX_MIN_DESC_BASE<"maxa.d", FGR64Opnd, II_MAXA_D>, HARDFLOAT; class MINA_S_MMR6_DESC : MAX_MIN_DESC_BASE<"mina.s", FGR32Opnd, II_MINA_S>, HARDFLOAT; class MINA_D_MMR6_DESC : MAX_MIN_DESC_BASE<"mina.d", FGR64Opnd, II_MINA_D>, HARDFLOAT; class CVT_MMR6_DESC_BASE< string instr_asm, RegisterOperand DstRC, RegisterOperand SrcRC, InstrItinClass Itin, SDPatternOperator OpNode = null_frag> : HARDFLOAT, NeverHasSideEffects { dag OutOperandList = (outs DstRC:$ft); dag InOperandList = (ins SrcRC:$fs); string AsmString = !strconcat(instr_asm, "\t$ft, $fs"); list Pattern = [(set DstRC:$ft, (OpNode SrcRC:$fs))]; InstrItinClass Itinerary = Itin; Format Form = FrmFR; } class CVT_L_S_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.l.s", FGR64Opnd, FGR32Opnd, II_CVT>; class CVT_L_D_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.l.d", FGR64Opnd, FGR64Opnd, II_CVT>; class CVT_W_S_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.w.s", FGR32Opnd, FGR32Opnd, II_CVT>; class CVT_D_L_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.d.l", FGR64Opnd, FGR64Opnd, II_CVT>, FGR_64; class CVT_S_W_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.s.w", FGR32Opnd, FGR32Opnd, II_CVT>; class CVT_S_L_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.s.l", FGR64Opnd, FGR32Opnd, II_CVT>, FGR_64; multiclass CMP_CC_MMR6 format, string Typestr, RegisterOperand FGROpnd, InstrItinClass Itin> { def CMP_AF_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.af.", Typestr), format, FIELD_CMP_COND_AF>, CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd, Itin>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_UN_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.un.", Typestr), format, FIELD_CMP_COND_UN>, CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd, Itin, setuo>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_EQ_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.eq.", Typestr), format, FIELD_CMP_COND_EQ>, CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd, Itin, setoeq>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_UEQ_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.ueq.", Typestr), format, FIELD_CMP_COND_UEQ>, CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd, Itin, setueq>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_LT_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.lt.", Typestr), format, FIELD_CMP_COND_LT>, CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd, Itin, setolt>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_ULT_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.ult.", Typestr), format, FIELD_CMP_COND_ULT>, CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd, Itin, setult>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_LE_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.le.", Typestr), format, FIELD_CMP_COND_LE>, CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd, Itin, setole>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_ULE_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.ule.", Typestr), format, FIELD_CMP_COND_ULE>, CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd, Itin, setule>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_SAF_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.saf.", Typestr), format, FIELD_CMP_COND_SAF>, CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd, Itin>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_SUN_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.sun.", Typestr), format, FIELD_CMP_COND_SUN>, CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd, Itin>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_SEQ_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.seq.", Typestr), format, FIELD_CMP_COND_SEQ>, CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd, Itin>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_SUEQ_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.sueq.", Typestr), format, FIELD_CMP_COND_SUEQ>, CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd, Itin>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_SLT_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.slt.", Typestr), format, FIELD_CMP_COND_SLT>, CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd, Itin>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_SULT_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.sult.", Typestr), format, FIELD_CMP_COND_SULT>, CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd, Itin>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_SLE_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.sle.", Typestr), format, FIELD_CMP_COND_SLE>, CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd, Itin>, HARDFLOAT, ISA_MICROMIPS32R6; def CMP_SULE_#NAME : R6MMR6Rel, POOL32F_CMP_FM< !strconcat("cmp.sule.", Typestr), format, FIELD_CMP_COND_SULE>, CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd, Itin>, HARDFLOAT, ISA_MICROMIPS32R6; } class ABSS_FT_MMR6_DESC_BASE : HARDFLOAT, NeverHasSideEffects { dag OutOperandList = (outs DstRC:$ft); dag InOperandList = (ins SrcRC:$fs); string AsmString = !strconcat(instr_asm, "\t$ft, $fs"); list Pattern = [(set DstRC:$ft, (OpNode SrcRC:$fs))]; InstrItinClass Itinerary = Itin; Format Form = FrmFR; list EncodingPredicates = [HasStdEnc]; } class FLOOR_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.l.s", FGR64Opnd, FGR32Opnd, II_FLOOR>; class FLOOR_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.l.d", FGR64Opnd, FGR64Opnd, II_FLOOR>; class FLOOR_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.w.s", FGR32Opnd, FGR32Opnd, II_FLOOR>; class FLOOR_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.w.d", FGR32Opnd, AFGR64Opnd, II_FLOOR>; class CEIL_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.l.s", FGR64Opnd, FGR32Opnd, II_CEIL>; class CEIL_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.l.d", FGR64Opnd, FGR64Opnd, II_CEIL>; class CEIL_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>; class CEIL_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.w.d", FGR32Opnd, AFGR64Opnd, II_CEIL>; class TRUNC_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.l.s", FGR64Opnd, FGR32Opnd, II_TRUNC>; class TRUNC_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.l.d", FGR64Opnd, FGR64Opnd, II_TRUNC>; class TRUNC_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>; class TRUNC_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.d", FGR32Opnd, - AFGR64Opnd, II_TRUNC>; + FGR64Opnd, II_TRUNC>; class SQRT_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S, fsqrt>; class SQRT_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.d", AFGR64Opnd, AFGR64Opnd, II_SQRT_D, fsqrt>; class ROUND_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.l.s", FGR64Opnd, FGR32Opnd, II_ROUND>; class ROUND_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.l.d", FGR64Opnd, FGR64Opnd, II_ROUND>; class ROUND_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>; class ROUND_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.w.d", FGR64Opnd, FGR64Opnd, II_ROUND>; class SEL_S_MMR6_DESC : COP1_SEL_DESC_BASE<"sel.s", FGR32Opnd, II_SEL_S>; class SEL_D_MMR6_DESC : COP1_SEL_D_DESC_BASE<"sel.d", FGR64Opnd, II_SEL_D>; class SELEQZ_S_MMR6_DESC : SELEQNEZ_DESC_BASE<"seleqz.s", FGR32Opnd, II_SELCCZ_S>; class SELEQZ_D_MMR6_DESC : SELEQNEZ_DESC_BASE<"seleqz.d", FGR64Opnd, II_SELCCZ_D>; class SELNEZ_S_MMR6_DESC : SELEQNEZ_DESC_BASE<"selnez.s", FGR32Opnd, II_SELCCZ_S>; class SELNEZ_D_MMR6_DESC : SELEQNEZ_DESC_BASE<"selnez.d", FGR64Opnd, II_SELCCZ_D>; class RINT_S_MMR6_DESC : CLASS_RINT_DESC_BASE<"rint.s", FGR32Opnd, II_RINT_S>; class RINT_D_MMR6_DESC : CLASS_RINT_DESC_BASE<"rint.d", FGR64Opnd, II_RINT_S>; class CLASS_S_MMR6_DESC : CLASS_RINT_DESC_BASE<"class.s", FGR32Opnd, II_CLASS_S>; class CLASS_D_MMR6_DESC : CLASS_RINT_DESC_BASE<"class.d", FGR64Opnd, II_CLASS_S>; class STORE_MMR6_DESC_BASE : Store, MMR6Arch { let DecoderMethod = "DecodeMemMMImm16"; InstrItinClass Itinerary = Itin; } class SB_MMR6_DESC : STORE_MMR6_DESC_BASE<"sb", GPR32Opnd, II_SB>; class SH_MMR6_DESC : STORE_MMR6_DESC_BASE<"sh", GPR32Opnd, II_SH>; class ADDU16_MMR6_DESC : ArithRMM16<"addu16", GPRMM16Opnd, 1, II_ADDU, add>, MMR6Arch<"addu16"> { int AddedComplexity = 1; } class AND16_MMR6_DESC : LogicRMM16<"and16", GPRMM16Opnd, II_AND>, MMR6Arch<"and16">; class ANDI16_MMR6_DESC : AndImmMM16<"andi16", GPRMM16Opnd, II_AND>, MMR6Arch<"andi16">; class NOT16_MMR6_DESC : NotMM16<"not16", GPRMM16Opnd>, MMR6Arch<"not16"> { int AddedComplexity = 1; } class OR16_MMR6_DESC : LogicRMM16<"or16", GPRMM16Opnd, II_OR>, MMR6Arch<"or16">; class SLL16_MMR6_DESC : ShiftIMM16<"sll16", uimm3_shift, GPRMM16Opnd, II_SLL>, MMR6Arch<"sll16">; class SRL16_MMR6_DESC : ShiftIMM16<"srl16", uimm3_shift, GPRMM16Opnd, II_SRL>, MMR6Arch<"srl16">; class BREAK16_MMR6_DESC : BrkSdbbp16MM<"break16", II_BREAK>, MMR6Arch<"break16">; class LI16_MMR6_DESC : LoadImmMM16<"li16", li16_imm, GPRMM16Opnd>, MMR6Arch<"li16">, IsAsCheapAsAMove; class MOVE16_MMR6_DESC : MoveMM16<"move16", GPR32Opnd>, MMR6Arch<"move16">; class MOVEP_MMR6_DESC : MovePMM16<"movep", GPRMM16OpndMovePPairFirst, GPRMM16OpndMovePPairSecond, GPRMM16OpndMoveP>, MMR6Arch<"movep">; class SDBBP16_MMR6_DESC : BrkSdbbp16MM<"sdbbp16", II_SDBBP>, MMR6Arch<"sdbbp16">; class SUBU16_MMR6_DESC : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>, MMR6Arch<"subu16"> { int AddedComplexity = 1; } class XOR16_MMR6_DESC : LogicRMM16<"xor16", GPRMM16Opnd, II_XOR>, MMR6Arch<"xor16">; class LW_MMR6_DESC : MMR6Arch<"lw">, MipsR6Inst { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins mem:$addr); string AsmString = "lw\t$rt, $addr"; let DecoderMethod = "DecodeMemMMImm16"; let canFoldAsLoad = 1; let mayLoad = 1; list Pattern = [(set GPR32Opnd:$rt, (load addrDefault:$addr))]; InstrItinClass Itinerary = II_LW; } class LUI_MMR6_DESC : IsAsCheapAsAMove, MMR6Arch<"lui">, MipsR6Inst{ dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins uimm16:$imm16); string AsmString = "lui\t$rt, $imm16"; list Pattern = []; bit hasSideEffects = 0; bit isReMaterializable = 1; InstrItinClass Itinerary = II_LUI; Format Form = FrmI; } class SYNC_MMR6_DESC : MMR6Arch<"sync">, MipsR6Inst { dag OutOperandList = (outs); dag InOperandList = (ins uimm5:$stype); string AsmString = !strconcat("sync", "\t$stype"); list Pattern = [(MipsSync immZExt5:$stype)]; InstrItinClass Itinerary = II_SYNC; bit HasSideEffects = 1; } class SYNCI_MMR6_DESC : SYNCI_FT<"synci", mem_mm_16> { let DecoderMethod = "DecodeSynciR6"; } class RDPGPR_MMR6_DESC : MMR6Arch<"rdpgpr">, MipsR6Inst { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins GPR32Opnd:$rd); string AsmString = !strconcat("rdpgpr", "\t$rt, $rd"); InstrItinClass Itinerary = II_RDPGPR; } class SDBBP_MMR6_DESC : MipsR6Inst { dag OutOperandList = (outs); dag InOperandList = (ins uimm20:$code_); string AsmString = !strconcat("sdbbp", "\t$code_"); list Pattern = []; InstrItinClass Itinerary = II_SDBBP; } class SIGRIE_MMR6_DESC : MipsR6Inst { dag OutOperandList = (outs); dag InOperandList = (ins uimm16:$code_); string AsmString = !strconcat("sigrie", "\t$code_"); list Pattern = []; InstrItinClass Itinerary = II_SIGRIE; } class LWM16_MMR6_DESC : MicroMipsInst16<(outs reglist16:$rt), (ins mem_mm_4sp:$addr), !strconcat("lwm16", "\t$rt, $addr"), [], II_LWM, FrmI>, MMR6Arch<"lwm16"> { let DecoderMethod = "DecodeMemMMReglistImm4Lsl2"; let mayLoad = 1; ComplexPattern Addr = addr; } class SWM16_MMR6_DESC : MicroMipsInst16<(outs), (ins reglist16:$rt, mem_mm_4sp:$addr), !strconcat("swm16", "\t$rt, $addr"), [], II_SWM, FrmI>, MMR6Arch<"swm16"> { let DecoderMethod = "DecodeMemMMReglistImm4Lsl2"; let mayStore = 1; ComplexPattern Addr = addr; } class SB16_MMR6_DESC_BASE : MicroMipsInst16<(outs), (ins RTOpnd:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI>, MMR6Arch { let DecoderMethod = "DecodeMemMMImm4"; let mayStore = 1; } class SB16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sb16", GPRMM16OpndZero, GPRMM16Opnd, truncstorei8, II_SB, mem_mm_4>; class SH16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sh16", GPRMM16OpndZero, GPRMM16Opnd, truncstorei16, II_SH, mem_mm_4_lsl1>; class SW16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sw16", GPRMM16OpndZero, GPRMM16Opnd, store, II_SW, mem_mm_4_lsl2>; class SWSP_MMR6_DESC : MicroMipsInst16<(outs), (ins GPR32Opnd:$rt, mem_mm_sp_imm5_lsl2:$offset), !strconcat("sw", "\t$rt, $offset"), [], II_SW, FrmI>, MMR6Arch<"sw"> { let DecoderMethod = "DecodeMemMMSPImm5Lsl2"; let mayStore = 1; } class JALRC_HB_MMR6_DESC { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins GPR32Opnd:$rs); string AsmString = !strconcat("jalrc.hb", "\t$rt, $rs"); list Pattern = []; InstrItinClass Itinerary = II_JALR_HB; Format Form = FrmJ; bit isIndirectBranch = 1; bit hasDelaySlot = 0; } class TLBINV_MMR6_DESC_BASE { dag OutOperandList = (outs); dag InOperandList = (ins); string AsmString = opstr; list Pattern = []; InstrItinClass Itinerary = Itin; } class TLBINV_MMR6_DESC : TLBINV_MMR6_DESC_BASE<"tlbinv", II_TLBINV>; class TLBINVF_MMR6_DESC : TLBINV_MMR6_DESC_BASE<"tlbinvf", II_TLBINVF>; class DVPEVP_MMR6_DESC_BASE { dag OutOperandList = (outs GPR32Opnd:$rs); dag InOperandList = (ins); string AsmString = !strconcat(opstr, "\t$rs"); list Pattern = []; InstrItinClass Itinerary = Itin; bit hasUnModeledSideEffects = 1; } class DVP_MMR6_DESC : DVPEVP_MMR6_DESC_BASE<"dvp", II_DVP>; class EVP_MMR6_DESC : DVPEVP_MMR6_DESC_BASE<"evp", II_EVP>; class BEQZC_MMR6_DESC : CMP_CBR_EQNE_Z_DESC_BASE<"beqzc", brtarget21_mm, GPR32Opnd>, MMR6Arch<"beqzc">; class BNEZC_MMR6_DESC : CMP_CBR_EQNE_Z_DESC_BASE<"bnezc", brtarget21_mm, GPR32Opnd>, MMR6Arch<"bnezc">; class BRANCH_COP1_MMR6_DESC_BASE : InstSE<(outs), (ins FGR64Opnd:$rt, brtarget_mm:$offset), !strconcat(opstr, "\t$rt, $offset"), [], II_BC1CCZ, FrmI>, HARDFLOAT, BRANCH_DESC_BASE { list Defs = [AT]; } class BC1EQZC_MMR6_DESC : BRANCH_COP1_MMR6_DESC_BASE<"bc1eqzc">; class BC1NEZC_MMR6_DESC : BRANCH_COP1_MMR6_DESC_BASE<"bc1nezc">; class BRANCH_COP2_MMR6_DESC_BASE : BRANCH_DESC_BASE { dag InOperandList = (ins COP2Opnd:$rt, brtarget_mm:$offset); dag OutOperandList = (outs); string AsmString = !strconcat(opstr, "\t$rt, $offset"); list Defs = [AT]; InstrItinClass Itinerary = Itin; } class BC2EQZC_MMR6_DESC : BRANCH_COP2_MMR6_DESC_BASE<"bc2eqzc", II_BC2CCZ>; class BC2NEZC_MMR6_DESC : BRANCH_COP2_MMR6_DESC_BASE<"bc2nezc", II_BC2CCZ>; class EXT_MMR6_DESC { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins GPR32Opnd:$rs, uimm5:$pos, uimm5_plus1:$size); string AsmString = !strconcat("ext", "\t$rt, $rs, $pos, $size"); list Pattern = [(set GPR32Opnd:$rt, (MipsExt GPR32Opnd:$rs, imm:$pos, imm:$size))]; InstrItinClass Itinerary = II_EXT; Format Form = FrmR; string BaseOpcode = "ext"; } class INS_MMR6_DESC { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins GPR32Opnd:$rs, uimm5:$pos, uimm5_inssize_plus1:$size, GPR32Opnd:$src); string AsmString = !strconcat("ins", "\t$rt, $rs, $pos, $size"); list Pattern = [(set GPR32Opnd:$rt, (MipsIns GPR32Opnd:$rs, imm:$pos, imm:$size, GPR32Opnd:$src))]; InstrItinClass Itinerary = II_INS; Format Form = FrmR; string BaseOpcode = "ins"; string Constraints = "$src = $rt"; } class JALRC_MMR6_DESC { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins GPR32Opnd:$rs); string AsmString = !strconcat("jalrc", "\t$rt, $rs"); list Pattern = []; InstrItinClass Itinerary = II_JALRC; bit isCall = 1; bit hasDelaySlot = 0; list Defs = [RA]; } class BOVC_BNVC_MMR6_DESC_BASE : BRANCH_DESC_BASE { dag InOperandList = (ins GPROpnd:$rt, GPROpnd:$rs, opnd:$offset); dag OutOperandList = (outs); string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $offset"); list Defs = [AT]; InstrItinClass Itinerary = II_BCCC; } class BOVC_MMR6_DESC : BOVC_BNVC_MMR6_DESC_BASE<"bovc", brtargetr6, GPR32Opnd>; class BNVC_MMR6_DESC : BOVC_BNVC_MMR6_DESC_BASE<"bnvc", brtargetr6, GPR32Opnd>; //===----------------------------------------------------------------------===// // // Instruction Definitions // //===----------------------------------------------------------------------===// let DecoderNamespace = "MicroMipsR6" in { def ADD_MMR6 : StdMMR6Rel, ADD_MMR6_DESC, ADD_MMR6_ENC, ISA_MICROMIPS32R6; def ADDIU_MMR6 : StdMMR6Rel, ADDIU_MMR6_DESC, ADDIU_MMR6_ENC, ISA_MICROMIPS32R6; def ADDU_MMR6 : StdMMR6Rel, ADDU_MMR6_DESC, ADDU_MMR6_ENC, ISA_MICROMIPS32R6; def ADDIUPC_MMR6 : R6MMR6Rel, ADDIUPC_MMR6_ENC, ADDIUPC_MMR6_DESC, ISA_MICROMIPS32R6; def ALUIPC_MMR6 : R6MMR6Rel, ALUIPC_MMR6_ENC, ALUIPC_MMR6_DESC, ISA_MICROMIPS32R6; def AND_MMR6 : StdMMR6Rel, AND_MMR6_DESC, AND_MMR6_ENC, ISA_MICROMIPS32R6; def ANDI_MMR6 : StdMMR6Rel, ANDI_MMR6_DESC, ANDI_MMR6_ENC, ISA_MICROMIPS32R6; def AUIPC_MMR6 : R6MMR6Rel, AUIPC_MMR6_ENC, AUIPC_MMR6_DESC, ISA_MICROMIPS32R6; def ALIGN_MMR6 : R6MMR6Rel, ALIGN_MMR6_ENC, ALIGN_MMR6_DESC, ISA_MICROMIPS32R6; def AUI_MMR6 : R6MMR6Rel, AUI_MMR6_ENC, AUI_MMR6_DESC, ISA_MICROMIPS32R6; def BALC_MMR6 : R6MMR6Rel, BALC_MMR6_ENC, BALC_MMR6_DESC, ISA_MICROMIPS32R6; def BC_MMR6 : R6MMR6Rel, BC_MMR6_ENC, BC_MMR6_DESC, ISA_MICROMIPS32R6; def BC16_MMR6 : StdMMR6Rel, BC16_MMR6_DESC, BC16_MMR6_ENC, ISA_MICROMIPS32R6; def BEQZC_MMR6 : R6MMR6Rel, BEQZC_MMR6_ENC, BEQZC_MMR6_DESC, ISA_MICROMIPS32R6; def BEQZC16_MMR6 : StdMMR6Rel, BEQZC16_MMR6_DESC, BEQZC16_MMR6_ENC, ISA_MICROMIPS32R6; def BNEZC_MMR6 : R6MMR6Rel, BNEZC_MMR6_ENC, BNEZC_MMR6_DESC, ISA_MICROMIPS32R6; def BNEZC16_MMR6 : StdMMR6Rel, BNEZC16_MMR6_DESC, BNEZC16_MMR6_ENC, ISA_MICROMIPS32R6; def BITSWAP_MMR6 : R6MMR6Rel, BITSWAP_MMR6_ENC, BITSWAP_MMR6_DESC, ISA_MICROMIPS32R6; def BEQZALC_MMR6 : R6MMR6Rel, BEQZALC_MMR6_ENC, BEQZALC_MMR6_DESC, ISA_MICROMIPS32R6; def BNEZALC_MMR6 : R6MMR6Rel, BNEZALC_MMR6_ENC, BNEZALC_MMR6_DESC, ISA_MICROMIPS32R6; def BREAK_MMR6 : StdMMR6Rel, BRK_MMR6_DESC, BRK_MMR6_ENC, ISA_MICROMIPS32R6; def CACHE_MMR6 : R6MMR6Rel, CACHE_MMR6_ENC, CACHE_MMR6_DESC, ISA_MICROMIPS32R6; def CLO_MMR6 : R6MMR6Rel, CLO_MMR6_ENC, CLO_MMR6_DESC, ISA_MICROMIPS32R6; def CLZ_MMR6 : R6MMR6Rel, CLZ_MMR6_ENC, CLZ_MMR6_DESC, ISA_MICROMIPS32R6; def DIV_MMR6 : R6MMR6Rel, DIV_MMR6_DESC, DIV_MMR6_ENC, ISA_MICROMIPS32R6; def DIVU_MMR6 : R6MMR6Rel, DIVU_MMR6_DESC, DIVU_MMR6_ENC, ISA_MICROMIPS32R6; def EHB_MMR6 : StdMMR6Rel, EHB_MMR6_DESC, EHB_MMR6_ENC, ISA_MICROMIPS32R6; def EI_MMR6 : StdMMR6Rel, EI_MMR6_DESC, EI_MMR6_ENC, ISA_MICROMIPS32R6; def DI_MMR6 : StdMMR6Rel, DI_MMR6_DESC, DI_MMR6_ENC, ISA_MICROMIPS32R6; def ERET_MMR6 : StdMMR6Rel, ERET_MMR6_DESC, ERET_MMR6_ENC, ISA_MICROMIPS32R6; def DERET_MMR6 : StdMMR6Rel, DERET_MMR6_DESC, DERET_MMR6_ENC, ISA_MICROMIPS32R6; def ERETNC_MMR6 : R6MMR6Rel, ERETNC_MMR6_DESC, ERETNC_MMR6_ENC, ISA_MICROMIPS32R6; def GINVI_MMR6 : R6MMR6Rel, GINVI_MMR6_ENC, GINVI_MMR6_DESC, ISA_MICROMIPS32R6, ASE_GINV; def GINVT_MMR6 : R6MMR6Rel, GINVT_MMR6_ENC, GINVT_MMR6_DESC, ISA_MICROMIPS32R6, ASE_GINV; let FastISelShouldIgnore = 1 in def JALRC16_MMR6 : R6MMR6Rel, JALRC16_MMR6_DESC, JALRC16_MMR6_ENC, ISA_MICROMIPS32R6; def JIALC_MMR6 : R6MMR6Rel, JIALC_MMR6_ENC, JIALC_MMR6_DESC, ISA_MICROMIPS32R6; def JIC_MMR6 : R6MMR6Rel, JIC_MMR6_ENC, JIC_MMR6_DESC, ISA_MICROMIPS32R6; def JRC16_MMR6 : R6MMR6Rel, JRC16_MMR6_DESC, JRC16_MMR6_ENC, ISA_MICROMIPS32R6; def JRCADDIUSP_MMR6 : R6MMR6Rel, JRCADDIUSP_MMR6_DESC, JRCADDIUSP_MMR6_ENC, ISA_MICROMIPS32R6; def LSA_MMR6 : R6MMR6Rel, LSA_MMR6_ENC, LSA_MMR6_DESC, ISA_MICROMIPS32R6; def LWPC_MMR6 : R6MMR6Rel, LWPC_MMR6_ENC, LWPC_MMR6_DESC, ISA_MICROMIPS32R6; def LWM16_MMR6 : StdMMR6Rel, LWM16_MMR6_DESC, LWM16_MMR6_ENC, ISA_MICROMIPS32R6; def MTC0_MMR6 : StdMMR6Rel, MTC0_MMR6_ENC, MTC0_MMR6_DESC, ISA_MICROMIPS32R6; def MTC1_MMR6 : StdMMR6Rel, MTC1_MMR6_DESC, MTC1_MMR6_ENC, ISA_MICROMIPS32R6; def MTC2_MMR6 : StdMMR6Rel, MTC2_MMR6_ENC, MTC2_MMR6_DESC, ISA_MICROMIPS32R6; def MTHC0_MMR6 : R6MMR6Rel, MTHC0_MMR6_ENC, MTHC0_MMR6_DESC, ISA_MICROMIPS32R6; def MTHC2_MMR6 : StdMMR6Rel, MTHC2_MMR6_ENC, MTHC2_MMR6_DESC, ISA_MICROMIPS32R6; def MFC0_MMR6 : StdMMR6Rel, MFC0_MMR6_ENC, MFC0_MMR6_DESC, ISA_MICROMIPS32R6; def MFC1_MMR6 : StdMMR6Rel, MFC1_MMR6_DESC, MFC1_MMR6_ENC, ISA_MICROMIPS32R6; def MFC2_MMR6 : StdMMR6Rel, MFC2_MMR6_ENC, MFC2_MMR6_DESC, ISA_MICROMIPS32R6; def MFHC0_MMR6 : R6MMR6Rel, MFHC0_MMR6_ENC, MFHC0_MMR6_DESC, ISA_MICROMIPS32R6; def MFHC2_MMR6 : StdMMR6Rel, MFHC2_MMR6_ENC, MFHC2_MMR6_DESC, ISA_MICROMIPS32R6; def MOD_MMR6 : R6MMR6Rel, MOD_MMR6_DESC, MOD_MMR6_ENC, ISA_MICROMIPS32R6; def MODU_MMR6 : R6MMR6Rel, MODU_MMR6_DESC, MODU_MMR6_ENC, ISA_MICROMIPS32R6; def MUL_MMR6 : R6MMR6Rel, MUL_MMR6_DESC, MUL_MMR6_ENC, ISA_MICROMIPS32R6; def MUH_MMR6 : R6MMR6Rel, MUH_MMR6_DESC, MUH_MMR6_ENC, ISA_MICROMIPS32R6; def MULU_MMR6 : R6MMR6Rel, MULU_MMR6_DESC, MULU_MMR6_ENC, ISA_MICROMIPS32R6; def MUHU_MMR6 : R6MMR6Rel, MUHU_MMR6_DESC, MUHU_MMR6_ENC, ISA_MICROMIPS32R6; def NOR_MMR6 : StdMMR6Rel, NOR_MMR6_DESC, NOR_MMR6_ENC, ISA_MICROMIPS32R6; def OR_MMR6 : StdMMR6Rel, OR_MMR6_DESC, OR_MMR6_ENC, ISA_MICROMIPS32R6; def ORI_MMR6 : StdMMR6Rel, ORI_MMR6_DESC, ORI_MMR6_ENC, ISA_MICROMIPS32R6; def PREF_MMR6 : R6MMR6Rel, PREF_MMR6_ENC, PREF_MMR6_DESC, ISA_MICROMIPS32R6; def SB16_MMR6 : StdMMR6Rel, SB16_MMR6_DESC, SB16_MMR6_ENC, ISA_MICROMIPS32R6; def SELEQZ_MMR6 : R6MMR6Rel, SELEQZ_MMR6_ENC, SELEQZ_MMR6_DESC, ISA_MICROMIPS32R6; def SELNEZ_MMR6 : R6MMR6Rel, SELNEZ_MMR6_ENC, SELNEZ_MMR6_DESC, ISA_MICROMIPS32R6; def SH16_MMR6 : StdMMR6Rel, SH16_MMR6_DESC, SH16_MMR6_ENC, ISA_MICROMIPS32R6; def SLL_MMR6 : StdMMR6Rel, SLL_MMR6_DESC, SLL_MMR6_ENC, ISA_MICROMIPS32R6; def SUB_MMR6 : StdMMR6Rel, SUB_MMR6_DESC, SUB_MMR6_ENC, ISA_MICROMIPS32R6; def SUBU_MMR6 : StdMMR6Rel, SUBU_MMR6_DESC, SUBU_MMR6_ENC, ISA_MICROMIPS32R6; def SW16_MMR6 : StdMMR6Rel, SW16_MMR6_DESC, SW16_MMR6_ENC, ISA_MICROMIPS32R6; def SWM16_MMR6 : StdMMR6Rel, SWM16_MMR6_DESC, SWM16_MMR6_ENC, ISA_MICROMIPS32R6; def SWSP_MMR6 : StdMMR6Rel, SWSP_MMR6_DESC, SWSP_MMR6_ENC, ISA_MICROMIPS32R6; def WRPGPR_MMR6 : StdMMR6Rel, WRPGPR_MMR6_ENC, WRPGPR_MMR6_DESC, ISA_MICROMIPS32R6; def WSBH_MMR6 : StdMMR6Rel, WSBH_MMR6_ENC, WSBH_MMR6_DESC, ISA_MICROMIPS32R6; def LB_MMR6 : R6MMR6Rel, LB_MMR6_ENC, LB_MMR6_DESC, ISA_MICROMIPS32R6; def LBU_MMR6 : R6MMR6Rel, LBU_MMR6_ENC, LBU_MMR6_DESC, ISA_MICROMIPS32R6; def PAUSE_MMR6 : StdMMR6Rel, PAUSE_MMR6_DESC, PAUSE_MMR6_ENC, ISA_MICROMIPS32R6; def RDHWR_MMR6 : R6MMR6Rel, RDHWR_MMR6_DESC, RDHWR_MMR6_ENC, ISA_MICROMIPS32R6; def WAIT_MMR6 : StdMMR6Rel, WAIT_MMR6_DESC, WAIT_MMR6_ENC, ISA_MICROMIPS32R6; def SSNOP_MMR6 : StdMMR6Rel, SSNOP_MMR6_DESC, SSNOP_MMR6_ENC, ISA_MICROMIPS32R6; def SYNC_MMR6 : StdMMR6Rel, SYNC_MMR6_DESC, SYNC_MMR6_ENC, ISA_MICROMIPS32R6; def SYNCI_MMR6 : StdMMR6Rel, SYNCI_MMR6_DESC, SYNCI_MMR6_ENC, ISA_MICROMIPS32R6; def RDPGPR_MMR6 : R6MMR6Rel, RDPGPR_MMR6_DESC, RDPGPR_MMR6_ENC, ISA_MICROMIPS32R6; def SDBBP_MMR6 : R6MMR6Rel, SDBBP_MMR6_DESC, SDBBP_MMR6_ENC, ISA_MICROMIPS32R6; def SIGRIE_MMR6 : R6MMR6Rel, SIGRIE_MMR6_DESC, SIGRIE_MMR6_ENC, ISA_MICROMIPS32R6; def XOR_MMR6 : StdMMR6Rel, XOR_MMR6_DESC, XOR_MMR6_ENC, ISA_MICROMIPS32R6; def XORI_MMR6 : StdMMR6Rel, XORI_MMR6_DESC, XORI_MMR6_ENC, ISA_MICROMIPS32R6; let DecoderMethod = "DecodeMemMMImm16" in { def SW_MMR6 : StdMMR6Rel, SW_MMR6_DESC, SW_MMR6_ENC, ISA_MICROMIPS32R6; } /// Floating Point Instructions def FADD_S_MMR6 : StdMMR6Rel, FADD_S_MMR6_ENC, FADD_S_MMR6_DESC, ISA_MICROMIPS32R6; def FSUB_S_MMR6 : StdMMR6Rel, FSUB_S_MMR6_ENC, FSUB_S_MMR6_DESC, ISA_MICROMIPS32R6; def FMUL_S_MMR6 : StdMMR6Rel, FMUL_S_MMR6_ENC, FMUL_S_MMR6_DESC, ISA_MICROMIPS32R6; def FDIV_S_MMR6 : StdMMR6Rel, FDIV_S_MMR6_ENC, FDIV_S_MMR6_DESC, ISA_MICROMIPS32R6; def MADDF_S_MMR6 : R6MMR6Rel, MADDF_S_MMR6_ENC, MADDF_S_MMR6_DESC, ISA_MICROMIPS32R6; def MADDF_D_MMR6 : R6MMR6Rel, MADDF_D_MMR6_ENC, MADDF_D_MMR6_DESC, ISA_MICROMIPS32R6; def MSUBF_S_MMR6 : R6MMR6Rel, MSUBF_S_MMR6_ENC, MSUBF_S_MMR6_DESC, ISA_MICROMIPS32R6; def MSUBF_D_MMR6 : R6MMR6Rel, MSUBF_D_MMR6_ENC, MSUBF_D_MMR6_DESC, ISA_MICROMIPS32R6; def FMOV_S_MMR6 : StdMMR6Rel, FMOV_S_MMR6_ENC, FMOV_S_MMR6_DESC, ISA_MICROMIPS32R6; def FNEG_S_MMR6 : StdMMR6Rel, FNEG_S_MMR6_ENC, FNEG_S_MMR6_DESC, ISA_MICROMIPS32R6; def MAX_S_MMR6 : R6MMR6Rel, MAX_S_MMR6_ENC, MAX_S_MMR6_DESC, ISA_MICROMIPS32R6; def MAX_D_MMR6 : R6MMR6Rel, MAX_D_MMR6_ENC, MAX_D_MMR6_DESC, ISA_MICROMIPS32R6; def MIN_S_MMR6 : R6MMR6Rel, MIN_S_MMR6_ENC, MIN_S_MMR6_DESC, ISA_MICROMIPS32R6; def MIN_D_MMR6 : R6MMR6Rel, MIN_D_MMR6_ENC, MIN_D_MMR6_DESC, ISA_MICROMIPS32R6; def MAXA_S_MMR6 : R6MMR6Rel, MAXA_S_MMR6_ENC, MAXA_S_MMR6_DESC, ISA_MICROMIPS32R6; def MAXA_D_MMR6 : R6MMR6Rel, MAXA_D_MMR6_ENC, MAXA_D_MMR6_DESC, ISA_MICROMIPS32R6; def MINA_S_MMR6 : R6MMR6Rel, MINA_S_MMR6_ENC, MINA_S_MMR6_DESC, ISA_MICROMIPS32R6; def MINA_D_MMR6 : R6MMR6Rel, MINA_D_MMR6_ENC, MINA_D_MMR6_DESC, ISA_MICROMIPS32R6; def CVT_L_S_MMR6 : StdMMR6Rel, CVT_L_S_MMR6_ENC, CVT_L_S_MMR6_DESC, ISA_MICROMIPS32R6; def CVT_L_D_MMR6 : StdMMR6Rel, CVT_L_D_MMR6_ENC, CVT_L_D_MMR6_DESC, ISA_MICROMIPS32R6; def CVT_W_S_MMR6 : StdMMR6Rel, CVT_W_S_MMR6_ENC, CVT_W_S_MMR6_DESC, ISA_MICROMIPS32R6; def CVT_D_L_MMR6 : StdMMR6Rel, CVT_D_L_MMR6_ENC, CVT_D_L_MMR6_DESC, ISA_MICROMIPS32R6; def CVT_S_W_MMR6 : StdMMR6Rel, CVT_S_W_MMR6_ENC, CVT_S_W_MMR6_DESC, ISA_MICROMIPS32R6; def CVT_S_L_MMR6 : StdMMR6Rel, CVT_S_L_MMR6_ENC, CVT_S_L_MMR6_DESC, ISA_MICROMIPS32R6; defm S_MMR6 : CMP_CC_MMR6<0b000101, "s", FGR32Opnd, II_CMP_CC_S>; defm D_MMR6 : CMP_CC_MMR6<0b010101, "d", FGR64Opnd, II_CMP_CC_D>; def FLOOR_L_S_MMR6 : StdMMR6Rel, FLOOR_L_S_MMR6_ENC, FLOOR_L_S_MMR6_DESC, ISA_MICROMIPS32R6; def FLOOR_L_D_MMR6 : StdMMR6Rel, FLOOR_L_D_MMR6_ENC, FLOOR_L_D_MMR6_DESC, ISA_MICROMIPS32R6; def FLOOR_W_S_MMR6 : StdMMR6Rel, FLOOR_W_S_MMR6_ENC, FLOOR_W_S_MMR6_DESC, ISA_MICROMIPS32R6; def FLOOR_W_D_MMR6 : StdMMR6Rel, FLOOR_W_D_MMR6_ENC, FLOOR_W_D_MMR6_DESC, ISA_MICROMIPS32R6; def CEIL_L_S_MMR6 : StdMMR6Rel, CEIL_L_S_MMR6_ENC, CEIL_L_S_MMR6_DESC, ISA_MICROMIPS32R6; def CEIL_L_D_MMR6 : StdMMR6Rel, CEIL_L_D_MMR6_ENC, CEIL_L_D_MMR6_DESC, ISA_MICROMIPS32R6; def CEIL_W_S_MMR6 : StdMMR6Rel, CEIL_W_S_MMR6_ENC, CEIL_W_S_MMR6_DESC, ISA_MICROMIPS32R6; def CEIL_W_D_MMR6 : StdMMR6Rel, CEIL_W_D_MMR6_ENC, CEIL_W_D_MMR6_DESC, ISA_MICROMIPS32R6; def TRUNC_L_S_MMR6 : StdMMR6Rel, TRUNC_L_S_MMR6_ENC, TRUNC_L_S_MMR6_DESC, ISA_MICROMIPS32R6; def TRUNC_L_D_MMR6 : StdMMR6Rel, TRUNC_L_D_MMR6_ENC, TRUNC_L_D_MMR6_DESC, ISA_MICROMIPS32R6; def TRUNC_W_S_MMR6 : StdMMR6Rel, TRUNC_W_S_MMR6_ENC, TRUNC_W_S_MMR6_DESC, ISA_MICROMIPS32R6; def TRUNC_W_D_MMR6 : StdMMR6Rel, TRUNC_W_D_MMR6_ENC, TRUNC_W_D_MMR6_DESC, ISA_MICROMIPS32R6; def SB_MMR6 : StdMMR6Rel, SB_MMR6_DESC, SB_MMR6_ENC, ISA_MICROMIPS32R6; def SH_MMR6 : StdMMR6Rel, SH_MMR6_DESC, SH_MMR6_ENC, ISA_MICROMIPS32R6; def LW_MMR6 : StdMMR6Rel, LW_MMR6_DESC, LW_MMR6_ENC, ISA_MICROMIPS32R6; def LUI_MMR6 : R6MMR6Rel, LUI_MMR6_DESC, LUI_MMR6_ENC, ISA_MICROMIPS32R6; def ADDU16_MMR6 : StdMMR6Rel, ADDU16_MMR6_DESC, ADDU16_MMR6_ENC, ISA_MICROMIPS32R6; def AND16_MMR6 : StdMMR6Rel, AND16_MMR6_DESC, AND16_MMR6_ENC, ISA_MICROMIPS32R6; def ANDI16_MMR6 : StdMMR6Rel, ANDI16_MMR6_DESC, ANDI16_MMR6_ENC, ISA_MICROMIPS32R6; def NOT16_MMR6 : StdMMR6Rel, NOT16_MMR6_DESC, NOT16_MMR6_ENC, ISA_MICROMIPS32R6; def OR16_MMR6 : StdMMR6Rel, OR16_MMR6_DESC, OR16_MMR6_ENC, ISA_MICROMIPS32R6; def SLL16_MMR6 : StdMMR6Rel, SLL16_MMR6_DESC, SLL16_MMR6_ENC, ISA_MICROMIPS32R6; def SRL16_MMR6 : StdMMR6Rel, SRL16_MMR6_DESC, SRL16_MMR6_ENC, ISA_MICROMIPS32R6; def BREAK16_MMR6 : StdMMR6Rel, BREAK16_MMR6_DESC, BREAK16_MMR6_ENC, ISA_MICROMIPS32R6; def LI16_MMR6 : StdMMR6Rel, LI16_MMR6_DESC, LI16_MMR6_ENC, ISA_MICROMIPS32R6; def MOVE16_MMR6 : StdMMR6Rel, MOVE16_MMR6_DESC, MOVE16_MMR6_ENC, ISA_MICROMIPS32R6; def MOVEP_MMR6 : StdMMR6Rel, MOVEP_MMR6_DESC, MOVEP_MMR6_ENC, ISA_MICROMIPS32R6; def SDBBP16_MMR6 : StdMMR6Rel, SDBBP16_MMR6_DESC, SDBBP16_MMR6_ENC, ISA_MICROMIPS32R6; def SUBU16_MMR6 : StdMMR6Rel, SUBU16_MMR6_DESC, SUBU16_MMR6_ENC, ISA_MICROMIPS32R6; def XOR16_MMR6 : StdMMR6Rel, XOR16_MMR6_DESC, XOR16_MMR6_ENC, ISA_MICROMIPS32R6; def JALRC_HB_MMR6 : R6MMR6Rel, JALRC_HB_MMR6_ENC, JALRC_HB_MMR6_DESC, ISA_MICROMIPS32R6; def EXT_MMR6 : StdMMR6Rel, EXT_MMR6_ENC, EXT_MMR6_DESC, ISA_MICROMIPS32R6; def INS_MMR6 : StdMMR6Rel, INS_MMR6_ENC, INS_MMR6_DESC, ISA_MICROMIPS32R6; def JALRC_MMR6 : R6MMR6Rel, JALRC_MMR6_ENC, JALRC_MMR6_DESC, ISA_MICROMIPS32R6; def RINT_S_MMR6 : StdMMR6Rel, RINT_S_MMR6_ENC, RINT_S_MMR6_DESC, ISA_MICROMIPS32R6; def RINT_D_MMR6 : StdMMR6Rel, RINT_D_MMR6_ENC, RINT_D_MMR6_DESC, ISA_MICROMIPS32R6; def ROUND_L_S_MMR6 : StdMMR6Rel, ROUND_L_S_MMR6_ENC, ROUND_L_S_MMR6_DESC, ISA_MICROMIPS32R6; def ROUND_L_D_MMR6 : StdMMR6Rel, ROUND_L_D_MMR6_ENC, ROUND_L_D_MMR6_DESC, ISA_MICROMIPS32R6; def ROUND_W_S_MMR6 : StdMMR6Rel, ROUND_W_S_MMR6_ENC, ROUND_W_S_MMR6_DESC, ISA_MICROMIPS32R6; def ROUND_W_D_MMR6 : StdMMR6Rel, ROUND_W_D_MMR6_ENC, ROUND_W_D_MMR6_DESC, ISA_MICROMIPS32R6; def SEL_S_MMR6 : R6MMR6Rel, SEL_S_MMR6_ENC, SEL_S_MMR6_DESC, ISA_MICROMIPS32R6; def SEL_D_MMR6 : R6MMR6Rel, SEL_D_MMR6_ENC, SEL_D_MMR6_DESC, ISA_MICROMIPS32R6; def SELEQZ_S_MMR6 : R6MMR6Rel, SELEQZ_S_MMR6_ENC, SELEQZ_S_MMR6_DESC, ISA_MICROMIPS32R6; def SELEQZ_D_MMR6 : R6MMR6Rel, SELEQZ_D_MMR6_ENC, SELEQZ_D_MMR6_DESC, ISA_MICROMIPS32R6; def SELNEZ_S_MMR6 : R6MMR6Rel, SELNEZ_S_MMR6_ENC, SELNEZ_S_MMR6_DESC, ISA_MICROMIPS32R6; def SELNEZ_D_MMR6 : R6MMR6Rel, SELNEZ_D_MMR6_ENC, SELNEZ_D_MMR6_DESC, ISA_MICROMIPS32R6; def CLASS_S_MMR6 : StdMMR6Rel, CLASS_S_MMR6_ENC, CLASS_S_MMR6_DESC, ISA_MICROMIPS32R6; def CLASS_D_MMR6 : StdMMR6Rel, CLASS_D_MMR6_ENC, CLASS_D_MMR6_DESC, ISA_MICROMIPS32R6; def TLBINV_MMR6 : StdMMR6Rel, TLBINV_MMR6_ENC, TLBINV_MMR6_DESC, ISA_MICROMIPS32R6; def TLBINVF_MMR6 : StdMMR6Rel, TLBINVF_MMR6_ENC, TLBINVF_MMR6_DESC, ISA_MICROMIPS32R6; def DVP_MMR6 : R6MMR6Rel, DVP_MMR6_ENC, DVP_MMR6_DESC, ISA_MICROMIPS32R6; def EVP_MMR6 : R6MMR6Rel, EVP_MMR6_ENC, EVP_MMR6_DESC, ISA_MICROMIPS32R6; def BC1EQZC_MMR6 : R6MMR6Rel, BC1EQZC_MMR6_DESC, BC1EQZC_MMR6_ENC, ISA_MICROMIPS32R6; def BC1NEZC_MMR6 : R6MMR6Rel, BC1NEZC_MMR6_DESC, BC1NEZC_MMR6_ENC, ISA_MICROMIPS32R6; def BC2EQZC_MMR6 : R6MMR6Rel, MipsR6Inst, BC2EQZC_MMR6_ENC, BC2EQZC_MMR6_DESC, ISA_MICROMIPS32R6; def BC2NEZC_MMR6 : R6MMR6Rel, MipsR6Inst, BC2NEZC_MMR6_ENC, BC2NEZC_MMR6_DESC, ISA_MICROMIPS32R6; let DecoderNamespace = "MicroMipsFP64" in { def LDC1_D64_MMR6 : StdMMR6Rel, LDC1_D64_MMR6_DESC, LDC1_MMR6_ENC, ISA_MICROMIPS32R6 { let BaseOpcode = "LDC164"; } def SDC1_D64_MMR6 : StdMMR6Rel, SDC1_D64_MMR6_DESC, SDC1_MMR6_ENC, ISA_MICROMIPS32R6; } def LDC2_MMR6 : StdMMR6Rel, LDC2_MMR6_ENC, LDC2_MMR6_DESC, ISA_MICROMIPS32R6; def SDC2_MMR6 : StdMMR6Rel, SDC2_MMR6_ENC, SDC2_MMR6_DESC, ISA_MICROMIPS32R6; def LWC2_MMR6 : StdMMR6Rel, LWC2_MMR6_ENC, LWC2_MMR6_DESC, ISA_MICROMIPS32R6; def SWC2_MMR6 : StdMMR6Rel, SWC2_MMR6_ENC, SWC2_MMR6_DESC, ISA_MICROMIPS32R6; def LL_MMR6 : R6MMR6Rel, LL_MMR6_ENC, LL_MMR6_DESC, ISA_MICROMIPS32R6; def SC_MMR6 : R6MMR6Rel, SC_MMR6_ENC, SC_MMR6_DESC, ISA_MICROMIPS32R6; } def BOVC_MMR6 : R6MMR6Rel, BOVC_MMR6_ENC, BOVC_MMR6_DESC, ISA_MICROMIPS32R6, MMDecodeDisambiguatedBy<"POP35GroupBranchMMR6">; def BNVC_MMR6 : R6MMR6Rel, BNVC_MMR6_ENC, BNVC_MMR6_DESC, ISA_MICROMIPS32R6, MMDecodeDisambiguatedBy<"POP37GroupBranchMMR6">; def BGEC_MMR6 : R6MMR6Rel, BGEC_MMR6_ENC, BGEC_MMR6_DESC, ISA_MICROMIPS32R6; def BGEUC_MMR6 : R6MMR6Rel, BGEUC_MMR6_ENC, BGEUC_MMR6_DESC, ISA_MICROMIPS32R6; def BLTC_MMR6 : R6MMR6Rel, BLTC_MMR6_ENC, BLTC_MMR6_DESC, ISA_MICROMIPS32R6; def BLTUC_MMR6 : R6MMR6Rel, BLTUC_MMR6_ENC, BLTUC_MMR6_DESC, ISA_MICROMIPS32R6; def BEQC_MMR6 : R6MMR6Rel, BEQC_MMR6_ENC, BEQC_MMR6_DESC, ISA_MICROMIPS32R6, DecodeDisambiguates<"POP35GroupBranchMMR6">; def BNEC_MMR6 : R6MMR6Rel, BNEC_MMR6_ENC, BNEC_MMR6_DESC, ISA_MICROMIPS32R6, DecodeDisambiguates<"POP37GroupBranchMMR6">; def BLTZC_MMR6 : R6MMR6Rel, BLTZC_MMR6_ENC, BLTZC_MMR6_DESC, ISA_MICROMIPS32R6; def BLEZC_MMR6 : R6MMR6Rel, BLEZC_MMR6_ENC, BLEZC_MMR6_DESC, ISA_MICROMIPS32R6; def BGEZC_MMR6 : R6MMR6Rel, BGEZC_MMR6_ENC, BGEZC_MMR6_DESC, ISA_MICROMIPS32R6; def BGTZC_MMR6 : R6MMR6Rel, BGTZC_MMR6_ENC, BGTZC_MMR6_DESC, ISA_MICROMIPS32R6; def BGEZALC_MMR6 : R6MMR6Rel, BGEZALC_MMR6_ENC, BGEZALC_MMR6_DESC, ISA_MICROMIPS32R6; def BGTZALC_MMR6 : R6MMR6Rel, BGTZALC_MMR6_ENC, BGTZALC_MMR6_DESC, ISA_MICROMIPS32R6; def BLEZALC_MMR6 : R6MMR6Rel, BLEZALC_MMR6_ENC, BLEZALC_MMR6_DESC, ISA_MICROMIPS32R6; def BLTZALC_MMR6 : R6MMR6Rel, BLTZALC_MMR6_ENC, BLTZALC_MMR6_DESC, ISA_MICROMIPS32R6; //===----------------------------------------------------------------------===// // // MicroMips instruction aliases // //===----------------------------------------------------------------------===// def : MipsInstAlias<"ei", (EI_MMR6 ZERO), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"di", (DI_MMR6 ZERO), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"nop", (SLL_MMR6 ZERO, ZERO, 0), 1>, ISA_MICROMIPS32R6; def B_MMR6_Pseudo : MipsAsmPseudoInst<(outs), (ins brtarget_mm:$offset), !strconcat("b", "\t$offset")> { string DecoderNamespace = "MicroMipsR6"; } def : MipsInstAlias<"sync", (SYNC_MMR6 0), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"sdbbp", (SDBBP_MMR6 0), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"sigrie", (SIGRIE_MMR6 0), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"rdhwr $rt, $rs", (RDHWR_MMR6 GPR32Opnd:$rt, HWRegsOpnd:$rs, 0), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"mtc0 $rt, $rs", (MTC0_MMR6 COP0Opnd:$rs, GPR32Opnd:$rt, 0), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"mthc0 $rt, $rs", (MTHC0_MMR6 COP0Opnd:$rs, GPR32Opnd:$rt, 0), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"mfc0 $rt, $rs", (MFC0_MMR6 GPR32Opnd:$rt, COP0Opnd:$rs, 0), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"mfhc0 $rt, $rs", (MFHC0_MMR6 GPR32Opnd:$rt, COP0Opnd:$rs, 0), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"jalrc.hb $rs", (JALRC_HB_MMR6 RA, GPR32Opnd:$rs), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"jal $offset", (BALC_MMR6 brtarget26_mm:$offset), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"dvp", (DVP_MMR6 ZERO), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"evp", (EVP_MMR6 ZERO), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"jalrc $rs", (JALRC_MMR6 RA, GPR32Opnd:$rs), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"and $rs, $rt, $imm", (ANDI_MMR6 GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"and $rs, $imm", (ANDI_MMR6 GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"or $rs, $rt, $imm", (ORI_MMR6 GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"or $rs, $imm", (ORI_MMR6 GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"xor $rs, $rt, $imm", (XORI_MMR6 GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"xor $rs, $imm", (XORI_MMR6 GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"not $rt, $rs", (NOR_MMR6 GPR32Opnd:$rt, GPR32Opnd:$rs, ZERO), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"not $rt", (NOR_MMR6 GPR32Opnd:$rt, GPR32Opnd:$rt, ZERO), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"lapc $rd, $imm", (ADDIUPC_MMR6 GPR32Opnd:$rd, simm19_lsl2:$imm)>, ISA_MICROMIPS32R6; def : MipsInstAlias<"neg $rt, $rs", (SUB_MMR6 GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"neg $rt", (SUB_MMR6 GPR32Opnd:$rt, ZERO, GPR32Opnd:$rt), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"negu $rt, $rs", (SUBU_MMR6 GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"negu $rt", (SUBU_MMR6 GPR32Opnd:$rt, ZERO, GPR32Opnd:$rt), 1>, ISA_MICROMIPS32R6; def : MipsInstAlias<"beqz16 $rs, $offset", (BEQZC16_MMR6 GPRMM16Opnd:$rs, brtarget7_mm:$offset), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"bnez16 $rs, $offset", (BNEZC16_MMR6 GPRMM16Opnd:$rs, brtarget7_mm:$offset), 0>, ISA_MICROMIPS32R6; def : MipsInstAlias<"b16 $offset", (BC16_MMR6 brtarget10_mm:$offset), 0>, ISA_MICROMIPS32R6; //===----------------------------------------------------------------------===// // // MicroMips arbitrary patterns that map to one or more instructions // //===----------------------------------------------------------------------===// def : MipsPat<(store GPRMM16:$src, addrimm4lsl2:$addr), (SW16_MMR6 GPRMM16:$src, addrimm4lsl2:$addr)>, ISA_MICROMIPS32R6; def : MipsPat<(subc GPR32:$lhs, GPR32:$rhs), (SUBU_MMR6 GPR32:$lhs, GPR32:$rhs)>, ISA_MICROMIPS32R6; def : MipsPat<(select i32:$cond, i32:$t, i32:$f), (OR_MM (SELNEZ_MMR6 i32:$t, i32:$cond), (SELEQZ_MMR6 i32:$f, i32:$cond))>, ISA_MICROMIPS32R6; def : MipsPat<(select i32:$cond, i32:$t, immz), (SELNEZ_MMR6 i32:$t, i32:$cond)>, ISA_MICROMIPS32R6; def : MipsPat<(select i32:$cond, immz, i32:$f), (SELEQZ_MMR6 i32:$f, i32:$cond)>, ISA_MICROMIPS32R6; defm : SelectInt_Pats, ISA_MICROMIPS32R6; defm S_MMR6 : Cmp_Pats, ISA_MICROMIPS32R6; defm D_MMR6 : Cmp_Pats, ISA_MICROMIPS32R6; def : MipsPat<(f32 fpimm0), (MTC1_MMR6 ZERO)>, ISA_MICROMIPS32R6; def : MipsPat<(f32 fpimm0neg), (FNEG_S_MMR6 (MTC1_MMR6 ZERO))>, ISA_MICROMIPS32R6; def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src), (TRUNC_W_D_MMR6 FGR64Opnd:$src)>, ISA_MICROMIPS32R6; +def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src), + (TRUNC_W_S_MMR6 FGR32Opnd:$src)>, ISA_MICROMIPS32R6; def : MipsPat<(and GPRMM16:$src, immZExtAndi16:$imm), (ANDI16_MMR6 GPRMM16:$src, immZExtAndi16:$imm)>, ISA_MICROMIPS32R6; def : MipsPat<(and GPR32:$src, immZExt16:$imm), (ANDI_MMR6 GPR32:$src, immZExt16:$imm)>, ISA_MICROMIPS32R6; def : MipsPat<(i32 immZExt16:$imm), (XORI_MMR6 ZERO, immZExt16:$imm)>, ISA_MICROMIPS32R6; def : MipsPat<(not GPRMM16:$in), (NOT16_MMR6 GPRMM16:$in)>, ISA_MICROMIPS32R6; def : MipsPat<(not GPR32:$in), (NOR_MMR6 GPR32Opnd:$in, ZERO)>, ISA_MICROMIPS32R6; // Patterns for load with a reg+imm operand. let AddedComplexity = 41 in { def : LoadRegImmPat, FGR_64, ISA_MICROMIPS32R6; def : StoreRegImmPat, FGR_64, ISA_MICROMIPS32R6; } def TAILCALL_MMR6 : TailCall, ISA_MICROMIPS32R6; def TAILCALLREG_MMR6 : TailCallReg, ISA_MICROMIPS32R6; def PseudoIndirectBranch_MMR6 : PseudoIndirectBranchBase, ISA_MICROMIPS32R6; def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)), (TAILCALL_MMR6 tglobaladdr:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)), (TAILCALL_MMR6 texternalsym:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond (i32 (setne GPR32:$lhs, 0)), bb:$dst), (BNEZC_MMR6 GPR32:$lhs, bb:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond (i32 (seteq GPR32:$lhs, 0)), bb:$dst), (BEQZC_MMR6 GPR32:$lhs, bb:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond (i32 (setge GPR32:$lhs, GPR32:$rhs)), bb:$dst), (BEQZC_MMR6 (SLT_MM GPR32:$lhs, GPR32:$rhs), bb:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond (i32 (setuge GPR32:$lhs, GPR32:$rhs)), bb:$dst), (BEQZC_MMR6 (SLTu_MM GPR32:$lhs, GPR32:$rhs), bb:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond (i32 (setge GPR32:$lhs, immSExt16:$rhs)), bb:$dst), (BEQZC_MMR6 (SLTi_MM GPR32:$lhs, immSExt16:$rhs), bb:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond (i32 (setuge GPR32:$lhs, immSExt16:$rhs)), bb:$dst), (BEQZC_MMR6 (SLTiu_MM GPR32:$lhs, immSExt16:$rhs), bb:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond (i32 (setgt GPR32:$lhs, immSExt16Plus1:$rhs)), bb:$dst), (BEQZC_MMR6 (SLTi_MM GPR32:$lhs, (Plus1 imm:$rhs)), bb:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond (i32 (setugt GPR32:$lhs, immSExt16Plus1:$rhs)), bb:$dst), (BEQZC_MMR6 (SLTiu_MM GPR32:$lhs, (Plus1 imm:$rhs)), bb:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond (i32 (setle GPR32:$lhs, GPR32:$rhs)), bb:$dst), (BEQZC_MMR6 (SLT_MM GPR32:$rhs, GPR32:$lhs), bb:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond (i32 (setule GPR32:$lhs, GPR32:$rhs)), bb:$dst), (BEQZC_MMR6 (SLTu_MM GPR32:$rhs, GPR32:$lhs), bb:$dst)>, ISA_MICROMIPS32R6; def : MipsPat<(brcond GPR32:$cond, bb:$dst), (BNEZC_MMR6 GPR32:$cond, bb:$dst)>, ISA_MICROMIPS32R6; Index: vendor/llvm/dist-release_80/lib/Target/Mips/MicroMipsInstrFPU.td =================================================================== --- vendor/llvm/dist-release_80/lib/Target/Mips/MicroMipsInstrFPU.td (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/Mips/MicroMipsInstrFPU.td (revision 348932) @@ -1,444 +1,449 @@ //==- MicroMipsInstrFPU.td - microMIPS FPU Instruction Info -*- tablegen -*-==// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file describes the microMIPS FPU instruction set. // //===----------------------------------------------------------------------===// multiclass ADDS_MMM { def _D32_MM : MMRel, ADDS_FT, FGR_32 { string DecoderNamespace = "MicroMips"; } // FIXME: This needs to be part of the instruction mapping tables. def _D64_MM : ADDS_FT, FGR_64 { string DecoderNamespace = "MicroMipsFP64"; } } def FADD_S_MM : MMRel, ADDS_FT<"add.s", FGR32Opnd, II_ADD_S, 1, fadd>, ADDS_FM_MM<0, 0x30>, ISA_MICROMIPS; def FDIV_S_MM : MMRel, ADDS_FT<"div.s", FGR32Opnd, II_DIV_S, 0, fdiv>, ADDS_FM_MM<0, 0xf0>, ISA_MICROMIPS; def FMUL_S_MM : MMRel, ADDS_FT<"mul.s", FGR32Opnd, II_MUL_S, 1, fmul>, ADDS_FM_MM<0, 0xb0>, ISA_MICROMIPS; def FSUB_S_MM : MMRel, ADDS_FT<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>, ADDS_FM_MM<0, 0x70>, ISA_MICROMIPS; defm FADD : ADDS_MMM<"add.d", II_ADD_D, 1, fadd>, ADDS_FM_MM<1, 0x30>, ISA_MICROMIPS; defm FDIV : ADDS_MMM<"div.d", II_DIV_D, 0, fdiv>, ADDS_FM_MM<1, 0xf0>, ISA_MICROMIPS; defm FMUL : ADDS_MMM<"mul.d", II_MUL_D, 1, fmul>, ADDS_FM_MM<1, 0xb0>, ISA_MICROMIPS; defm FSUB : ADDS_MMM<"sub.d", II_SUB_D, 0, fsub>, ADDS_FM_MM<1, 0x70>, ISA_MICROMIPS; let DecoderNamespace = "MicroMips" in { def LWXC1_MM : MMRel, LWXC1_FT<"lwxc1", FGR32Opnd, II_LWXC1, load>, LWXC1_FM_MM<0x48>, ISA_MICROMIPS32_NOT_MIPS32R6; def SWXC1_MM : MMRel, SWXC1_FT<"swxc1", FGR32Opnd, II_SWXC1, store>, SWXC1_FM_MM<0x88>, ISA_MICROMIPS32_NOT_MIPS32R6; def LUXC1_MM : MMRel, LWXC1_FT<"luxc1", FGR64Opnd, II_LUXC1>, LWXC1_FM_MM<0x148>, FGR_64, ISA_MICROMIPS32_NOT_MIPS32R6; def SUXC1_MM : MMRel, SWXC1_FT<"suxc1", FGR64Opnd, II_SUXC1>, SWXC1_FM_MM<0x188>, FGR_64, ISA_MICROMIPS32_NOT_MIPS32R6; } let isCodeGenOnly = 1 in { def FCMP_S32_MM : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM_MM<0>, ISA_MICROMIPS32_NOT_MIPS32R6 { // FIXME: This is a required to work around the fact that these instructions // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the // fcc register set is used directly. bits<3> fcc = 0; } def FCMP_D32_MM : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM_MM<1>, ISA_MICROMIPS32_NOT_MIPS32R6 { // FIXME: This is a required to work around the fact that these instructions // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the // fcc register set is used directly. bits<3> fcc = 0; } } let DecoderNamespace = "MicroMips" in { def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, II_BC1F, MIPS_BRANCH_F>, BC1F_FM_MM<0x1c>, ISA_MICROMIPS32_NOT_MIPS32R6; def BC1T_MM : MMRel, BC1F_FT<"bc1t", brtarget_mm, II_BC1T, MIPS_BRANCH_T>, BC1F_FM_MM<0x1d>, ISA_MICROMIPS32_NOT_MIPS32R6; def CVT_W_S_MM : MMRel, ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, II_CVT>, ROUND_W_FM_MM<0, 0x24>, ISA_MICROMIPS; } let DecoderNamespace = "MicroMips" in { def ROUND_W_S_MM : MMRel, StdMMR6Rel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>, ROUND_W_FM_MM<0, 0xec>, ISA_MICROMIPS; def CEIL_W_MM : MMRel, ABSS_FT<"ceil.w.d", FGR32Opnd, AFGR64Opnd, II_CEIL>, ROUND_W_FM_MM<1, 0x6c>, ISA_MICROMIPS, FGR_32; def FLOOR_W_MM : MMRel, ABSS_FT<"floor.w.d", FGR32Opnd, AFGR64Opnd, II_FLOOR>, ROUND_W_FM_MM<1, 0x2c>, ISA_MICROMIPS, FGR_32; def ROUND_W_MM : MMRel, StdMMR6Rel, ABSS_FT<"round.w.d", FGR32Opnd, AFGR64Opnd, II_ROUND>, ROUND_W_FM_MM<1, 0xec>, ISA_MICROMIPS, FGR_32; def TRUNC_W_MM : MMRel, ABSS_FT<"trunc.w.d", FGR32Opnd, AFGR64Opnd, II_TRUNC>, ROUND_W_FM_MM<1, 0xac>, ISA_MICROMIPS, FGR_32; def CVT_L_S_MM : MMRel, ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, II_CVT>, ROUND_W_FM_MM<0, 0x4>, ISA_MICROMIPS, FGR_64; def CVT_L_D64_MM : MMRel, ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, II_CVT>, ROUND_W_FM_MM<1, 0x4>, ISA_MICROMIPS, FGR_64; def CVT_W_D32_MM : MMRel, ABSS_FT<"cvt.w.d", FGR32Opnd, AFGR64Opnd, II_CVT>, ROUND_W_FM_MM<1, 0x24>, ISA_MICROMIPS, FGR_32; } let DecoderNamespace = "MicroMipsFP64" in { def CVT_W_D64_MM : ABSS_FT<"cvt.w.d", FGR32Opnd, FGR64Opnd, II_CVT>, ROUND_W_FM_MM<1, 0x24>, ISA_MICROMIPS, FGR_64; } multiclass ABSS_MMM { def _D32_MM : MMRel, ABSS_FT, ISA_MICROMIPS, FGR_32 { string DecoderNamespace = "MicroMips"; } // FIXME: This needs to be part of the instruction mapping tables. def _D64_MM : ABSS_FT, ISA_MICROMIPS, FGR_64 { string DecoderNamespace = "MicroMipsFP64"; } } defm FSQRT : ABSS_MMM<"sqrt.d", II_SQRT_D, fsqrt>, ROUND_W_FM_MM<1, 0x28>; defm FABS : ABSS_MMM<"abs.d", II_SQRT_D, fabs>, ABS_FM_MM<1, 0xd>; let DecoderNamespace = "MicroMips" in { def FABS_S_MM : MMRel, ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, II_ABS, fabs>, ABS_FM_MM<0, 0xd>, ISA_MICROMIPS; } def FMOV_S_MM : MMRel, ABSS_FT<"mov.s", FGR32Opnd, FGR32Opnd, II_MOV_S>, ABS_FM_MM<0, 0x1>, ISA_MICROMIPS { let isMoveReg = 1; } def FNEG_S_MM : MMRel, ABSS_FT<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>, ABS_FM_MM<0, 0x2d>, ISA_MICROMIPS; let DecoderNamespace = "MicroMips" in { def CVT_D32_S_MM : MMRel, ABSS_FT<"cvt.d.s", AFGR64Opnd, FGR32Opnd, II_CVT>, ABS_FM_MM<0, 0x4d>, ISA_MICROMIPS, FGR_32; def CVT_D32_W_MM : MMRel, ABSS_FT<"cvt.d.w", AFGR64Opnd, FGR32Opnd, II_CVT>, ABS_FM_MM<1, 0x4d>, ISA_MICROMIPS, FGR_32; } let DecoderNamespace = "MicroMipsFP64" in { def CVT_D64_S_MM : ABSS_FT<"cvt.d.s", FGR64Opnd, FGR32Opnd, II_CVT>, ABS_FM_MM<0, 0x4d>, ISA_MICROMIPS, FGR_64; def CVT_D64_W_MM : ABSS_FT<"cvt.d.w", FGR64Opnd, FGR32Opnd, II_CVT>, ABS_FM_MM<1, 0x4d>, ISA_MICROMIPS, FGR_64; def CVT_S_D64_MM : ABSS_FT<"cvt.s.d", FGR32Opnd, FGR64Opnd, II_CVT>, ABS_FM_MM<0, 0x6d>, ISA_MICROMIPS, FGR_64; } let DecoderNamespace = "MicroMips" in { def CVT_S_D32_MM : MMRel, ABSS_FT<"cvt.s.d", FGR32Opnd, AFGR64Opnd, II_CVT>, ABS_FM_MM<0, 0x6d>, ISA_MICROMIPS, FGR_32; def CVT_S_W_MM : MMRel, ABSS_FT<"cvt.s.w", FGR32Opnd, FGR32Opnd, II_CVT>, ABS_FM_MM<1, 0x6d>, ISA_MICROMIPS; } defm FNEG : ABSS_MMM<"neg.d", II_NEG, fneg>, ABS_FM_MM<1, 0x2d>; defm FMOV : ABSS_MMM<"mov.d", II_MOV_D>, ABS_FM_MM<1, 0x1>; let DecoderNamespace = "MicroMips" in { def MOVZ_I_S_MM : MMRel, CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32Opnd, II_MOVZ_S>, CMov_I_F_FM_MM<0x78, 0>, ISA_MICROMIPS32_NOT_MIPS32R6; def MOVN_I_S_MM : MMRel, CMov_I_F_FT<"movn.s", GPR32Opnd, FGR32Opnd, II_MOVN_S>, CMov_I_F_FM_MM<0x38, 0>, ISA_MICROMIPS32_NOT_MIPS32R6; def MOVZ_I_D32_MM : MMRel, CMov_I_F_FT<"movz.d", GPR32Opnd, AFGR64Opnd, II_MOVZ_D>, CMov_I_F_FM_MM<0x78, 1>, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; def MOVN_I_D32_MM : MMRel, CMov_I_F_FT<"movn.d", GPR32Opnd, AFGR64Opnd, II_MOVN_D>, CMov_I_F_FM_MM<0x38, 1>, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; def MOVT_S_MM : MMRel, CMov_F_F_FT<"movt.s", FGR32Opnd, II_MOVT_S, MipsCMovFP_T>, CMov_F_F_FM_MM<0x60, 0>, ISA_MICROMIPS32_NOT_MIPS32R6; def MOVF_S_MM : MMRel, CMov_F_F_FT<"movf.s", FGR32Opnd, II_MOVF_S, MipsCMovFP_F>, CMov_F_F_FM_MM<0x20, 0>, ISA_MICROMIPS32_NOT_MIPS32R6; def MOVT_D32_MM : MMRel, CMov_F_F_FT<"movt.d", AFGR64Opnd, II_MOVT_D, MipsCMovFP_T>, CMov_F_F_FM_MM<0x60, 1>, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; def MOVF_D32_MM : MMRel, CMov_F_F_FT<"movf.d", AFGR64Opnd, II_MOVF_D, MipsCMovFP_F>, CMov_F_F_FM_MM<0x20, 1>, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; def MFC1_MM : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, II_MFC1, bitconvert>, MFC1_FM_MM<0x80>, ISA_MICROMIPS; def MTC1_MM : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1, bitconvert>, MFC1_FM_MM<0xa0>, ISA_MICROMIPS; def MADD_S_MM : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S>, MADDS_FM_MM<0x1>, ISA_MICROMIPS32_NOT_MIPS32R6, MADD4; def MSUB_S_MM : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S>, MADDS_FM_MM<0x21>, ISA_MICROMIPS32_NOT_MIPS32R6, MADD4; let AdditionalPredicates = [NoNaNsFPMath, HasMadd4] in { def NMADD_S_MM : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, II_NMADD_S>, MADDS_FM_MM<0x2>, ISA_MICROMIPS32_NOT_MIPS32R6; def NMSUB_S_MM : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, II_NMSUB_S>, MADDS_FM_MM<0x22>, ISA_MICROMIPS32_NOT_MIPS32R6; } def MADD_D32_MM : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D>, MADDS_FM_MM<0x9>, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32, MADD4; def MSUB_D32_MM : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D>, MADDS_FM_MM<0x29>, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32, MADD4; let AdditionalPredicates = [NoNaNsFPMath, HasMadd4] in { def NMADD_D32_MM : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D>, MADDS_FM_MM<0xa>, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; def NMSUB_D32_MM : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D>, MADDS_FM_MM<0x2a>, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; } def FLOOR_W_S_MM : MMRel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, II_FLOOR>, ROUND_W_FM_MM<0, 0x2c>, ISA_MICROMIPS; def TRUNC_W_S_MM : MMRel, StdMMR6Rel, ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>, ROUND_W_FM_MM<0, 0xac>, ISA_MICROMIPS; def CEIL_W_S_MM : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>, ROUND_W_FM_MM<0, 0x6c>, ISA_MICROMIPS; def FSQRT_S_MM : MMRel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S, fsqrt>, ROUND_W_FM_MM<0, 0x28>, ISA_MICROMIPS; def MTHC1_D32_MM : MMRel, MTC1_64_FT<"mthc1", AFGR64Opnd, GPR32Opnd, II_MTHC1>, MFC1_FM_MM<0xe0>, ISA_MICROMIPS, FGR_32; def MFHC1_D32_MM : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, AFGR64Opnd, II_MFHC1>, MFC1_FM_MM<0xc0>, ISA_MICROMIPS, FGR_32; } let DecoderNamespace = "MicroMipsFP64" in { def MTHC1_D64_MM : MTC1_64_FT<"mthc1", FGR64Opnd, GPR32Opnd, II_MTHC1>, MFC1_FM_MM<0xe0>, ISA_MICROMIPS, FGR_64; def MFHC1_D64_MM : MFC1_FT<"mfhc1", GPR32Opnd, FGR64Opnd, II_MFHC1>, MFC1_FM_MM<0xc0>, ISA_MICROMIPS, FGR_64; def MTC1_D64_MM : MTC1_FT<"mtc1", FGR64Opnd, GPR32Opnd, II_MTC1>, MFC1_FM_MM<0xa0>, ISA_MICROMIPS, FGR_64; } let DecoderNamespace = "MicroMips" in { def CFC1_MM : MMRel, MFC1_FT<"cfc1", GPR32Opnd, CCROpnd, II_CFC1>, MFC1_FM_MM<0x40>, ISA_MICROMIPS; def CTC1_MM : MMRel, MTC1_FT<"ctc1", CCROpnd, GPR32Opnd, II_CTC1>, MFC1_FM_MM<0x60>, ISA_MICROMIPS; def RECIP_S_MM : MMRel, ABSS_FT<"recip.s", FGR32Opnd, FGR32Opnd, II_RECIP_S>, ROUND_W_FM_MM<0b0, 0b01001000>, ISA_MICROMIPS; def RECIP_D32_MM : MMRel, ABSS_FT<"recip.d", AFGR64Opnd, AFGR64Opnd, II_RECIP_D>, ROUND_W_FM_MM<0b1, 0b01001000>, ISA_MICROMIPS, FGR_32 { let BaseOpcode = "RECIP_D32"; } let DecoderNamespace = "MicroMipsFP64" in def RECIP_D64_MM : MMRel, ABSS_FT<"recip.d", FGR64Opnd, FGR64Opnd, II_RECIP_D>, ROUND_W_FM_MM<0b1, 0b01001000>, ISA_MICROMIPS, FGR_64; def RSQRT_S_MM : MMRel, ABSS_FT<"rsqrt.s", FGR32Opnd, FGR32Opnd, II_RECIP_S>, ROUND_W_FM_MM<0b0, 0b00001000>; def RSQRT_D32_MM : MMRel, ABSS_FT<"rsqrt.d", AFGR64Opnd, AFGR64Opnd, II_RECIP_D>, ROUND_W_FM_MM<0b1, 0b00001000>, ISA_MICROMIPS, FGR_32 { let BaseOpcode = "RSQRT_D32"; } let DecoderNamespace = "MicroMipsFP64" in def RSQRT_D64_MM : MMRel, ABSS_FT<"rsqrt.d", FGR64Opnd, FGR64Opnd, II_RECIP_D>, ROUND_W_FM_MM<0b1, 0b00001000>, ISA_MICROMIPS, FGR_64; } let DecoderNamespace = "MicroMips", DecoderMethod = "DecodeFMemMMR2" in { def LDC1_MM : MMRel, LW_FT<"ldc1", AFGR64Opnd, mem_mm_16, II_LDC1, load>, LW_FM_MM<0x2f>, ISA_MICROMIPS, FGR_32 { let BaseOpcode = "LDC132"; } def SDC1_MM : MMRel, SW_FT<"sdc1", AFGR64Opnd, mem_mm_16, II_SDC1, store>, LW_FM_MM<0x2e>, ISA_MICROMIPS, FGR_32; def LWC1_MM : MMRel, LW_FT<"lwc1", FGR32Opnd, mem_mm_16, II_LWC1, load>, LW_FM_MM<0x27>, ISA_MICROMIPS; def SWC1_MM : MMRel, SW_FT<"swc1", FGR32Opnd, mem_mm_16, II_SWC1, store>, LW_FM_MM<0x26>, ISA_MICROMIPS; } multiclass C_COND_MM fmt, InstrItinClass itin> { def C_F_#NAME#_MM : MMRel, C_COND_FT<"f", TypeStr, RC, itin>, C_COND_FM_MM { let BaseOpcode = "c.f."#NAME; let isCommutable = 1; } def C_UN_#NAME#_MM : MMRel, C_COND_FT<"un", TypeStr, RC, itin>, C_COND_FM_MM { let BaseOpcode = "c.un."#NAME; let isCommutable = 1; } def C_EQ_#NAME#_MM : MMRel, C_COND_FT<"eq", TypeStr, RC, itin>, C_COND_FM_MM { let BaseOpcode = "c.eq."#NAME; let isCommutable = 1; } def C_UEQ_#NAME#_MM : MMRel, C_COND_FT<"ueq", TypeStr, RC, itin>, C_COND_FM_MM { let BaseOpcode = "c.ueq."#NAME; let isCommutable = 1; } def C_OLT_#NAME#_MM : MMRel, C_COND_FT<"olt", TypeStr, RC, itin>, C_COND_FM_MM { let BaseOpcode = "c.olt."#NAME; } def C_ULT_#NAME#_MM : MMRel, C_COND_FT<"ult", TypeStr, RC, itin>, C_COND_FM_MM { let BaseOpcode = "c.ult."#NAME; } def C_OLE_#NAME#_MM : MMRel, C_COND_FT<"ole", TypeStr, RC, itin>, C_COND_FM_MM { let BaseOpcode = "c.ole."#NAME; } def C_ULE_#NAME#_MM : MMRel, C_COND_FT<"ule", TypeStr, RC, itin>, C_COND_FM_MM { let BaseOpcode = "c.ule."#NAME; } def C_SF_#NAME#_MM : MMRel, C_COND_FT<"sf", TypeStr, RC, itin>, C_COND_FM_MM { let BaseOpcode = "c.sf."#NAME; let isCommutable = 1; } def C_NGLE_#NAME#_MM : MMRel, C_COND_FT<"ngle", TypeStr, RC, itin>, C_COND_FM_MM { let BaseOpcode = "c.ngle."#NAME; } def C_SEQ_#NAME#_MM : MMRel, C_COND_FT<"seq", TypeStr, RC, itin>, C_COND_FM_MM { let BaseOpcode = "c.seq."#NAME; let isCommutable = 1; } def C_NGL_#NAME#_MM : MMRel, C_COND_FT<"ngl", TypeStr, RC, itin>, C_COND_FM_MM { let BaseOpcode = "c.ngl."#NAME; } def C_LT_#NAME#_MM : MMRel, C_COND_FT<"lt", TypeStr, RC, itin>, C_COND_FM_MM { let BaseOpcode = "c.lt."#NAME; } def C_NGE_#NAME#_MM : MMRel, C_COND_FT<"nge", TypeStr, RC, itin>, C_COND_FM_MM { let BaseOpcode = "c.nge."#NAME; } def C_LE_#NAME#_MM : MMRel, C_COND_FT<"le", TypeStr, RC, itin>, C_COND_FM_MM { let BaseOpcode = "c.le."#NAME; } def C_NGT_#NAME#_MM : MMRel, C_COND_FT<"ngt", TypeStr, RC, itin>, C_COND_FM_MM { let BaseOpcode = "c.ngt."#NAME; } } let DecoderNamespace = "MicroMips" in { defm S : C_COND_MM<"s", FGR32Opnd, 0b00, II_C_CC_S>, ISA_MICROMIPS32_NOT_MIPS32R6; defm D32 : C_COND_MM<"d", AFGR64Opnd, 0b01, II_C_CC_D>, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; } let DecoderNamespace = "Mips64" in defm D64 : C_COND_MM<"d", FGR64Opnd, 0b01, II_C_CC_D>, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_64; defm S_MM : C_COND_ALIASES<"s", FGR32Opnd>, HARDFLOAT, ISA_MICROMIPS32_NOT_MIPS32R6; defm D32_MM : C_COND_ALIASES<"d", AFGR64Opnd>, HARDFLOAT, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; defm D64_MM : C_COND_ALIASES<"d", FGR64Opnd>, HARDFLOAT, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_64; defm : BC1_ALIASES, ISA_MICROMIPS32_NOT_MIPS32R6, HARDFLOAT; // To generate NMADD and NMSUB instructions when fneg node is present let AdditionalPredicates = [NoNaNsFPMath, HasMadd4, InMicroMips, NotMips32r6] in { defm : NMADD_NMSUB, ISA_MICROMIPS32_NOT_MIPS32R6; defm : NMADD_NMSUB, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; } //===----------------------------------------------------------------------===// // Floating Point Patterns //===----------------------------------------------------------------------===// // Patterns for loads/stores with a reg+imm operand. let AddedComplexity = 40 in { def : LoadRegImmPat, ISA_MICROMIPS, FGR_32; def : StoreRegImmPat, ISA_MICROMIPS, FGR_32; def : LoadRegImmPat, ISA_MICROMIPS; def : StoreRegImmPat, ISA_MICROMIPS; } def : MipsPat<(MipsMTC1_D64 GPR32Opnd:$src), (MTC1_D64_MM GPR32Opnd:$src)>, ISA_MICROMIPS, FGR_64; def : MipsPat<(f32 fpimm0), (MTC1_MM ZERO)>, ISA_MICROMIPS32_NOT_MIPS32R6; def : MipsPat<(f32 fpimm0neg), (FNEG_S_MM (MTC1_MM ZERO))>, ISA_MICROMIPS32_NOT_MIPS32R6; def : MipsPat<(f32 (fpround FGR64Opnd:$src)), (CVT_S_D64_MM FGR64Opnd:$src)>, ISA_MICROMIPS, FGR_64; def : MipsPat<(f64 (fpextend FGR32Opnd:$src)), (CVT_D64_S_MM FGR32Opnd:$src)>, ISA_MICROMIPS, FGR_64; def : MipsPat<(f32 (fpround AFGR64Opnd:$src)), (CVT_S_D32_MM AFGR64Opnd:$src)>, ISA_MICROMIPS, FGR_32; def : MipsPat<(f64 (fpextend FGR32Opnd:$src)), (CVT_D32_S_MM FGR32Opnd:$src)>, ISA_MICROMIPS, FGR_32; def : MipsPat<(MipsTruncIntFP AFGR64Opnd:$src), (TRUNC_W_MM AFGR64Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; +def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src), + (CVT_W_D64_MM FGR64Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6, + FGR_64; +def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src), + (TRUNC_W_S_MM FGR32Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6; // Selects defm : MovzPats0, ISA_MICROMIPS32_NOT_MIPS32R6; defm : MovzPats1, ISA_MICROMIPS32_NOT_MIPS32R6; defm : MovnPats, ISA_MICROMIPS32_NOT_MIPS32R6; defm : MovzPats0, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; defm : MovzPats1, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; defm : MovnPats, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; Index: vendor/llvm/dist-release_80/lib/Target/Mips/MipsAsmPrinter.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/Target/Mips/MipsAsmPrinter.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/Mips/MipsAsmPrinter.cpp (revision 348932) @@ -1,1304 +1,1305 @@ //===- MipsAsmPrinter.cpp - Mips LLVM Assembly Printer --------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file contains a printer that converts from our internal representation // of machine-dependent LLVM code to GAS-format MIPS assembly language. // //===----------------------------------------------------------------------===// #include "MipsAsmPrinter.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsABIInfo.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MCTargetDesc/MipsMCNaCl.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "Mips.h" #include "MipsMCInstLower.h" #include "MipsMachineFunction.h" #include "MipsSubtarget.h" #include "MipsTargetMachine.h" #include "MipsTargetStreamer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include #include #include #include #include #include using namespace llvm; #define DEBUG_TYPE "mips-asm-printer" extern cl::opt EmitJalrReloc; MipsTargetStreamer &MipsAsmPrinter::getTargetStreamer() const { return static_cast(*OutStreamer->getTargetStreamer()); } bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) { Subtarget = &MF.getSubtarget(); MipsFI = MF.getInfo(); if (Subtarget->inMips16Mode()) for (std::map< const char *, const Mips16HardFloatInfo::FuncSignature *>::const_iterator it = MipsFI->StubsNeeded.begin(); it != MipsFI->StubsNeeded.end(); ++it) { const char *Symbol = it->first; const Mips16HardFloatInfo::FuncSignature *Signature = it->second; if (StubsNeeded.find(Symbol) == StubsNeeded.end()) StubsNeeded[Symbol] = Signature; } MCP = MF.getConstantPool(); // In NaCl, all indirect jump targets must be aligned to bundle size. if (Subtarget->isTargetNaCl()) NaClAlignIndirectJumpTargets(MF); AsmPrinter::runOnMachineFunction(MF); emitXRayTable(); return true; } bool MipsAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) { MCOp = MCInstLowering.LowerOperand(MO); return MCOp.isValid(); } #include "MipsGenMCPseudoLowering.inc" // Lower PseudoReturn/PseudoIndirectBranch/PseudoIndirectBranch64 to JR, JR_MM, // JALR, or JALR64 as appropriate for the target. void MipsAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer, const MachineInstr *MI) { bool HasLinkReg = false; bool InMicroMipsMode = Subtarget->inMicroMipsMode(); MCInst TmpInst0; if (Subtarget->hasMips64r6()) { // MIPS64r6 should use (JALR64 ZERO_64, $rs) TmpInst0.setOpcode(Mips::JALR64); HasLinkReg = true; } else if (Subtarget->hasMips32r6()) { // MIPS32r6 should use (JALR ZERO, $rs) if (InMicroMipsMode) TmpInst0.setOpcode(Mips::JRC16_MMR6); else { TmpInst0.setOpcode(Mips::JALR); HasLinkReg = true; } } else if (Subtarget->inMicroMipsMode()) // microMIPS should use (JR_MM $rs) TmpInst0.setOpcode(Mips::JR_MM); else { // Everything else should use (JR $rs) TmpInst0.setOpcode(Mips::JR); } MCOperand MCOp; if (HasLinkReg) { unsigned ZeroReg = Subtarget->isGP64bit() ? Mips::ZERO_64 : Mips::ZERO; TmpInst0.addOperand(MCOperand::createReg(ZeroReg)); } lowerOperand(MI->getOperand(0), MCOp); TmpInst0.addOperand(MCOp); EmitToStreamer(OutStreamer, TmpInst0); } // If there is an MO_JALR operand, insert: // // .reloc tmplabel, R_{MICRO}MIPS_JALR, symbol // tmplabel: // // This is an optimization hint for the linker which may then replace // an indirect call with a direct branch. static void emitDirectiveRelocJalr(const MachineInstr &MI, MCContext &OutContext, TargetMachine &TM, MCStreamer &OutStreamer, const MipsSubtarget &Subtarget) { for (unsigned int I = MI.getDesc().getNumOperands(), E = MI.getNumOperands(); I < E; ++I) { MachineOperand MO = MI.getOperand(I); if (MO.isMCSymbol() && (MO.getTargetFlags() & MipsII::MO_JALR)) { MCSymbol *Callee = MO.getMCSymbol(); if (Callee && !Callee->getName().empty()) { MCSymbol *OffsetLabel = OutContext.createTempSymbol(); const MCExpr *OffsetExpr = MCSymbolRefExpr::create(OffsetLabel, OutContext); const MCExpr *CaleeExpr = MCSymbolRefExpr::create(Callee, OutContext); OutStreamer.EmitRelocDirective (*OffsetExpr, Subtarget.inMicroMipsMode() ? "R_MICROMIPS_JALR" : "R_MIPS_JALR", CaleeExpr, SMLoc(), *TM.getMCSubtargetInfo()); OutStreamer.EmitLabel(OffsetLabel); return; } } } } void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { MipsTargetStreamer &TS = getTargetStreamer(); unsigned Opc = MI->getOpcode(); TS.forbidModuleDirective(); if (MI->isDebugValue()) { SmallString<128> Str; raw_svector_ostream OS(Str); PrintDebugValueComment(MI, OS); return; } if (MI->isDebugLabel()) return; // If we just ended a constant pool, mark it as such. if (InConstantPool && Opc != Mips::CONSTPOOL_ENTRY) { OutStreamer->EmitDataRegion(MCDR_DataRegionEnd); InConstantPool = false; } if (Opc == Mips::CONSTPOOL_ENTRY) { // CONSTPOOL_ENTRY - This instruction represents a floating // constant pool in the function. The first operand is the ID# // for this instruction, the second is the index into the // MachineConstantPool that this is, the third is the size in // bytes of this constant pool entry. // The required alignment is specified on the basic block holding this MI. // unsigned LabelId = (unsigned)MI->getOperand(0).getImm(); unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex(); // If this is the first entry of the pool, mark it. if (!InConstantPool) { OutStreamer->EmitDataRegion(MCDR_DataRegion); InConstantPool = true; } OutStreamer->EmitLabel(GetCPISymbol(LabelId)); const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx]; if (MCPE.isMachineConstantPoolEntry()) EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal); else EmitGlobalConstant(MF->getDataLayout(), MCPE.Val.ConstVal); return; } switch (Opc) { case Mips::PATCHABLE_FUNCTION_ENTER: LowerPATCHABLE_FUNCTION_ENTER(*MI); return; case Mips::PATCHABLE_FUNCTION_EXIT: LowerPATCHABLE_FUNCTION_EXIT(*MI); return; case Mips::PATCHABLE_TAIL_CALL: LowerPATCHABLE_TAIL_CALL(*MI); return; } if (EmitJalrReloc && (MI->isReturn() || MI->isCall() || MI->isIndirectBranch())) { emitDirectiveRelocJalr(*MI, OutContext, TM, *OutStreamer, *Subtarget); } MachineBasicBlock::const_instr_iterator I = MI->getIterator(); MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); do { // Do any auto-generated pseudo lowerings. if (emitPseudoExpansionLowering(*OutStreamer, &*I)) continue; if (I->getOpcode() == Mips::PseudoReturn || I->getOpcode() == Mips::PseudoReturn64 || I->getOpcode() == Mips::PseudoIndirectBranch || I->getOpcode() == Mips::PseudoIndirectBranch64 || I->getOpcode() == Mips::TAILCALLREG || I->getOpcode() == Mips::TAILCALLREG64) { emitPseudoIndirectBranch(*OutStreamer, &*I); continue; } // The inMips16Mode() test is not permanent. // Some instructions are marked as pseudo right now which // would make the test fail for the wrong reason but // that will be fixed soon. We need this here because we are // removing another test for this situation downstream in the // callchain. // if (I->isPseudo() && !Subtarget->inMips16Mode() && !isLongBranchPseudo(I->getOpcode())) llvm_unreachable("Pseudo opcode found in EmitInstruction()"); MCInst TmpInst0; MCInstLowering.Lower(&*I, TmpInst0); EmitToStreamer(*OutStreamer, TmpInst0); } while ((++I != E) && I->isInsideBundle()); // Delay slot check } //===----------------------------------------------------------------------===// // // Mips Asm Directives // // -- Frame directive "frame Stackpointer, Stacksize, RARegister" // Describe the stack frame. // // -- Mask directives "(f)mask bitmask, offset" // Tells the assembler which registers are saved and where. // bitmask - contain a little endian bitset indicating which registers are // saved on function prologue (e.g. with a 0x80000000 mask, the // assembler knows the register 31 (RA) is saved at prologue. // offset - the position before stack pointer subtraction indicating where // the first saved register on prologue is located. (e.g. with a // // Consider the following function prologue: // // .frame $fp,48,$ra // .mask 0xc0000000,-8 // addiu $sp, $sp, -48 // sw $ra, 40($sp) // sw $fp, 36($sp) // // With a 0xc0000000 mask, the assembler knows the register 31 (RA) and // 30 (FP) are saved at prologue. As the save order on prologue is from // left to right, RA is saved first. A -8 offset means that after the // stack pointer subtration, the first register in the mask (RA) will be // saved at address 48-8=40. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // Mask directives //===----------------------------------------------------------------------===// // Create a bitmask with all callee saved registers for CPU or Floating Point // registers. For CPU registers consider RA, GP and FP for saving if necessary. void MipsAsmPrinter::printSavedRegsBitmask() { // CPU and FPU Saved Registers Bitmasks unsigned CPUBitmask = 0, FPUBitmask = 0; int CPUTopSavedRegOff, FPUTopSavedRegOff; // Set the CPU and FPU Bitmasks const MachineFrameInfo &MFI = MF->getFrameInfo(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); const std::vector &CSI = MFI.getCalleeSavedInfo(); // size of stack area to which FP callee-saved regs are saved. unsigned CPURegSize = TRI->getRegSizeInBits(Mips::GPR32RegClass) / 8; unsigned FGR32RegSize = TRI->getRegSizeInBits(Mips::FGR32RegClass) / 8; unsigned AFGR64RegSize = TRI->getRegSizeInBits(Mips::AFGR64RegClass) / 8; bool HasAFGR64Reg = false; unsigned CSFPRegsSize = 0; for (const auto &I : CSI) { unsigned Reg = I.getReg(); unsigned RegNum = TRI->getEncodingValue(Reg); // If it's a floating point register, set the FPU Bitmask. // If it's a general purpose register, set the CPU Bitmask. if (Mips::FGR32RegClass.contains(Reg)) { FPUBitmask |= (1 << RegNum); CSFPRegsSize += FGR32RegSize; } else if (Mips::AFGR64RegClass.contains(Reg)) { FPUBitmask |= (3 << RegNum); CSFPRegsSize += AFGR64RegSize; HasAFGR64Reg = true; } else if (Mips::GPR32RegClass.contains(Reg)) CPUBitmask |= (1 << RegNum); } // FP Regs are saved right below where the virtual frame pointer points to. FPUTopSavedRegOff = FPUBitmask ? (HasAFGR64Reg ? -AFGR64RegSize : -FGR32RegSize) : 0; // CPU Regs are saved below FP Regs. CPUTopSavedRegOff = CPUBitmask ? -CSFPRegsSize - CPURegSize : 0; MipsTargetStreamer &TS = getTargetStreamer(); // Print CPUBitmask TS.emitMask(CPUBitmask, CPUTopSavedRegOff); // Print FPUBitmask TS.emitFMask(FPUBitmask, FPUTopSavedRegOff); } //===----------------------------------------------------------------------===// // Frame and Set directives //===----------------------------------------------------------------------===// /// Frame Directive void MipsAsmPrinter::emitFrameDirective() { const TargetRegisterInfo &RI = *MF->getSubtarget().getRegisterInfo(); unsigned stackReg = RI.getFrameRegister(*MF); unsigned returnReg = RI.getRARegister(); unsigned stackSize = MF->getFrameInfo().getStackSize(); getTargetStreamer().emitFrame(stackReg, stackSize, returnReg); } /// Emit Set directives. const char *MipsAsmPrinter::getCurrentABIString() const { switch (static_cast(TM).getABI().GetEnumValue()) { case MipsABIInfo::ABI::O32: return "abi32"; case MipsABIInfo::ABI::N32: return "abiN32"; case MipsABIInfo::ABI::N64: return "abi64"; default: llvm_unreachable("Unknown Mips ABI"); } } void MipsAsmPrinter::EmitFunctionEntryLabel() { MipsTargetStreamer &TS = getTargetStreamer(); // NaCl sandboxing requires that indirect call instructions are masked. // This means that function entry points should be bundle-aligned. if (Subtarget->isTargetNaCl()) EmitAlignment(std::max(MF->getAlignment(), MIPS_NACL_BUNDLE_ALIGN)); if (Subtarget->inMicroMipsMode()) { TS.emitDirectiveSetMicroMips(); TS.setUsesMicroMips(); TS.updateABIInfo(*Subtarget); } else TS.emitDirectiveSetNoMicroMips(); if (Subtarget->inMips16Mode()) TS.emitDirectiveSetMips16(); else TS.emitDirectiveSetNoMips16(); TS.emitDirectiveEnt(*CurrentFnSym); OutStreamer->EmitLabel(CurrentFnSym); } /// EmitFunctionBodyStart - Targets can override this to emit stuff before /// the first basic block in the function. void MipsAsmPrinter::EmitFunctionBodyStart() { MipsTargetStreamer &TS = getTargetStreamer(); MCInstLowering.Initialize(&MF->getContext()); bool IsNakedFunction = MF->getFunction().hasFnAttribute(Attribute::Naked); if (!IsNakedFunction) emitFrameDirective(); if (!IsNakedFunction) printSavedRegsBitmask(); if (!Subtarget->inMips16Mode()) { TS.emitDirectiveSetNoReorder(); TS.emitDirectiveSetNoMacro(); TS.emitDirectiveSetNoAt(); } } /// EmitFunctionBodyEnd - Targets can override this to emit stuff after /// the last basic block in the function. void MipsAsmPrinter::EmitFunctionBodyEnd() { MipsTargetStreamer &TS = getTargetStreamer(); // There are instruction for this macros, but they must // always be at the function end, and we can't emit and // break with BB logic. if (!Subtarget->inMips16Mode()) { TS.emitDirectiveSetAt(); TS.emitDirectiveSetMacro(); TS.emitDirectiveSetReorder(); } TS.emitDirectiveEnd(CurrentFnSym->getName()); // Make sure to terminate any constant pools that were at the end // of the function. if (!InConstantPool) return; InConstantPool = false; OutStreamer->EmitDataRegion(MCDR_DataRegionEnd); } void MipsAsmPrinter::EmitBasicBlockEnd(const MachineBasicBlock &MBB) { AsmPrinter::EmitBasicBlockEnd(MBB); MipsTargetStreamer &TS = getTargetStreamer(); if (MBB.empty()) TS.emitDirectiveInsn(); } /// isBlockOnlyReachableByFallthough - Return true if the basic block has /// exactly one predecessor and the control transfer mechanism between /// the predecessor and this block is a fall-through. bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock* MBB) const { // The predecessor has to be immediately before this block. const MachineBasicBlock *Pred = *MBB->pred_begin(); // If the predecessor is a switch statement, assume a jump table // implementation, so it is not a fall through. if (const BasicBlock *bb = Pred->getBasicBlock()) if (isa(bb->getTerminator())) return false; // If this is a landing pad, it isn't a fall through. If it has no preds, // then nothing falls through to it. if (MBB->isEHPad() || MBB->pred_empty()) return false; // If there isn't exactly one predecessor, it can't be a fall through. MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI; ++PI2; if (PI2 != MBB->pred_end()) return false; // The predecessor has to be immediately before this block. if (!Pred->isLayoutSuccessor(MBB)) return false; // If the block is completely empty, then it definitely does fall through. if (Pred->empty()) return true; // Otherwise, check the last instruction. // Check if the last terminator is an unconditional branch. MachineBasicBlock::const_iterator I = Pred->end(); while (I != Pred->begin() && !(--I)->isTerminator()) ; return !I->isBarrier(); } // Print out an operand for an inline asm expression. bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) { if (ExtraCode[1] != 0) return true; // Unknown modifier. const MachineOperand &MO = MI->getOperand(OpNum); switch (ExtraCode[0]) { default: // See if this is a generic print operand return AsmPrinter::PrintAsmOperand(MI,OpNum,AsmVariant,ExtraCode,O); case 'X': // hex const int if ((MO.getType()) != MachineOperand::MO_Immediate) return true; O << "0x" << Twine::utohexstr(MO.getImm()); return false; case 'x': // hex const int (low 16 bits) if ((MO.getType()) != MachineOperand::MO_Immediate) return true; O << "0x" << Twine::utohexstr(MO.getImm() & 0xffff); return false; case 'd': // decimal const int if ((MO.getType()) != MachineOperand::MO_Immediate) return true; O << MO.getImm(); return false; case 'm': // decimal const int minus 1 if ((MO.getType()) != MachineOperand::MO_Immediate) return true; O << MO.getImm() - 1; return false; case 'y': // exact log2 if ((MO.getType()) != MachineOperand::MO_Immediate) return true; if (!isPowerOf2_64(MO.getImm())) return true; O << Log2_64(MO.getImm()); return false; case 'z': // $0 if zero, regular printing otherwise if (MO.getType() == MachineOperand::MO_Immediate && MO.getImm() == 0) { O << "$0"; return false; } // If not, call printOperand as normal. break; case 'D': // Second part of a double word register operand case 'L': // Low order register of a double word register operand case 'M': // High order register of a double word register operand { if (OpNum == 0) return true; const MachineOperand &FlagsOP = MI->getOperand(OpNum - 1); if (!FlagsOP.isImm()) return true; unsigned Flags = FlagsOP.getImm(); unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); // Number of registers represented by this operand. We are looking // for 2 for 32 bit mode and 1 for 64 bit mode. if (NumVals != 2) { if (Subtarget->isGP64bit() && NumVals == 1 && MO.isReg()) { unsigned Reg = MO.getReg(); O << '$' << MipsInstPrinter::getRegisterName(Reg); return false; } return true; } unsigned RegOp = OpNum; if (!Subtarget->isGP64bit()){ // Endianness reverses which register holds the high or low value // between M and L. switch(ExtraCode[0]) { case 'M': RegOp = (Subtarget->isLittle()) ? OpNum + 1 : OpNum; break; case 'L': RegOp = (Subtarget->isLittle()) ? OpNum : OpNum + 1; break; case 'D': // Always the second part RegOp = OpNum + 1; } if (RegOp >= MI->getNumOperands()) return true; const MachineOperand &MO = MI->getOperand(RegOp); if (!MO.isReg()) return true; unsigned Reg = MO.getReg(); O << '$' << MipsInstPrinter::getRegisterName(Reg); return false; } break; } case 'w': // Print MSA registers for the 'f' constraint // In LLVM, the 'w' modifier doesn't need to do anything. // We can just call printOperand as normal. break; } } printOperand(MI, OpNum, O); return false; } bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { assert(OpNum + 1 < MI->getNumOperands() && "Insufficient operands"); const MachineOperand &BaseMO = MI->getOperand(OpNum); const MachineOperand &OffsetMO = MI->getOperand(OpNum + 1); assert(BaseMO.isReg() && "Unexpected base pointer for inline asm memory operand."); assert(OffsetMO.isImm() && "Unexpected offset for inline asm memory operand."); int Offset = OffsetMO.getImm(); // Currently we are expecting either no ExtraCode or 'D','M','L'. if (ExtraCode) { switch (ExtraCode[0]) { case 'D': Offset += 4; break; case 'M': if (Subtarget->isLittle()) Offset += 4; break; case 'L': if (!Subtarget->isLittle()) Offset += 4; break; default: return true; // Unknown modifier. } } O << Offset << "($" << MipsInstPrinter::getRegisterName(BaseMO.getReg()) << ")"; return false; } void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(opNum); bool closeP = false; if (MO.getTargetFlags()) closeP = true; switch(MO.getTargetFlags()) { case MipsII::MO_GPREL: O << "%gp_rel("; break; case MipsII::MO_GOT_CALL: O << "%call16("; break; case MipsII::MO_GOT: O << "%got("; break; case MipsII::MO_ABS_HI: O << "%hi("; break; case MipsII::MO_ABS_LO: O << "%lo("; break; case MipsII::MO_HIGHER: O << "%higher("; break; case MipsII::MO_HIGHEST: O << "%highest(("; break; case MipsII::MO_TLSGD: O << "%tlsgd("; break; case MipsII::MO_GOTTPREL: O << "%gottprel("; break; case MipsII::MO_TPREL_HI: O << "%tprel_hi("; break; case MipsII::MO_TPREL_LO: O << "%tprel_lo("; break; case MipsII::MO_GPOFF_HI: O << "%hi(%neg(%gp_rel("; break; case MipsII::MO_GPOFF_LO: O << "%lo(%neg(%gp_rel("; break; case MipsII::MO_GOT_DISP: O << "%got_disp("; break; case MipsII::MO_GOT_PAGE: O << "%got_page("; break; case MipsII::MO_GOT_OFST: O << "%got_ofst("; break; } switch (MO.getType()) { case MachineOperand::MO_Register: O << '$' << StringRef(MipsInstPrinter::getRegisterName(MO.getReg())).lower(); break; case MachineOperand::MO_Immediate: O << MO.getImm(); break; case MachineOperand::MO_MachineBasicBlock: MO.getMBB()->getSymbol()->print(O, MAI); return; case MachineOperand::MO_GlobalAddress: getSymbol(MO.getGlobal())->print(O, MAI); break; case MachineOperand::MO_BlockAddress: { MCSymbol *BA = GetBlockAddressSymbol(MO.getBlockAddress()); O << BA->getName(); break; } case MachineOperand::MO_ConstantPoolIndex: O << getDataLayout().getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" << MO.getIndex(); if (MO.getOffset()) O << "+" << MO.getOffset(); break; default: llvm_unreachable(""); } if (closeP) O << ")"; } void MipsAsmPrinter:: printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O) { // Load/Store memory operands -- imm($reg) // If PIC target the target is loaded as the // pattern lw $25,%call16($28) // opNum can be invalid if instruction has reglist as operand. // MemOperand is always last operand of instruction (base + offset). switch (MI->getOpcode()) { default: break; case Mips::SWM32_MM: case Mips::LWM32_MM: opNum = MI->getNumOperands() - 2; break; } printOperand(MI, opNum+1, O); O << "("; printOperand(MI, opNum, O); O << ")"; } void MipsAsmPrinter:: printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O) { // when using stack locations for not load/store instructions // print the same way as all normal 3 operand instructions. printOperand(MI, opNum, O); O << ", "; printOperand(MI, opNum+1, O); } void MipsAsmPrinter:: printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier) { const MachineOperand &MO = MI->getOperand(opNum); O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm()); } void MipsAsmPrinter:: printRegisterList(const MachineInstr *MI, int opNum, raw_ostream &O) { for (int i = opNum, e = MI->getNumOperands(); i != e; ++i) { if (i != opNum) O << ", "; printOperand(MI, i, O); } } void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { MipsTargetStreamer &TS = getTargetStreamer(); // MipsTargetStreamer has an initialization order problem when emitting an // object file directly (see MipsTargetELFStreamer for full details). Work // around it by re-initializing the PIC state here. TS.setPic(OutContext.getObjectFileInfo()->isPositionIndependent()); // Compute MIPS architecture attributes based on the default subtarget // that we'd have constructed. Module level directives aren't LTO // clean anyhow. // FIXME: For ifunc related functions we could iterate over and look // for a feature string that doesn't match the default one. const Triple &TT = TM.getTargetTriple(); StringRef CPU = MIPS_MC::selectMipsCPU(TT, TM.getTargetCPU()); StringRef FS = TM.getTargetFeatureString(); const MipsTargetMachine &MTM = static_cast(TM); const MipsSubtarget STI(TT, CPU, FS, MTM.isLittleEndian(), MTM, 0); bool IsABICalls = STI.isABICalls(); const MipsABIInfo &ABI = MTM.getABI(); if (IsABICalls) { TS.emitDirectiveAbiCalls(); // FIXME: This condition should be a lot more complicated that it is here. // Ideally it should test for properties of the ABI and not the ABI // itself. // For the moment, I'm only correcting enough to make MIPS-IV work. if (!isPositionIndependent() && STI.hasSym32()) TS.emitDirectiveOptionPic0(); } // Tell the assembler which ABI we are using std::string SectionName = std::string(".mdebug.") + getCurrentABIString(); OutStreamer->SwitchSection( OutContext.getELFSection(SectionName, ELF::SHT_PROGBITS, 0)); // NaN: At the moment we only support: // 1. .nan legacy (default) // 2. .nan 2008 STI.isNaN2008() ? TS.emitDirectiveNaN2008() : TS.emitDirectiveNaNLegacy(); // TODO: handle O64 ABI TS.updateABIInfo(STI); // We should always emit a '.module fp=...' but binutils 2.24 does not accept // it. We therefore emit it when it contradicts the ABI defaults (-mfpxx or // -mfp64) and omit it otherwise. - if (ABI.IsO32() && (STI.isABI_FPXX() || STI.isFP64bit())) + if ((ABI.IsO32() && (STI.isABI_FPXX() || STI.isFP64bit())) || + STI.useSoftFloat()) TS.emitDirectiveModuleFP(); // We should always emit a '.module [no]oddspreg' but binutils 2.24 does not // accept it. We therefore emit it when it contradicts the default or an // option has changed the default (i.e. FPXX) and omit it otherwise. if (ABI.IsO32() && (!STI.useOddSPReg() || STI.isABI_FPXX())) TS.emitDirectiveModuleOddSPReg(); } void MipsAsmPrinter::emitInlineAsmStart() const { MipsTargetStreamer &TS = getTargetStreamer(); // GCC's choice of assembler options for inline assembly code ('at', 'macro' // and 'reorder') is different from LLVM's choice for generated code ('noat', // 'nomacro' and 'noreorder'). // In order to maintain compatibility with inline assembly code which depends // on GCC's assembler options being used, we have to switch to those options // for the duration of the inline assembly block and then switch back. TS.emitDirectiveSetPush(); TS.emitDirectiveSetAt(); TS.emitDirectiveSetMacro(); TS.emitDirectiveSetReorder(); OutStreamer->AddBlankLine(); } void MipsAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, const MCSubtargetInfo *EndInfo) const { OutStreamer->AddBlankLine(); getTargetStreamer().emitDirectiveSetPop(); } void MipsAsmPrinter::EmitJal(const MCSubtargetInfo &STI, MCSymbol *Symbol) { MCInst I; I.setOpcode(Mips::JAL); I.addOperand( MCOperand::createExpr(MCSymbolRefExpr::create(Symbol, OutContext))); OutStreamer->EmitInstruction(I, STI); } void MipsAsmPrinter::EmitInstrReg(const MCSubtargetInfo &STI, unsigned Opcode, unsigned Reg) { MCInst I; I.setOpcode(Opcode); I.addOperand(MCOperand::createReg(Reg)); OutStreamer->EmitInstruction(I, STI); } void MipsAsmPrinter::EmitInstrRegReg(const MCSubtargetInfo &STI, unsigned Opcode, unsigned Reg1, unsigned Reg2) { MCInst I; // // Because of the current td files for Mips32, the operands for MTC1 // appear backwards from their normal assembly order. It's not a trivial // change to fix this in the td file so we adjust for it here. // if (Opcode == Mips::MTC1) { unsigned Temp = Reg1; Reg1 = Reg2; Reg2 = Temp; } I.setOpcode(Opcode); I.addOperand(MCOperand::createReg(Reg1)); I.addOperand(MCOperand::createReg(Reg2)); OutStreamer->EmitInstruction(I, STI); } void MipsAsmPrinter::EmitInstrRegRegReg(const MCSubtargetInfo &STI, unsigned Opcode, unsigned Reg1, unsigned Reg2, unsigned Reg3) { MCInst I; I.setOpcode(Opcode); I.addOperand(MCOperand::createReg(Reg1)); I.addOperand(MCOperand::createReg(Reg2)); I.addOperand(MCOperand::createReg(Reg3)); OutStreamer->EmitInstruction(I, STI); } void MipsAsmPrinter::EmitMovFPIntPair(const MCSubtargetInfo &STI, unsigned MovOpc, unsigned Reg1, unsigned Reg2, unsigned FPReg1, unsigned FPReg2, bool LE) { if (!LE) { unsigned temp = Reg1; Reg1 = Reg2; Reg2 = temp; } EmitInstrRegReg(STI, MovOpc, Reg1, FPReg1); EmitInstrRegReg(STI, MovOpc, Reg2, FPReg2); } void MipsAsmPrinter::EmitSwapFPIntParams(const MCSubtargetInfo &STI, Mips16HardFloatInfo::FPParamVariant PV, bool LE, bool ToFP) { using namespace Mips16HardFloatInfo; unsigned MovOpc = ToFP ? Mips::MTC1 : Mips::MFC1; switch (PV) { case FSig: EmitInstrRegReg(STI, MovOpc, Mips::A0, Mips::F12); break; case FFSig: EmitMovFPIntPair(STI, MovOpc, Mips::A0, Mips::A1, Mips::F12, Mips::F14, LE); break; case FDSig: EmitInstrRegReg(STI, MovOpc, Mips::A0, Mips::F12); EmitMovFPIntPair(STI, MovOpc, Mips::A2, Mips::A3, Mips::F14, Mips::F15, LE); break; case DSig: EmitMovFPIntPair(STI, MovOpc, Mips::A0, Mips::A1, Mips::F12, Mips::F13, LE); break; case DDSig: EmitMovFPIntPair(STI, MovOpc, Mips::A0, Mips::A1, Mips::F12, Mips::F13, LE); EmitMovFPIntPair(STI, MovOpc, Mips::A2, Mips::A3, Mips::F14, Mips::F15, LE); break; case DFSig: EmitMovFPIntPair(STI, MovOpc, Mips::A0, Mips::A1, Mips::F12, Mips::F13, LE); EmitInstrRegReg(STI, MovOpc, Mips::A2, Mips::F14); break; case NoSig: return; } } void MipsAsmPrinter::EmitSwapFPIntRetval( const MCSubtargetInfo &STI, Mips16HardFloatInfo::FPReturnVariant RV, bool LE) { using namespace Mips16HardFloatInfo; unsigned MovOpc = Mips::MFC1; switch (RV) { case FRet: EmitInstrRegReg(STI, MovOpc, Mips::V0, Mips::F0); break; case DRet: EmitMovFPIntPair(STI, MovOpc, Mips::V0, Mips::V1, Mips::F0, Mips::F1, LE); break; case CFRet: EmitMovFPIntPair(STI, MovOpc, Mips::V0, Mips::V1, Mips::F0, Mips::F1, LE); break; case CDRet: EmitMovFPIntPair(STI, MovOpc, Mips::V0, Mips::V1, Mips::F0, Mips::F1, LE); EmitMovFPIntPair(STI, MovOpc, Mips::A0, Mips::A1, Mips::F2, Mips::F3, LE); break; case NoFPRet: break; } } void MipsAsmPrinter::EmitFPCallStub( const char *Symbol, const Mips16HardFloatInfo::FuncSignature *Signature) { using namespace Mips16HardFloatInfo; MCSymbol *MSymbol = OutContext.getOrCreateSymbol(StringRef(Symbol)); bool LE = getDataLayout().isLittleEndian(); // Construct a local MCSubtargetInfo here. // This is because the MachineFunction won't exist (but have not yet been // freed) and since we're at the global level we can use the default // constructed subtarget. std::unique_ptr STI(TM.getTarget().createMCSubtargetInfo( TM.getTargetTriple().str(), TM.getTargetCPU(), TM.getTargetFeatureString())); // // .global xxxx // OutStreamer->EmitSymbolAttribute(MSymbol, MCSA_Global); const char *RetType; // // make the comment field identifying the return and parameter // types of the floating point stub // # Stub function to call rettype xxxx (params) // switch (Signature->RetSig) { case FRet: RetType = "float"; break; case DRet: RetType = "double"; break; case CFRet: RetType = "complex"; break; case CDRet: RetType = "double complex"; break; case NoFPRet: RetType = ""; break; } const char *Parms; switch (Signature->ParamSig) { case FSig: Parms = "float"; break; case FFSig: Parms = "float, float"; break; case FDSig: Parms = "float, double"; break; case DSig: Parms = "double"; break; case DDSig: Parms = "double, double"; break; case DFSig: Parms = "double, float"; break; case NoSig: Parms = ""; break; } OutStreamer->AddComment("\t# Stub function to call " + Twine(RetType) + " " + Twine(Symbol) + " (" + Twine(Parms) + ")"); // // probably not necessary but we save and restore the current section state // OutStreamer->PushSection(); // // .section mips16.call.fpxxxx,"ax",@progbits // MCSectionELF *M = OutContext.getELFSection( ".mips16.call.fp." + std::string(Symbol), ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_EXECINSTR); OutStreamer->SwitchSection(M, nullptr); // // .align 2 // OutStreamer->EmitValueToAlignment(4); MipsTargetStreamer &TS = getTargetStreamer(); // // .set nomips16 // .set nomicromips // TS.emitDirectiveSetNoMips16(); TS.emitDirectiveSetNoMicroMips(); // // .ent __call_stub_fp_xxxx // .type __call_stub_fp_xxxx,@function // __call_stub_fp_xxxx: // std::string x = "__call_stub_fp_" + std::string(Symbol); MCSymbolELF *Stub = cast(OutContext.getOrCreateSymbol(StringRef(x))); TS.emitDirectiveEnt(*Stub); MCSymbol *MType = OutContext.getOrCreateSymbol("__call_stub_fp_" + Twine(Symbol)); OutStreamer->EmitSymbolAttribute(MType, MCSA_ELF_TypeFunction); OutStreamer->EmitLabel(Stub); // Only handle non-pic for now. assert(!isPositionIndependent() && "should not be here if we are compiling pic"); TS.emitDirectiveSetReorder(); // // We need to add a MipsMCExpr class to MCTargetDesc to fully implement // stubs without raw text but this current patch is for compiler generated // functions and they all return some value. // The calling sequence for non pic is different in that case and we need // to implement %lo and %hi in order to handle the case of no return value // See the corresponding method in Mips16HardFloat for details. // // mov the return address to S2. // we have no stack space to store it and we are about to make another call. // We need to make sure that the enclosing function knows to save S2 // This should have already been handled. // // Mov $18, $31 EmitInstrRegRegReg(*STI, Mips::OR, Mips::S2, Mips::RA, Mips::ZERO); EmitSwapFPIntParams(*STI, Signature->ParamSig, LE, true); // Jal xxxx // EmitJal(*STI, MSymbol); // fix return values EmitSwapFPIntRetval(*STI, Signature->RetSig, LE); // // do the return // if (Signature->RetSig == NoFPRet) // llvm_unreachable("should not be any stubs here with no return value"); // else EmitInstrReg(*STI, Mips::JR, Mips::S2); MCSymbol *Tmp = OutContext.createTempSymbol(); OutStreamer->EmitLabel(Tmp); const MCSymbolRefExpr *E = MCSymbolRefExpr::create(Stub, OutContext); const MCSymbolRefExpr *T = MCSymbolRefExpr::create(Tmp, OutContext); const MCExpr *T_min_E = MCBinaryExpr::createSub(T, E, OutContext); OutStreamer->emitELFSize(Stub, T_min_E); TS.emitDirectiveEnd(x); OutStreamer->PopSection(); } void MipsAsmPrinter::EmitEndOfAsmFile(Module &M) { // Emit needed stubs // for (std::map< const char *, const Mips16HardFloatInfo::FuncSignature *>::const_iterator it = StubsNeeded.begin(); it != StubsNeeded.end(); ++it) { const char *Symbol = it->first; const Mips16HardFloatInfo::FuncSignature *Signature = it->second; EmitFPCallStub(Symbol, Signature); } // return to the text section OutStreamer->SwitchSection(OutContext.getObjectFileInfo()->getTextSection()); } void MipsAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) { const uint8_t NoopsInSledCount = Subtarget->isGP64bit() ? 15 : 11; // For mips32 we want to emit the following pattern: // // .Lxray_sled_N: // ALIGN // B .tmpN // 11 NOP instructions (44 bytes) // ADDIU T9, T9, 52 // .tmpN // // We need the 44 bytes (11 instructions) because at runtime, we'd // be patching over the full 48 bytes (12 instructions) with the following // pattern: // // ADDIU SP, SP, -8 // NOP // SW RA, 4(SP) // SW T9, 0(SP) // LUI T9, %hi(__xray_FunctionEntry/Exit) // ORI T9, T9, %lo(__xray_FunctionEntry/Exit) // LUI T0, %hi(function_id) // JALR T9 // ORI T0, T0, %lo(function_id) // LW T9, 0(SP) // LW RA, 4(SP) // ADDIU SP, SP, 8 // // We add 52 bytes to t9 because we want to adjust the function pointer to // the actual start of function i.e. the address just after the noop sled. // We do this because gp displacement relocation is emitted at the start of // of the function i.e after the nop sled and to correctly calculate the // global offset table address, t9 must hold the address of the instruction // containing the gp displacement relocation. // FIXME: Is this correct for the static relocation model? // // For mips64 we want to emit the following pattern: // // .Lxray_sled_N: // ALIGN // B .tmpN // 15 NOP instructions (60 bytes) // .tmpN // // We need the 60 bytes (15 instructions) because at runtime, we'd // be patching over the full 64 bytes (16 instructions) with the following // pattern: // // DADDIU SP, SP, -16 // NOP // SD RA, 8(SP) // SD T9, 0(SP) // LUI T9, %highest(__xray_FunctionEntry/Exit) // ORI T9, T9, %higher(__xray_FunctionEntry/Exit) // DSLL T9, T9, 16 // ORI T9, T9, %hi(__xray_FunctionEntry/Exit) // DSLL T9, T9, 16 // ORI T9, T9, %lo(__xray_FunctionEntry/Exit) // LUI T0, %hi(function_id) // JALR T9 // ADDIU T0, T0, %lo(function_id) // LD T9, 0(SP) // LD RA, 8(SP) // DADDIU SP, SP, 16 // OutStreamer->EmitCodeAlignment(4); auto CurSled = OutContext.createTempSymbol("xray_sled_", true); OutStreamer->EmitLabel(CurSled); auto Target = OutContext.createTempSymbol(); // Emit "B .tmpN" instruction, which jumps over the nop sled to the actual // start of function const MCExpr *TargetExpr = MCSymbolRefExpr::create( Target, MCSymbolRefExpr::VariantKind::VK_None, OutContext); EmitToStreamer(*OutStreamer, MCInstBuilder(Mips::BEQ) .addReg(Mips::ZERO) .addReg(Mips::ZERO) .addExpr(TargetExpr)); for (int8_t I = 0; I < NoopsInSledCount; I++) EmitToStreamer(*OutStreamer, MCInstBuilder(Mips::SLL) .addReg(Mips::ZERO) .addReg(Mips::ZERO) .addImm(0)); OutStreamer->EmitLabel(Target); if (!Subtarget->isGP64bit()) { EmitToStreamer(*OutStreamer, MCInstBuilder(Mips::ADDiu) .addReg(Mips::T9) .addReg(Mips::T9) .addImm(0x34)); } recordSled(CurSled, MI, Kind); } void MipsAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI) { EmitSled(MI, SledKind::FUNCTION_ENTER); } void MipsAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI) { EmitSled(MI, SledKind::FUNCTION_EXIT); } void MipsAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI) { EmitSled(MI, SledKind::TAIL_CALL); } void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS) { // TODO: implement } // Emit .dtprelword or .dtpreldword directive // and value for debug thread local expression. void MipsAsmPrinter::EmitDebugValue(const MCExpr *Value, unsigned Size) const { if (auto *MipsExpr = dyn_cast(Value)) { if (MipsExpr && MipsExpr->getKind() == MipsMCExpr::MEK_DTPREL) { switch (Size) { case 4: OutStreamer->EmitDTPRel32Value(MipsExpr->getSubExpr()); break; case 8: OutStreamer->EmitDTPRel64Value(MipsExpr->getSubExpr()); break; default: llvm_unreachable("Unexpected size of expression value."); } return; } } AsmPrinter::EmitDebugValue(Value, Size); } // Align all targets of indirect branches on bundle size. Used only if target // is NaCl. void MipsAsmPrinter::NaClAlignIndirectJumpTargets(MachineFunction &MF) { // Align all blocks that are jumped to through jump table. if (MachineJumpTableInfo *JtInfo = MF.getJumpTableInfo()) { const std::vector &JT = JtInfo->getJumpTables(); for (unsigned I = 0; I < JT.size(); ++I) { const std::vector &MBBs = JT[I].MBBs; for (unsigned J = 0; J < MBBs.size(); ++J) MBBs[J]->setAlignment(MIPS_NACL_BUNDLE_ALIGN); } } // If basic block address is taken, block can be target of indirect branch. for (auto &MBB : MF) { if (MBB.hasAddressTaken()) MBB.setAlignment(MIPS_NACL_BUNDLE_ALIGN); } } bool MipsAsmPrinter::isLongBranchPseudo(int Opcode) const { return (Opcode == Mips::LONG_BRANCH_LUi || Opcode == Mips::LONG_BRANCH_LUi2Op || Opcode == Mips::LONG_BRANCH_LUi2Op_64 || Opcode == Mips::LONG_BRANCH_ADDiu || Opcode == Mips::LONG_BRANCH_ADDiu2Op || Opcode == Mips::LONG_BRANCH_DADDiu || Opcode == Mips::LONG_BRANCH_DADDiu2Op); } // Force static initialization. extern "C" void LLVMInitializeMipsAsmPrinter() { RegisterAsmPrinter X(getTheMipsTarget()); RegisterAsmPrinter Y(getTheMipselTarget()); RegisterAsmPrinter A(getTheMips64Target()); RegisterAsmPrinter B(getTheMips64elTarget()); } Index: vendor/llvm/dist-release_80/lib/Target/Mips/MipsDSPInstrInfo.td =================================================================== --- vendor/llvm/dist-release_80/lib/Target/Mips/MipsDSPInstrInfo.td (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/Mips/MipsDSPInstrInfo.td (revision 348932) @@ -1,1471 +1,1473 @@ //===- MipsDSPInstrInfo.td - DSP ASE instructions -*- tablegen ------------*-=// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file describes Mips DSP ASE instructions. // //===----------------------------------------------------------------------===// // ImmLeaf def immZExt1 : ImmLeaf(Imm);}]>; def immZExt2 : ImmLeaf(Imm);}]>; def immZExt3 : ImmLeaf(Imm);}]>; def immZExt4 : ImmLeaf(Imm);}]>; def immZExt8 : ImmLeaf(Imm);}]>; def immZExt10 : ImmLeaf(Imm);}]>; def immSExt6 : ImmLeaf(Imm);}]>; def immSExt10 : ImmLeaf(Imm);}]>; // Mips-specific dsp nodes def SDT_MipsExtr : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisVT<2, untyped>]>; def SDT_MipsShilo : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, SDTCisSameAs<0, 2>, SDTCisVT<1, i32>]>; def SDT_MipsDPA : SDTypeProfile<1, 3, [SDTCisVT<0, untyped>, SDTCisSameAs<0, 3>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; def SDT_MipsSHIFT_DSP : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; class MipsDSPBase : SDNode; class MipsDSPSideEffectBase : SDNode; def MipsEXTP : MipsDSPSideEffectBase<"EXTP", SDT_MipsExtr>; def MipsEXTPDP : MipsDSPSideEffectBase<"EXTPDP", SDT_MipsExtr>; def MipsEXTR_S_H : MipsDSPSideEffectBase<"EXTR_S_H", SDT_MipsExtr>; def MipsEXTR_W : MipsDSPSideEffectBase<"EXTR_W", SDT_MipsExtr>; def MipsEXTR_R_W : MipsDSPSideEffectBase<"EXTR_R_W", SDT_MipsExtr>; def MipsEXTR_RS_W : MipsDSPSideEffectBase<"EXTR_RS_W", SDT_MipsExtr>; def MipsSHILO : MipsDSPBase<"SHILO", SDT_MipsShilo>; def MipsMTHLIP : MipsDSPSideEffectBase<"MTHLIP", SDT_MipsShilo>; def MipsMULSAQ_S_W_PH : MipsDSPSideEffectBase<"MULSAQ_S_W_PH", SDT_MipsDPA>; def MipsMAQ_S_W_PHL : MipsDSPSideEffectBase<"MAQ_S_W_PHL", SDT_MipsDPA>; def MipsMAQ_S_W_PHR : MipsDSPSideEffectBase<"MAQ_S_W_PHR", SDT_MipsDPA>; def MipsMAQ_SA_W_PHL : MipsDSPSideEffectBase<"MAQ_SA_W_PHL", SDT_MipsDPA>; def MipsMAQ_SA_W_PHR : MipsDSPSideEffectBase<"MAQ_SA_W_PHR", SDT_MipsDPA>; def MipsDPAU_H_QBL : MipsDSPBase<"DPAU_H_QBL", SDT_MipsDPA>; def MipsDPAU_H_QBR : MipsDSPBase<"DPAU_H_QBR", SDT_MipsDPA>; def MipsDPSU_H_QBL : MipsDSPBase<"DPSU_H_QBL", SDT_MipsDPA>; def MipsDPSU_H_QBR : MipsDSPBase<"DPSU_H_QBR", SDT_MipsDPA>; def MipsDPAQ_S_W_PH : MipsDSPSideEffectBase<"DPAQ_S_W_PH", SDT_MipsDPA>; def MipsDPSQ_S_W_PH : MipsDSPSideEffectBase<"DPSQ_S_W_PH", SDT_MipsDPA>; def MipsDPAQ_SA_L_W : MipsDSPSideEffectBase<"DPAQ_SA_L_W", SDT_MipsDPA>; def MipsDPSQ_SA_L_W : MipsDSPSideEffectBase<"DPSQ_SA_L_W", SDT_MipsDPA>; def MipsDPA_W_PH : MipsDSPBase<"DPA_W_PH", SDT_MipsDPA>; def MipsDPS_W_PH : MipsDSPBase<"DPS_W_PH", SDT_MipsDPA>; def MipsDPAQX_S_W_PH : MipsDSPSideEffectBase<"DPAQX_S_W_PH", SDT_MipsDPA>; def MipsDPAQX_SA_W_PH : MipsDSPSideEffectBase<"DPAQX_SA_W_PH", SDT_MipsDPA>; def MipsDPAX_W_PH : MipsDSPBase<"DPAX_W_PH", SDT_MipsDPA>; def MipsDPSX_W_PH : MipsDSPBase<"DPSX_W_PH", SDT_MipsDPA>; def MipsDPSQX_S_W_PH : MipsDSPSideEffectBase<"DPSQX_S_W_PH", SDT_MipsDPA>; def MipsDPSQX_SA_W_PH : MipsDSPSideEffectBase<"DPSQX_SA_W_PH", SDT_MipsDPA>; def MipsMULSA_W_PH : MipsDSPBase<"MULSA_W_PH", SDT_MipsDPA>; def MipsMULT : MipsDSPBase<"MULT", SDT_MipsDPA>; def MipsMULTU : MipsDSPBase<"MULTU", SDT_MipsDPA>; def MipsMADD_DSP : MipsDSPBase<"MADD_DSP", SDT_MipsDPA>; def MipsMADDU_DSP : MipsDSPBase<"MADDU_DSP", SDT_MipsDPA>; def MipsMSUB_DSP : MipsDSPBase<"MSUB_DSP", SDT_MipsDPA>; def MipsMSUBU_DSP : MipsDSPBase<"MSUBU_DSP", SDT_MipsDPA>; def MipsSHLL_DSP : MipsDSPBase<"SHLL_DSP", SDT_MipsSHIFT_DSP>; def MipsSHRA_DSP : MipsDSPBase<"SHRA_DSP", SDT_MipsSHIFT_DSP>; def MipsSHRL_DSP : MipsDSPBase<"SHRL_DSP", SDT_MipsSHIFT_DSP>; def MipsSETCC_DSP : MipsDSPBase<"SETCC_DSP", SDTSetCC>; def MipsSELECT_CC_DSP : MipsDSPBase<"SELECT_CC_DSP", SDTSelectCC>; // Flags. class Uses Regs> { list Uses = Regs; } class Defs Regs> { list Defs = Regs; } // Instruction encoding. class ADDU_QB_ENC : ADDU_QB_FMT<0b00000>; class ADDU_S_QB_ENC : ADDU_QB_FMT<0b00100>; class SUBU_QB_ENC : ADDU_QB_FMT<0b00001>; class SUBU_S_QB_ENC : ADDU_QB_FMT<0b00101>; class ADDQ_PH_ENC : ADDU_QB_FMT<0b01010>; class ADDQ_S_PH_ENC : ADDU_QB_FMT<0b01110>; class SUBQ_PH_ENC : ADDU_QB_FMT<0b01011>; class SUBQ_S_PH_ENC : ADDU_QB_FMT<0b01111>; class ADDQ_S_W_ENC : ADDU_QB_FMT<0b10110>; class SUBQ_S_W_ENC : ADDU_QB_FMT<0b10111>; class ADDSC_ENC : ADDU_QB_FMT<0b10000>; class ADDWC_ENC : ADDU_QB_FMT<0b10001>; class MODSUB_ENC : ADDU_QB_FMT<0b10010>; class RADDU_W_QB_ENC : RADDU_W_QB_FMT<0b10100>; class ABSQ_S_PH_ENC : ABSQ_S_PH_R2_FMT<0b01001>; class ABSQ_S_W_ENC : ABSQ_S_PH_R2_FMT<0b10001>; class PRECRQ_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01100>; class PRECRQ_PH_W_ENC : CMP_EQ_QB_R3_FMT<0b10100>; class PRECRQ_RS_PH_W_ENC : CMP_EQ_QB_R3_FMT<0b10101>; class PRECRQU_S_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01111>; class PRECEQ_W_PHL_ENC : ABSQ_S_PH_R2_FMT<0b01100>; class PRECEQ_W_PHR_ENC : ABSQ_S_PH_R2_FMT<0b01101>; class PRECEQU_PH_QBL_ENC : ABSQ_S_PH_R2_FMT<0b00100>; class PRECEQU_PH_QBR_ENC : ABSQ_S_PH_R2_FMT<0b00101>; class PRECEQU_PH_QBLA_ENC : ABSQ_S_PH_R2_FMT<0b00110>; class PRECEQU_PH_QBRA_ENC : ABSQ_S_PH_R2_FMT<0b00111>; class PRECEU_PH_QBL_ENC : ABSQ_S_PH_R2_FMT<0b11100>; class PRECEU_PH_QBR_ENC : ABSQ_S_PH_R2_FMT<0b11101>; class PRECEU_PH_QBLA_ENC : ABSQ_S_PH_R2_FMT<0b11110>; class PRECEU_PH_QBRA_ENC : ABSQ_S_PH_R2_FMT<0b11111>; class SHLL_QB_ENC : SHLL_QB_FMT<0b00000>; class SHLLV_QB_ENC : SHLL_QB_FMT<0b00010>; class SHRL_QB_ENC : SHLL_QB_FMT<0b00001>; class SHRLV_QB_ENC : SHLL_QB_FMT<0b00011>; class SHLL_PH_ENC : SHLL_QB_FMT<0b01000>; class SHLLV_PH_ENC : SHLL_QB_FMT<0b01010>; class SHLL_S_PH_ENC : SHLL_QB_FMT<0b01100>; class SHLLV_S_PH_ENC : SHLL_QB_FMT<0b01110>; class SHRA_PH_ENC : SHLL_QB_FMT<0b01001>; class SHRAV_PH_ENC : SHLL_QB_FMT<0b01011>; class SHRA_R_PH_ENC : SHLL_QB_FMT<0b01101>; class SHRAV_R_PH_ENC : SHLL_QB_FMT<0b01111>; class SHLL_S_W_ENC : SHLL_QB_FMT<0b10100>; class SHLLV_S_W_ENC : SHLL_QB_FMT<0b10110>; class SHRA_R_W_ENC : SHLL_QB_FMT<0b10101>; class SHRAV_R_W_ENC : SHLL_QB_FMT<0b10111>; class MULEU_S_PH_QBL_ENC : ADDU_QB_FMT<0b00110>; class MULEU_S_PH_QBR_ENC : ADDU_QB_FMT<0b00111>; class MULEQ_S_W_PHL_ENC : ADDU_QB_FMT<0b11100>; class MULEQ_S_W_PHR_ENC : ADDU_QB_FMT<0b11101>; class MULQ_RS_PH_ENC : ADDU_QB_FMT<0b11111>; class MULSAQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00110>; class MAQ_S_W_PHL_ENC : DPA_W_PH_FMT<0b10100>; class MAQ_S_W_PHR_ENC : DPA_W_PH_FMT<0b10110>; class MAQ_SA_W_PHL_ENC : DPA_W_PH_FMT<0b10000>; class MAQ_SA_W_PHR_ENC : DPA_W_PH_FMT<0b10010>; class MFHI_ENC : MFHI_FMT<0b010000>; class MFLO_ENC : MFHI_FMT<0b010010>; class MTHI_ENC : MTHI_FMT<0b010001>; class MTLO_ENC : MTHI_FMT<0b010011>; class DPAU_H_QBL_ENC : DPA_W_PH_FMT<0b00011>; class DPAU_H_QBR_ENC : DPA_W_PH_FMT<0b00111>; class DPSU_H_QBL_ENC : DPA_W_PH_FMT<0b01011>; class DPSU_H_QBR_ENC : DPA_W_PH_FMT<0b01111>; class DPAQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00100>; class DPSQ_S_W_PH_ENC : DPA_W_PH_FMT<0b00101>; class DPAQ_SA_L_W_ENC : DPA_W_PH_FMT<0b01100>; class DPSQ_SA_L_W_ENC : DPA_W_PH_FMT<0b01101>; class MULT_DSP_ENC : MULT_FMT<0b000000, 0b011000>; class MULTU_DSP_ENC : MULT_FMT<0b000000, 0b011001>; class MADD_DSP_ENC : MULT_FMT<0b011100, 0b000000>; class MADDU_DSP_ENC : MULT_FMT<0b011100, 0b000001>; class MSUB_DSP_ENC : MULT_FMT<0b011100, 0b000100>; class MSUBU_DSP_ENC : MULT_FMT<0b011100, 0b000101>; class CMPU_EQ_QB_ENC : CMP_EQ_QB_R2_FMT<0b00000>; class CMPU_LT_QB_ENC : CMP_EQ_QB_R2_FMT<0b00001>; class CMPU_LE_QB_ENC : CMP_EQ_QB_R2_FMT<0b00010>; class CMPGU_EQ_QB_ENC : CMP_EQ_QB_R3_FMT<0b00100>; class CMPGU_LT_QB_ENC : CMP_EQ_QB_R3_FMT<0b00101>; class CMPGU_LE_QB_ENC : CMP_EQ_QB_R3_FMT<0b00110>; class CMP_EQ_PH_ENC : CMP_EQ_QB_R2_FMT<0b01000>; class CMP_LT_PH_ENC : CMP_EQ_QB_R2_FMT<0b01001>; class CMP_LE_PH_ENC : CMP_EQ_QB_R2_FMT<0b01010>; class BITREV_ENC : ABSQ_S_PH_R2_FMT<0b11011>; class PACKRL_PH_ENC : CMP_EQ_QB_R3_FMT<0b01110>; class REPL_QB_ENC : REPL_FMT<0b00010>; class REPL_PH_ENC : REPL_FMT<0b01010>; class REPLV_QB_ENC : ABSQ_S_PH_R2_FMT<0b00011>; class REPLV_PH_ENC : ABSQ_S_PH_R2_FMT<0b01011>; class PICK_QB_ENC : CMP_EQ_QB_R3_FMT<0b00011>; class PICK_PH_ENC : CMP_EQ_QB_R3_FMT<0b01011>; class LWX_ENC : LX_FMT<0b00000>; class LHX_ENC : LX_FMT<0b00100>; class LBUX_ENC : LX_FMT<0b00110>; class BPOSGE32_ENC : BPOSGE32_FMT<0b11100>; class INSV_ENC : INSV_FMT<0b001100>; class EXTP_ENC : EXTR_W_TY1_FMT<0b00010>; class EXTPV_ENC : EXTR_W_TY1_FMT<0b00011>; class EXTPDP_ENC : EXTR_W_TY1_FMT<0b01010>; class EXTPDPV_ENC : EXTR_W_TY1_FMT<0b01011>; class EXTR_W_ENC : EXTR_W_TY1_FMT<0b00000>; class EXTRV_W_ENC : EXTR_W_TY1_FMT<0b00001>; class EXTR_R_W_ENC : EXTR_W_TY1_FMT<0b00100>; class EXTRV_R_W_ENC : EXTR_W_TY1_FMT<0b00101>; class EXTR_RS_W_ENC : EXTR_W_TY1_FMT<0b00110>; class EXTRV_RS_W_ENC : EXTR_W_TY1_FMT<0b00111>; class EXTR_S_H_ENC : EXTR_W_TY1_FMT<0b01110>; class EXTRV_S_H_ENC : EXTR_W_TY1_FMT<0b01111>; class SHILO_ENC : SHILO_R1_FMT<0b11010>; class SHILOV_ENC : SHILO_R2_FMT<0b11011>; class MTHLIP_ENC : SHILO_R2_FMT<0b11111>; class RDDSP_ENC : RDDSP_FMT<0b10010>; class WRDSP_ENC : WRDSP_FMT<0b10011>; class ADDU_PH_ENC : ADDU_QB_FMT<0b01000>; class ADDU_S_PH_ENC : ADDU_QB_FMT<0b01100>; class SUBU_PH_ENC : ADDU_QB_FMT<0b01001>; class SUBU_S_PH_ENC : ADDU_QB_FMT<0b01101>; class CMPGDU_EQ_QB_ENC : CMP_EQ_QB_R3_FMT<0b11000>; class CMPGDU_LT_QB_ENC : CMP_EQ_QB_R3_FMT<0b11001>; class CMPGDU_LE_QB_ENC : CMP_EQ_QB_R3_FMT<0b11010>; class ABSQ_S_QB_ENC : ABSQ_S_PH_R2_FMT<0b00001>; class ADDUH_QB_ENC : ADDUH_QB_FMT<0b00000>; class ADDUH_R_QB_ENC : ADDUH_QB_FMT<0b00010>; class SUBUH_QB_ENC : ADDUH_QB_FMT<0b00001>; class SUBUH_R_QB_ENC : ADDUH_QB_FMT<0b00011>; class ADDQH_PH_ENC : ADDUH_QB_FMT<0b01000>; class ADDQH_R_PH_ENC : ADDUH_QB_FMT<0b01010>; class SUBQH_PH_ENC : ADDUH_QB_FMT<0b01001>; class SUBQH_R_PH_ENC : ADDUH_QB_FMT<0b01011>; class ADDQH_W_ENC : ADDUH_QB_FMT<0b10000>; class ADDQH_R_W_ENC : ADDUH_QB_FMT<0b10010>; class SUBQH_W_ENC : ADDUH_QB_FMT<0b10001>; class SUBQH_R_W_ENC : ADDUH_QB_FMT<0b10011>; class MUL_PH_ENC : ADDUH_QB_FMT<0b01100>; class MUL_S_PH_ENC : ADDUH_QB_FMT<0b01110>; class MULQ_S_W_ENC : ADDUH_QB_FMT<0b10110>; class MULQ_RS_W_ENC : ADDUH_QB_FMT<0b10111>; class MULQ_S_PH_ENC : ADDU_QB_FMT<0b11110>; class DPA_W_PH_ENC : DPA_W_PH_FMT<0b00000>; class DPS_W_PH_ENC : DPA_W_PH_FMT<0b00001>; class DPAQX_S_W_PH_ENC : DPA_W_PH_FMT<0b11000>; class DPAQX_SA_W_PH_ENC : DPA_W_PH_FMT<0b11010>; class DPAX_W_PH_ENC : DPA_W_PH_FMT<0b01000>; class DPSX_W_PH_ENC : DPA_W_PH_FMT<0b01001>; class DPSQX_S_W_PH_ENC : DPA_W_PH_FMT<0b11001>; class DPSQX_SA_W_PH_ENC : DPA_W_PH_FMT<0b11011>; class MULSA_W_PH_ENC : DPA_W_PH_FMT<0b00010>; class PRECR_QB_PH_ENC : CMP_EQ_QB_R3_FMT<0b01101>; class PRECR_SRA_PH_W_ENC : PRECR_SRA_PH_W_FMT<0b11110>; class PRECR_SRA_R_PH_W_ENC : PRECR_SRA_PH_W_FMT<0b11111>; class SHRA_QB_ENC : SHLL_QB_FMT<0b00100>; class SHRAV_QB_ENC : SHLL_QB_FMT<0b00110>; class SHRA_R_QB_ENC : SHLL_QB_FMT<0b00101>; class SHRAV_R_QB_ENC : SHLL_QB_FMT<0b00111>; class SHRL_PH_ENC : SHLL_QB_FMT<0b11001>; class SHRLV_PH_ENC : SHLL_QB_FMT<0b11011>; class APPEND_ENC : APPEND_FMT<0b00000>; class BALIGN_ENC : APPEND_FMT<0b10000>; class PREPEND_ENC : APPEND_FMT<0b00001>; // Instruction desc. class ADDU_QB_DESC_BASE { dag OutOperandList = (outs ROD:$rd); dag InOperandList = (ins ROS:$rs, ROT:$rt); string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); list Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))]; InstrItinClass Itinerary = itin; string BaseOpcode = instr_asm; } class RADDU_W_QB_DESC_BASE { dag OutOperandList = (outs ROD:$rd); dag InOperandList = (ins ROS:$rs); string AsmString = !strconcat(instr_asm, "\t$rd, $rs"); list Pattern = [(set ROD:$rd, (OpNode ROS:$rs))]; InstrItinClass Itinerary = itin; string BaseOpcode = instr_asm; } class CMP_EQ_QB_R2_DESC_BASE { dag OutOperandList = (outs); dag InOperandList = (ins ROS:$rs, ROT:$rt); string AsmString = !strconcat(instr_asm, "\t$rs, $rt"); list Pattern = [(OpNode ROS:$rs, ROT:$rt)]; InstrItinClass Itinerary = itin; string BaseOpcode = instr_asm; } class CMP_EQ_QB_R3_DESC_BASE { dag OutOperandList = (outs ROD:$rd); dag InOperandList = (ins ROS:$rs, ROT:$rt); string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); list Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))]; InstrItinClass Itinerary = itin; string BaseOpcode = instr_asm; } class PRECR_SRA_PH_W_DESC_BASE { dag OutOperandList = (outs ROT:$rt); dag InOperandList = (ins ROS:$rs, uimm5:$sa, ROS:$src); string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa"); list Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, immZExt5:$sa))]; InstrItinClass Itinerary = itin; string Constraints = "$src = $rt"; string BaseOpcode = instr_asm; } class ABSQ_S_PH_R2_DESC_BASE { dag OutOperandList = (outs ROD:$rd); dag InOperandList = (ins ROT:$rt); string AsmString = !strconcat(instr_asm, "\t$rd, $rt"); list Pattern = [(set ROD:$rd, (OpNode ROT:$rt))]; InstrItinClass Itinerary = itin; string BaseOpcode = instr_asm; } class REPL_DESC_BASE { dag OutOperandList = (outs RO:$rd); dag InOperandList = (ins ImmOp:$imm); string AsmString = !strconcat(instr_asm, "\t$rd, $imm"); list Pattern = [(set RO:$rd, (OpNode immPat:$imm))]; InstrItinClass Itinerary = itin; string BaseOpcode = instr_asm; } class SHLL_QB_R3_DESC_BASE { dag OutOperandList = (outs RO:$rd); dag InOperandList = (ins RO:$rt, GPR32Opnd:$rs_sa); string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa"); list Pattern = [(set RO:$rd, (OpNode RO:$rt, GPR32Opnd:$rs_sa))]; InstrItinClass Itinerary = itin; string BaseOpcode = instr_asm; } class SHLL_QB_R2_DESC_BASE { dag OutOperandList = (outs RO:$rd); dag InOperandList = (ins RO:$rt, ImmOpnd:$rs_sa); string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa"); list Pattern = [(set RO:$rd, (OpNode RO:$rt, ImmPat:$rs_sa))]; InstrItinClass Itinerary = itin; bit hasSideEffects = 1; string BaseOpcode = instr_asm; } class LX_DESC_BASE { dag OutOperandList = (outs GPR32Opnd:$rd); dag InOperandList = (ins PtrRC:$base, PtrRC:$index); string AsmString = !strconcat(instr_asm, "\t$rd, ${index}(${base})"); list Pattern = [(set GPR32Opnd:$rd, (OpNode iPTR:$base, iPTR:$index))]; InstrItinClass Itinerary = itin; bit mayLoad = 1; string BaseOpcode = instr_asm; } class ADDUH_QB_DESC_BASE { dag OutOperandList = (outs ROD:$rd); dag InOperandList = (ins ROS:$rs, ROT:$rt); string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); list Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))]; InstrItinClass Itinerary = itin; string BaseOpcode = instr_asm; } class APPEND_DESC_BASE { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins GPR32Opnd:$rs, ImmOp:$sa, GPR32Opnd:$src); string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa"); list Pattern = [(set GPR32Opnd:$rt, (OpNode GPR32Opnd:$src, GPR32Opnd:$rs, Imm:$sa))]; InstrItinClass Itinerary = itin; string Constraints = "$src = $rt"; string BaseOpcode = instr_asm; } class EXTR_W_TY1_R2_DESC_BASE { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins ACC64DSPOpnd:$ac, GPR32Opnd:$shift_rs); string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs"); InstrItinClass Itinerary = itin; string BaseOpcode = instr_asm; } class EXTR_W_TY1_R1_DESC_BASE { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins ACC64DSPOpnd:$ac, uimm5:$shift_rs); string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs"); InstrItinClass Itinerary = itin; string BaseOpcode = instr_asm; } class SHILO_R1_DESC_BASE { dag OutOperandList = (outs ACC64DSPOpnd:$ac); dag InOperandList = (ins simm6:$shift, ACC64DSPOpnd:$acin); string AsmString = !strconcat(instr_asm, "\t$ac, $shift"); list Pattern = [(set ACC64DSPOpnd:$ac, (OpNode immSExt6:$shift, ACC64DSPOpnd:$acin))]; string Constraints = "$acin = $ac"; string BaseOpcode = instr_asm; } class SHILO_R2_DESC_BASE { dag OutOperandList = (outs ACC64DSPOpnd:$ac); dag InOperandList = (ins GPR32Opnd:$rs, ACC64DSPOpnd:$acin); string AsmString = !strconcat(instr_asm, "\t$ac, $rs"); list Pattern = [(set ACC64DSPOpnd:$ac, (OpNode GPR32Opnd:$rs, ACC64DSPOpnd:$acin))]; string Constraints = "$acin = $ac"; string BaseOpcode = instr_asm; } class MTHLIP_DESC_BASE { dag OutOperandList = (outs ACC64DSPOpnd:$ac); dag InOperandList = (ins GPR32Opnd:$rs, ACC64DSPOpnd:$acin); string AsmString = !strconcat(instr_asm, "\t$rs, $ac"); list Pattern = [(set ACC64DSPOpnd:$ac, (OpNode GPR32Opnd:$rs, ACC64DSPOpnd:$acin))]; string Constraints = "$acin = $ac"; string BaseOpcode = instr_asm; } class RDDSP_DESC_BASE { dag OutOperandList = (outs GPR32Opnd:$rd); dag InOperandList = (ins uimm10:$mask); string AsmString = !strconcat(instr_asm, "\t$rd, $mask"); list Pattern = [(set GPR32Opnd:$rd, (OpNode immZExt10:$mask))]; InstrItinClass Itinerary = itin; string BaseOpcode = instr_asm; bit isMoveReg = 1; } class WRDSP_DESC_BASE { dag OutOperandList = (outs); dag InOperandList = (ins GPR32Opnd:$rs, uimm10:$mask); string AsmString = !strconcat(instr_asm, "\t$rs, $mask"); list Pattern = [(OpNode GPR32Opnd:$rs, immZExt10:$mask)]; InstrItinClass Itinerary = itin; string BaseOpcode = instr_asm; bit isMoveReg = 1; } class DPA_W_PH_DESC_BASE { dag OutOperandList = (outs ACC64DSPOpnd:$ac); dag InOperandList = (ins GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin); string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); list Pattern = [(set ACC64DSPOpnd:$ac, (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin))]; string Constraints = "$acin = $ac"; string BaseOpcode = instr_asm; } class MULT_DESC_BASE { dag OutOperandList = (outs ACC64DSPOpnd:$ac); dag InOperandList = (ins GPR32Opnd:$rs, GPR32Opnd:$rt); string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); list Pattern = [(set ACC64DSPOpnd:$ac, (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt))]; InstrItinClass Itinerary = itin; bit isCommutable = 1; string BaseOpcode = instr_asm; } class MADD_DESC_BASE { dag OutOperandList = (outs ACC64DSPOpnd:$ac); dag InOperandList = (ins GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin); string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); list Pattern = [(set ACC64DSPOpnd:$ac, (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin))]; InstrItinClass Itinerary = itin; string Constraints = "$acin = $ac"; string BaseOpcode = instr_asm; } class MFHI_DESC_BASE { dag OutOperandList = (outs GPR32Opnd:$rd); dag InOperandList = (ins RO:$ac); string AsmString = !strconcat(instr_asm, "\t$rd, $ac"); list Pattern = [(set GPR32Opnd:$rd, (OpNode RO:$ac))]; InstrItinClass Itinerary = itin; string BaseOpcode = instr_asm; bit isMoveReg = 1; } class MTHI_DESC_BASE { dag OutOperandList = (outs RO:$ac); dag InOperandList = (ins GPR32Opnd:$rs); string AsmString = !strconcat(instr_asm, "\t$rs, $ac"); InstrItinClass Itinerary = itin; string BaseOpcode = instr_asm; bit isMoveReg = 1; } class BPOSGE32_PSEUDO_DESC_BASE : MipsPseudo<(outs GPR32Opnd:$dst), (ins), [(set GPR32Opnd:$dst, (OpNode))]> { bit usesCustomInserter = 1; } class BPOSGE32_DESC_BASE { dag OutOperandList = (outs); dag InOperandList = (ins opnd:$offset); string AsmString = !strconcat(instr_asm, "\t$offset"); InstrItinClass Itinerary = itin; bit isBranch = 1; bit isTerminator = 1; bit hasDelaySlot = 1; string BaseOpcode = instr_asm; } class INSV_DESC_BASE { dag OutOperandList = (outs GPR32Opnd:$rt); dag InOperandList = (ins GPR32Opnd:$src, GPR32Opnd:$rs); string AsmString = !strconcat(instr_asm, "\t$rt, $rs"); list Pattern = [(set GPR32Opnd:$rt, (OpNode GPR32Opnd:$src, GPR32Opnd:$rs))]; InstrItinClass Itinerary = itin; string Constraints = "$src = $rt"; string BaseOpcode = instr_asm; } //===----------------------------------------------------------------------===// // MIPS DSP Rev 1 //===----------------------------------------------------------------------===// // Addition/subtraction class ADDU_QB_DESC : ADDU_QB_DESC_BASE<"addu.qb", null_frag, NoItinerary, DSPROpnd, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag20]>; class ADDU_S_QB_DESC : ADDU_QB_DESC_BASE<"addu_s.qb", int_mips_addu_s_qb, NoItinerary, DSPROpnd, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag20]>; class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", null_frag, NoItinerary, DSPROpnd, DSPROpnd>, Defs<[DSPOutFlag20]>; class SUBU_S_QB_DESC : ADDU_QB_DESC_BASE<"subu_s.qb", int_mips_subu_s_qb, NoItinerary, DSPROpnd, DSPROpnd>, Defs<[DSPOutFlag20]>; class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", null_frag, NoItinerary, DSPROpnd, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag20]>; class ADDQ_S_PH_DESC : ADDU_QB_DESC_BASE<"addq_s.ph", int_mips_addq_s_ph, NoItinerary, DSPROpnd, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag20]>; class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", null_frag, NoItinerary, DSPROpnd, DSPROpnd>, Defs<[DSPOutFlag20]>; class SUBQ_S_PH_DESC : ADDU_QB_DESC_BASE<"subq_s.ph", int_mips_subq_s_ph, NoItinerary, DSPROpnd, DSPROpnd>, Defs<[DSPOutFlag20]>; class ADDQ_S_W_DESC : ADDU_QB_DESC_BASE<"addq_s.w", int_mips_addq_s_w, NoItinerary, GPR32Opnd, GPR32Opnd>, IsCommutable, Defs<[DSPOutFlag20]>; class SUBQ_S_W_DESC : ADDU_QB_DESC_BASE<"subq_s.w", int_mips_subq_s_w, NoItinerary, GPR32Opnd, GPR32Opnd>, Defs<[DSPOutFlag20]>; class ADDSC_DESC : ADDU_QB_DESC_BASE<"addsc", null_frag, NoItinerary, GPR32Opnd, GPR32Opnd>, IsCommutable, Defs<[DSPCarry]>; class ADDWC_DESC : ADDU_QB_DESC_BASE<"addwc", null_frag, NoItinerary, GPR32Opnd, GPR32Opnd>, IsCommutable, Uses<[DSPCarry]>, Defs<[DSPOutFlag20]>; class MODSUB_DESC : ADDU_QB_DESC_BASE<"modsub", int_mips_modsub, NoItinerary, GPR32Opnd, GPR32Opnd>; class RADDU_W_QB_DESC : RADDU_W_QB_DESC_BASE<"raddu.w.qb", int_mips_raddu_w_qb, NoItinerary, GPR32Opnd, DSPROpnd>; // Absolute value class ABSQ_S_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.ph", int_mips_absq_s_ph, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag20]>; class ABSQ_S_W_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.w", int_mips_absq_s_w, NoItinerary, GPR32Opnd>, Defs<[DSPOutFlag20]>; // Precision reduce/expand class PRECRQ_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.qb.ph", int_mips_precrq_qb_ph, NoItinerary, DSPROpnd, DSPROpnd>; class PRECRQ_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.ph.w", int_mips_precrq_ph_w, NoItinerary, DSPROpnd, GPR32Opnd>; class PRECRQ_RS_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq_rs.ph.w", int_mips_precrq_rs_ph_w, NoItinerary, DSPROpnd, GPR32Opnd>, Defs<[DSPOutFlag22]>; class PRECRQU_S_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrqu_s.qb.ph", int_mips_precrqu_s_qb_ph, NoItinerary, DSPROpnd, DSPROpnd>, Defs<[DSPOutFlag22]>; class PRECEQ_W_PHL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phl", int_mips_preceq_w_phl, NoItinerary, GPR32Opnd, DSPROpnd>; class PRECEQ_W_PHR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phr", int_mips_preceq_w_phr, NoItinerary, GPR32Opnd, DSPROpnd>; class PRECEQU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbl", int_mips_precequ_ph_qbl, NoItinerary, DSPROpnd>; class PRECEQU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbr", int_mips_precequ_ph_qbr, NoItinerary, DSPROpnd>; class PRECEQU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbla", int_mips_precequ_ph_qbla, NoItinerary, DSPROpnd>; class PRECEQU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbra", int_mips_precequ_ph_qbra, NoItinerary, DSPROpnd>; class PRECEU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbl", int_mips_preceu_ph_qbl, NoItinerary, DSPROpnd>; class PRECEU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbr", int_mips_preceu_ph_qbr, NoItinerary, DSPROpnd>; class PRECEU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbla", int_mips_preceu_ph_qbla, NoItinerary, DSPROpnd>; class PRECEU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbra", int_mips_preceu_ph_qbra, NoItinerary, DSPROpnd>; // Shift class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", null_frag, immZExt3, NoItinerary, DSPROpnd, uimm3>, Defs<[DSPOutFlag22]>; class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag22]>; class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", null_frag, immZExt3, NoItinerary, DSPROpnd, uimm3>; class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb, NoItinerary, DSPROpnd>; class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", null_frag, immZExt4, NoItinerary, DSPROpnd, uimm4>, Defs<[DSPOutFlag22]>; class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag22]>; class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph, immZExt4, NoItinerary, DSPROpnd, uimm4>, Defs<[DSPOutFlag22]>; class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag22]>; class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", null_frag, immZExt4, NoItinerary, DSPROpnd, uimm4>; class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph, NoItinerary, DSPROpnd>; class SHRA_R_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.ph", int_mips_shra_r_ph, immZExt4, NoItinerary, DSPROpnd, uimm4>; class SHRAV_R_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.ph", int_mips_shra_r_ph, NoItinerary, DSPROpnd>; class SHLL_S_W_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.w", int_mips_shll_s_w, immZExt5, NoItinerary, GPR32Opnd, uimm5>, Defs<[DSPOutFlag22]>; class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w, NoItinerary, GPR32Opnd>, Defs<[DSPOutFlag22]>; class SHRA_R_W_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.w", int_mips_shra_r_w, immZExt5, NoItinerary, GPR32Opnd, uimm5>; class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w, NoItinerary, GPR32Opnd>; // Multiplication class MULEU_S_PH_QBL_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbl", int_mips_muleu_s_ph_qbl, NoItinerary, DSPROpnd, DSPROpnd>, Defs<[DSPOutFlag21]>; class MULEU_S_PH_QBR_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbr", int_mips_muleu_s_ph_qbr, NoItinerary, DSPROpnd, DSPROpnd>, Defs<[DSPOutFlag21]>; class MULEQ_S_W_PHL_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phl", int_mips_muleq_s_w_phl, NoItinerary, GPR32Opnd, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag21]>; class MULEQ_S_W_PHR_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phr", int_mips_muleq_s_w_phr, NoItinerary, GPR32Opnd, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag21]>; class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph, NoItinerary, DSPROpnd, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag21]>; class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph", MipsMULSAQ_S_W_PH>, Defs<[DSPOutFlag16_19]>; class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl", MipsMAQ_S_W_PHL>, Defs<[DSPOutFlag16_19]>; class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr", MipsMAQ_S_W_PHR>, Defs<[DSPOutFlag16_19]>; class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl", MipsMAQ_SA_W_PHL>, Defs<[DSPOutFlag16_19]>; class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr", MipsMAQ_SA_W_PHR>, Defs<[DSPOutFlag16_19]>; // Move from/to hi/lo. class MFHI_DESC : MFHI_DESC_BASE<"mfhi", ACC64DSPOpnd, MipsMFHI, NoItinerary>; class MFLO_DESC : MFHI_DESC_BASE<"mflo", ACC64DSPOpnd, MipsMFLO, NoItinerary>; class MTHI_DESC : MTHI_DESC_BASE<"mthi", HI32DSPOpnd, NoItinerary>; class MTLO_DESC : MTHI_DESC_BASE<"mtlo", LO32DSPOpnd, NoItinerary>; // Dot product with accumulate/subtract class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl", MipsDPAU_H_QBL>; class DPAU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbr", MipsDPAU_H_QBR>; class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl", MipsDPSU_H_QBL>; class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr", MipsDPSU_H_QBR>; class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph", MipsDPAQ_S_W_PH>, Defs<[DSPOutFlag16_19]>; class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph", MipsDPSQ_S_W_PH>, Defs<[DSPOutFlag16_19]>; class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w", MipsDPAQ_SA_L_W>, Defs<[DSPOutFlag16_19]>; class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w", MipsDPSQ_SA_L_W>, Defs<[DSPOutFlag16_19]>; class MULT_DSP_DESC : MULT_DESC_BASE<"mult", MipsMult, NoItinerary>; class MULTU_DSP_DESC : MULT_DESC_BASE<"multu", MipsMultu, NoItinerary>; class MADD_DSP_DESC : MADD_DESC_BASE<"madd", MipsMAdd, NoItinerary>; class MADDU_DSP_DESC : MADD_DESC_BASE<"maddu", MipsMAddu, NoItinerary>; class MSUB_DSP_DESC : MADD_DESC_BASE<"msub", MipsMSub, NoItinerary>; class MSUBU_DSP_DESC : MADD_DESC_BASE<"msubu", MipsMSubu, NoItinerary>; // Comparison class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb", int_mips_cmpu_eq_qb, NoItinerary, DSPROpnd>, IsCommutable, Defs<[DSPCCond]>; class CMPU_LT_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.lt.qb", int_mips_cmpu_lt_qb, NoItinerary, DSPROpnd>, Defs<[DSPCCond]>; class CMPU_LE_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.le.qb", int_mips_cmpu_le_qb, NoItinerary, DSPROpnd>, Defs<[DSPCCond]>; class CMPGU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.eq.qb", int_mips_cmpgu_eq_qb, NoItinerary, GPR32Opnd, DSPROpnd>, IsCommutable; class CMPGU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.lt.qb", int_mips_cmpgu_lt_qb, NoItinerary, GPR32Opnd, DSPROpnd>; class CMPGU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.le.qb", int_mips_cmpgu_le_qb, NoItinerary, GPR32Opnd, DSPROpnd>; class CMP_EQ_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.eq.ph", int_mips_cmp_eq_ph, NoItinerary, DSPROpnd>, IsCommutable, Defs<[DSPCCond]>; class CMP_LT_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.lt.ph", int_mips_cmp_lt_ph, NoItinerary, DSPROpnd>, Defs<[DSPCCond]>; class CMP_LE_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.le.ph", int_mips_cmp_le_ph, NoItinerary, DSPROpnd>, Defs<[DSPCCond]>; // Misc class BITREV_DESC : ABSQ_S_PH_R2_DESC_BASE<"bitrev", int_mips_bitrev, NoItinerary, GPR32Opnd>; class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph, NoItinerary, DSPROpnd, DSPROpnd>; class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, uimm8, immZExt8, NoItinerary, DSPROpnd>; class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, simm10, immSExt10, NoItinerary, DSPROpnd>; class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb, NoItinerary, DSPROpnd, GPR32Opnd>; class REPLV_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.ph", int_mips_repl_ph, NoItinerary, DSPROpnd, GPR32Opnd>; class PICK_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.qb", int_mips_pick_qb, NoItinerary, DSPROpnd, DSPROpnd>, Uses<[DSPCCond]>; class PICK_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.ph", int_mips_pick_ph, NoItinerary, DSPROpnd, DSPROpnd>, Uses<[DSPCCond]>; class LWX_DESC : LX_DESC_BASE<"lwx", int_mips_lwx, NoItinerary>; class LHX_DESC : LX_DESC_BASE<"lhx", int_mips_lhx, NoItinerary>; class LBUX_DESC : LX_DESC_BASE<"lbux", int_mips_lbux, NoItinerary>; class BPOSGE32_DESC : BPOSGE32_DESC_BASE<"bposge32", brtarget, NoItinerary>; // Extr class EXTP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extp", MipsEXTP, NoItinerary>, Uses<[DSPPos]>, Defs<[DSPEFI]>; class EXTPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpv", MipsEXTP, NoItinerary>, Uses<[DSPPos]>, Defs<[DSPEFI]>; class EXTPDP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>, Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>; class EXTPDPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpdpv", MipsEXTPDP, NoItinerary>, Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>; class EXTR_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>, Defs<[DSPOutFlag23]>; class EXTRV_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv.w", MipsEXTR_W, NoItinerary>, Defs<[DSPOutFlag23]>; class EXTR_R_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_r.w", MipsEXTR_R_W, NoItinerary>, Defs<[DSPOutFlag23]>; class EXTRV_R_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_r.w", MipsEXTR_R_W, NoItinerary>, Defs<[DSPOutFlag23]>; class EXTR_RS_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_rs.w", MipsEXTR_RS_W, NoItinerary>, Defs<[DSPOutFlag23]>; class EXTRV_RS_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_rs.w", MipsEXTR_RS_W, NoItinerary>, Defs<[DSPOutFlag23]>; class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", MipsEXTR_S_H, NoItinerary>, Defs<[DSPOutFlag23]>; class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", MipsEXTR_S_H, NoItinerary>, Defs<[DSPOutFlag23]>; class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo", MipsSHILO>; class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov", MipsSHILO>; class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip", MipsMTHLIP>, Defs<[DSPPos]>; class RDDSP_DESC : RDDSP_DESC_BASE<"rddsp", int_mips_rddsp, NoItinerary>; class WRDSP_DESC : WRDSP_DESC_BASE<"wrdsp", int_mips_wrdsp, NoItinerary>; class INSV_DESC : INSV_DESC_BASE<"insv", int_mips_insv, NoItinerary>, Uses<[DSPPos, DSPSCount]>; //===----------------------------------------------------------------------===// // MIPS DSP Rev 2 // Addition/subtraction class ADDU_PH_DESC : ADDU_QB_DESC_BASE<"addu.ph", int_mips_addu_ph, NoItinerary, DSPROpnd, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag20]>; class ADDU_S_PH_DESC : ADDU_QB_DESC_BASE<"addu_s.ph", int_mips_addu_s_ph, NoItinerary, DSPROpnd, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag20]>; class SUBU_PH_DESC : ADDU_QB_DESC_BASE<"subu.ph", int_mips_subu_ph, NoItinerary, DSPROpnd, DSPROpnd>, Defs<[DSPOutFlag20]>; class SUBU_S_PH_DESC : ADDU_QB_DESC_BASE<"subu_s.ph", int_mips_subu_s_ph, NoItinerary, DSPROpnd, DSPROpnd>, Defs<[DSPOutFlag20]>; class ADDUH_QB_DESC : ADDUH_QB_DESC_BASE<"adduh.qb", int_mips_adduh_qb, NoItinerary, DSPROpnd>, IsCommutable; class ADDUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"adduh_r.qb", int_mips_adduh_r_qb, NoItinerary, DSPROpnd>, IsCommutable; class SUBUH_QB_DESC : ADDUH_QB_DESC_BASE<"subuh.qb", int_mips_subuh_qb, NoItinerary, DSPROpnd>; class SUBUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"subuh_r.qb", int_mips_subuh_r_qb, NoItinerary, DSPROpnd>; class ADDQH_PH_DESC : ADDUH_QB_DESC_BASE<"addqh.ph", int_mips_addqh_ph, NoItinerary, DSPROpnd>, IsCommutable; class ADDQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"addqh_r.ph", int_mips_addqh_r_ph, NoItinerary, DSPROpnd>, IsCommutable; class SUBQH_PH_DESC : ADDUH_QB_DESC_BASE<"subqh.ph", int_mips_subqh_ph, NoItinerary, DSPROpnd>; class SUBQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"subqh_r.ph", int_mips_subqh_r_ph, NoItinerary, DSPROpnd>; class ADDQH_W_DESC : ADDUH_QB_DESC_BASE<"addqh.w", int_mips_addqh_w, NoItinerary, GPR32Opnd>, IsCommutable; class ADDQH_R_W_DESC : ADDUH_QB_DESC_BASE<"addqh_r.w", int_mips_addqh_r_w, NoItinerary, GPR32Opnd>, IsCommutable; class SUBQH_W_DESC : ADDUH_QB_DESC_BASE<"subqh.w", int_mips_subqh_w, NoItinerary, GPR32Opnd>; class SUBQH_R_W_DESC : ADDUH_QB_DESC_BASE<"subqh_r.w", int_mips_subqh_r_w, NoItinerary, GPR32Opnd>; // Comparison class CMPGDU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.eq.qb", int_mips_cmpgdu_eq_qb, NoItinerary, GPR32Opnd, DSPROpnd>, IsCommutable, Defs<[DSPCCond]>; class CMPGDU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.lt.qb", int_mips_cmpgdu_lt_qb, NoItinerary, GPR32Opnd, DSPROpnd>, Defs<[DSPCCond]>; class CMPGDU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.le.qb", int_mips_cmpgdu_le_qb, NoItinerary, GPR32Opnd, DSPROpnd>, Defs<[DSPCCond]>; // Absolute class ABSQ_S_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.qb", int_mips_absq_s_qb, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag20]>; // Multiplication class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", null_frag, NoItinerary, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag21]>; class MUL_S_PH_DESC : ADDUH_QB_DESC_BASE<"mul_s.ph", int_mips_mul_s_ph, NoItinerary, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag21]>; class MULQ_S_W_DESC : ADDUH_QB_DESC_BASE<"mulq_s.w", int_mips_mulq_s_w, NoItinerary, GPR32Opnd>, IsCommutable, Defs<[DSPOutFlag21]>; class MULQ_RS_W_DESC : ADDUH_QB_DESC_BASE<"mulq_rs.w", int_mips_mulq_rs_w, NoItinerary, GPR32Opnd>, IsCommutable, Defs<[DSPOutFlag21]>; class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph, NoItinerary, DSPROpnd, DSPROpnd>, IsCommutable, Defs<[DSPOutFlag21]>; // Dot product with accumulate/subtract class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph", MipsDPA_W_PH>; class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph", MipsDPS_W_PH>; class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph", MipsDPAQX_S_W_PH>, Defs<[DSPOutFlag16_19]>; class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph", MipsDPAQX_SA_W_PH>, Defs<[DSPOutFlag16_19]>; class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph", MipsDPAX_W_PH>; class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph", MipsDPSX_W_PH>; class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph", MipsDPSQX_S_W_PH>, Defs<[DSPOutFlag16_19]>; class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph", MipsDPSQX_SA_W_PH>, Defs<[DSPOutFlag16_19]>; class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph", MipsMULSA_W_PH>; // Precision reduce/expand class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph", int_mips_precr_qb_ph, NoItinerary, DSPROpnd, DSPROpnd>; class PRECR_SRA_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra.ph.w", int_mips_precr_sra_ph_w, NoItinerary, DSPROpnd, GPR32Opnd>; class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w", int_mips_precr_sra_r_ph_w, NoItinerary, DSPROpnd, GPR32Opnd>; // Shift class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", null_frag, immZExt3, NoItinerary, DSPROpnd, uimm3>; class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb, NoItinerary, DSPROpnd>; class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb, immZExt3, NoItinerary, DSPROpnd, uimm3>; class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb, NoItinerary, DSPROpnd>; class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", null_frag, immZExt4, NoItinerary, DSPROpnd, uimm4>; class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph, NoItinerary, DSPROpnd>; // Misc class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, uimm5, immZExt5, NoItinerary>; class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, uimm2, immZExt2, NoItinerary>; class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, uimm5, immZExt5, NoItinerary>; // Pseudos. def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE, Uses<[DSPPos]>; // Instruction defs. // MIPS DSP Rev 1 def ADDU_QB : DspMMRel, ADDU_QB_ENC, ADDU_QB_DESC; def ADDU_S_QB : DspMMRel, ADDU_S_QB_ENC, ADDU_S_QB_DESC; def SUBU_QB : DspMMRel, SUBU_QB_ENC, SUBU_QB_DESC; def SUBU_S_QB : DspMMRel, SUBU_S_QB_ENC, SUBU_S_QB_DESC; def ADDQ_PH : DspMMRel, ADDQ_PH_ENC, ADDQ_PH_DESC; def ADDQ_S_PH : DspMMRel, ADDQ_S_PH_ENC, ADDQ_S_PH_DESC; def SUBQ_PH : DspMMRel, SUBQ_PH_ENC, SUBQ_PH_DESC; def SUBQ_S_PH : DspMMRel, SUBQ_S_PH_ENC, SUBQ_S_PH_DESC; def ADDQ_S_W : DspMMRel, ADDQ_S_W_ENC, ADDQ_S_W_DESC; def SUBQ_S_W : DspMMRel, SUBQ_S_W_ENC, SUBQ_S_W_DESC; def ADDSC : DspMMRel, ADDSC_ENC, ADDSC_DESC; def ADDWC : DspMMRel, ADDWC_ENC, ADDWC_DESC; def MODSUB : DspMMRel, MODSUB_ENC, MODSUB_DESC; def RADDU_W_QB : DspMMRel, RADDU_W_QB_ENC, RADDU_W_QB_DESC; def ABSQ_S_PH : DspMMRel, ABSQ_S_PH_ENC, ABSQ_S_PH_DESC; def ABSQ_S_W : DspMMRel, ABSQ_S_W_ENC, ABSQ_S_W_DESC; def PRECRQ_QB_PH : DspMMRel, PRECRQ_QB_PH_ENC, PRECRQ_QB_PH_DESC; def PRECRQ_PH_W : DspMMRel, PRECRQ_PH_W_ENC, PRECRQ_PH_W_DESC; def PRECRQ_RS_PH_W : DspMMRel, PRECRQ_RS_PH_W_ENC, PRECRQ_RS_PH_W_DESC; def PRECRQU_S_QB_PH : DspMMRel, PRECRQU_S_QB_PH_ENC, PRECRQU_S_QB_PH_DESC; def PRECEQ_W_PHL : DspMMRel, PRECEQ_W_PHL_ENC, PRECEQ_W_PHL_DESC; def PRECEQ_W_PHR : DspMMRel, PRECEQ_W_PHR_ENC, PRECEQ_W_PHR_DESC; def PRECEQU_PH_QBL : DspMMRel, PRECEQU_PH_QBL_ENC, PRECEQU_PH_QBL_DESC; def PRECEQU_PH_QBR : DspMMRel, PRECEQU_PH_QBR_ENC, PRECEQU_PH_QBR_DESC; def PRECEQU_PH_QBLA : DspMMRel, PRECEQU_PH_QBLA_ENC, PRECEQU_PH_QBLA_DESC; def PRECEQU_PH_QBRA : DspMMRel, PRECEQU_PH_QBRA_ENC, PRECEQU_PH_QBRA_DESC; def PRECEU_PH_QBL : DspMMRel, PRECEU_PH_QBL_ENC, PRECEU_PH_QBL_DESC; def PRECEU_PH_QBR : DspMMRel, PRECEU_PH_QBR_ENC, PRECEU_PH_QBR_DESC; def PRECEU_PH_QBLA : DspMMRel, PRECEU_PH_QBLA_ENC, PRECEU_PH_QBLA_DESC; def PRECEU_PH_QBRA : DspMMRel, PRECEU_PH_QBRA_ENC, PRECEU_PH_QBRA_DESC; def SHLL_QB : DspMMRel, SHLL_QB_ENC, SHLL_QB_DESC; def SHLLV_QB : DspMMRel, SHLLV_QB_ENC, SHLLV_QB_DESC; def SHRL_QB : DspMMRel, SHRL_QB_ENC, SHRL_QB_DESC; def SHRLV_QB : DspMMRel, SHRLV_QB_ENC, SHRLV_QB_DESC; def SHLL_PH : DspMMRel, SHLL_PH_ENC, SHLL_PH_DESC; def SHLLV_PH : DspMMRel, SHLLV_PH_ENC, SHLLV_PH_DESC; def SHLL_S_PH : DspMMRel, SHLL_S_PH_ENC, SHLL_S_PH_DESC; def SHLLV_S_PH : DspMMRel, SHLLV_S_PH_ENC, SHLLV_S_PH_DESC; def SHRA_PH : DspMMRel, SHRA_PH_ENC, SHRA_PH_DESC; def SHRAV_PH : DspMMRel, SHRAV_PH_ENC, SHRAV_PH_DESC; def SHRA_R_PH : DspMMRel, SHRA_R_PH_ENC, SHRA_R_PH_DESC; def SHRAV_R_PH : DspMMRel, SHRAV_R_PH_ENC, SHRAV_R_PH_DESC; def SHLL_S_W : DspMMRel, SHLL_S_W_ENC, SHLL_S_W_DESC; def SHLLV_S_W : DspMMRel, SHLLV_S_W_ENC, SHLLV_S_W_DESC; def SHRA_R_W : DspMMRel, SHRA_R_W_ENC, SHRA_R_W_DESC; def SHRAV_R_W : DspMMRel, SHRAV_R_W_ENC, SHRAV_R_W_DESC; def MULEU_S_PH_QBL : DspMMRel, MULEU_S_PH_QBL_ENC, MULEU_S_PH_QBL_DESC; def MULEU_S_PH_QBR : DspMMRel, MULEU_S_PH_QBR_ENC, MULEU_S_PH_QBR_DESC; def MULEQ_S_W_PHL : DspMMRel, MULEQ_S_W_PHL_ENC, MULEQ_S_W_PHL_DESC; def MULEQ_S_W_PHR : DspMMRel, MULEQ_S_W_PHR_ENC, MULEQ_S_W_PHR_DESC; def MULQ_RS_PH : DspMMRel, MULQ_RS_PH_ENC, MULQ_RS_PH_DESC; def MULSAQ_S_W_PH : DspMMRel, MULSAQ_S_W_PH_ENC, MULSAQ_S_W_PH_DESC; def MAQ_S_W_PHL : DspMMRel, MAQ_S_W_PHL_ENC, MAQ_S_W_PHL_DESC; def MAQ_S_W_PHR : DspMMRel, MAQ_S_W_PHR_ENC, MAQ_S_W_PHR_DESC; def MAQ_SA_W_PHL : DspMMRel, MAQ_SA_W_PHL_ENC, MAQ_SA_W_PHL_DESC; def MAQ_SA_W_PHR : DspMMRel, MAQ_SA_W_PHR_ENC, MAQ_SA_W_PHR_DESC; def MFHI_DSP : DspMMRel, MFHI_ENC, MFHI_DESC; def MFLO_DSP : DspMMRel, MFLO_ENC, MFLO_DESC; def MTHI_DSP : DspMMRel, MTHI_ENC, MTHI_DESC; def MTLO_DSP : DspMMRel, MTLO_ENC, MTLO_DESC; def DPAU_H_QBL : DspMMRel, DPAU_H_QBL_ENC, DPAU_H_QBL_DESC; def DPAU_H_QBR : DspMMRel, DPAU_H_QBR_ENC, DPAU_H_QBR_DESC; def DPSU_H_QBL : DspMMRel, DPSU_H_QBL_ENC, DPSU_H_QBL_DESC; def DPSU_H_QBR : DspMMRel, DPSU_H_QBR_ENC, DPSU_H_QBR_DESC; def DPAQ_S_W_PH : DspMMRel, DPAQ_S_W_PH_ENC, DPAQ_S_W_PH_DESC; def DPSQ_S_W_PH : DspMMRel, DPSQ_S_W_PH_ENC, DPSQ_S_W_PH_DESC; def DPAQ_SA_L_W : DspMMRel, DPAQ_SA_L_W_ENC, DPAQ_SA_L_W_DESC; def DPSQ_SA_L_W : DspMMRel, DPSQ_SA_L_W_ENC, DPSQ_SA_L_W_DESC; def MULT_DSP : DspMMRel, MULT_DSP_ENC, MULT_DSP_DESC; def MULTU_DSP : DspMMRel, MULTU_DSP_ENC, MULTU_DSP_DESC; def MADD_DSP : DspMMRel, MADD_DSP_ENC, MADD_DSP_DESC; def MADDU_DSP : DspMMRel, MADDU_DSP_ENC, MADDU_DSP_DESC; def MSUB_DSP : DspMMRel, MSUB_DSP_ENC, MSUB_DSP_DESC; def MSUBU_DSP : DspMMRel, MSUBU_DSP_ENC, MSUBU_DSP_DESC; def CMPU_EQ_QB : DspMMRel, CMPU_EQ_QB_ENC, CMPU_EQ_QB_DESC; def CMPU_LT_QB : DspMMRel, CMPU_LT_QB_ENC, CMPU_LT_QB_DESC; def CMPU_LE_QB : DspMMRel, CMPU_LE_QB_ENC, CMPU_LE_QB_DESC; def CMPGU_EQ_QB : DspMMRel, CMPGU_EQ_QB_ENC, CMPGU_EQ_QB_DESC; def CMPGU_LT_QB : DspMMRel, CMPGU_LT_QB_ENC, CMPGU_LT_QB_DESC; def CMPGU_LE_QB : DspMMRel, CMPGU_LE_QB_ENC, CMPGU_LE_QB_DESC; def CMP_EQ_PH : DspMMRel, CMP_EQ_PH_ENC, CMP_EQ_PH_DESC; def CMP_LT_PH : DspMMRel, CMP_LT_PH_ENC, CMP_LT_PH_DESC; def CMP_LE_PH : DspMMRel, CMP_LE_PH_ENC, CMP_LE_PH_DESC; def BITREV : DspMMRel, BITREV_ENC, BITREV_DESC; def PACKRL_PH : DspMMRel, PACKRL_PH_ENC, PACKRL_PH_DESC; def REPL_QB : DspMMRel, REPL_QB_ENC, REPL_QB_DESC; def REPL_PH : DspMMRel, REPL_PH_ENC, REPL_PH_DESC; def REPLV_QB : DspMMRel, REPLV_QB_ENC, REPLV_QB_DESC; def REPLV_PH : DspMMRel, REPLV_PH_ENC, REPLV_PH_DESC; def PICK_QB : DspMMRel, PICK_QB_ENC, PICK_QB_DESC; def PICK_PH : DspMMRel, PICK_PH_ENC, PICK_PH_DESC; def LWX : DspMMRel, LWX_ENC, LWX_DESC; def LHX : DspMMRel, LHX_ENC, LHX_DESC; def LBUX : DspMMRel, LBUX_ENC, LBUX_DESC; let AdditionalPredicates = [NotInMicroMips] in { def BPOSGE32 : DspMMRel, BPOSGE32_ENC, BPOSGE32_DESC; } def INSV : DspMMRel, INSV_ENC, INSV_DESC; def EXTP : DspMMRel, EXTP_ENC, EXTP_DESC; def EXTPV : DspMMRel, EXTPV_ENC, EXTPV_DESC; def EXTPDP : DspMMRel, EXTPDP_ENC, EXTPDP_DESC; def EXTPDPV : DspMMRel, EXTPDPV_ENC, EXTPDPV_DESC; def EXTR_W : DspMMRel, EXTR_W_ENC, EXTR_W_DESC; def EXTRV_W : DspMMRel, EXTRV_W_ENC, EXTRV_W_DESC; def EXTR_R_W : DspMMRel, EXTR_R_W_ENC, EXTR_R_W_DESC; def EXTRV_R_W : DspMMRel, EXTRV_R_W_ENC, EXTRV_R_W_DESC; def EXTR_RS_W : DspMMRel, EXTR_RS_W_ENC, EXTR_RS_W_DESC; def EXTRV_RS_W : DspMMRel, EXTRV_RS_W_ENC, EXTRV_RS_W_DESC; def EXTR_S_H : DspMMRel, EXTR_S_H_ENC, EXTR_S_H_DESC; def EXTRV_S_H : DspMMRel, EXTRV_S_H_ENC, EXTRV_S_H_DESC; def SHILO : DspMMRel, SHILO_ENC, SHILO_DESC; def SHILOV : DspMMRel, SHILOV_ENC, SHILOV_DESC; def MTHLIP : DspMMRel, MTHLIP_ENC, MTHLIP_DESC; def RDDSP : DspMMRel, RDDSP_ENC, RDDSP_DESC; let AdditionalPredicates = [NotInMicroMips] in { def WRDSP : WRDSP_ENC, WRDSP_DESC; } // MIPS DSP Rev 2 def ADDU_PH : DspMMRel, ADDU_PH_ENC, ADDU_PH_DESC, ISA_DSPR2; def ADDU_S_PH : DspMMRel, ADDU_S_PH_ENC, ADDU_S_PH_DESC, ISA_DSPR2; def SUBU_PH : DspMMRel, SUBU_PH_ENC, SUBU_PH_DESC, ISA_DSPR2; def SUBU_S_PH : DspMMRel, SUBU_S_PH_ENC, SUBU_S_PH_DESC, ISA_DSPR2; def CMPGDU_EQ_QB : DspMMRel, CMPGDU_EQ_QB_ENC, CMPGDU_EQ_QB_DESC, ISA_DSPR2; def CMPGDU_LT_QB : DspMMRel, CMPGDU_LT_QB_ENC, CMPGDU_LT_QB_DESC, ISA_DSPR2; def CMPGDU_LE_QB : DspMMRel, CMPGDU_LE_QB_ENC, CMPGDU_LE_QB_DESC, ISA_DSPR2; def ABSQ_S_QB : DspMMRel, ABSQ_S_QB_ENC, ABSQ_S_QB_DESC, ISA_DSPR2; def ADDUH_QB : DspMMRel, ADDUH_QB_ENC, ADDUH_QB_DESC, ISA_DSPR2; def ADDUH_R_QB : DspMMRel, ADDUH_R_QB_ENC, ADDUH_R_QB_DESC, ISA_DSPR2; def SUBUH_QB : DspMMRel, SUBUH_QB_ENC, SUBUH_QB_DESC, ISA_DSPR2; def SUBUH_R_QB : DspMMRel, SUBUH_R_QB_ENC, SUBUH_R_QB_DESC, ISA_DSPR2; def ADDQH_PH : DspMMRel, ADDQH_PH_ENC, ADDQH_PH_DESC, ISA_DSPR2; def ADDQH_R_PH : DspMMRel, ADDQH_R_PH_ENC, ADDQH_R_PH_DESC, ISA_DSPR2; def SUBQH_PH : DspMMRel, SUBQH_PH_ENC, SUBQH_PH_DESC, ISA_DSPR2; def SUBQH_R_PH : DspMMRel, SUBQH_R_PH_ENC, SUBQH_R_PH_DESC, ISA_DSPR2; def ADDQH_W : DspMMRel, ADDQH_W_ENC, ADDQH_W_DESC, ISA_DSPR2; def ADDQH_R_W : DspMMRel, ADDQH_R_W_ENC, ADDQH_R_W_DESC, ISA_DSPR2; def SUBQH_W : DspMMRel, SUBQH_W_ENC, SUBQH_W_DESC, ISA_DSPR2; def SUBQH_R_W : DspMMRel, SUBQH_R_W_ENC, SUBQH_R_W_DESC, ISA_DSPR2; def MUL_PH : DspMMRel, MUL_PH_ENC, MUL_PH_DESC, ISA_DSPR2; def MUL_S_PH : DspMMRel, MUL_S_PH_ENC, MUL_S_PH_DESC, ISA_DSPR2; def MULQ_S_W : DspMMRel, MULQ_S_W_ENC, MULQ_S_W_DESC, ISA_DSPR2; def MULQ_RS_W : DspMMRel, MULQ_RS_W_ENC, MULQ_RS_W_DESC, ISA_DSPR2; def MULQ_S_PH : DspMMRel, MULQ_S_PH_ENC, MULQ_S_PH_DESC, ISA_DSPR2; def DPA_W_PH : DspMMRel, DPA_W_PH_ENC, DPA_W_PH_DESC, ISA_DSPR2; def DPS_W_PH : DspMMRel, DPS_W_PH_ENC, DPS_W_PH_DESC, ISA_DSPR2; def DPAQX_S_W_PH : DspMMRel, DPAQX_S_W_PH_ENC, DPAQX_S_W_PH_DESC, ISA_DSPR2; def DPAQX_SA_W_PH : DspMMRel, DPAQX_SA_W_PH_ENC, DPAQX_SA_W_PH_DESC, ISA_DSPR2; def DPAX_W_PH : DspMMRel, DPAX_W_PH_ENC, DPAX_W_PH_DESC, ISA_DSPR2; def DPSX_W_PH : DspMMRel, DPSX_W_PH_ENC, DPSX_W_PH_DESC, ISA_DSPR2; def DPSQX_S_W_PH : DspMMRel, DPSQX_S_W_PH_ENC, DPSQX_S_W_PH_DESC, ISA_DSPR2; def DPSQX_SA_W_PH : DspMMRel, DPSQX_SA_W_PH_ENC, DPSQX_SA_W_PH_DESC, ISA_DSPR2; def MULSA_W_PH : DspMMRel, MULSA_W_PH_ENC, MULSA_W_PH_DESC, ISA_DSPR2; def PRECR_QB_PH : DspMMRel, PRECR_QB_PH_ENC, PRECR_QB_PH_DESC, ISA_DSPR2; def PRECR_SRA_PH_W : DspMMRel, PRECR_SRA_PH_W_ENC, PRECR_SRA_PH_W_DESC, ISA_DSPR2; def PRECR_SRA_R_PH_W : DspMMRel, PRECR_SRA_R_PH_W_ENC, PRECR_SRA_R_PH_W_DESC, ISA_DSPR2; def SHRA_QB : DspMMRel, SHRA_QB_ENC, SHRA_QB_DESC, ISA_DSPR2; def SHRAV_QB : DspMMRel, SHRAV_QB_ENC, SHRAV_QB_DESC, ISA_DSPR2; def SHRA_R_QB : DspMMRel, SHRA_R_QB_ENC, SHRA_R_QB_DESC, ISA_DSPR2; def SHRAV_R_QB : DspMMRel, SHRAV_R_QB_ENC, SHRAV_R_QB_DESC, ISA_DSPR2; def SHRL_PH : DspMMRel, SHRL_PH_ENC, SHRL_PH_DESC, ISA_DSPR2; def SHRLV_PH : DspMMRel, SHRLV_PH_ENC, SHRLV_PH_DESC, ISA_DSPR2; def APPEND : DspMMRel, APPEND_ENC, APPEND_DESC, ISA_DSPR2; def BALIGN : DspMMRel, BALIGN_ENC, BALIGN_DESC, ISA_DSPR2; def PREPEND : DspMMRel, PREPEND_ENC, PREPEND_DESC, ISA_DSPR2; // Pseudos. let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in { // Pseudo instructions for loading and storing accumulator registers. def LOAD_ACC64DSP : Load<"", ACC64DSPOpnd>; def STORE_ACC64DSP : Store<"", ACC64DSPOpnd>; // Pseudos for loading and storing ccond field of DSP control register. def LOAD_CCOND_DSP : Load<"load_ccond_dsp", DSPCC>; def STORE_CCOND_DSP : Store<"store_ccond_dsp", DSPCC>; } let DecoderNamespace = "MipsDSP", Arch = "dsp", ASEPredicate = [HasDSP] in { def LWDSP : Load<"lw", DSPROpnd, null_frag, II_LW>, DspMMRel, LW_FM<0x23>; def SWDSP : Store<"sw", DSPROpnd, null_frag, II_SW>, DspMMRel, LW_FM<0x2b>; } // Pseudo CMP and PICK instructions. class PseudoCMP : PseudoDSP<(outs DSPCC:$cmp), (ins DSPROpnd:$rs, DSPROpnd:$rt), []>, PseudoInstExpansion<(RealInst DSPROpnd:$rs, DSPROpnd:$rt)>, NeverHasSideEffects; class PseudoPICK : PseudoDSP<(outs DSPROpnd:$rd), (ins DSPCC:$cmp, DSPROpnd:$rs, DSPROpnd:$rt), []>, PseudoInstExpansion<(RealInst DSPROpnd:$rd, DSPROpnd:$rs, DSPROpnd:$rt)>, NeverHasSideEffects; def PseudoCMP_EQ_PH : PseudoCMP; def PseudoCMP_LT_PH : PseudoCMP; def PseudoCMP_LE_PH : PseudoCMP; def PseudoCMPU_EQ_QB : PseudoCMP; def PseudoCMPU_LT_QB : PseudoCMP; def PseudoCMPU_LE_QB : PseudoCMP; def PseudoPICK_PH : PseudoPICK; def PseudoPICK_QB : PseudoPICK; -def PseudoMTLOHI_DSP : PseudoMTLOHI; +let AdditionalPredicates = [HasDSP] in { + def PseudoMTLOHI_DSP : PseudoMTLOHI; +} // Patterns. class DSPPat : Pat, Requires<[pred]>; class BitconvertPat : DSPPat<(DstVT (bitconvert (SrcVT SrcRC:$src))), (COPY_TO_REGCLASS SrcRC:$src, DstRC)>; def : BitconvertPat; def : BitconvertPat; def : BitconvertPat; def : BitconvertPat; def : BitconvertPat; def : BitconvertPat; def : BitconvertPat; def : BitconvertPat; def : DSPPat<(v2i16 (load addr:$a)), (v2i16 (COPY_TO_REGCLASS (LW addr:$a), DSPR))>; def : DSPPat<(v4i8 (load addr:$a)), (v4i8 (COPY_TO_REGCLASS (LW addr:$a), DSPR))>; def : DSPPat<(store (v2i16 DSPR:$val), addr:$a), (SW (COPY_TO_REGCLASS DSPR:$val, GPR32), addr:$a)>; def : DSPPat<(store (v4i8 DSPR:$val), addr:$a), (SW (COPY_TO_REGCLASS DSPR:$val, GPR32), addr:$a)>; // Binary operations. class DSPBinPat : DSPPat<(Node ValTy:$a, ValTy:$b), (Inst ValTy:$a, ValTy:$b), Pred>; def : DSPBinPat; def : DSPBinPat; def : DSPBinPat; def : DSPBinPat; def : DSPBinPat; def : DSPBinPat; def : DSPBinPat; def : DSPBinPat; def : DSPBinPat; def : DSPBinPat; def : DSPBinPat; def : DSPBinPat; def : DSPBinPat; def : DSPBinPat; // Shift immediate patterns. class DSPShiftPat : DSPPat<(Node ValTy:$a, Imm:$shamt), (Inst ValTy:$a, Imm:$shamt), Pred>; def : DSPShiftPat; def : DSPShiftPat; def : DSPShiftPat; def : DSPShiftPat; def : DSPShiftPat; def : DSPShiftPat; def : DSPShiftPat; def : DSPShiftPat; def : DSPShiftPat; def : DSPShiftPat; def : DSPShiftPat; def : DSPShiftPat; // SETCC/SELECT_CC patterns. class DSPSetCCPat : DSPPat<(ValTy (MipsSETCC_DSP ValTy:$a, ValTy:$b, CC)), (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPR)), (ValTy ZERO)))>; class DSPSetCCPatInv : DSPPat<(ValTy (MipsSETCC_DSP ValTy:$a, ValTy:$b, CC)), (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), (ValTy ZERO), (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPR))))>; class DSPSelectCCPat : DSPPat<(ValTy (MipsSELECT_CC_DSP ValTy:$a, ValTy:$b, ValTy:$c, ValTy:$d, CC)), (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), $c, $d))>; class DSPSelectCCPatInv : DSPPat<(ValTy (MipsSELECT_CC_DSP ValTy:$a, ValTy:$b, ValTy:$c, ValTy:$d, CC)), (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), $d, $c))>; def : DSPSetCCPat; def : DSPSetCCPat; def : DSPSetCCPat; def : DSPSetCCPatInv; def : DSPSetCCPatInv; def : DSPSetCCPatInv; def : DSPSetCCPat; def : DSPSetCCPat; def : DSPSetCCPat; def : DSPSetCCPatInv; def : DSPSetCCPatInv; def : DSPSetCCPatInv; def : DSPSelectCCPat; def : DSPSelectCCPat; def : DSPSelectCCPat; def : DSPSelectCCPatInv; def : DSPSelectCCPatInv; def : DSPSelectCCPatInv; def : DSPSelectCCPat; def : DSPSelectCCPat; def : DSPSelectCCPat; def : DSPSelectCCPatInv; def : DSPSelectCCPatInv; def : DSPSelectCCPatInv; // Extr patterns. class EXTR_W_TY1_R2_Pat : DSPPat<(i32 (OpNode GPR32:$rs, ACC64DSP:$ac)), (Instr ACC64DSP:$ac, GPR32:$rs)>; class EXTR_W_TY1_R1_Pat : DSPPat<(i32 (OpNode immZExt5:$shift, ACC64DSP:$ac)), (Instr ACC64DSP:$ac, immZExt5:$shift)>; def : EXTR_W_TY1_R1_Pat; def : EXTR_W_TY1_R2_Pat; def : EXTR_W_TY1_R1_Pat; def : EXTR_W_TY1_R2_Pat; def : EXTR_W_TY1_R1_Pat; def : EXTR_W_TY1_R2_Pat; def : EXTR_W_TY1_R1_Pat; def : EXTR_W_TY1_R2_Pat; def : EXTR_W_TY1_R1_Pat; def : EXTR_W_TY1_R2_Pat; def : EXTR_W_TY1_R1_Pat; def : EXTR_W_TY1_R2_Pat; // Indexed load patterns. class IndexedLoadPat : DSPPat<(i32 (LoadNode (add i32:$base, i32:$index))), (Instr i32:$base, i32:$index)>; let AddedComplexity = 20 in { def : IndexedLoadPat; def : IndexedLoadPat; def : IndexedLoadPat; } // Instruction alias. let AdditionalPredicates = [NotInMicroMips] in { def : DSPInstAlias<"wrdsp $rt", (WRDSP GPR32Opnd:$rt, 0x1F), 1>; } Index: vendor/llvm/dist-release_80/lib/Target/Mips/MipsDelaySlotFiller.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/Target/Mips/MipsDelaySlotFiller.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/Mips/MipsDelaySlotFiller.cpp (revision 348932) @@ -1,933 +1,934 @@ //===- MipsDelaySlotFiller.cpp - Mips Delay Slot Filler -------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // Simple pass to fill delay slots with useful instructions. // //===----------------------------------------------------------------------===// #include "MCTargetDesc/MipsMCNaCl.h" #include "Mips.h" #include "MipsInstrInfo.h" #include "MipsRegisterInfo.h" #include "MipsSubtarget.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" #include #include #include #include #include using namespace llvm; #define DEBUG_TYPE "mips-delay-slot-filler" STATISTIC(FilledSlots, "Number of delay slots filled"); STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that" " are not NOP."); static cl::opt DisableDelaySlotFiller( "disable-mips-delay-filler", cl::init(false), cl::desc("Fill all delay slots with NOPs."), cl::Hidden); static cl::opt DisableForwardSearch( "disable-mips-df-forward-search", cl::init(true), cl::desc("Disallow MIPS delay filler to search forward."), cl::Hidden); static cl::opt DisableSuccBBSearch( "disable-mips-df-succbb-search", cl::init(true), cl::desc("Disallow MIPS delay filler to search successor basic blocks."), cl::Hidden); static cl::opt DisableBackwardSearch( "disable-mips-df-backward-search", cl::init(false), cl::desc("Disallow MIPS delay filler to search backward."), cl::Hidden); enum CompactBranchPolicy { CB_Never, ///< The policy 'never' may in some circumstances or for some ///< ISAs not be absolutely adhered to. CB_Optimal, ///< Optimal is the default and will produce compact branches ///< when delay slots cannot be filled. CB_Always ///< 'always' may in some circumstances may not be ///< absolutely adhered to there may not be a corresponding ///< compact form of a branch. }; static cl::opt MipsCompactBranchPolicy( "mips-compact-branches",cl::Optional, cl::init(CB_Optimal), cl::desc("MIPS Specific: Compact branch policy."), cl::values( clEnumValN(CB_Never, "never", "Do not use compact branches if possible."), clEnumValN(CB_Optimal, "optimal", "Use compact branches where appropiate (default)."), clEnumValN(CB_Always, "always", "Always use compact branches if possible.") ) ); namespace { using Iter = MachineBasicBlock::iterator; using ReverseIter = MachineBasicBlock::reverse_iterator; using BB2BrMap = SmallDenseMap; class RegDefsUses { public: RegDefsUses(const TargetRegisterInfo &TRI); void init(const MachineInstr &MI); /// This function sets all caller-saved registers in Defs. void setCallerSaved(const MachineInstr &MI); /// This function sets all unallocatable registers in Defs. void setUnallocatableRegs(const MachineFunction &MF); /// Set bits in Uses corresponding to MBB's live-out registers except for /// the registers that are live-in to SuccBB. void addLiveOut(const MachineBasicBlock &MBB, const MachineBasicBlock &SuccBB); bool update(const MachineInstr &MI, unsigned Begin, unsigned End); private: bool checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses, unsigned Reg, bool IsDef) const; /// Returns true if Reg or its alias is in RegSet. bool isRegInSet(const BitVector &RegSet, unsigned Reg) const; const TargetRegisterInfo &TRI; BitVector Defs, Uses; }; /// Base class for inspecting loads and stores. class InspectMemInstr { public: InspectMemInstr(bool ForbidMemInstr_) : ForbidMemInstr(ForbidMemInstr_) {} virtual ~InspectMemInstr() = default; /// Return true if MI cannot be moved to delay slot. bool hasHazard(const MachineInstr &MI); protected: /// Flags indicating whether loads or stores have been seen. bool OrigSeenLoad = false; bool OrigSeenStore = false; bool SeenLoad = false; bool SeenStore = false; /// Memory instructions are not allowed to move to delay slot if this flag /// is true. bool ForbidMemInstr; private: virtual bool hasHazard_(const MachineInstr &MI) = 0; }; /// This subclass rejects any memory instructions. class NoMemInstr : public InspectMemInstr { public: NoMemInstr() : InspectMemInstr(true) {} private: bool hasHazard_(const MachineInstr &MI) override { return true; } }; /// This subclass accepts loads from stacks and constant loads. class LoadFromStackOrConst : public InspectMemInstr { public: LoadFromStackOrConst() : InspectMemInstr(false) {} private: bool hasHazard_(const MachineInstr &MI) override; }; /// This subclass uses memory dependence information to determine whether a /// memory instruction can be moved to a delay slot. class MemDefsUses : public InspectMemInstr { public: MemDefsUses(const DataLayout &DL, const MachineFrameInfo *MFI); private: using ValueType = PointerUnion; bool hasHazard_(const MachineInstr &MI) override; /// Update Defs and Uses. Return true if there exist dependences that /// disqualify the delay slot candidate between V and values in Uses and /// Defs. bool updateDefsUses(ValueType V, bool MayStore); /// Get the list of underlying objects of MI's memory operand. bool getUnderlyingObjects(const MachineInstr &MI, SmallVectorImpl &Objects) const; const MachineFrameInfo *MFI; SmallPtrSet Uses, Defs; const DataLayout &DL; /// Flags indicating whether loads or stores with no underlying objects have /// been seen. bool SeenNoObjLoad = false; bool SeenNoObjStore = false; }; class MipsDelaySlotFiller : public MachineFunctionPass { public: MipsDelaySlotFiller() : MachineFunctionPass(ID) { initializeMipsDelaySlotFillerPass(*PassRegistry::getPassRegistry()); } StringRef getPassName() const override { return "Mips Delay Slot Filler"; } bool runOnMachineFunction(MachineFunction &F) override { TM = &F.getTarget(); bool Changed = false; for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) Changed |= runOnMachineBasicBlock(*FI); // This pass invalidates liveness information when it reorders // instructions to fill delay slot. Without this, -verify-machineinstrs // will fail. if (Changed) F.getRegInfo().invalidateLiveness(); return Changed; } MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } static char ID; private: bool runOnMachineBasicBlock(MachineBasicBlock &MBB); Iter replaceWithCompactBranch(MachineBasicBlock &MBB, Iter Branch, const DebugLoc &DL); /// This function checks if it is valid to move Candidate to the delay slot /// and returns true if it isn't. It also updates memory and register /// dependence information. bool delayHasHazard(const MachineInstr &Candidate, RegDefsUses &RegDU, InspectMemInstr &IM) const; /// This function searches range [Begin, End) for an instruction that can be /// moved to the delay slot. Returns true on success. template bool searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End, RegDefsUses &RegDU, InspectMemInstr &IM, Iter Slot, IterTy &Filler) const; /// This function searches in the backward direction for an instruction that /// can be moved to the delay slot. Returns true on success. bool searchBackward(MachineBasicBlock &MBB, MachineInstr &Slot) const; /// This function searches MBB in the forward direction for an instruction /// that can be moved to the delay slot. Returns true on success. bool searchForward(MachineBasicBlock &MBB, Iter Slot) const; /// This function searches one of MBB's successor blocks for an instruction /// that can be moved to the delay slot and inserts clones of the /// instruction into the successor's predecessor blocks. bool searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const; /// Pick a successor block of MBB. Return NULL if MBB doesn't have a /// successor block that is not a landing pad. MachineBasicBlock *selectSuccBB(MachineBasicBlock &B) const; /// This function analyzes MBB and returns an instruction with an unoccupied /// slot that branches to Dst. std::pair getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const; /// Examine Pred and see if it is possible to insert an instruction into /// one of its branches delay slot or its end. bool examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ, RegDefsUses &RegDU, bool &HasMultipleSuccs, BB2BrMap &BrMap) const; bool terminateSearch(const MachineInstr &Candidate) const; const TargetMachine *TM = nullptr; }; } // end anonymous namespace char MipsDelaySlotFiller::ID = 0; static bool hasUnoccupiedSlot(const MachineInstr *MI) { return MI->hasDelaySlot() && !MI->isBundledWithSucc(); } INITIALIZE_PASS(MipsDelaySlotFiller, DEBUG_TYPE, "Fill delay slot for MIPS", false, false) /// This function inserts clones of Filler into predecessor blocks. static void insertDelayFiller(Iter Filler, const BB2BrMap &BrMap) { MachineFunction *MF = Filler->getParent()->getParent(); for (BB2BrMap::const_iterator I = BrMap.begin(); I != BrMap.end(); ++I) { if (I->second) { MIBundleBuilder(I->second).append(MF->CloneMachineInstr(&*Filler)); ++UsefulSlots; } else { I->first->insert(I->first->end(), MF->CloneMachineInstr(&*Filler)); } } } /// This function adds registers Filler defines to MBB's live-in register list. static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) { for (unsigned I = 0, E = Filler->getNumOperands(); I != E; ++I) { const MachineOperand &MO = Filler->getOperand(I); unsigned R; if (!MO.isReg() || !MO.isDef() || !(R = MO.getReg())) continue; #ifndef NDEBUG const MachineFunction &MF = *MBB.getParent(); assert(MF.getSubtarget().getRegisterInfo()->getAllocatableSet(MF).test(R) && "Shouldn't move an instruction with unallocatable registers across " "basic block boundaries."); #endif if (!MBB.isLiveIn(R)) MBB.addLiveIn(R); } } RegDefsUses::RegDefsUses(const TargetRegisterInfo &TRI) : TRI(TRI), Defs(TRI.getNumRegs(), false), Uses(TRI.getNumRegs(), false) {} void RegDefsUses::init(const MachineInstr &MI) { // Add all register operands which are explicit and non-variadic. update(MI, 0, MI.getDesc().getNumOperands()); // If MI is a call, add RA to Defs to prevent users of RA from going into // delay slot. if (MI.isCall()) Defs.set(Mips::RA); // Add all implicit register operands of branch instructions except // register AT. if (MI.isBranch()) { update(MI, MI.getDesc().getNumOperands(), MI.getNumOperands()); Defs.reset(Mips::AT); } } void RegDefsUses::setCallerSaved(const MachineInstr &MI) { assert(MI.isCall()); // Add RA/RA_64 to Defs to prevent users of RA/RA_64 from going into // the delay slot. The reason is that RA/RA_64 must not be changed // in the delay slot so that the callee can return to the caller. if (MI.definesRegister(Mips::RA) || MI.definesRegister(Mips::RA_64)) { Defs.set(Mips::RA); Defs.set(Mips::RA_64); } // If MI is a call, add all caller-saved registers to Defs. BitVector CallerSavedRegs(TRI.getNumRegs(), true); CallerSavedRegs.reset(Mips::ZERO); CallerSavedRegs.reset(Mips::ZERO_64); for (const MCPhysReg *R = TRI.getCalleeSavedRegs(MI.getParent()->getParent()); *R; ++R) for (MCRegAliasIterator AI(*R, &TRI, true); AI.isValid(); ++AI) CallerSavedRegs.reset(*AI); Defs |= CallerSavedRegs; } void RegDefsUses::setUnallocatableRegs(const MachineFunction &MF) { BitVector AllocSet = TRI.getAllocatableSet(MF); for (unsigned R : AllocSet.set_bits()) for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI) AllocSet.set(*AI); AllocSet.set(Mips::ZERO); AllocSet.set(Mips::ZERO_64); Defs |= AllocSet.flip(); } void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB, const MachineBasicBlock &SuccBB) { for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) if (*SI != &SuccBB) for (const auto &LI : (*SI)->liveins()) Uses.set(LI.PhysReg); } bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) { BitVector NewDefs(TRI.getNumRegs()), NewUses(TRI.getNumRegs()); bool HasHazard = false; for (unsigned I = Begin; I != End; ++I) { const MachineOperand &MO = MI.getOperand(I); if (MO.isReg() && MO.getReg()) HasHazard |= checkRegDefsUses(NewDefs, NewUses, MO.getReg(), MO.isDef()); } Defs |= NewDefs; Uses |= NewUses; return HasHazard; } bool RegDefsUses::checkRegDefsUses(BitVector &NewDefs, BitVector &NewUses, unsigned Reg, bool IsDef) const { if (IsDef) { NewDefs.set(Reg); // check whether Reg has already been defined or used. return (isRegInSet(Defs, Reg) || isRegInSet(Uses, Reg)); } NewUses.set(Reg); // check whether Reg has already been defined. return isRegInSet(Defs, Reg); } bool RegDefsUses::isRegInSet(const BitVector &RegSet, unsigned Reg) const { // Check Reg and all aliased Registers. for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) if (RegSet.test(*AI)) return true; return false; } bool InspectMemInstr::hasHazard(const MachineInstr &MI) { if (!MI.mayStore() && !MI.mayLoad()) return false; if (ForbidMemInstr) return true; OrigSeenLoad = SeenLoad; OrigSeenStore = SeenStore; SeenLoad |= MI.mayLoad(); SeenStore |= MI.mayStore(); // If MI is an ordered or volatile memory reference, disallow moving // subsequent loads and stores to delay slot. if (MI.hasOrderedMemoryRef() && (OrigSeenLoad || OrigSeenStore)) { ForbidMemInstr = true; return true; } return hasHazard_(MI); } bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) { if (MI.mayStore()) return true; if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getPseudoValue()) return true; if (const PseudoSourceValue *PSV = (*MI.memoperands_begin())->getPseudoValue()) { if (isa(PSV)) return false; return !PSV->isConstant(nullptr) && !PSV->isStack(); } return true; } MemDefsUses::MemDefsUses(const DataLayout &DL, const MachineFrameInfo *MFI_) : InspectMemInstr(false), MFI(MFI_), DL(DL) {} bool MemDefsUses::hasHazard_(const MachineInstr &MI) { bool HasHazard = false; SmallVector Objs; // Check underlying object list. if (getUnderlyingObjects(MI, Objs)) { for (SmallVectorImpl::const_iterator I = Objs.begin(); I != Objs.end(); ++I) HasHazard |= updateDefsUses(*I, MI.mayStore()); return HasHazard; } // No underlying objects found. HasHazard = MI.mayStore() && (OrigSeenLoad || OrigSeenStore); HasHazard |= MI.mayLoad() || OrigSeenStore; SeenNoObjLoad |= MI.mayLoad(); SeenNoObjStore |= MI.mayStore(); return HasHazard; } bool MemDefsUses::updateDefsUses(ValueType V, bool MayStore) { if (MayStore) return !Defs.insert(V).second || Uses.count(V) || SeenNoObjStore || SeenNoObjLoad; Uses.insert(V); return Defs.count(V) || SeenNoObjStore; } bool MemDefsUses:: getUnderlyingObjects(const MachineInstr &MI, SmallVectorImpl &Objects) const { if (!MI.hasOneMemOperand() || (!(*MI.memoperands_begin())->getValue() && !(*MI.memoperands_begin())->getPseudoValue())) return false; if (const PseudoSourceValue *PSV = (*MI.memoperands_begin())->getPseudoValue()) { if (!PSV->isAliased(MFI)) return false; Objects.push_back(PSV); return true; } const Value *V = (*MI.memoperands_begin())->getValue(); SmallVector Objs; GetUnderlyingObjects(const_cast(V), Objs, DL); for (SmallVectorImpl::iterator I = Objs.begin(), E = Objs.end(); I != E; ++I) { if (!isIdentifiedObject(V)) return false; Objects.push_back(*I); } return true; } // Replace Branch with the compact branch instruction. Iter MipsDelaySlotFiller::replaceWithCompactBranch(MachineBasicBlock &MBB, Iter Branch, const DebugLoc &DL) { const MipsSubtarget &STI = MBB.getParent()->getSubtarget(); const MipsInstrInfo *TII = STI.getInstrInfo(); unsigned NewOpcode = TII->getEquivalentCompactForm(Branch); Branch = TII->genInstrWithNewOpc(NewOpcode, Branch); std::next(Branch)->eraseFromParent(); return Branch; } // For given opcode returns opcode of corresponding instruction with short // delay slot. // For the pseudo TAILCALL*_MM instructions return the short delay slot // form. Unfortunately, TAILCALL<->b16 is denied as b16 has a limited range // that is too short to make use of for tail calls. static int getEquivalentCallShort(int Opcode) { switch (Opcode) { case Mips::BGEZAL: return Mips::BGEZALS_MM; case Mips::BLTZAL: return Mips::BLTZALS_MM; case Mips::JAL: case Mips::JAL_MM: return Mips::JALS_MM; case Mips::JALR: return Mips::JALRS_MM; case Mips::JALR16_MM: return Mips::JALRS16_MM; case Mips::TAILCALL_MM: llvm_unreachable("Attempting to shorten the TAILCALL_MM pseudo!"); case Mips::TAILCALLREG: return Mips::JR16_MM; default: llvm_unreachable("Unexpected call instruction for microMIPS."); } } /// runOnMachineBasicBlock - Fill in delay slots for the given basic block. /// We assume there is only one delay slot per delayed instruction. bool MipsDelaySlotFiller::runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; const MipsSubtarget &STI = MBB.getParent()->getSubtarget(); bool InMicroMipsMode = STI.inMicroMipsMode(); const MipsInstrInfo *TII = STI.getInstrInfo(); for (Iter I = MBB.begin(); I != MBB.end(); ++I) { if (!hasUnoccupiedSlot(&*I)) continue; // Delay slot filling is disabled at -O0, or in microMIPS32R6. if (!DisableDelaySlotFiller && (TM->getOptLevel() != CodeGenOpt::None) && !(InMicroMipsMode && STI.hasMips32r6())) { bool Filled = false; if (MipsCompactBranchPolicy.getValue() != CB_Always || !TII->getEquivalentCompactForm(I)) { if (searchBackward(MBB, *I)) { Filled = true; } else if (I->isTerminator()) { if (searchSuccBBs(MBB, I)) { Filled = true; } } else if (searchForward(MBB, I)) { Filled = true; } } if (Filled) { // Get instruction with delay slot. MachineBasicBlock::instr_iterator DSI = I.getInstrIterator(); if (InMicroMipsMode && TII->getInstSizeInBytes(*std::next(DSI)) == 2 && DSI->isCall()) { // If instruction in delay slot is 16b change opcode to // corresponding instruction with short delay slot. // TODO: Implement an instruction mapping table of 16bit opcodes to // 32bit opcodes so that an instruction can be expanded. This would // save 16 bits as a TAILCALL_MM pseudo requires a fullsized nop. // TODO: Permit b16 when branching backwards to the same function // if it is in range. DSI->setDesc(TII->get(getEquivalentCallShort(DSI->getOpcode()))); } ++FilledSlots; Changed = true; continue; } } // For microMIPS if instruction is BEQ or BNE with one ZERO register, then // instead of adding NOP replace this instruction with the corresponding // compact branch instruction, i.e. BEQZC or BNEZC. Additionally // PseudoReturn and PseudoIndirectBranch are expanded to JR_MM, so they can // be replaced with JRC16_MM. // For MIPSR6 attempt to produce the corresponding compact (no delay slot) // form of the CTI. For indirect jumps this will not require inserting a // NOP and for branches will hopefully avoid requiring a NOP. if ((InMicroMipsMode || (STI.hasMips32r6() && MipsCompactBranchPolicy != CB_Never)) && TII->getEquivalentCompactForm(I)) { I = replaceWithCompactBranch(MBB, I, I->getDebugLoc()); Changed = true; continue; } // Bundle the NOP to the instruction with the delay slot. BuildMI(MBB, std::next(I), I->getDebugLoc(), TII->get(Mips::NOP)); MIBundleBuilder(MBB, I, std::next(I, 2)); ++FilledSlots; Changed = true; } return Changed; } template bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End, RegDefsUses &RegDU, InspectMemInstr &IM, Iter Slot, IterTy &Filler) const { for (IterTy I = Begin; I != End;) { IterTy CurrI = I; ++I; // skip debug value if (CurrI->isDebugInstr()) continue; if (terminateSearch(*CurrI)) break; assert((!CurrI->isCall() && !CurrI->isReturn() && !CurrI->isBranch()) && "Cannot put calls, returns or branches in delay slot."); if (CurrI->isKill()) { CurrI->eraseFromParent(); continue; } if (delayHasHazard(*CurrI, RegDU, IM)) continue; const MipsSubtarget &STI = MBB.getParent()->getSubtarget(); if (STI.isTargetNaCl()) { // In NaCl, instructions that must be masked are forbidden in delay slots. // We only check for loads, stores and SP changes. Calls, returns and // branches are not checked because non-NaCl targets never put them in // delay slots. unsigned AddrIdx; if ((isBasePlusOffsetMemoryAccess(CurrI->getOpcode(), &AddrIdx) && baseRegNeedsLoadStoreMask(CurrI->getOperand(AddrIdx).getReg())) || CurrI->modifiesRegister(Mips::SP, STI.getRegisterInfo())) continue; } bool InMicroMipsMode = STI.inMicroMipsMode(); const MipsInstrInfo *TII = STI.getInstrInfo(); unsigned Opcode = (*Slot).getOpcode(); // This is complicated by the tail call optimization. For non-PIC code // there is only a 32bit sized unconditional branch which can be assumed // to be able to reach the target. b16 only has a range of +/- 1 KB. // It's entirely possible that the target function is reachable with b16 // but we don't have enough information to make that decision. if (InMicroMipsMode && TII->getInstSizeInBytes(*CurrI) == 2 && (Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch || + Opcode == Mips::PseudoIndirectBranch_MM || Opcode == Mips::PseudoReturn || Opcode == Mips::TAILCALL)) continue; // Instructions LWP/SWP and MOVEP should not be in a delay slot as that // results in unpredictable behaviour if (InMicroMipsMode && (Opcode == Mips::LWP_MM || Opcode == Mips::SWP_MM || Opcode == Mips::MOVEP_MM)) continue; Filler = CurrI; return true; } return false; } bool MipsDelaySlotFiller::searchBackward(MachineBasicBlock &MBB, MachineInstr &Slot) const { if (DisableBackwardSearch) return false; auto *Fn = MBB.getParent(); RegDefsUses RegDU(*Fn->getSubtarget().getRegisterInfo()); MemDefsUses MemDU(Fn->getDataLayout(), &Fn->getFrameInfo()); ReverseIter Filler; RegDU.init(Slot); MachineBasicBlock::iterator SlotI = Slot; if (!searchRange(MBB, ++SlotI.getReverse(), MBB.rend(), RegDU, MemDU, Slot, Filler)) return false; MBB.splice(std::next(SlotI), &MBB, Filler.getReverse()); MIBundleBuilder(MBB, SlotI, std::next(SlotI, 2)); ++UsefulSlots; return true; } bool MipsDelaySlotFiller::searchForward(MachineBasicBlock &MBB, Iter Slot) const { // Can handle only calls. if (DisableForwardSearch || !Slot->isCall()) return false; RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo()); NoMemInstr NM; Iter Filler; RegDU.setCallerSaved(*Slot); if (!searchRange(MBB, std::next(Slot), MBB.end(), RegDU, NM, Slot, Filler)) return false; MBB.splice(std::next(Slot), &MBB, Filler); MIBundleBuilder(MBB, Slot, std::next(Slot, 2)); ++UsefulSlots; return true; } bool MipsDelaySlotFiller::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const { if (DisableSuccBBSearch) return false; MachineBasicBlock *SuccBB = selectSuccBB(MBB); if (!SuccBB) return false; RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo()); bool HasMultipleSuccs = false; BB2BrMap BrMap; std::unique_ptr IM; Iter Filler; auto *Fn = MBB.getParent(); // Iterate over SuccBB's predecessor list. for (MachineBasicBlock::pred_iterator PI = SuccBB->pred_begin(), PE = SuccBB->pred_end(); PI != PE; ++PI) if (!examinePred(**PI, *SuccBB, RegDU, HasMultipleSuccs, BrMap)) return false; // Do not allow moving instructions which have unallocatable register operands // across basic block boundaries. RegDU.setUnallocatableRegs(*Fn); // Only allow moving loads from stack or constants if any of the SuccBB's // predecessors have multiple successors. if (HasMultipleSuccs) { IM.reset(new LoadFromStackOrConst()); } else { const MachineFrameInfo &MFI = Fn->getFrameInfo(); IM.reset(new MemDefsUses(Fn->getDataLayout(), &MFI)); } if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Slot, Filler)) return false; insertDelayFiller(Filler, BrMap); addLiveInRegs(Filler, *SuccBB); Filler->eraseFromParent(); return true; } MachineBasicBlock * MipsDelaySlotFiller::selectSuccBB(MachineBasicBlock &B) const { if (B.succ_empty()) return nullptr; // Select the successor with the larget edge weight. auto &Prob = getAnalysis(); MachineBasicBlock *S = *std::max_element( B.succ_begin(), B.succ_end(), [&](const MachineBasicBlock *Dst0, const MachineBasicBlock *Dst1) { return Prob.getEdgeProbability(&B, Dst0) < Prob.getEdgeProbability(&B, Dst1); }); return S->isEHPad() ? nullptr : S; } std::pair MipsDelaySlotFiller::getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const { const MipsInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo(); MachineBasicBlock *TrueBB = nullptr, *FalseBB = nullptr; SmallVector BranchInstrs; SmallVector Cond; MipsInstrInfo::BranchType R = TII->analyzeBranch(MBB, TrueBB, FalseBB, Cond, false, BranchInstrs); if ((R == MipsInstrInfo::BT_None) || (R == MipsInstrInfo::BT_NoBranch)) return std::make_pair(R, nullptr); if (R != MipsInstrInfo::BT_CondUncond) { if (!hasUnoccupiedSlot(BranchInstrs[0])) return std::make_pair(MipsInstrInfo::BT_None, nullptr); assert(((R != MipsInstrInfo::BT_Uncond) || (TrueBB == &Dst))); return std::make_pair(R, BranchInstrs[0]); } assert((TrueBB == &Dst) || (FalseBB == &Dst)); // Examine the conditional branch. See if its slot is occupied. if (hasUnoccupiedSlot(BranchInstrs[0])) return std::make_pair(MipsInstrInfo::BT_Cond, BranchInstrs[0]); // If that fails, try the unconditional branch. if (hasUnoccupiedSlot(BranchInstrs[1]) && (FalseBB == &Dst)) return std::make_pair(MipsInstrInfo::BT_Uncond, BranchInstrs[1]); return std::make_pair(MipsInstrInfo::BT_None, nullptr); } bool MipsDelaySlotFiller::examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ, RegDefsUses &RegDU, bool &HasMultipleSuccs, BB2BrMap &BrMap) const { std::pair P = getBranch(Pred, Succ); // Return if either getBranch wasn't able to analyze the branches or there // were no branches with unoccupied slots. if (P.first == MipsInstrInfo::BT_None) return false; if ((P.first != MipsInstrInfo::BT_Uncond) && (P.first != MipsInstrInfo::BT_NoBranch)) { HasMultipleSuccs = true; RegDU.addLiveOut(Pred, Succ); } BrMap[&Pred] = P.second; return true; } bool MipsDelaySlotFiller::delayHasHazard(const MachineInstr &Candidate, RegDefsUses &RegDU, InspectMemInstr &IM) const { assert(!Candidate.isKill() && "KILL instructions should have been eliminated at this point."); bool HasHazard = Candidate.isImplicitDef(); HasHazard |= IM.hasHazard(Candidate); HasHazard |= RegDU.update(Candidate, 0, Candidate.getNumOperands()); return HasHazard; } bool MipsDelaySlotFiller::terminateSearch(const MachineInstr &Candidate) const { return (Candidate.isTerminator() || Candidate.isCall() || Candidate.isPosition() || Candidate.isInlineAsm() || Candidate.hasUnmodeledSideEffects()); } /// createMipsDelaySlotFillerPass - Returns a pass that fills in delay /// slots in Mips MachineFunctions FunctionPass *llvm::createMipsDelaySlotFillerPass() { return new MipsDelaySlotFiller(); } Index: vendor/llvm/dist-release_80/lib/Target/Mips/MipsFastISel.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/Target/Mips/MipsFastISel.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/Mips/MipsFastISel.cpp (revision 348932) @@ -1,2141 +1,2154 @@ //===- MipsFastISel.cpp - Mips FastISel implementation --------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// /// \file /// This file defines the MIPS-specific support for the FastISel class. /// Some of the target-specific code is generated by tablegen in the file /// MipsGenFastISel.inc, which is #included here. /// //===----------------------------------------------------------------------===// #include "MCTargetDesc/MipsABIInfo.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MipsCCState.h" #include "MipsISelLowering.h" #include "MipsInstrInfo.h" #include "MipsMachineFunction.h" #include "MipsSubtarget.h" #include "MipsTargetMachine.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #define DEBUG_TYPE "mips-fastisel" using namespace llvm; extern cl::opt EmitJalrReloc; namespace { class MipsFastISel final : public FastISel { // All possible address modes. class Address { public: using BaseKind = enum { RegBase, FrameIndexBase }; private: BaseKind Kind = RegBase; union { unsigned Reg; int FI; } Base; int64_t Offset = 0; const GlobalValue *GV = nullptr; public: // Innocuous defaults for our address. Address() { Base.Reg = 0; } void setKind(BaseKind K) { Kind = K; } BaseKind getKind() const { return Kind; } bool isRegBase() const { return Kind == RegBase; } bool isFIBase() const { return Kind == FrameIndexBase; } void setReg(unsigned Reg) { assert(isRegBase() && "Invalid base register access!"); Base.Reg = Reg; } unsigned getReg() const { assert(isRegBase() && "Invalid base register access!"); return Base.Reg; } void setFI(unsigned FI) { assert(isFIBase() && "Invalid base frame index access!"); Base.FI = FI; } unsigned getFI() const { assert(isFIBase() && "Invalid base frame index access!"); return Base.FI; } void setOffset(int64_t Offset_) { Offset = Offset_; } int64_t getOffset() const { return Offset; } void setGlobalValue(const GlobalValue *G) { GV = G; } const GlobalValue *getGlobalValue() { return GV; } }; /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can /// make the right decision when generating code for different targets. const TargetMachine &TM; const MipsSubtarget *Subtarget; const TargetInstrInfo &TII; const TargetLowering &TLI; MipsFunctionInfo *MFI; // Convenience variables to avoid some queries. LLVMContext *Context; bool fastLowerArguments() override; bool fastLowerCall(CallLoweringInfo &CLI) override; bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; bool UnsupportedFPMode; // To allow fast-isel to proceed and just not handle // floating point but not reject doing fast-isel in other // situations private: // Selection routines. bool selectLogicalOp(const Instruction *I); bool selectLoad(const Instruction *I); bool selectStore(const Instruction *I); bool selectBranch(const Instruction *I); bool selectSelect(const Instruction *I); bool selectCmp(const Instruction *I); bool selectFPExt(const Instruction *I); bool selectFPTrunc(const Instruction *I); bool selectFPToInt(const Instruction *I, bool IsSigned); bool selectRet(const Instruction *I); bool selectTrunc(const Instruction *I); bool selectIntExt(const Instruction *I); bool selectShift(const Instruction *I); bool selectDivRem(const Instruction *I, unsigned ISDOpcode); // Utility helper routines. bool isTypeLegal(Type *Ty, MVT &VT); bool isTypeSupported(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); bool computeAddress(const Value *Obj, Address &Addr); bool computeCallAddress(const Value *V, Address &Addr); void simplifyAddress(Address &Addr); // Emit helper routines. bool emitCmp(unsigned DestReg, const CmpInst *CI); bool emitLoad(MVT VT, unsigned &ResultReg, Address &Addr, unsigned Alignment = 0); bool emitStore(MVT VT, unsigned SrcReg, Address Addr, MachineMemOperand *MMO = nullptr); bool emitStore(MVT VT, unsigned SrcReg, Address &Addr, unsigned Alignment = 0); unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); bool emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg, bool IsZExt); bool emitIntZExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg); bool emitIntSExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg); bool emitIntSExt32r1(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg); bool emitIntSExt32r2(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg); unsigned getRegEnsuringSimpleIntegerWidening(const Value *, bool IsUnsigned); unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, const Value *RHS); unsigned materializeFP(const ConstantFP *CFP, MVT VT); unsigned materializeGV(const GlobalValue *GV, MVT VT); unsigned materializeInt(const Constant *C, MVT VT); unsigned materialize32BitInt(int64_t Imm, const TargetRegisterClass *RC); unsigned materializeExternalCallSym(MCSymbol *Syn); MachineInstrBuilder emitInst(unsigned Opc) { return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); } MachineInstrBuilder emitInst(unsigned Opc, unsigned DstReg) { return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DstReg); } MachineInstrBuilder emitInstStore(unsigned Opc, unsigned SrcReg, unsigned MemReg, int64_t MemOffset) { return emitInst(Opc).addReg(SrcReg).addReg(MemReg).addImm(MemOffset); } MachineInstrBuilder emitInstLoad(unsigned Opc, unsigned DstReg, unsigned MemReg, int64_t MemOffset) { return emitInst(Opc, DstReg).addReg(MemReg).addImm(MemOffset); } unsigned fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill); // for some reason, this default is not generated by tablegen // so we explicitly generate it here. unsigned fastEmitInst_riir(uint64_t inst, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t imm1, uint64_t imm2, unsigned Op3, bool Op3IsKill) { return 0; } // Call handling routines. private: CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl &ArgVTs, unsigned &NumBytes); bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes); const MipsABIInfo &getABI() const { return static_cast(TM).getABI(); } public: // Backend specific FastISel code. explicit MipsFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) : FastISel(funcInfo, libInfo), TM(funcInfo.MF->getTarget()), Subtarget(&funcInfo.MF->getSubtarget()), TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()) { MFI = funcInfo.MF->getInfo(); Context = &funcInfo.Fn->getContext(); UnsupportedFPMode = Subtarget->isFP64bit() || Subtarget->useSoftFloat(); } unsigned fastMaterializeAlloca(const AllocaInst *AI) override; unsigned fastMaterializeConstant(const Constant *C) override; bool fastSelectInstruction(const Instruction *I) override; #include "MipsGenFastISel.inc" }; } // end anonymous namespace static bool CC_Mips(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) LLVM_ATTRIBUTE_UNUSED; static bool CC_MipsO32_FP32(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { llvm_unreachable("should not be called"); } static bool CC_MipsO32_FP64(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { llvm_unreachable("should not be called"); } #include "MipsGenCallingConv.inc" CCAssignFn *MipsFastISel::CCAssignFnForCall(CallingConv::ID CC) const { return CC_MipsO32; } unsigned MipsFastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, const Value *RHS) { // Canonicalize immediates to the RHS first. if (isa(LHS) && !isa(RHS)) std::swap(LHS, RHS); unsigned Opc; switch (ISDOpc) { case ISD::AND: Opc = Mips::AND; break; case ISD::OR: Opc = Mips::OR; break; case ISD::XOR: Opc = Mips::XOR; break; default: llvm_unreachable("unexpected opcode"); } unsigned LHSReg = getRegForValue(LHS); if (!LHSReg) return 0; unsigned RHSReg; if (const auto *C = dyn_cast(RHS)) RHSReg = materializeInt(C, MVT::i32); else RHSReg = getRegForValue(RHS); if (!RHSReg) return 0; unsigned ResultReg = createResultReg(&Mips::GPR32RegClass); if (!ResultReg) return 0; emitInst(Opc, ResultReg).addReg(LHSReg).addReg(RHSReg); return ResultReg; } unsigned MipsFastISel::fastMaterializeAlloca(const AllocaInst *AI) { assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i32 && "Alloca should always return a pointer."); DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) { unsigned ResultReg = createResultReg(&Mips::GPR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::LEA_ADDiu), ResultReg) .addFrameIndex(SI->second) .addImm(0); return ResultReg; } return 0; } unsigned MipsFastISel::materializeInt(const Constant *C, MVT VT) { if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) return 0; const TargetRegisterClass *RC = &Mips::GPR32RegClass; const ConstantInt *CI = cast(C); return materialize32BitInt(CI->getZExtValue(), RC); } unsigned MipsFastISel::materialize32BitInt(int64_t Imm, const TargetRegisterClass *RC) { unsigned ResultReg = createResultReg(RC); if (isInt<16>(Imm)) { unsigned Opc = Mips::ADDiu; emitInst(Opc, ResultReg).addReg(Mips::ZERO).addImm(Imm); return ResultReg; } else if (isUInt<16>(Imm)) { emitInst(Mips::ORi, ResultReg).addReg(Mips::ZERO).addImm(Imm); return ResultReg; } unsigned Lo = Imm & 0xFFFF; unsigned Hi = (Imm >> 16) & 0xFFFF; if (Lo) { // Both Lo and Hi have nonzero bits. unsigned TmpReg = createResultReg(RC); emitInst(Mips::LUi, TmpReg).addImm(Hi); emitInst(Mips::ORi, ResultReg).addReg(TmpReg).addImm(Lo); } else { emitInst(Mips::LUi, ResultReg).addImm(Hi); } return ResultReg; } unsigned MipsFastISel::materializeFP(const ConstantFP *CFP, MVT VT) { if (UnsupportedFPMode) return 0; int64_t Imm = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); if (VT == MVT::f32) { const TargetRegisterClass *RC = &Mips::FGR32RegClass; unsigned DestReg = createResultReg(RC); unsigned TempReg = materialize32BitInt(Imm, &Mips::GPR32RegClass); emitInst(Mips::MTC1, DestReg).addReg(TempReg); return DestReg; } else if (VT == MVT::f64) { const TargetRegisterClass *RC = &Mips::AFGR64RegClass; unsigned DestReg = createResultReg(RC); unsigned TempReg1 = materialize32BitInt(Imm >> 32, &Mips::GPR32RegClass); unsigned TempReg2 = materialize32BitInt(Imm & 0xFFFFFFFF, &Mips::GPR32RegClass); emitInst(Mips::BuildPairF64, DestReg).addReg(TempReg2).addReg(TempReg1); return DestReg; } return 0; } unsigned MipsFastISel::materializeGV(const GlobalValue *GV, MVT VT) { // For now 32-bit only. if (VT != MVT::i32) return 0; const TargetRegisterClass *RC = &Mips::GPR32RegClass; unsigned DestReg = createResultReg(RC); const GlobalVariable *GVar = dyn_cast(GV); bool IsThreadLocal = GVar && GVar->isThreadLocal(); // TLS not supported at this time. if (IsThreadLocal) return 0; emitInst(Mips::LW, DestReg) .addReg(MFI->getGlobalBaseReg()) .addGlobalAddress(GV, 0, MipsII::MO_GOT); if ((GV->hasInternalLinkage() || (GV->hasLocalLinkage() && !isa(GV)))) { unsigned TempReg = createResultReg(RC); emitInst(Mips::ADDiu, TempReg) .addReg(DestReg) .addGlobalAddress(GV, 0, MipsII::MO_ABS_LO); DestReg = TempReg; } return DestReg; } unsigned MipsFastISel::materializeExternalCallSym(MCSymbol *Sym) { const TargetRegisterClass *RC = &Mips::GPR32RegClass; unsigned DestReg = createResultReg(RC); emitInst(Mips::LW, DestReg) .addReg(MFI->getGlobalBaseReg()) .addSym(Sym, MipsII::MO_GOT); return DestReg; } // Materialize a constant into a register, and return the register // number (or zero if we failed to handle it). unsigned MipsFastISel::fastMaterializeConstant(const Constant *C) { EVT CEVT = TLI.getValueType(DL, C->getType(), true); // Only handle simple types. if (!CEVT.isSimple()) return 0; MVT VT = CEVT.getSimpleVT(); if (const ConstantFP *CFP = dyn_cast(C)) return (UnsupportedFPMode) ? 0 : materializeFP(CFP, VT); else if (const GlobalValue *GV = dyn_cast(C)) return materializeGV(GV, VT); else if (isa(C)) return materializeInt(C, VT); return 0; } bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) { const User *U = nullptr; unsigned Opcode = Instruction::UserOp1; if (const Instruction *I = dyn_cast(Obj)) { // Don't walk into other basic blocks unless the object is an alloca from // another block, otherwise it may not have a virtual register assigned. if (FuncInfo.StaticAllocaMap.count(static_cast(Obj)) || FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { Opcode = I->getOpcode(); U = I; } } else if (const ConstantExpr *C = dyn_cast(Obj)) { Opcode = C->getOpcode(); U = C; } switch (Opcode) { default: break; case Instruction::BitCast: // Look through bitcasts. return computeAddress(U->getOperand(0), Addr); case Instruction::GetElementPtr: { Address SavedAddr = Addr; int64_t TmpOffset = Addr.getOffset(); // Iterate through the GEP folding the constants into offsets where // we can. gep_type_iterator GTI = gep_type_begin(U); for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i, ++GTI) { const Value *Op = *i; if (StructType *STy = GTI.getStructTypeOrNull()) { const StructLayout *SL = DL.getStructLayout(STy); unsigned Idx = cast(Op)->getZExtValue(); TmpOffset += SL->getElementOffset(Idx); } else { uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); while (true) { if (const ConstantInt *CI = dyn_cast(Op)) { // Constant-offset addressing. TmpOffset += CI->getSExtValue() * S; break; } if (canFoldAddIntoGEP(U, Op)) { // A compatible add with a constant operand. Fold the constant. ConstantInt *CI = cast(cast(Op)->getOperand(1)); TmpOffset += CI->getSExtValue() * S; // Iterate on the other operand. Op = cast(Op)->getOperand(0); continue; } // Unsupported goto unsupported_gep; } } } // Try to grab the base operand now. Addr.setOffset(TmpOffset); if (computeAddress(U->getOperand(0), Addr)) return true; // We failed, restore everything and try the other options. Addr = SavedAddr; unsupported_gep: break; } case Instruction::Alloca: { const AllocaInst *AI = cast(Obj); DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) { Addr.setKind(Address::FrameIndexBase); Addr.setFI(SI->second); return true; } break; } } Addr.setReg(getRegForValue(Obj)); return Addr.getReg() != 0; } bool MipsFastISel::computeCallAddress(const Value *V, Address &Addr) { const User *U = nullptr; unsigned Opcode = Instruction::UserOp1; if (const auto *I = dyn_cast(V)) { // Check if the value is defined in the same basic block. This information // is crucial to know whether or not folding an operand is valid. if (I->getParent() == FuncInfo.MBB->getBasicBlock()) { Opcode = I->getOpcode(); U = I; } } else if (const auto *C = dyn_cast(V)) { Opcode = C->getOpcode(); U = C; } switch (Opcode) { default: break; case Instruction::BitCast: // Look past bitcasts if its operand is in the same BB. return computeCallAddress(U->getOperand(0), Addr); break; case Instruction::IntToPtr: // Look past no-op inttoptrs if its operand is in the same BB. if (TLI.getValueType(DL, U->getOperand(0)->getType()) == TLI.getPointerTy(DL)) return computeCallAddress(U->getOperand(0), Addr); break; case Instruction::PtrToInt: // Look past no-op ptrtoints if its operand is in the same BB. if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return computeCallAddress(U->getOperand(0), Addr); break; } if (const GlobalValue *GV = dyn_cast(V)) { Addr.setGlobalValue(GV); return true; } // If all else fails, try to materialize the value in a register. if (!Addr.getGlobalValue()) { Addr.setReg(getRegForValue(V)); return Addr.getReg() != 0; } return false; } bool MipsFastISel::isTypeLegal(Type *Ty, MVT &VT) { EVT evt = TLI.getValueType(DL, Ty, true); // Only handle simple types. if (evt == MVT::Other || !evt.isSimple()) return false; VT = evt.getSimpleVT(); // Handle all legal types, i.e. a register that will directly hold this // value. return TLI.isTypeLegal(VT); } bool MipsFastISel::isTypeSupported(Type *Ty, MVT &VT) { if (Ty->isVectorTy()) return false; if (isTypeLegal(Ty, VT)) return true; // If this is a type than can be sign or zero-extended to a basic operation // go ahead and accept it now. if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) return true; return false; } bool MipsFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { if (isTypeLegal(Ty, VT)) return true; // We will extend this in a later patch: // If this is a type than can be sign or zero-extended to a basic operation // go ahead and accept it now. if (VT == MVT::i8 || VT == MVT::i16) return true; return false; } // Because of how EmitCmp is called with fast-isel, you can // end up with redundant "andi" instructions after the sequences emitted below. // We should try and solve this issue in the future. // bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) { const Value *Left = CI->getOperand(0), *Right = CI->getOperand(1); bool IsUnsigned = CI->isUnsigned(); unsigned LeftReg = getRegEnsuringSimpleIntegerWidening(Left, IsUnsigned); if (LeftReg == 0) return false; unsigned RightReg = getRegEnsuringSimpleIntegerWidening(Right, IsUnsigned); if (RightReg == 0) return false; CmpInst::Predicate P = CI->getPredicate(); switch (P) { default: return false; case CmpInst::ICMP_EQ: { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::XOR, TempReg).addReg(LeftReg).addReg(RightReg); emitInst(Mips::SLTiu, ResultReg).addReg(TempReg).addImm(1); break; } case CmpInst::ICMP_NE: { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::XOR, TempReg).addReg(LeftReg).addReg(RightReg); emitInst(Mips::SLTu, ResultReg).addReg(Mips::ZERO).addReg(TempReg); break; } case CmpInst::ICMP_UGT: emitInst(Mips::SLTu, ResultReg).addReg(RightReg).addReg(LeftReg); break; case CmpInst::ICMP_ULT: emitInst(Mips::SLTu, ResultReg).addReg(LeftReg).addReg(RightReg); break; case CmpInst::ICMP_UGE: { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::SLTu, TempReg).addReg(LeftReg).addReg(RightReg); emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1); break; } case CmpInst::ICMP_ULE: { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::SLTu, TempReg).addReg(RightReg).addReg(LeftReg); emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1); break; } case CmpInst::ICMP_SGT: emitInst(Mips::SLT, ResultReg).addReg(RightReg).addReg(LeftReg); break; case CmpInst::ICMP_SLT: emitInst(Mips::SLT, ResultReg).addReg(LeftReg).addReg(RightReg); break; case CmpInst::ICMP_SGE: { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::SLT, TempReg).addReg(LeftReg).addReg(RightReg); emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1); break; } case CmpInst::ICMP_SLE: { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::SLT, TempReg).addReg(RightReg).addReg(LeftReg); emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1); break; } case CmpInst::FCMP_OEQ: case CmpInst::FCMP_UNE: case CmpInst::FCMP_OLT: case CmpInst::FCMP_OLE: case CmpInst::FCMP_OGT: case CmpInst::FCMP_OGE: { if (UnsupportedFPMode) return false; bool IsFloat = Left->getType()->isFloatTy(); bool IsDouble = Left->getType()->isDoubleTy(); if (!IsFloat && !IsDouble) return false; unsigned Opc, CondMovOpc; switch (P) { case CmpInst::FCMP_OEQ: Opc = IsFloat ? Mips::C_EQ_S : Mips::C_EQ_D32; CondMovOpc = Mips::MOVT_I; break; case CmpInst::FCMP_UNE: Opc = IsFloat ? Mips::C_EQ_S : Mips::C_EQ_D32; CondMovOpc = Mips::MOVF_I; break; case CmpInst::FCMP_OLT: Opc = IsFloat ? Mips::C_OLT_S : Mips::C_OLT_D32; CondMovOpc = Mips::MOVT_I; break; case CmpInst::FCMP_OLE: Opc = IsFloat ? Mips::C_OLE_S : Mips::C_OLE_D32; CondMovOpc = Mips::MOVT_I; break; case CmpInst::FCMP_OGT: Opc = IsFloat ? Mips::C_ULE_S : Mips::C_ULE_D32; CondMovOpc = Mips::MOVF_I; break; case CmpInst::FCMP_OGE: Opc = IsFloat ? Mips::C_ULT_S : Mips::C_ULT_D32; CondMovOpc = Mips::MOVF_I; break; default: llvm_unreachable("Only switching of a subset of CCs."); } unsigned RegWithZero = createResultReg(&Mips::GPR32RegClass); unsigned RegWithOne = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::ADDiu, RegWithZero).addReg(Mips::ZERO).addImm(0); emitInst(Mips::ADDiu, RegWithOne).addReg(Mips::ZERO).addImm(1); emitInst(Opc).addReg(Mips::FCC0, RegState::Define).addReg(LeftReg) .addReg(RightReg); emitInst(CondMovOpc, ResultReg) .addReg(RegWithOne) .addReg(Mips::FCC0) .addReg(RegWithZero); break; } } return true; } bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr, unsigned Alignment) { // // more cases will be handled here in following patches. // unsigned Opc; switch (VT.SimpleTy) { case MVT::i32: ResultReg = createResultReg(&Mips::GPR32RegClass); Opc = Mips::LW; break; case MVT::i16: ResultReg = createResultReg(&Mips::GPR32RegClass); Opc = Mips::LHu; break; case MVT::i8: ResultReg = createResultReg(&Mips::GPR32RegClass); Opc = Mips::LBu; break; case MVT::f32: if (UnsupportedFPMode) return false; ResultReg = createResultReg(&Mips::FGR32RegClass); Opc = Mips::LWC1; break; case MVT::f64: if (UnsupportedFPMode) return false; ResultReg = createResultReg(&Mips::AFGR64RegClass); Opc = Mips::LDC1; break; default: return false; } if (Addr.isRegBase()) { simplifyAddress(Addr); emitInstLoad(Opc, ResultReg, Addr.getReg(), Addr.getOffset()); return true; } if (Addr.isFIBase()) { unsigned FI = Addr.getFI(); unsigned Align = 4; int64_t Offset = Addr.getOffset(); MachineFrameInfo &MFI = MF->getFrameInfo(); MachineMemOperand *MMO = MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addFrameIndex(FI) .addImm(Offset) .addMemOperand(MMO); return true; } return false; } bool MipsFastISel::emitStore(MVT VT, unsigned SrcReg, Address &Addr, unsigned Alignment) { // // more cases will be handled here in following patches. // unsigned Opc; switch (VT.SimpleTy) { case MVT::i8: Opc = Mips::SB; break; case MVT::i16: Opc = Mips::SH; break; case MVT::i32: Opc = Mips::SW; break; case MVT::f32: if (UnsupportedFPMode) return false; Opc = Mips::SWC1; break; case MVT::f64: if (UnsupportedFPMode) return false; Opc = Mips::SDC1; break; default: return false; } if (Addr.isRegBase()) { simplifyAddress(Addr); emitInstStore(Opc, SrcReg, Addr.getReg(), Addr.getOffset()); return true; } if (Addr.isFIBase()) { unsigned FI = Addr.getFI(); unsigned Align = 4; int64_t Offset = Addr.getOffset(); MachineFrameInfo &MFI = MF->getFrameInfo(); MachineMemOperand *MMO = MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) .addReg(SrcReg) .addFrameIndex(FI) .addImm(Offset) .addMemOperand(MMO); return true; } return false; } bool MipsFastISel::selectLogicalOp(const Instruction *I) { MVT VT; if (!isTypeSupported(I->getType(), VT)) return false; unsigned ResultReg; switch (I->getOpcode()) { default: llvm_unreachable("Unexpected instruction."); case Instruction::And: ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); break; case Instruction::Or: ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); break; case Instruction::Xor: ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); break; } if (!ResultReg) return false; updateValueMap(I, ResultReg); return true; } bool MipsFastISel::selectLoad(const Instruction *I) { // Atomic loads need special handling. if (cast(I)->isAtomic()) return false; // Verify we have a legal type before going any further. MVT VT; if (!isLoadTypeLegal(I->getType(), VT)) return false; // See if we can handle this address. Address Addr; if (!computeAddress(I->getOperand(0), Addr)) return false; unsigned ResultReg; if (!emitLoad(VT, ResultReg, Addr, cast(I)->getAlignment())) return false; updateValueMap(I, ResultReg); return true; } bool MipsFastISel::selectStore(const Instruction *I) { Value *Op0 = I->getOperand(0); unsigned SrcReg = 0; // Atomic stores need special handling. if (cast(I)->isAtomic()) return false; // Verify we have a legal type before going any further. MVT VT; if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT)) return false; // Get the value to be stored into a register. SrcReg = getRegForValue(Op0); if (SrcReg == 0) return false; // See if we can handle this address. Address Addr; if (!computeAddress(I->getOperand(1), Addr)) return false; if (!emitStore(VT, SrcReg, Addr, cast(I)->getAlignment())) return false; return true; } // This can cause a redundant sltiu to be generated. // FIXME: try and eliminate this in a future patch. bool MipsFastISel::selectBranch(const Instruction *I) { const BranchInst *BI = cast(I); MachineBasicBlock *BrBB = FuncInfo.MBB; // // TBB is the basic block for the case where the comparison is true. // FBB is the basic block for the case where the comparison is false. // if (cond) goto TBB // goto FBB // TBB: // MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; - // For now, just try the simplest case where it's fed by a compare. + + // Fold the common case of a conditional branch with a comparison + // in the same block. + unsigned ZExtCondReg = 0; if (const CmpInst *CI = dyn_cast(BI->getCondition())) { - MVT CIMVT = - TLI.getValueType(DL, CI->getOperand(0)->getType(), true).getSimpleVT(); - if (CIMVT == MVT::i1) + if (CI->hasOneUse() && CI->getParent() == I->getParent()) { + ZExtCondReg = createResultReg(&Mips::GPR32RegClass); + if (!emitCmp(ZExtCondReg, CI)) + return false; + } + } + + // For the general case, we need to mask with 1. + if (ZExtCondReg == 0) { + unsigned CondReg = getRegForValue(BI->getCondition()); + if (CondReg == 0) return false; - unsigned CondReg = getRegForValue(CI); - BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ)) - .addReg(CondReg) - .addMBB(TBB); - finishCondBranch(BI->getParent(), TBB, FBB); - return true; + ZExtCondReg = emitIntExt(MVT::i1, CondReg, MVT::i32, true); + if (ZExtCondReg == 0) + return false; } - return false; + + BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ)) + .addReg(ZExtCondReg) + .addMBB(TBB); + finishCondBranch(BI->getParent(), TBB, FBB); + return true; } bool MipsFastISel::selectCmp(const Instruction *I) { const CmpInst *CI = cast(I); unsigned ResultReg = createResultReg(&Mips::GPR32RegClass); if (!emitCmp(ResultReg, CI)) return false; updateValueMap(I, ResultReg); return true; } // Attempt to fast-select a floating-point extend instruction. bool MipsFastISel::selectFPExt(const Instruction *I) { if (UnsupportedFPMode) return false; Value *Src = I->getOperand(0); EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); EVT DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::f32 || DestVT != MVT::f64) return false; unsigned SrcReg = getRegForValue(Src); // this must be a 32bit floating point register class // maybe we should handle this differently if (!SrcReg) return false; unsigned DestReg = createResultReg(&Mips::AFGR64RegClass); emitInst(Mips::CVT_D32_S, DestReg).addReg(SrcReg); updateValueMap(I, DestReg); return true; } bool MipsFastISel::selectSelect(const Instruction *I) { assert(isa(I) && "Expected a select instruction."); LLVM_DEBUG(dbgs() << "selectSelect\n"); MVT VT; if (!isTypeSupported(I->getType(), VT) || UnsupportedFPMode) { LLVM_DEBUG( dbgs() << ".. .. gave up (!isTypeSupported || UnsupportedFPMode)\n"); return false; } unsigned CondMovOpc; const TargetRegisterClass *RC; if (VT.isInteger() && !VT.isVector() && VT.getSizeInBits() <= 32) { CondMovOpc = Mips::MOVN_I_I; RC = &Mips::GPR32RegClass; } else if (VT == MVT::f32) { CondMovOpc = Mips::MOVN_I_S; RC = &Mips::FGR32RegClass; } else if (VT == MVT::f64) { CondMovOpc = Mips::MOVN_I_D32; RC = &Mips::AFGR64RegClass; } else return false; const SelectInst *SI = cast(I); const Value *Cond = SI->getCondition(); unsigned Src1Reg = getRegForValue(SI->getTrueValue()); unsigned Src2Reg = getRegForValue(SI->getFalseValue()); unsigned CondReg = getRegForValue(Cond); if (!Src1Reg || !Src2Reg || !CondReg) return false; unsigned ZExtCondReg = createResultReg(&Mips::GPR32RegClass); if (!ZExtCondReg) return false; if (!emitIntExt(MVT::i1, CondReg, MVT::i32, ZExtCondReg, true)) return false; unsigned ResultReg = createResultReg(RC); unsigned TempReg = createResultReg(RC); if (!ResultReg || !TempReg) return false; emitInst(TargetOpcode::COPY, TempReg).addReg(Src2Reg); emitInst(CondMovOpc, ResultReg) .addReg(Src1Reg).addReg(ZExtCondReg).addReg(TempReg); updateValueMap(I, ResultReg); return true; } // Attempt to fast-select a floating-point truncate instruction. bool MipsFastISel::selectFPTrunc(const Instruction *I) { if (UnsupportedFPMode) return false; Value *Src = I->getOperand(0); EVT SrcVT = TLI.getValueType(DL, Src->getType(), true); EVT DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::f64 || DestVT != MVT::f32) return false; unsigned SrcReg = getRegForValue(Src); if (!SrcReg) return false; unsigned DestReg = createResultReg(&Mips::FGR32RegClass); if (!DestReg) return false; emitInst(Mips::CVT_S_D32, DestReg).addReg(SrcReg); updateValueMap(I, DestReg); return true; } // Attempt to fast-select a floating-point-to-integer conversion. bool MipsFastISel::selectFPToInt(const Instruction *I, bool IsSigned) { if (UnsupportedFPMode) return false; MVT DstVT, SrcVT; if (!IsSigned) return false; // We don't handle this case yet. There is no native // instruction for this but it can be synthesized. Type *DstTy = I->getType(); if (!isTypeLegal(DstTy, DstVT)) return false; if (DstVT != MVT::i32) return false; Value *Src = I->getOperand(0); Type *SrcTy = Src->getType(); if (!isTypeLegal(SrcTy, SrcVT)) return false; if (SrcVT != MVT::f32 && SrcVT != MVT::f64) return false; unsigned SrcReg = getRegForValue(Src); if (SrcReg == 0) return false; // Determine the opcode for the conversion, which takes place // entirely within FPRs. unsigned DestReg = createResultReg(&Mips::GPR32RegClass); unsigned TempReg = createResultReg(&Mips::FGR32RegClass); unsigned Opc = (SrcVT == MVT::f32) ? Mips::TRUNC_W_S : Mips::TRUNC_W_D32; // Generate the convert. emitInst(Opc, TempReg).addReg(SrcReg); emitInst(Mips::MFC1, DestReg).addReg(TempReg); updateValueMap(I, DestReg); return true; } bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl &OutVTs, unsigned &NumBytes) { CallingConv::ID CC = CLI.CallConv; SmallVector ArgLocs; CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); // Get a count of how many bytes are to be pushed on the stack. NumBytes = CCInfo.getNextStackOffset(); // This is the minimum argument area used for A0-A3. if (NumBytes < 16) NumBytes = 16; emitInst(Mips::ADJCALLSTACKDOWN).addImm(16).addImm(0); // Process the args. MVT firstMVT; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; const Value *ArgVal = CLI.OutVals[VA.getValNo()]; MVT ArgVT = OutVTs[VA.getValNo()]; if (i == 0) { firstMVT = ArgVT; if (ArgVT == MVT::f32) { VA.convertToReg(Mips::F12); } else if (ArgVT == MVT::f64) { VA.convertToReg(Mips::D6); } } else if (i == 1) { if ((firstMVT == MVT::f32) || (firstMVT == MVT::f64)) { if (ArgVT == MVT::f32) { VA.convertToReg(Mips::F14); } else if (ArgVT == MVT::f64) { VA.convertToReg(Mips::D7); } } } if (((ArgVT == MVT::i32) || (ArgVT == MVT::f32) || (ArgVT == MVT::i16) || (ArgVT == MVT::i8)) && VA.isMemLoc()) { switch (VA.getLocMemOffset()) { case 0: VA.convertToReg(Mips::A0); break; case 4: VA.convertToReg(Mips::A1); break; case 8: VA.convertToReg(Mips::A2); break; case 12: VA.convertToReg(Mips::A3); break; default: break; } } unsigned ArgReg = getRegForValue(ArgVal); if (!ArgReg) return false; // Handle arg promotion: SExt, ZExt, AExt. switch (VA.getLocInfo()) { case CCValAssign::Full: break; case CCValAssign::AExt: case CCValAssign::SExt: { MVT DestVT = VA.getLocVT(); MVT SrcVT = ArgVT; ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); if (!ArgReg) return false; break; } case CCValAssign::ZExt: { MVT DestVT = VA.getLocVT(); MVT SrcVT = ArgVT; ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); if (!ArgReg) return false; break; } default: llvm_unreachable("Unknown arg promotion!"); } // Now copy/store arg to correct locations. if (VA.isRegLoc() && !VA.needsCustom()) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); CLI.OutRegs.push_back(VA.getLocReg()); } else if (VA.needsCustom()) { llvm_unreachable("Mips does not use custom args."); return false; } else { // // FIXME: This path will currently return false. It was copied // from the AArch64 port and should be essentially fine for Mips too. // The work to finish up this path will be done in a follow-on patch. // assert(VA.isMemLoc() && "Assuming store on stack."); // Don't emit stores for undef values. if (isa(ArgVal)) continue; // Need to store on the stack. // FIXME: This alignment is incorrect but this path is disabled // for now (will return false). We need to determine the right alignment // based on the normal alignment for the underlying machine type. // unsigned ArgSize = alignTo(ArgVT.getSizeInBits(), 4); unsigned BEAlign = 0; if (ArgSize < 8 && !Subtarget->isLittle()) BEAlign = 8 - ArgSize; Address Addr; Addr.setKind(Address::RegBase); Addr.setReg(Mips::SP); Addr.setOffset(VA.getLocMemOffset() + BEAlign); unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); (void)(MMO); // if (!emitStore(ArgVT, ArgReg, Addr, MMO)) return false; // can't store on the stack yet. } } return true; } bool MipsFastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes) { CallingConv::ID CC = CLI.CallConv; emitInst(Mips::ADJCALLSTACKUP).addImm(16).addImm(0); if (RetVT != MVT::isVoid) { SmallVector RVLocs; MipsCCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); CCInfo.AnalyzeCallResult(CLI.Ins, RetCC_Mips, CLI.RetTy, CLI.Symbol ? CLI.Symbol->getName().data() : nullptr); // Only handle a single return value. if (RVLocs.size() != 1) return false; // Copy all of the result registers out of their specified physreg. MVT CopyVT = RVLocs[0].getValVT(); // Special handling for extended integers. if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16) CopyVT = MVT::i32; unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); if (!ResultReg) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(RVLocs[0].getLocReg()); CLI.InRegs.push_back(RVLocs[0].getLocReg()); CLI.ResultReg = ResultReg; CLI.NumResultRegs = 1; } return true; } bool MipsFastISel::fastLowerArguments() { LLVM_DEBUG(dbgs() << "fastLowerArguments\n"); if (!FuncInfo.CanLowerReturn) { LLVM_DEBUG(dbgs() << ".. gave up (!CanLowerReturn)\n"); return false; } const Function *F = FuncInfo.Fn; if (F->isVarArg()) { LLVM_DEBUG(dbgs() << ".. gave up (varargs)\n"); return false; } CallingConv::ID CC = F->getCallingConv(); if (CC != CallingConv::C) { LLVM_DEBUG(dbgs() << ".. gave up (calling convention is not C)\n"); return false; } std::array GPR32ArgRegs = {{Mips::A0, Mips::A1, Mips::A2, Mips::A3}}; std::array FGR32ArgRegs = {{Mips::F12, Mips::F14}}; std::array AFGR64ArgRegs = {{Mips::D6, Mips::D7}}; auto NextGPR32 = GPR32ArgRegs.begin(); auto NextFGR32 = FGR32ArgRegs.begin(); auto NextAFGR64 = AFGR64ArgRegs.begin(); struct AllocatedReg { const TargetRegisterClass *RC; unsigned Reg; AllocatedReg(const TargetRegisterClass *RC, unsigned Reg) : RC(RC), Reg(Reg) {} }; // Only handle simple cases. i.e. All arguments are directly mapped to // registers of the appropriate type. SmallVector Allocation; for (const auto &FormalArg : F->args()) { if (FormalArg.hasAttribute(Attribute::InReg) || FormalArg.hasAttribute(Attribute::StructRet) || FormalArg.hasAttribute(Attribute::ByVal)) { LLVM_DEBUG(dbgs() << ".. gave up (inreg, structret, byval)\n"); return false; } Type *ArgTy = FormalArg.getType(); if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) { LLVM_DEBUG(dbgs() << ".. gave up (struct, array, or vector)\n"); return false; } EVT ArgVT = TLI.getValueType(DL, ArgTy); LLVM_DEBUG(dbgs() << ".. " << FormalArg.getArgNo() << ": " << ArgVT.getEVTString() << "\n"); if (!ArgVT.isSimple()) { LLVM_DEBUG(dbgs() << ".. .. gave up (not a simple type)\n"); return false; } switch (ArgVT.getSimpleVT().SimpleTy) { case MVT::i1: case MVT::i8: case MVT::i16: if (!FormalArg.hasAttribute(Attribute::SExt) && !FormalArg.hasAttribute(Attribute::ZExt)) { // It must be any extend, this shouldn't happen for clang-generated IR // so just fall back on SelectionDAG. LLVM_DEBUG(dbgs() << ".. .. gave up (i8/i16 arg is not extended)\n"); return false; } if (NextGPR32 == GPR32ArgRegs.end()) { LLVM_DEBUG(dbgs() << ".. .. gave up (ran out of GPR32 arguments)\n"); return false; } LLVM_DEBUG(dbgs() << ".. .. GPR32(" << *NextGPR32 << ")\n"); Allocation.emplace_back(&Mips::GPR32RegClass, *NextGPR32++); // Allocating any GPR32 prohibits further use of floating point arguments. NextFGR32 = FGR32ArgRegs.end(); NextAFGR64 = AFGR64ArgRegs.end(); break; case MVT::i32: if (FormalArg.hasAttribute(Attribute::ZExt)) { // The O32 ABI does not permit a zero-extended i32. LLVM_DEBUG(dbgs() << ".. .. gave up (i32 arg is zero extended)\n"); return false; } if (NextGPR32 == GPR32ArgRegs.end()) { LLVM_DEBUG(dbgs() << ".. .. gave up (ran out of GPR32 arguments)\n"); return false; } LLVM_DEBUG(dbgs() << ".. .. GPR32(" << *NextGPR32 << ")\n"); Allocation.emplace_back(&Mips::GPR32RegClass, *NextGPR32++); // Allocating any GPR32 prohibits further use of floating point arguments. NextFGR32 = FGR32ArgRegs.end(); NextAFGR64 = AFGR64ArgRegs.end(); break; case MVT::f32: if (UnsupportedFPMode) { LLVM_DEBUG(dbgs() << ".. .. gave up (UnsupportedFPMode)\n"); return false; } if (NextFGR32 == FGR32ArgRegs.end()) { LLVM_DEBUG(dbgs() << ".. .. gave up (ran out of FGR32 arguments)\n"); return false; } LLVM_DEBUG(dbgs() << ".. .. FGR32(" << *NextFGR32 << ")\n"); Allocation.emplace_back(&Mips::FGR32RegClass, *NextFGR32++); // Allocating an FGR32 also allocates the super-register AFGR64, and // ABI rules require us to skip the corresponding GPR32. if (NextGPR32 != GPR32ArgRegs.end()) NextGPR32++; if (NextAFGR64 != AFGR64ArgRegs.end()) NextAFGR64++; break; case MVT::f64: if (UnsupportedFPMode) { LLVM_DEBUG(dbgs() << ".. .. gave up (UnsupportedFPMode)\n"); return false; } if (NextAFGR64 == AFGR64ArgRegs.end()) { LLVM_DEBUG(dbgs() << ".. .. gave up (ran out of AFGR64 arguments)\n"); return false; } LLVM_DEBUG(dbgs() << ".. .. AFGR64(" << *NextAFGR64 << ")\n"); Allocation.emplace_back(&Mips::AFGR64RegClass, *NextAFGR64++); // Allocating an FGR32 also allocates the super-register AFGR64, and // ABI rules require us to skip the corresponding GPR32 pair. if (NextGPR32 != GPR32ArgRegs.end()) NextGPR32++; if (NextGPR32 != GPR32ArgRegs.end()) NextGPR32++; if (NextFGR32 != FGR32ArgRegs.end()) NextFGR32++; break; default: LLVM_DEBUG(dbgs() << ".. .. gave up (unknown type)\n"); return false; } } for (const auto &FormalArg : F->args()) { unsigned ArgNo = FormalArg.getArgNo(); unsigned SrcReg = Allocation[ArgNo].Reg; unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, Allocation[ArgNo].RC); // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. // Without this, EmitLiveInCopies may eliminate the livein if its only // use is a bitcast (which isn't turned into an instruction). unsigned ResultReg = createResultReg(Allocation[ArgNo].RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(DstReg, getKillRegState(true)); updateValueMap(&FormalArg, ResultReg); } // Calculate the size of the incoming arguments area. // We currently reject all the cases where this would be non-zero. unsigned IncomingArgSizeInBytes = 0; // Account for the reserved argument area on ABI's that have one (O32). // It seems strange to do this on the caller side but it's necessary in // SelectionDAG's implementation. IncomingArgSizeInBytes = std::min(getABI().GetCalleeAllocdArgSizeInBytes(CC), IncomingArgSizeInBytes); MF->getInfo()->setFormalArgInfo(IncomingArgSizeInBytes, false); return true; } bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) { CallingConv::ID CC = CLI.CallConv; bool IsTailCall = CLI.IsTailCall; bool IsVarArg = CLI.IsVarArg; const Value *Callee = CLI.Callee; MCSymbol *Symbol = CLI.Symbol; // Do not handle FastCC. if (CC == CallingConv::Fast) return false; // Allow SelectionDAG isel to handle tail calls. if (IsTailCall) return false; // Let SDISel handle vararg functions. if (IsVarArg) return false; // FIXME: Only handle *simple* calls for now. MVT RetVT; if (CLI.RetTy->isVoidTy()) RetVT = MVT::isVoid; else if (!isTypeSupported(CLI.RetTy, RetVT)) return false; for (auto Flag : CLI.OutFlags) if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal()) return false; // Set up the argument vectors. SmallVector OutVTs; OutVTs.reserve(CLI.OutVals.size()); for (auto *Val : CLI.OutVals) { MVT VT; if (!isTypeLegal(Val->getType(), VT) && !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) return false; // We don't handle vector parameters yet. if (VT.isVector() || VT.getSizeInBits() > 64) return false; OutVTs.push_back(VT); } Address Addr; if (!computeCallAddress(Callee, Addr)) return false; // Handle the arguments now that we've gotten them. unsigned NumBytes; if (!processCallArgs(CLI, OutVTs, NumBytes)) return false; if (!Addr.getGlobalValue()) return false; // Issue the call. unsigned DestAddress; if (Symbol) DestAddress = materializeExternalCallSym(Symbol); else DestAddress = materializeGV(Addr.getGlobalValue(), MVT::i32); emitInst(TargetOpcode::COPY, Mips::T9).addReg(DestAddress); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::JALR), Mips::RA).addReg(Mips::T9); // Add implicit physical register uses to the call. for (auto Reg : CLI.OutRegs) MIB.addReg(Reg, RegState::Implicit); // Add a register mask with the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); CLI.Call = MIB; if (EmitJalrReloc && !Subtarget->inMips16Mode()) { // Attach callee address to the instruction, let asm printer emit // .reloc R_MIPS_JALR. if (Symbol) MIB.addSym(Symbol, MipsII::MO_JALR); else MIB.addSym(FuncInfo.MF->getContext().getOrCreateSymbol( Addr.getGlobalValue()->getName()), MipsII::MO_JALR); } // Finish off the call including any return values. return finishCall(CLI, RetVT, NumBytes); } bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { switch (II->getIntrinsicID()) { default: return false; case Intrinsic::bswap: { Type *RetTy = II->getCalledFunction()->getReturnType(); MVT VT; if (!isTypeSupported(RetTy, VT)) return false; unsigned SrcReg = getRegForValue(II->getOperand(0)); if (SrcReg == 0) return false; unsigned DestReg = createResultReg(&Mips::GPR32RegClass); if (DestReg == 0) return false; if (VT == MVT::i16) { if (Subtarget->hasMips32r2()) { emitInst(Mips::WSBH, DestReg).addReg(SrcReg); updateValueMap(II, DestReg); return true; } else { unsigned TempReg[3]; for (int i = 0; i < 3; i++) { TempReg[i] = createResultReg(&Mips::GPR32RegClass); if (TempReg[i] == 0) return false; } emitInst(Mips::SLL, TempReg[0]).addReg(SrcReg).addImm(8); emitInst(Mips::SRL, TempReg[1]).addReg(SrcReg).addImm(8); emitInst(Mips::OR, TempReg[2]).addReg(TempReg[0]).addReg(TempReg[1]); emitInst(Mips::ANDi, DestReg).addReg(TempReg[2]).addImm(0xFFFF); updateValueMap(II, DestReg); return true; } } else if (VT == MVT::i32) { if (Subtarget->hasMips32r2()) { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::WSBH, TempReg).addReg(SrcReg); emitInst(Mips::ROTR, DestReg).addReg(TempReg).addImm(16); updateValueMap(II, DestReg); return true; } else { unsigned TempReg[8]; for (int i = 0; i < 8; i++) { TempReg[i] = createResultReg(&Mips::GPR32RegClass); if (TempReg[i] == 0) return false; } emitInst(Mips::SRL, TempReg[0]).addReg(SrcReg).addImm(8); emitInst(Mips::SRL, TempReg[1]).addReg(SrcReg).addImm(24); emitInst(Mips::ANDi, TempReg[2]).addReg(TempReg[0]).addImm(0xFF00); emitInst(Mips::OR, TempReg[3]).addReg(TempReg[1]).addReg(TempReg[2]); emitInst(Mips::ANDi, TempReg[4]).addReg(SrcReg).addImm(0xFF00); emitInst(Mips::SLL, TempReg[5]).addReg(TempReg[4]).addImm(8); emitInst(Mips::SLL, TempReg[6]).addReg(SrcReg).addImm(24); emitInst(Mips::OR, TempReg[7]).addReg(TempReg[3]).addReg(TempReg[5]); emitInst(Mips::OR, DestReg).addReg(TempReg[6]).addReg(TempReg[7]); updateValueMap(II, DestReg); return true; } } return false; } case Intrinsic::memcpy: case Intrinsic::memmove: { const auto *MTI = cast(II); // Don't handle volatile. if (MTI->isVolatile()) return false; if (!MTI->getLength()->getType()->isIntegerTy(32)) return false; const char *IntrMemName = isa(II) ? "memcpy" : "memmove"; return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1); } case Intrinsic::memset: { const MemSetInst *MSI = cast(II); // Don't handle volatile. if (MSI->isVolatile()) return false; if (!MSI->getLength()->getType()->isIntegerTy(32)) return false; return lowerCallTo(II, "memset", II->getNumArgOperands() - 1); } } return false; } bool MipsFastISel::selectRet(const Instruction *I) { const Function &F = *I->getParent()->getParent(); const ReturnInst *Ret = cast(I); LLVM_DEBUG(dbgs() << "selectRet\n"); if (!FuncInfo.CanLowerReturn) return false; // Build a list of return value registers. SmallVector RetRegs; if (Ret->getNumOperands() > 0) { CallingConv::ID CC = F.getCallingConv(); // Do not handle FastCC. if (CC == CallingConv::Fast) return false; SmallVector Outs; GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); // Analyze operands of the call, assigning locations to each operand. SmallVector ValLocs; MipsCCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); CCAssignFn *RetCC = RetCC_Mips; CCInfo.AnalyzeReturn(Outs, RetCC); // Only handle a single return value for now. if (ValLocs.size() != 1) return false; CCValAssign &VA = ValLocs[0]; const Value *RV = Ret->getOperand(0); // Don't bother handling odd stuff for now. if ((VA.getLocInfo() != CCValAssign::Full) && (VA.getLocInfo() != CCValAssign::BCvt)) return false; // Only handle register returns for now. if (!VA.isRegLoc()) return false; unsigned Reg = getRegForValue(RV); if (Reg == 0) return false; unsigned SrcReg = Reg + VA.getValNo(); unsigned DestReg = VA.getLocReg(); // Avoid a cross-class copy. This is very unlikely. if (!MRI.getRegClass(SrcReg)->contains(DestReg)) return false; EVT RVEVT = TLI.getValueType(DL, RV->getType()); if (!RVEVT.isSimple()) return false; if (RVEVT.isVector()) return false; MVT RVVT = RVEVT.getSimpleVT(); if (RVVT == MVT::f128) return false; // Do not handle FGR64 returns for now. if (RVVT == MVT::f64 && UnsupportedFPMode) { LLVM_DEBUG(dbgs() << ".. .. gave up (UnsupportedFPMode\n"); return false; } MVT DestVT = VA.getValVT(); // Special handling for extended integers. if (RVVT != DestVT) { if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) return false; if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) { bool IsZExt = Outs[0].Flags.isZExt(); SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); if (SrcReg == 0) return false; } } // Make the copy. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); // Add register to return instruction. RetRegs.push_back(VA.getLocReg()); } MachineInstrBuilder MIB = emitInst(Mips::RetRA); for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) MIB.addReg(RetRegs[i], RegState::Implicit); return true; } bool MipsFastISel::selectTrunc(const Instruction *I) { // The high bits for a type smaller than the register size are assumed to be // undefined. Value *Op = I->getOperand(0); EVT SrcVT, DestVT; SrcVT = TLI.getValueType(DL, Op->getType(), true); DestVT = TLI.getValueType(DL, I->getType(), true); if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8) return false; if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1) return false; unsigned SrcReg = getRegForValue(Op); if (!SrcReg) return false; // Because the high bits are undefined, a truncate doesn't generate // any code. updateValueMap(I, SrcReg); return true; } bool MipsFastISel::selectIntExt(const Instruction *I) { Type *DestTy = I->getType(); Value *Src = I->getOperand(0); Type *SrcTy = Src->getType(); bool isZExt = isa(I); unsigned SrcReg = getRegForValue(Src); if (!SrcReg) return false; EVT SrcEVT, DestEVT; SrcEVT = TLI.getValueType(DL, SrcTy, true); DestEVT = TLI.getValueType(DL, DestTy, true); if (!SrcEVT.isSimple()) return false; if (!DestEVT.isSimple()) return false; MVT SrcVT = SrcEVT.getSimpleVT(); MVT DestVT = DestEVT.getSimpleVT(); unsigned ResultReg = createResultReg(&Mips::GPR32RegClass); if (!emitIntExt(SrcVT, SrcReg, DestVT, ResultReg, isZExt)) return false; updateValueMap(I, ResultReg); return true; } bool MipsFastISel::emitIntSExt32r1(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg) { unsigned ShiftAmt; switch (SrcVT.SimpleTy) { default: return false; case MVT::i8: ShiftAmt = 24; break; case MVT::i16: ShiftAmt = 16; break; } unsigned TempReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::SLL, TempReg).addReg(SrcReg).addImm(ShiftAmt); emitInst(Mips::SRA, DestReg).addReg(TempReg).addImm(ShiftAmt); return true; } bool MipsFastISel::emitIntSExt32r2(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg) { switch (SrcVT.SimpleTy) { default: return false; case MVT::i8: emitInst(Mips::SEB, DestReg).addReg(SrcReg); break; case MVT::i16: emitInst(Mips::SEH, DestReg).addReg(SrcReg); break; } return true; } bool MipsFastISel::emitIntSExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg) { if ((DestVT != MVT::i32) && (DestVT != MVT::i16)) return false; if (Subtarget->hasMips32r2()) return emitIntSExt32r2(SrcVT, SrcReg, DestVT, DestReg); return emitIntSExt32r1(SrcVT, SrcReg, DestVT, DestReg); } bool MipsFastISel::emitIntZExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg) { int64_t Imm; switch (SrcVT.SimpleTy) { default: return false; case MVT::i1: Imm = 1; break; case MVT::i8: Imm = 0xff; break; case MVT::i16: Imm = 0xffff; break; } emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(Imm); return true; } bool MipsFastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, unsigned DestReg, bool IsZExt) { // FastISel does not have plumbing to deal with extensions where the SrcVT or // DestVT are odd things, so test to make sure that they are both types we can // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise // bail out to SelectionDAG. if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && (DestVT != MVT::i32)) || ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && (SrcVT != MVT::i16))) return false; if (IsZExt) return emitIntZExt(SrcVT, SrcReg, DestVT, DestReg); return emitIntSExt(SrcVT, SrcReg, DestVT, DestReg); } unsigned MipsFastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt) { unsigned DestReg = createResultReg(&Mips::GPR32RegClass); bool Success = emitIntExt(SrcVT, SrcReg, DestVT, DestReg, isZExt); return Success ? DestReg : 0; } bool MipsFastISel::selectDivRem(const Instruction *I, unsigned ISDOpcode) { EVT DestEVT = TLI.getValueType(DL, I->getType(), true); if (!DestEVT.isSimple()) return false; MVT DestVT = DestEVT.getSimpleVT(); if (DestVT != MVT::i32) return false; unsigned DivOpc; switch (ISDOpcode) { default: return false; case ISD::SDIV: case ISD::SREM: DivOpc = Mips::SDIV; break; case ISD::UDIV: case ISD::UREM: DivOpc = Mips::UDIV; break; } unsigned Src0Reg = getRegForValue(I->getOperand(0)); unsigned Src1Reg = getRegForValue(I->getOperand(1)); if (!Src0Reg || !Src1Reg) return false; emitInst(DivOpc).addReg(Src0Reg).addReg(Src1Reg); emitInst(Mips::TEQ).addReg(Src1Reg).addReg(Mips::ZERO).addImm(7); unsigned ResultReg = createResultReg(&Mips::GPR32RegClass); if (!ResultReg) return false; unsigned MFOpc = (ISDOpcode == ISD::SREM || ISDOpcode == ISD::UREM) ? Mips::MFHI : Mips::MFLO; emitInst(MFOpc, ResultReg); updateValueMap(I, ResultReg); return true; } bool MipsFastISel::selectShift(const Instruction *I) { MVT RetVT; if (!isTypeSupported(I->getType(), RetVT)) return false; unsigned ResultReg = createResultReg(&Mips::GPR32RegClass); if (!ResultReg) return false; unsigned Opcode = I->getOpcode(); const Value *Op0 = I->getOperand(0); unsigned Op0Reg = getRegForValue(Op0); if (!Op0Reg) return false; // If AShr or LShr, then we need to make sure the operand0 is sign extended. if (Opcode == Instruction::AShr || Opcode == Instruction::LShr) { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); if (!TempReg) return false; MVT Op0MVT = TLI.getValueType(DL, Op0->getType(), true).getSimpleVT(); bool IsZExt = Opcode == Instruction::LShr; if (!emitIntExt(Op0MVT, Op0Reg, MVT::i32, TempReg, IsZExt)) return false; Op0Reg = TempReg; } if (const auto *C = dyn_cast(I->getOperand(1))) { uint64_t ShiftVal = C->getZExtValue(); switch (Opcode) { default: llvm_unreachable("Unexpected instruction."); case Instruction::Shl: Opcode = Mips::SLL; break; case Instruction::AShr: Opcode = Mips::SRA; break; case Instruction::LShr: Opcode = Mips::SRL; break; } emitInst(Opcode, ResultReg).addReg(Op0Reg).addImm(ShiftVal); updateValueMap(I, ResultReg); return true; } unsigned Op1Reg = getRegForValue(I->getOperand(1)); if (!Op1Reg) return false; switch (Opcode) { default: llvm_unreachable("Unexpected instruction."); case Instruction::Shl: Opcode = Mips::SLLV; break; case Instruction::AShr: Opcode = Mips::SRAV; break; case Instruction::LShr: Opcode = Mips::SRLV; break; } emitInst(Opcode, ResultReg).addReg(Op0Reg).addReg(Op1Reg); updateValueMap(I, ResultReg); return true; } bool MipsFastISel::fastSelectInstruction(const Instruction *I) { switch (I->getOpcode()) { default: break; case Instruction::Load: return selectLoad(I); case Instruction::Store: return selectStore(I); case Instruction::SDiv: if (!selectBinaryOp(I, ISD::SDIV)) return selectDivRem(I, ISD::SDIV); return true; case Instruction::UDiv: if (!selectBinaryOp(I, ISD::UDIV)) return selectDivRem(I, ISD::UDIV); return true; case Instruction::SRem: if (!selectBinaryOp(I, ISD::SREM)) return selectDivRem(I, ISD::SREM); return true; case Instruction::URem: if (!selectBinaryOp(I, ISD::UREM)) return selectDivRem(I, ISD::UREM); return true; case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: return selectShift(I); case Instruction::And: case Instruction::Or: case Instruction::Xor: return selectLogicalOp(I); case Instruction::Br: return selectBranch(I); case Instruction::Ret: return selectRet(I); case Instruction::Trunc: return selectTrunc(I); case Instruction::ZExt: case Instruction::SExt: return selectIntExt(I); case Instruction::FPTrunc: return selectFPTrunc(I); case Instruction::FPExt: return selectFPExt(I); case Instruction::FPToSI: return selectFPToInt(I, /*isSigned*/ true); case Instruction::FPToUI: return selectFPToInt(I, /*isSigned*/ false); case Instruction::ICmp: case Instruction::FCmp: return selectCmp(I); case Instruction::Select: return selectSelect(I); } return false; } unsigned MipsFastISel::getRegEnsuringSimpleIntegerWidening(const Value *V, bool IsUnsigned) { unsigned VReg = getRegForValue(V); if (VReg == 0) return 0; MVT VMVT = TLI.getValueType(DL, V->getType(), true).getSimpleVT(); if (VMVT == MVT::i1) return 0; if ((VMVT == MVT::i8) || (VMVT == MVT::i16)) { unsigned TempReg = createResultReg(&Mips::GPR32RegClass); if (!emitIntExt(VMVT, VReg, MVT::i32, TempReg, IsUnsigned)) return 0; VReg = TempReg; } return VReg; } void MipsFastISel::simplifyAddress(Address &Addr) { if (!isInt<16>(Addr.getOffset())) { unsigned TempReg = materialize32BitInt(Addr.getOffset(), &Mips::GPR32RegClass); unsigned DestReg = createResultReg(&Mips::GPR32RegClass); emitInst(Mips::ADDu, DestReg).addReg(TempReg).addReg(Addr.getReg()); Addr.setReg(DestReg); Addr.setOffset(0); } } unsigned MipsFastISel::fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill) { // We treat the MUL instruction in a special way because it clobbers // the HI0 & LO0 registers. The TableGen definition of this instruction can // mark these registers only as implicitly defined. As a result, the // register allocator runs out of registers when this instruction is // followed by another instruction that defines the same registers too. // We can fix this by explicitly marking those registers as dead. if (MachineInstOpcode == Mips::MUL) { unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, getKillRegState(Op0IsKill)) .addReg(Op1, getKillRegState(Op1IsKill)) .addReg(Mips::HI0, RegState::ImplicitDefine | RegState::Dead) .addReg(Mips::LO0, RegState::ImplicitDefine | RegState::Dead); return ResultReg; } return FastISel::fastEmitInst_rr(MachineInstOpcode, RC, Op0, Op0IsKill, Op1, Op1IsKill); } namespace llvm { FastISel *Mips::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) { return new MipsFastISel(funcInfo, libInfo); } } // end namespace llvm Index: vendor/llvm/dist-release_80/lib/Target/Mips/MipsSEInstrInfo.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/Target/Mips/MipsSEInstrInfo.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/Mips/MipsSEInstrInfo.cpp (revision 348932) @@ -1,904 +1,907 @@ //===-- MipsSEInstrInfo.cpp - Mips32/64 Instruction Information -----------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file contains the Mips32/64 implementation of the TargetInstrInfo class. // //===----------------------------------------------------------------------===// #include "MipsSEInstrInfo.h" #include "InstPrinter/MipsInstPrinter.h" #include "MipsAnalyzeImmediate.h" #include "MipsMachineFunction.h" #include "MipsTargetMachine.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; static unsigned getUnconditionalBranch(const MipsSubtarget &STI) { if (STI.inMicroMipsMode()) return STI.isPositionIndependent() ? Mips::B_MM : Mips::J_MM; return STI.isPositionIndependent() ? Mips::B : Mips::J; } MipsSEInstrInfo::MipsSEInstrInfo(const MipsSubtarget &STI) : MipsInstrInfo(STI, getUnconditionalBranch(STI)), RI() {} const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const { return RI; } /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than loading from the stack slot. unsigned MipsSEInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const { unsigned Opc = MI.getOpcode(); if ((Opc == Mips::LW) || (Opc == Mips::LD) || (Opc == Mips::LWC1) || (Opc == Mips::LDC1) || (Opc == Mips::LDC164)) { if ((MI.getOperand(1).isFI()) && // is a stack slot (MI.getOperand(2).isImm()) && // the imm is zero (isZeroImm(MI.getOperand(2)))) { FrameIndex = MI.getOperand(1).getIndex(); return MI.getOperand(0).getReg(); } } return 0; } /// isStoreToStackSlot - If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of /// the source reg along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than storing to the stack slot. unsigned MipsSEInstrInfo::isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const { unsigned Opc = MI.getOpcode(); if ((Opc == Mips::SW) || (Opc == Mips::SD) || (Opc == Mips::SWC1) || (Opc == Mips::SDC1) || (Opc == Mips::SDC164)) { if ((MI.getOperand(1).isFI()) && // is a stack slot (MI.getOperand(2).isImm()) && // the imm is zero (isZeroImm(MI.getOperand(2)))) { FrameIndex = MI.getOperand(1).getIndex(); return MI.getOperand(0).getReg(); } } return 0; } void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { unsigned Opc = 0, ZeroReg = 0; bool isMicroMips = Subtarget.inMicroMipsMode(); if (Mips::GPR32RegClass.contains(DestReg)) { // Copy to CPU Reg. if (Mips::GPR32RegClass.contains(SrcReg)) { if (isMicroMips) Opc = Mips::MOVE16_MM; else Opc = Mips::OR, ZeroReg = Mips::ZERO; } else if (Mips::CCRRegClass.contains(SrcReg)) Opc = Mips::CFC1; else if (Mips::FGR32RegClass.contains(SrcReg)) Opc = Mips::MFC1; else if (Mips::HI32RegClass.contains(SrcReg)) { Opc = isMicroMips ? Mips::MFHI16_MM : Mips::MFHI; SrcReg = 0; } else if (Mips::LO32RegClass.contains(SrcReg)) { Opc = isMicroMips ? Mips::MFLO16_MM : Mips::MFLO; SrcReg = 0; } else if (Mips::HI32DSPRegClass.contains(SrcReg)) Opc = Mips::MFHI_DSP; else if (Mips::LO32DSPRegClass.contains(SrcReg)) Opc = Mips::MFLO_DSP; else if (Mips::DSPCCRegClass.contains(SrcReg)) { BuildMI(MBB, I, DL, get(Mips::RDDSP), DestReg).addImm(1 << 4) .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); return; } else if (Mips::MSACtrlRegClass.contains(SrcReg)) Opc = Mips::CFCMSA; } else if (Mips::GPR32RegClass.contains(SrcReg)) { // Copy from CPU Reg. if (Mips::CCRRegClass.contains(DestReg)) Opc = Mips::CTC1; else if (Mips::FGR32RegClass.contains(DestReg)) Opc = Mips::MTC1; else if (Mips::HI32RegClass.contains(DestReg)) Opc = Mips::MTHI, DestReg = 0; else if (Mips::LO32RegClass.contains(DestReg)) Opc = Mips::MTLO, DestReg = 0; else if (Mips::HI32DSPRegClass.contains(DestReg)) Opc = Mips::MTHI_DSP; else if (Mips::LO32DSPRegClass.contains(DestReg)) Opc = Mips::MTLO_DSP; else if (Mips::DSPCCRegClass.contains(DestReg)) { BuildMI(MBB, I, DL, get(Mips::WRDSP)) .addReg(SrcReg, getKillRegState(KillSrc)).addImm(1 << 4) .addReg(DestReg, RegState::ImplicitDefine); return; } else if (Mips::MSACtrlRegClass.contains(DestReg)) { BuildMI(MBB, I, DL, get(Mips::CTCMSA)) .addReg(DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); return; } } else if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) Opc = Mips::FMOV_S; else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) Opc = Mips::FMOV_D32; else if (Mips::FGR64RegClass.contains(DestReg, SrcReg)) Opc = Mips::FMOV_D64; else if (Mips::GPR64RegClass.contains(DestReg)) { // Copy to CPU64 Reg. if (Mips::GPR64RegClass.contains(SrcReg)) Opc = Mips::OR64, ZeroReg = Mips::ZERO_64; else if (Mips::HI64RegClass.contains(SrcReg)) Opc = Mips::MFHI64, SrcReg = 0; else if (Mips::LO64RegClass.contains(SrcReg)) Opc = Mips::MFLO64, SrcReg = 0; else if (Mips::FGR64RegClass.contains(SrcReg)) Opc = Mips::DMFC1; } else if (Mips::GPR64RegClass.contains(SrcReg)) { // Copy from CPU64 Reg. if (Mips::HI64RegClass.contains(DestReg)) Opc = Mips::MTHI64, DestReg = 0; else if (Mips::LO64RegClass.contains(DestReg)) Opc = Mips::MTLO64, DestReg = 0; else if (Mips::FGR64RegClass.contains(DestReg)) Opc = Mips::DMTC1; } else if (Mips::MSA128BRegClass.contains(DestReg)) { // Copy to MSA reg if (Mips::MSA128BRegClass.contains(SrcReg)) Opc = Mips::MOVE_V; } assert(Opc && "Cannot copy registers"); MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc)); if (DestReg) MIB.addReg(DestReg, RegState::Define); if (SrcReg) MIB.addReg(SrcReg, getKillRegState(KillSrc)); if (ZeroReg) MIB.addReg(ZeroReg); } static bool isORCopyInst(const MachineInstr &MI) { switch (MI.getOpcode()) { default: break; case Mips::OR_MM: case Mips::OR: if (MI.getOperand(2).getReg() == Mips::ZERO) return true; break; case Mips::OR64: if (MI.getOperand(2).getReg() == Mips::ZERO_64) return true; break; } return false; } /// If @MI is WRDSP/RRDSP instruction return true with @isWrite set to true /// if it is WRDSP instruction. static bool isReadOrWriteToDSPReg(const MachineInstr &MI, bool &isWrite) { switch (MI.getOpcode()) { default: return false; case Mips::WRDSP: case Mips::WRDSP_MM: isWrite = true; break; case Mips::RDDSP: case Mips::RDDSP_MM: isWrite = false; break; } return true; } /// We check for the common case of 'or', as it's MIPS' preferred instruction /// for GPRs but we have to check the operands to ensure that is the case. /// Other move instructions for MIPS are directly identifiable. bool MipsSEInstrInfo::isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Src, const MachineOperand *&Dest) const { bool isDSPControlWrite = false; // Condition is made to match the creation of WRDSP/RDDSP copy instruction // from copyPhysReg function. if (isReadOrWriteToDSPReg(MI, isDSPControlWrite)) { if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != (1<<4)) return false; else if (isDSPControlWrite) { Src = &MI.getOperand(0); Dest = &MI.getOperand(2); } else { Dest = &MI.getOperand(0); Src = &MI.getOperand(2); } return true; } else if (MI.isMoveReg() || isORCopyInst(MI)) { Dest = &MI.getOperand(0); Src = &MI.getOperand(1); return true; } return false; } void MipsSEInstrInfo:: storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, int64_t Offset) const { DebugLoc DL; MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); unsigned Opc = 0; if (Mips::GPR32RegClass.hasSubClassEq(RC)) Opc = Mips::SW; else if (Mips::GPR64RegClass.hasSubClassEq(RC)) Opc = Mips::SD; else if (Mips::ACC64RegClass.hasSubClassEq(RC)) Opc = Mips::STORE_ACC64; else if (Mips::ACC64DSPRegClass.hasSubClassEq(RC)) Opc = Mips::STORE_ACC64DSP; else if (Mips::ACC128RegClass.hasSubClassEq(RC)) Opc = Mips::STORE_ACC128; else if (Mips::DSPCCRegClass.hasSubClassEq(RC)) Opc = Mips::STORE_CCOND_DSP; else if (Mips::FGR32RegClass.hasSubClassEq(RC)) Opc = Mips::SWC1; else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) Opc = Mips::SDC1; else if (Mips::FGR64RegClass.hasSubClassEq(RC)) Opc = Mips::SDC164; else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) Opc = Mips::ST_B; else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16) || TRI->isTypeLegalForClass(*RC, MVT::v8f16)) Opc = Mips::ST_H; else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || TRI->isTypeLegalForClass(*RC, MVT::v4f32)) Opc = Mips::ST_W; else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || TRI->isTypeLegalForClass(*RC, MVT::v2f64)) Opc = Mips::ST_D; else if (Mips::LO32RegClass.hasSubClassEq(RC)) Opc = Mips::SW; else if (Mips::LO64RegClass.hasSubClassEq(RC)) Opc = Mips::SD; else if (Mips::HI32RegClass.hasSubClassEq(RC)) Opc = Mips::SW; else if (Mips::HI64RegClass.hasSubClassEq(RC)) Opc = Mips::SD; else if (Mips::DSPRRegClass.hasSubClassEq(RC)) Opc = Mips::SWDSP; // Hi, Lo are normally caller save but they are callee save // for interrupt handling. const Function &Func = MBB.getParent()->getFunction(); if (Func.hasFnAttribute("interrupt")) { if (Mips::HI32RegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(Mips::MFHI), Mips::K0); SrcReg = Mips::K0; } else if (Mips::HI64RegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(Mips::MFHI64), Mips::K0_64); SrcReg = Mips::K0_64; } else if (Mips::LO32RegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(Mips::MFLO), Mips::K0); SrcReg = Mips::K0; } else if (Mips::LO64RegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(Mips::MFLO64), Mips::K0_64); SrcReg = Mips::K0_64; } } assert(Opc && "Register class not handled!"); BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(Offset).addMemOperand(MMO); } void MipsSEInstrInfo:: loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, int64_t Offset) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); unsigned Opc = 0; const Function &Func = MBB.getParent()->getFunction(); bool ReqIndirectLoad = Func.hasFnAttribute("interrupt") && (DestReg == Mips::LO0 || DestReg == Mips::LO0_64 || DestReg == Mips::HI0 || DestReg == Mips::HI0_64); if (Mips::GPR32RegClass.hasSubClassEq(RC)) Opc = Mips::LW; else if (Mips::GPR64RegClass.hasSubClassEq(RC)) Opc = Mips::LD; else if (Mips::ACC64RegClass.hasSubClassEq(RC)) Opc = Mips::LOAD_ACC64; else if (Mips::ACC64DSPRegClass.hasSubClassEq(RC)) Opc = Mips::LOAD_ACC64DSP; else if (Mips::ACC128RegClass.hasSubClassEq(RC)) Opc = Mips::LOAD_ACC128; else if (Mips::DSPCCRegClass.hasSubClassEq(RC)) Opc = Mips::LOAD_CCOND_DSP; else if (Mips::FGR32RegClass.hasSubClassEq(RC)) Opc = Mips::LWC1; else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) Opc = Mips::LDC1; else if (Mips::FGR64RegClass.hasSubClassEq(RC)) Opc = Mips::LDC164; else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) Opc = Mips::LD_B; else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16) || TRI->isTypeLegalForClass(*RC, MVT::v8f16)) Opc = Mips::LD_H; else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || TRI->isTypeLegalForClass(*RC, MVT::v4f32)) Opc = Mips::LD_W; else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || TRI->isTypeLegalForClass(*RC, MVT::v2f64)) Opc = Mips::LD_D; else if (Mips::HI32RegClass.hasSubClassEq(RC)) Opc = Mips::LW; else if (Mips::HI64RegClass.hasSubClassEq(RC)) Opc = Mips::LD; else if (Mips::LO32RegClass.hasSubClassEq(RC)) Opc = Mips::LW; else if (Mips::LO64RegClass.hasSubClassEq(RC)) Opc = Mips::LD; else if (Mips::DSPRRegClass.hasSubClassEq(RC)) Opc = Mips::LWDSP; assert(Opc && "Register class not handled!"); if (!ReqIndirectLoad) BuildMI(MBB, I, DL, get(Opc), DestReg) .addFrameIndex(FI) .addImm(Offset) .addMemOperand(MMO); else { // Load HI/LO through K0. Notably the DestReg is encoded into the // instruction itself. unsigned Reg = Mips::K0; unsigned LdOp = Mips::MTLO; if (DestReg == Mips::HI0) LdOp = Mips::MTHI; if (Subtarget.getABI().ArePtrs64bit()) { Reg = Mips::K0_64; if (DestReg == Mips::HI0_64) LdOp = Mips::MTHI64; else LdOp = Mips::MTLO64; } BuildMI(MBB, I, DL, get(Opc), Reg) .addFrameIndex(FI) .addImm(Offset) .addMemOperand(MMO); BuildMI(MBB, I, DL, get(LdOp)).addReg(Reg); } } bool MipsSEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MachineBasicBlock &MBB = *MI.getParent(); bool isMicroMips = Subtarget.inMicroMipsMode(); unsigned Opc; switch (MI.getDesc().getOpcode()) { default: return false; case Mips::RetRA: expandRetRA(MBB, MI); break; case Mips::ERet: expandERet(MBB, MI); break; case Mips::PseudoMFHI: expandPseudoMFHiLo(MBB, MI, Mips::MFHI); break; case Mips::PseudoMFHI_MM: expandPseudoMFHiLo(MBB, MI, Mips::MFHI16_MM); break; case Mips::PseudoMFLO: expandPseudoMFHiLo(MBB, MI, Mips::MFLO); break; case Mips::PseudoMFLO_MM: expandPseudoMFHiLo(MBB, MI, Mips::MFLO16_MM); break; case Mips::PseudoMFHI64: expandPseudoMFHiLo(MBB, MI, Mips::MFHI64); break; case Mips::PseudoMFLO64: expandPseudoMFHiLo(MBB, MI, Mips::MFLO64); break; case Mips::PseudoMTLOHI: expandPseudoMTLoHi(MBB, MI, Mips::MTLO, Mips::MTHI, false); break; case Mips::PseudoMTLOHI64: expandPseudoMTLoHi(MBB, MI, Mips::MTLO64, Mips::MTHI64, false); break; case Mips::PseudoMTLOHI_DSP: expandPseudoMTLoHi(MBB, MI, Mips::MTLO_DSP, Mips::MTHI_DSP, true); break; + case Mips::PseudoMTLOHI_MM: + expandPseudoMTLoHi(MBB, MI, Mips::MTLO_MM, Mips::MTHI_MM, false); + break; case Mips::PseudoCVT_S_W: expandCvtFPInt(MBB, MI, Mips::CVT_S_W, Mips::MTC1, false); break; case Mips::PseudoCVT_D32_W: Opc = isMicroMips ? Mips::CVT_D32_W_MM : Mips::CVT_D32_W; expandCvtFPInt(MBB, MI, Opc, Mips::MTC1, false); break; case Mips::PseudoCVT_S_L: expandCvtFPInt(MBB, MI, Mips::CVT_S_L, Mips::DMTC1, true); break; case Mips::PseudoCVT_D64_W: Opc = isMicroMips ? Mips::CVT_D64_W_MM : Mips::CVT_D64_W; expandCvtFPInt(MBB, MI, Opc, Mips::MTC1, true); break; case Mips::PseudoCVT_D64_L: expandCvtFPInt(MBB, MI, Mips::CVT_D64_L, Mips::DMTC1, true); break; case Mips::BuildPairF64: expandBuildPairF64(MBB, MI, isMicroMips, false); break; case Mips::BuildPairF64_64: expandBuildPairF64(MBB, MI, isMicroMips, true); break; case Mips::ExtractElementF64: expandExtractElementF64(MBB, MI, isMicroMips, false); break; case Mips::ExtractElementF64_64: expandExtractElementF64(MBB, MI, isMicroMips, true); break; case Mips::MIPSeh_return32: case Mips::MIPSeh_return64: expandEhReturn(MBB, MI); break; } MBB.erase(MI); return true; } /// getOppositeBranchOpc - Return the inverse of the specified /// opcode, e.g. turning BEQ to BNE. unsigned MipsSEInstrInfo::getOppositeBranchOpc(unsigned Opc) const { switch (Opc) { default: llvm_unreachable("Illegal opcode!"); case Mips::BEQ: return Mips::BNE; case Mips::BEQ_MM: return Mips::BNE_MM; case Mips::BNE: return Mips::BEQ; case Mips::BNE_MM: return Mips::BEQ_MM; case Mips::BGTZ: return Mips::BLEZ; case Mips::BGEZ: return Mips::BLTZ; case Mips::BLTZ: return Mips::BGEZ; case Mips::BLEZ: return Mips::BGTZ; case Mips::BGTZ_MM: return Mips::BLEZ_MM; case Mips::BGEZ_MM: return Mips::BLTZ_MM; case Mips::BLTZ_MM: return Mips::BGEZ_MM; case Mips::BLEZ_MM: return Mips::BGTZ_MM; case Mips::BEQ64: return Mips::BNE64; case Mips::BNE64: return Mips::BEQ64; case Mips::BGTZ64: return Mips::BLEZ64; case Mips::BGEZ64: return Mips::BLTZ64; case Mips::BLTZ64: return Mips::BGEZ64; case Mips::BLEZ64: return Mips::BGTZ64; case Mips::BC1T: return Mips::BC1F; case Mips::BC1F: return Mips::BC1T; case Mips::BC1T_MM: return Mips::BC1F_MM; case Mips::BC1F_MM: return Mips::BC1T_MM; case Mips::BEQZ16_MM: return Mips::BNEZ16_MM; case Mips::BNEZ16_MM: return Mips::BEQZ16_MM; case Mips::BEQZC_MM: return Mips::BNEZC_MM; case Mips::BNEZC_MM: return Mips::BEQZC_MM; case Mips::BEQZC: return Mips::BNEZC; case Mips::BNEZC: return Mips::BEQZC; case Mips::BLEZC: return Mips::BGTZC; case Mips::BGEZC: return Mips::BLTZC; case Mips::BGEC: return Mips::BLTC; case Mips::BGTZC: return Mips::BLEZC; case Mips::BLTZC: return Mips::BGEZC; case Mips::BLTC: return Mips::BGEC; case Mips::BGEUC: return Mips::BLTUC; case Mips::BLTUC: return Mips::BGEUC; case Mips::BEQC: return Mips::BNEC; case Mips::BNEC: return Mips::BEQC; case Mips::BC1EQZ: return Mips::BC1NEZ; case Mips::BC1NEZ: return Mips::BC1EQZ; case Mips::BEQZC_MMR6: return Mips::BNEZC_MMR6; case Mips::BNEZC_MMR6: return Mips::BEQZC_MMR6; case Mips::BLEZC_MMR6: return Mips::BGTZC_MMR6; case Mips::BGEZC_MMR6: return Mips::BLTZC_MMR6; case Mips::BGEC_MMR6: return Mips::BLTC_MMR6; case Mips::BGTZC_MMR6: return Mips::BLEZC_MMR6; case Mips::BLTZC_MMR6: return Mips::BGEZC_MMR6; case Mips::BLTC_MMR6: return Mips::BGEC_MMR6; case Mips::BGEUC_MMR6: return Mips::BLTUC_MMR6; case Mips::BLTUC_MMR6: return Mips::BGEUC_MMR6; case Mips::BEQC_MMR6: return Mips::BNEC_MMR6; case Mips::BNEC_MMR6: return Mips::BEQC_MMR6; case Mips::BC1EQZC_MMR6: return Mips::BC1NEZC_MMR6; case Mips::BC1NEZC_MMR6: return Mips::BC1EQZC_MMR6; case Mips::BEQZC64: return Mips::BNEZC64; case Mips::BNEZC64: return Mips::BEQZC64; case Mips::BEQC64: return Mips::BNEC64; case Mips::BNEC64: return Mips::BEQC64; case Mips::BGEC64: return Mips::BLTC64; case Mips::BGEUC64: return Mips::BLTUC64; case Mips::BLTC64: return Mips::BGEC64; case Mips::BLTUC64: return Mips::BGEUC64; case Mips::BGTZC64: return Mips::BLEZC64; case Mips::BGEZC64: return Mips::BLTZC64; case Mips::BLTZC64: return Mips::BGEZC64; case Mips::BLEZC64: return Mips::BGTZC64; case Mips::BBIT0: return Mips::BBIT1; case Mips::BBIT1: return Mips::BBIT0; case Mips::BBIT032: return Mips::BBIT132; case Mips::BBIT132: return Mips::BBIT032; case Mips::BZ_B: return Mips::BNZ_B; case Mips::BZ_H: return Mips::BNZ_H; case Mips::BZ_W: return Mips::BNZ_W; case Mips::BZ_D: return Mips::BNZ_D; case Mips::BZ_V: return Mips::BNZ_V; case Mips::BNZ_B: return Mips::BZ_B; case Mips::BNZ_H: return Mips::BZ_H; case Mips::BNZ_W: return Mips::BZ_W; case Mips::BNZ_D: return Mips::BZ_D; case Mips::BNZ_V: return Mips::BZ_V; } } /// Adjust SP by Amount bytes. void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { MipsABIInfo ABI = Subtarget.getABI(); DebugLoc DL; unsigned ADDiu = ABI.GetPtrAddiuOp(); if (Amount == 0) return; if (isInt<16>(Amount)) { // addi sp, sp, amount BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount); } else { // For numbers which are not 16bit integers we synthesize Amount inline // then add or subtract it from sp. unsigned Opc = ABI.GetPtrAdduOp(); if (Amount < 0) { Opc = ABI.GetPtrSubuOp(); Amount = -Amount; } unsigned Reg = loadImmediate(Amount, MBB, I, DL, nullptr); BuildMI(MBB, I, DL, get(Opc), SP).addReg(SP).addReg(Reg, RegState::Kill); } } /// This function generates the sequence of instructions needed to get the /// result of adding register REG and immediate IMM. unsigned MipsSEInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB, MachineBasicBlock::iterator II, const DebugLoc &DL, unsigned *NewImm) const { MipsAnalyzeImmediate AnalyzeImm; const MipsSubtarget &STI = Subtarget; MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); unsigned Size = STI.isABI_N64() ? 64 : 32; unsigned LUi = STI.isABI_N64() ? Mips::LUi64 : Mips::LUi; unsigned ZEROReg = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; const TargetRegisterClass *RC = STI.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; bool LastInstrIsADDiu = NewImm; const MipsAnalyzeImmediate::InstSeq &Seq = AnalyzeImm.Analyze(Imm, Size, LastInstrIsADDiu); MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); assert(Seq.size() && (!LastInstrIsADDiu || (Seq.size() > 1))); // The first instruction can be a LUi, which is different from other // instructions (ADDiu, ORI and SLL) in that it does not have a register // operand. unsigned Reg = RegInfo.createVirtualRegister(RC); if (Inst->Opc == LUi) BuildMI(MBB, II, DL, get(LUi), Reg).addImm(SignExtend64<16>(Inst->ImmOpnd)); else BuildMI(MBB, II, DL, get(Inst->Opc), Reg).addReg(ZEROReg) .addImm(SignExtend64<16>(Inst->ImmOpnd)); // Build the remaining instructions in Seq. for (++Inst; Inst != Seq.end() - LastInstrIsADDiu; ++Inst) BuildMI(MBB, II, DL, get(Inst->Opc), Reg).addReg(Reg, RegState::Kill) .addImm(SignExtend64<16>(Inst->ImmOpnd)); if (LastInstrIsADDiu) *NewImm = Inst->ImmOpnd; return Reg; } unsigned MipsSEInstrInfo::getAnalyzableBrOpc(unsigned Opc) const { return (Opc == Mips::BEQ || Opc == Mips::BEQ_MM || Opc == Mips::BNE || Opc == Mips::BNE_MM || Opc == Mips::BGTZ || Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 || Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 || Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B || Opc == Mips::J || Opc == Mips::J_MM || Opc == Mips::B_MM || Opc == Mips::BEQZC_MM || Opc == Mips::BNEZC_MM || Opc == Mips::BEQC || Opc == Mips::BNEC || Opc == Mips::BLTC || Opc == Mips::BGEC || Opc == Mips::BLTUC || Opc == Mips::BGEUC || Opc == Mips::BGTZC || Opc == Mips::BLEZC || Opc == Mips::BGEZC || Opc == Mips::BLTZC || Opc == Mips::BEQZC || Opc == Mips::BNEZC || Opc == Mips::BEQZC64 || Opc == Mips::BNEZC64 || Opc == Mips::BEQC64 || Opc == Mips::BNEC64 || Opc == Mips::BGEC64 || Opc == Mips::BGEUC64 || Opc == Mips::BLTC64 || Opc == Mips::BLTUC64 || Opc == Mips::BGTZC64 || Opc == Mips::BGEZC64 || Opc == Mips::BLTZC64 || Opc == Mips::BLEZC64 || Opc == Mips::BC || Opc == Mips::BBIT0 || Opc == Mips::BBIT1 || Opc == Mips::BBIT032 || Opc == Mips::BBIT132 || Opc == Mips::BC_MMR6 || Opc == Mips::BEQC_MMR6 || Opc == Mips::BNEC_MMR6 || Opc == Mips::BLTC_MMR6 || Opc == Mips::BGEC_MMR6 || Opc == Mips::BLTUC_MMR6 || Opc == Mips::BGEUC_MMR6 || Opc == Mips::BGTZC_MMR6 || Opc == Mips::BLEZC_MMR6 || Opc == Mips::BGEZC_MMR6 || Opc == Mips::BLTZC_MMR6 || Opc == Mips::BEQZC_MMR6 || Opc == Mips::BNEZC_MMR6) ? Opc : 0; } void MipsSEInstrInfo::expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { MachineInstrBuilder MIB; if (Subtarget.isGP64bit()) MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn64)) .addReg(Mips::RA_64, RegState::Undef); else MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn)) .addReg(Mips::RA, RegState::Undef); // Retain any imp-use flags. for (auto & MO : I->operands()) { if (MO.isImplicit()) MIB.add(MO); } } void MipsSEInstrInfo::expandERet(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { BuildMI(MBB, I, I->getDebugLoc(), get(Mips::ERET)); } std::pair MipsSEInstrInfo::compareOpndSize(unsigned Opc, const MachineFunction &MF) const { const MCInstrDesc &Desc = get(Opc); assert(Desc.NumOperands == 2 && "Unary instruction expected."); const MipsRegisterInfo *RI = &getRegisterInfo(); unsigned DstRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 0, RI, MF)); unsigned SrcRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 1, RI, MF)); return std::make_pair(DstRegSize > SrcRegSize, DstRegSize < SrcRegSize); } void MipsSEInstrInfo::expandPseudoMFHiLo(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned NewOpc) const { BuildMI(MBB, I, I->getDebugLoc(), get(NewOpc), I->getOperand(0).getReg()); } void MipsSEInstrInfo::expandPseudoMTLoHi(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned LoOpc, unsigned HiOpc, bool HasExplicitDef) const { // Expand // lo_hi pseudomtlohi $gpr0, $gpr1 // to these two instructions: // mtlo $gpr0 // mthi $gpr1 DebugLoc DL = I->getDebugLoc(); const MachineOperand &SrcLo = I->getOperand(1), &SrcHi = I->getOperand(2); MachineInstrBuilder LoInst = BuildMI(MBB, I, DL, get(LoOpc)); MachineInstrBuilder HiInst = BuildMI(MBB, I, DL, get(HiOpc)); // Add lo/hi registers if the mtlo/hi instructions created have explicit // def registers. if (HasExplicitDef) { unsigned DstReg = I->getOperand(0).getReg(); unsigned DstLo = getRegisterInfo().getSubReg(DstReg, Mips::sub_lo); unsigned DstHi = getRegisterInfo().getSubReg(DstReg, Mips::sub_hi); LoInst.addReg(DstLo, RegState::Define); HiInst.addReg(DstHi, RegState::Define); } LoInst.addReg(SrcLo.getReg(), getKillRegState(SrcLo.isKill())); HiInst.addReg(SrcHi.getReg(), getKillRegState(SrcHi.isKill())); } void MipsSEInstrInfo::expandCvtFPInt(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned CvtOpc, unsigned MovOpc, bool IsI64) const { const MCInstrDesc &CvtDesc = get(CvtOpc), &MovDesc = get(MovOpc); const MachineOperand &Dst = I->getOperand(0), &Src = I->getOperand(1); unsigned DstReg = Dst.getReg(), SrcReg = Src.getReg(), TmpReg = DstReg; unsigned KillSrc = getKillRegState(Src.isKill()); DebugLoc DL = I->getDebugLoc(); bool DstIsLarger, SrcIsLarger; std::tie(DstIsLarger, SrcIsLarger) = compareOpndSize(CvtOpc, *MBB.getParent()); if (DstIsLarger) TmpReg = getRegisterInfo().getSubReg(DstReg, Mips::sub_lo); if (SrcIsLarger) DstReg = getRegisterInfo().getSubReg(DstReg, Mips::sub_lo); BuildMI(MBB, I, DL, MovDesc, TmpReg).addReg(SrcReg, KillSrc); BuildMI(MBB, I, DL, CvtDesc, DstReg).addReg(TmpReg, RegState::Kill); } void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool isMicroMips, bool FP64) const { unsigned DstReg = I->getOperand(0).getReg(); unsigned SrcReg = I->getOperand(1).getReg(); unsigned N = I->getOperand(2).getImm(); DebugLoc dl = I->getDebugLoc(); assert(N < 2 && "Invalid immediate"); unsigned SubIdx = N ? Mips::sub_hi : Mips::sub_lo; unsigned SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx); // FPXX on MIPS-II or MIPS32r1 should have been handled with a spill/reload // in MipsSEFrameLowering.cpp. assert(!(Subtarget.isABI_FPXX() && !Subtarget.hasMips32r2())); // FP64A (FP64 with nooddspreg) should have been handled with a spill/reload // in MipsSEFrameLowering.cpp. assert(!(Subtarget.isFP64bit() && !Subtarget.useOddSPReg())); if (SubIdx == Mips::sub_hi && Subtarget.hasMTHC1()) { // FIXME: Strictly speaking MFHC1 only reads the top 32-bits however, we // claim to read the whole 64-bits as part of a white lie used to // temporarily work around a widespread bug in the -mfp64 support. // The problem is that none of the 32-bit fpu ops mention the fact // that they clobber the upper 32-bits of the 64-bit FPR. Fixing that // requires a major overhaul of the FPU implementation which can't // be done right now due to time constraints. // MFHC1 is one of two instructions that are affected since they are // the only instructions that don't read the lower 32-bits. // We therefore pretend that it reads the bottom 32-bits to // artificially create a dependency and prevent the scheduler // changing the behaviour of the code. BuildMI(MBB, I, dl, get(isMicroMips ? (FP64 ? Mips::MFHC1_D64_MM : Mips::MFHC1_D32_MM) : (FP64 ? Mips::MFHC1_D64 : Mips::MFHC1_D32)), DstReg) .addReg(SrcReg); } else BuildMI(MBB, I, dl, get(Mips::MFC1), DstReg).addReg(SubReg); } void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool isMicroMips, bool FP64) const { unsigned DstReg = I->getOperand(0).getReg(); unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg(); const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1); DebugLoc dl = I->getDebugLoc(); const TargetRegisterInfo &TRI = getRegisterInfo(); // When mthc1 is available, use: // mtc1 Lo, $fp // mthc1 Hi, $fp // // Otherwise, for O32 FPXX ABI: // spill + reload via ldc1 // This case is handled by the frame lowering code. // // Otherwise, for FP32: // mtc1 Lo, $fp // mtc1 Hi, $fp + 1 // // The case where dmtc1 is available doesn't need to be handled here // because it never creates a BuildPairF64 node. // FPXX on MIPS-II or MIPS32r1 should have been handled with a spill/reload // in MipsSEFrameLowering.cpp. assert(!(Subtarget.isABI_FPXX() && !Subtarget.hasMips32r2())); // FP64A (FP64 with nooddspreg) should have been handled with a spill/reload // in MipsSEFrameLowering.cpp. assert(!(Subtarget.isFP64bit() && !Subtarget.useOddSPReg())); BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_lo)) .addReg(LoReg); if (Subtarget.hasMTHC1()) { // FIXME: The .addReg(DstReg) is a white lie used to temporarily work // around a widespread bug in the -mfp64 support. // The problem is that none of the 32-bit fpu ops mention the fact // that they clobber the upper 32-bits of the 64-bit FPR. Fixing that // requires a major overhaul of the FPU implementation which can't // be done right now due to time constraints. // MTHC1 is one of two instructions that are affected since they are // the only instructions that don't read the lower 32-bits. // We therefore pretend that it reads the bottom 32-bits to // artificially create a dependency and prevent the scheduler // changing the behaviour of the code. BuildMI(MBB, I, dl, get(isMicroMips ? (FP64 ? Mips::MTHC1_D64_MM : Mips::MTHC1_D32_MM) : (FP64 ? Mips::MTHC1_D64 : Mips::MTHC1_D32)), DstReg) .addReg(DstReg) .addReg(HiReg); } else if (Subtarget.isABI_FPXX()) llvm_unreachable("BuildPairF64 not expanded in frame lowering code!"); else BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_hi)) .addReg(HiReg); } void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { // This pseudo instruction is generated as part of the lowering of // ISD::EH_RETURN. We convert it to a stack increment by OffsetReg, and // indirect jump to TargetReg MipsABIInfo ABI = Subtarget.getABI(); unsigned ADDU = ABI.GetPtrAdduOp(); unsigned SP = Subtarget.isGP64bit() ? Mips::SP_64 : Mips::SP; unsigned RA = Subtarget.isGP64bit() ? Mips::RA_64 : Mips::RA; unsigned T9 = Subtarget.isGP64bit() ? Mips::T9_64 : Mips::T9; unsigned ZERO = Subtarget.isGP64bit() ? Mips::ZERO_64 : Mips::ZERO; unsigned OffsetReg = I->getOperand(0).getReg(); unsigned TargetReg = I->getOperand(1).getReg(); // addu $ra, $v0, $zero // addu $sp, $sp, $v1 // jr $ra (via RetRA) const TargetMachine &TM = MBB.getParent()->getTarget(); if (TM.isPositionIndependent()) BuildMI(MBB, I, I->getDebugLoc(), get(ADDU), T9) .addReg(TargetReg) .addReg(ZERO); BuildMI(MBB, I, I->getDebugLoc(), get(ADDU), RA) .addReg(TargetReg) .addReg(ZERO); BuildMI(MBB, I, I->getDebugLoc(), get(ADDU), SP).addReg(SP).addReg(OffsetReg); expandRetRA(MBB, I); } const MipsInstrInfo *llvm::createMipsSEInstrInfo(const MipsSubtarget &STI) { return new MipsSEInstrInfo(STI); } Index: vendor/llvm/dist-release_80/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp (revision 348932) @@ -1,357 +1,365 @@ //===------ PPCDisassembler.cpp - Disassembler for PowerPC ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "MCTargetDesc/PPCMCTargetDesc.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Endian.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; DEFINE_PPC_REGCLASSES; #define DEBUG_TYPE "ppc-disassembler" typedef MCDisassembler::DecodeStatus DecodeStatus; namespace { class PPCDisassembler : public MCDisassembler { bool IsLittleEndian; public: PPCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, bool IsLittleEndian) : MCDisassembler(STI, Ctx), IsLittleEndian(IsLittleEndian) {} DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &VStream, raw_ostream &CStream) const override; }; } // end anonymous namespace static MCDisassembler *createPPCDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx) { return new PPCDisassembler(STI, Ctx, /*IsLittleEndian=*/false); } static MCDisassembler *createPPCLEDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx) { return new PPCDisassembler(STI, Ctx, /*IsLittleEndian=*/true); } extern "C" void LLVMInitializePowerPCDisassembler() { // Register the disassembler for each target. TargetRegistry::RegisterMCDisassembler(getThePPC32Target(), createPPCDisassembler); TargetRegistry::RegisterMCDisassembler(getThePPC64Target(), createPPCDisassembler); TargetRegistry::RegisterMCDisassembler(getThePPC64LETarget(), createPPCLEDisassembler); } +static DecodeStatus DecodePCRel24BranchTarget(MCInst &Inst, unsigned Imm, + uint64_t Addr, + const void *Decoder) { + int32_t Offset = SignExtend32<24>(Imm); + Inst.addOperand(MCOperand::createImm(Offset)); + return MCDisassembler::Success; +} + // FIXME: These can be generated by TableGen from the existing register // encoding values! template static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, const MCPhysReg (&Regs)[N]) { assert(RegNo < N && "Invalid register number"); Inst.addOperand(MCOperand::createReg(Regs[RegNo])); return MCDisassembler::Success; } static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { return decodeRegisterClass(Inst, RegNo, CRRegs); } static DecodeStatus DecodeCRRC0RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { return decodeRegisterClass(Inst, RegNo, CRRegs); } static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { return decodeRegisterClass(Inst, RegNo, CRBITRegs); } static DecodeStatus DecodeF4RCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { return decodeRegisterClass(Inst, RegNo, FRegs); } static DecodeStatus DecodeF8RCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { return decodeRegisterClass(Inst, RegNo, FRegs); } static DecodeStatus DecodeVFRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { return decodeRegisterClass(Inst, RegNo, VFRegs); } static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { return decodeRegisterClass(Inst, RegNo, VRegs); } static DecodeStatus DecodeVSRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { return decodeRegisterClass(Inst, RegNo, VSRegs); } static DecodeStatus DecodeVSFRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { return decodeRegisterClass(Inst, RegNo, VSFRegs); } static DecodeStatus DecodeVSSRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { return decodeRegisterClass(Inst, RegNo, VSSRegs); } static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { return decodeRegisterClass(Inst, RegNo, RRegs); } static DecodeStatus DecodeGPRC_NOR0RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { return decodeRegisterClass(Inst, RegNo, RRegsNoR0); } static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { return decodeRegisterClass(Inst, RegNo, XRegs); } static DecodeStatus DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { return decodeRegisterClass(Inst, RegNo, XRegsNoX0); } #define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass #define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { return decodeRegisterClass(Inst, RegNo, QFRegs); } static DecodeStatus DecodeSPE4RCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { return decodeRegisterClass(Inst, RegNo, RRegs); } static DecodeStatus DecodeSPERCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { return decodeRegisterClass(Inst, RegNo, SPERegs); } #define DecodeQSRCRegisterClass DecodeQFRCRegisterClass #define DecodeQBRCRegisterClass DecodeQFRCRegisterClass template static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm, int64_t Address, const void *Decoder) { assert(isUInt(Imm) && "Invalid immediate"); Inst.addOperand(MCOperand::createImm(Imm)); return MCDisassembler::Success; } template static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm, int64_t Address, const void *Decoder) { assert(isUInt(Imm) && "Invalid immediate"); Inst.addOperand(MCOperand::createImm(SignExtend64(Imm))); return MCDisassembler::Success; } static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm, int64_t Address, const void *Decoder) { // Decode the memri field (imm, reg), which has the low 16-bits as the // displacement and the next 5 bits as the register #. uint64_t Base = Imm >> 16; uint64_t Disp = Imm & 0xFFFF; assert(Base < 32 && "Invalid base register"); switch (Inst.getOpcode()) { default: break; case PPC::LBZU: case PPC::LHAU: case PPC::LHZU: case PPC::LWZU: case PPC::LFSU: case PPC::LFDU: // Add the tied output operand. Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base])); break; case PPC::STBU: case PPC::STHU: case PPC::STWU: case PPC::STFSU: case PPC::STFDU: Inst.insert(Inst.begin(), MCOperand::createReg(RRegsNoR0[Base])); break; } Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp))); Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base])); return MCDisassembler::Success; } static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm, int64_t Address, const void *Decoder) { // Decode the memrix field (imm, reg), which has the low 14-bits as the // displacement and the next 5 bits as the register #. uint64_t Base = Imm >> 14; uint64_t Disp = Imm & 0x3FFF; assert(Base < 32 && "Invalid base register"); if (Inst.getOpcode() == PPC::LDU) // Add the tied output operand. Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base])); else if (Inst.getOpcode() == PPC::STDU) Inst.insert(Inst.begin(), MCOperand::createReg(RRegsNoR0[Base])); Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp << 2))); Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base])); return MCDisassembler::Success; } static DecodeStatus decodeMemRIX16Operands(MCInst &Inst, uint64_t Imm, int64_t Address, const void *Decoder) { // Decode the memrix16 field (imm, reg), which has the low 12-bits as the // displacement with 16-byte aligned, and the next 5 bits as the register #. uint64_t Base = Imm >> 12; uint64_t Disp = Imm & 0xFFF; assert(Base < 32 && "Invalid base register"); Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp << 4))); Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base])); return MCDisassembler::Success; } static DecodeStatus decodeSPE8Operands(MCInst &Inst, uint64_t Imm, int64_t Address, const void *Decoder) { // Decode the spe8disp field (imm, reg), which has the low 5-bits as the // displacement with 8-byte aligned, and the next 5 bits as the register #. uint64_t Base = Imm >> 5; uint64_t Disp = Imm & 0x1F; assert(Base < 32 && "Invalid base register"); Inst.addOperand(MCOperand::createImm(Disp << 3)); Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base])); return MCDisassembler::Success; } static DecodeStatus decodeSPE4Operands(MCInst &Inst, uint64_t Imm, int64_t Address, const void *Decoder) { // Decode the spe4disp field (imm, reg), which has the low 5-bits as the // displacement with 4-byte aligned, and the next 5 bits as the register #. uint64_t Base = Imm >> 5; uint64_t Disp = Imm & 0x1F; assert(Base < 32 && "Invalid base register"); Inst.addOperand(MCOperand::createImm(Disp << 2)); Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base])); return MCDisassembler::Success; } static DecodeStatus decodeSPE2Operands(MCInst &Inst, uint64_t Imm, int64_t Address, const void *Decoder) { // Decode the spe2disp field (imm, reg), which has the low 5-bits as the // displacement with 2-byte aligned, and the next 5 bits as the register #. uint64_t Base = Imm >> 5; uint64_t Disp = Imm & 0x1F; assert(Base < 32 && "Invalid base register"); Inst.addOperand(MCOperand::createImm(Disp << 1)); Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base])); return MCDisassembler::Success; } static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm, int64_t Address, const void *Decoder) { // The cr bit encoding is 0x80 >> cr_reg_num. unsigned Zeros = countTrailingZeros(Imm); assert(Zeros < 8 && "Invalid CR bit value"); Inst.addOperand(MCOperand::createReg(CRRegs[7 - Zeros])); return MCDisassembler::Success; } #include "PPCGenDisassemblerTables.inc" DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size, ArrayRef Bytes, uint64_t Address, raw_ostream &OS, raw_ostream &CS) const { // Get the four bytes of the instruction. Size = 4; if (Bytes.size() < 4) { Size = 0; return MCDisassembler::Fail; } // Read the instruction in the proper endianness. uint32_t Inst = IsLittleEndian ? support::endian::read32le(Bytes.data()) : support::endian::read32be(Bytes.data()); if (STI.getFeatureBits()[PPC::FeatureQPX]) { DecodeStatus result = decodeInstruction(DecoderTableQPX32, MI, Inst, Address, this, STI); if (result != MCDisassembler::Fail) return result; } else if (STI.getFeatureBits()[PPC::FeatureSPE]) { DecodeStatus result = decodeInstruction(DecoderTableSPE32, MI, Inst, Address, this, STI); if (result != MCDisassembler::Fail) return result; } return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI); } Index: vendor/llvm/dist-release_80/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp (revision 348932) @@ -1,532 +1,535 @@ //===-- PPCInstPrinter.cpp - Convert PPC MCInst to assembly syntax --------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This class prints an PPC MCInst to a .s file. // //===----------------------------------------------------------------------===// #include "PPCInstPrinter.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPCInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "asm-printer" // FIXME: Once the integrated assembler supports full register names, tie this // to the verbose-asm setting. static cl::opt FullRegNames("ppc-asm-full-reg-names", cl::Hidden, cl::init(false), cl::desc("Use full register names when printing assembly")); // Useful for testing purposes. Prints vs{31-63} as v{0-31} respectively. static cl::opt ShowVSRNumsAsVR("ppc-vsr-nums-as-vr", cl::Hidden, cl::init(false), cl::desc("Prints full register names with vs{31-63} as v{0-31}")); // Prints full register names with percent symbol. static cl::opt FullRegNamesWithPercent("ppc-reg-with-percent-prefix", cl::Hidden, cl::init(false), cl::desc("Prints full register names with percent")); #define PRINT_ALIAS_INSTR #include "PPCGenAsmWriter.inc" void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { const char *RegName = getRegisterName(RegNo); if (RegName[0] == 'q' /* QPX */) { // The system toolchain on the BG/Q does not understand QPX register names // in .cfi_* directives, so print the name of the floating-point // subregister instead. std::string RN(RegName); RN[0] = 'f'; OS << RN; return; } OS << RegName; } void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, const MCSubtargetInfo &STI) { // Check for slwi/srwi mnemonics. if (MI->getOpcode() == PPC::RLWINM) { unsigned char SH = MI->getOperand(2).getImm(); unsigned char MB = MI->getOperand(3).getImm(); unsigned char ME = MI->getOperand(4).getImm(); bool useSubstituteMnemonic = false; if (SH <= 31 && MB == 0 && ME == (31-SH)) { O << "\tslwi "; useSubstituteMnemonic = true; } if (SH <= 31 && MB == (32-SH) && ME == 31) { O << "\tsrwi "; useSubstituteMnemonic = true; SH = 32-SH; } if (useSubstituteMnemonic) { printOperand(MI, 0, O); O << ", "; printOperand(MI, 1, O); O << ", " << (unsigned int)SH; printAnnotation(O, Annot); return; } } if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) && MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { O << "\tmr "; printOperand(MI, 0, O); O << ", "; printOperand(MI, 1, O); printAnnotation(O, Annot); return; } if (MI->getOpcode() == PPC::RLDICR || MI->getOpcode() == PPC::RLDICR_32) { unsigned char SH = MI->getOperand(2).getImm(); unsigned char ME = MI->getOperand(3).getImm(); // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH if (63-SH == ME) { O << "\tsldi "; printOperand(MI, 0, O); O << ", "; printOperand(MI, 1, O); O << ", " << (unsigned int)SH; printAnnotation(O, Annot); return; } } // dcbt[st] is printed manually here because: // 1. The assembly syntax is different between embedded and server targets // 2. We must print the short mnemonics for TH == 0 because the // embedded/server syntax default will not be stable across assemblers // The syntax for dcbt is: // dcbt ra, rb, th [server] // dcbt th, ra, rb [embedded] // where th can be omitted when it is 0. dcbtst is the same. if (MI->getOpcode() == PPC::DCBT || MI->getOpcode() == PPC::DCBTST) { unsigned char TH = MI->getOperand(0).getImm(); O << "\tdcbt"; if (MI->getOpcode() == PPC::DCBTST) O << "st"; if (TH == 16) O << "t"; O << " "; bool IsBookE = STI.getFeatureBits()[PPC::FeatureBookE]; if (IsBookE && TH != 0 && TH != 16) O << (unsigned int) TH << ", "; printOperand(MI, 1, O); O << ", "; printOperand(MI, 2, O); if (!IsBookE && TH != 0 && TH != 16) O << ", " << (unsigned int) TH; printAnnotation(O, Annot); return; } if (MI->getOpcode() == PPC::DCBF) { unsigned char L = MI->getOperand(0).getImm(); if (!L || L == 1 || L == 3) { O << "\tdcbf"; if (L == 1 || L == 3) O << "l"; if (L == 3) O << "p"; O << " "; printOperand(MI, 1, O); O << ", "; printOperand(MI, 2, O); printAnnotation(O, Annot); return; } } if (!printAliasInstr(MI, O)) printInstruction(MI, O); printAnnotation(O, Annot); } void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, const char *Modifier) { unsigned Code = MI->getOperand(OpNo).getImm(); if (StringRef(Modifier) == "cc") { switch ((PPC::Predicate)Code) { case PPC::PRED_LT_MINUS: case PPC::PRED_LT_PLUS: case PPC::PRED_LT: O << "lt"; return; case PPC::PRED_LE_MINUS: case PPC::PRED_LE_PLUS: case PPC::PRED_LE: O << "le"; return; case PPC::PRED_EQ_MINUS: case PPC::PRED_EQ_PLUS: case PPC::PRED_EQ: O << "eq"; return; case PPC::PRED_GE_MINUS: case PPC::PRED_GE_PLUS: case PPC::PRED_GE: O << "ge"; return; case PPC::PRED_GT_MINUS: case PPC::PRED_GT_PLUS: case PPC::PRED_GT: O << "gt"; return; case PPC::PRED_NE_MINUS: case PPC::PRED_NE_PLUS: case PPC::PRED_NE: O << "ne"; return; case PPC::PRED_UN_MINUS: case PPC::PRED_UN_PLUS: case PPC::PRED_UN: O << "un"; return; case PPC::PRED_NU_MINUS: case PPC::PRED_NU_PLUS: case PPC::PRED_NU: O << "nu"; return; case PPC::PRED_BIT_SET: case PPC::PRED_BIT_UNSET: llvm_unreachable("Invalid use of bit predicate code"); } llvm_unreachable("Invalid predicate code"); } if (StringRef(Modifier) == "pm") { switch ((PPC::Predicate)Code) { case PPC::PRED_LT: case PPC::PRED_LE: case PPC::PRED_EQ: case PPC::PRED_GE: case PPC::PRED_GT: case PPC::PRED_NE: case PPC::PRED_UN: case PPC::PRED_NU: return; case PPC::PRED_LT_MINUS: case PPC::PRED_LE_MINUS: case PPC::PRED_EQ_MINUS: case PPC::PRED_GE_MINUS: case PPC::PRED_GT_MINUS: case PPC::PRED_NE_MINUS: case PPC::PRED_UN_MINUS: case PPC::PRED_NU_MINUS: O << "-"; return; case PPC::PRED_LT_PLUS: case PPC::PRED_LE_PLUS: case PPC::PRED_EQ_PLUS: case PPC::PRED_GE_PLUS: case PPC::PRED_GT_PLUS: case PPC::PRED_NE_PLUS: case PPC::PRED_UN_PLUS: case PPC::PRED_NU_PLUS: O << "+"; return; case PPC::PRED_BIT_SET: case PPC::PRED_BIT_UNSET: llvm_unreachable("Invalid use of bit predicate code"); } llvm_unreachable("Invalid predicate code"); } assert(StringRef(Modifier) == "reg" && "Need to specify 'cc', 'pm' or 'reg' as predicate op modifier!"); printOperand(MI, OpNo+1, O); } void PPCInstPrinter::printATBitsAsHint(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned Code = MI->getOperand(OpNo).getImm(); if (Code == 2) O << "-"; else if (Code == 3) O << "+"; } void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 1 && "Invalid u1imm argument!"); O << (unsigned int)Value; } void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 3 && "Invalid u2imm argument!"); O << (unsigned int)Value; } void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 8 && "Invalid u3imm argument!"); O << (unsigned int)Value; } void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 15 && "Invalid u4imm argument!"); O << (unsigned int)Value; } void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { int Value = MI->getOperand(OpNo).getImm(); Value = SignExtend32<5>(Value); O << (int)Value; } void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 31 && "Invalid u5imm argument!"); O << (unsigned int)Value; } void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 63 && "Invalid u6imm argument!"); O << (unsigned int)Value; } void PPCInstPrinter::printU7ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned int Value = MI->getOperand(OpNo).getImm(); assert(Value <= 127 && "Invalid u7imm argument!"); O << (unsigned int)Value; } // Operands of BUILD_VECTOR are signed and we use this to print operands // of XXSPLTIB which are unsigned. So we simply truncate to 8 bits and // print as unsigned. void PPCInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned char Value = MI->getOperand(OpNo).getImm(); O << (unsigned int)Value; } void PPCInstPrinter::printU10ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned short Value = MI->getOperand(OpNo).getImm(); assert(Value <= 1023 && "Invalid u10imm argument!"); O << (unsigned short)Value; } void PPCInstPrinter::printU12ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned short Value = MI->getOperand(OpNo).getImm(); assert(Value <= 4095 && "Invalid u12imm argument!"); O << (unsigned short)Value; } void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { if (MI->getOperand(OpNo).isImm()) O << (short)MI->getOperand(OpNo).getImm(); else printOperand(MI, OpNo, O); } void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { if (MI->getOperand(OpNo).isImm()) O << (unsigned short)MI->getOperand(OpNo).getImm(); else printOperand(MI, OpNo, O); } void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { if (!MI->getOperand(OpNo).isImm()) return printOperand(MI, OpNo, O); // Branches can take an immediate operand. This is used by the branch // selection pass to print .+8, an eight byte displacement from the PC. - O << ".+"; - printAbsBranchOperand(MI, OpNo, O); + O << "."; + int32_t Imm = SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2); + if (Imm >= 0) + O << "+"; + O << Imm; } void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { if (!MI->getOperand(OpNo).isImm()) return printOperand(MI, OpNo, O); O << SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2); } void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned CCReg = MI->getOperand(OpNo).getReg(); unsigned RegNo; switch (CCReg) { default: llvm_unreachable("Unknown CR register"); case PPC::CR0: RegNo = 0; break; case PPC::CR1: RegNo = 1; break; case PPC::CR2: RegNo = 2; break; case PPC::CR3: RegNo = 3; break; case PPC::CR4: RegNo = 4; break; case PPC::CR5: RegNo = 5; break; case PPC::CR6: RegNo = 6; break; case PPC::CR7: RegNo = 7; break; } O << (0x80 >> RegNo); } void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printS16ImmOperand(MI, OpNo, O); O << '('; if (MI->getOperand(OpNo+1).getReg() == PPC::R0) O << "0"; else printOperand(MI, OpNo+1, O); O << ')'; } void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O) { // When used as the base register, r0 reads constant zero rather than // the value contained in the register. For this reason, the darwin // assembler requires that we print r0 as 0 (no r) when used as the base. if (MI->getOperand(OpNo).getReg() == PPC::R0) O << "0"; else printOperand(MI, OpNo, O); O << ", "; printOperand(MI, OpNo+1, O); } void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo, raw_ostream &O) { // On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must // come at the _end_ of the expression. const MCOperand &Op = MI->getOperand(OpNo); const MCSymbolRefExpr &refExp = cast(*Op.getExpr()); O << refExp.getSymbol().getName(); O << '('; printOperand(MI, OpNo+1, O); O << ')'; if (refExp.getKind() != MCSymbolRefExpr::VK_None) O << '@' << MCSymbolRefExpr::getVariantKindName(refExp.getKind()); } /// showRegistersWithPercentPrefix - Check if this register name should be /// printed with a percentage symbol as prefix. bool PPCInstPrinter::showRegistersWithPercentPrefix(const char *RegName) const { if (!FullRegNamesWithPercent || TT.isOSDarwin() || TT.getOS() == Triple::AIX) return false; switch (RegName[0]) { default: return false; case 'r': case 'f': case 'q': case 'v': case 'c': return true; } } /// getVerboseConditionalRegName - This method expands the condition register /// when requested explicitly or targetting Darwin. const char *PPCInstPrinter::getVerboseConditionRegName(unsigned RegNum, unsigned RegEncoding) const { if (!TT.isOSDarwin() && !FullRegNames) return nullptr; if (RegNum < PPC::CR0EQ || RegNum > PPC::CR7UN) return nullptr; const char *CRBits[] = { "lt", "gt", "eq", "un", "4*cr1+lt", "4*cr1+gt", "4*cr1+eq", "4*cr1+un", "4*cr2+lt", "4*cr2+gt", "4*cr2+eq", "4*cr2+un", "4*cr3+lt", "4*cr3+gt", "4*cr3+eq", "4*cr3+un", "4*cr4+lt", "4*cr4+gt", "4*cr4+eq", "4*cr4+un", "4*cr5+lt", "4*cr5+gt", "4*cr5+eq", "4*cr5+un", "4*cr6+lt", "4*cr6+gt", "4*cr6+eq", "4*cr6+un", "4*cr7+lt", "4*cr7+gt", "4*cr7+eq", "4*cr7+un" }; return CRBits[RegEncoding]; } // showRegistersWithPrefix - This method determines whether registers // should be number-only or include the prefix. bool PPCInstPrinter::showRegistersWithPrefix() const { if (TT.getOS() == Triple::AIX) return false; return TT.isOSDarwin() || FullRegNamesWithPercent || FullRegNames; } void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { unsigned Reg = Op.getReg(); if (!ShowVSRNumsAsVR) Reg = PPCInstrInfo::getRegNumForOperand(MII.get(MI->getOpcode()), Reg, OpNo); const char *RegName; RegName = getVerboseConditionRegName(Reg, MRI.getEncodingValue(Reg)); if (RegName == nullptr) RegName = getRegisterName(Reg); if (showRegistersWithPercentPrefix(RegName)) O << "%"; if (!showRegistersWithPrefix()) RegName = PPCRegisterInfo::stripRegisterPrefix(RegName); O << RegName; return; } if (Op.isImm()) { O << Op.getImm(); return; } assert(Op.isExpr() && "unknown operand kind in printOperand"); Op.getExpr()->print(O, &MAI); } Index: vendor/llvm/dist-release_80/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp (revision 348932) @@ -1,276 +1,294 @@ //===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -----------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file provides PowerPC specific target descriptions. // //===----------------------------------------------------------------------===// #include "MCTargetDesc/PPCMCTargetDesc.h" #include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCMCAsmInfo.h" #include "PPCTargetStreamer.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; #define GET_INSTRINFO_MC_DESC #include "PPCGenInstrInfo.inc" #define GET_SUBTARGETINFO_MC_DESC #include "PPCGenSubtargetInfo.inc" #define GET_REGINFO_MC_DESC #include "PPCGenRegisterInfo.inc" // Pin the vtable to this file. PPCTargetStreamer::PPCTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} PPCTargetStreamer::~PPCTargetStreamer() = default; static MCInstrInfo *createPPCMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitPPCMCInstrInfo(X); return X; } static MCRegisterInfo *createPPCMCRegisterInfo(const Triple &TT) { bool isPPC64 = (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le); unsigned Flavour = isPPC64 ? 0 : 1; unsigned RA = isPPC64 ? PPC::LR8 : PPC::LR; MCRegisterInfo *X = new MCRegisterInfo(); InitPPCMCRegisterInfo(X, RA, Flavour, Flavour); return X; } static MCSubtargetInfo *createPPCMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { return createPPCMCSubtargetInfoImpl(TT, CPU, FS); } static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, const Triple &TheTriple) { bool isPPC64 = (TheTriple.getArch() == Triple::ppc64 || TheTriple.getArch() == Triple::ppc64le); MCAsmInfo *MAI; if (TheTriple.isOSDarwin()) MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple); else MAI = new PPCELFMCAsmInfo(isPPC64, TheTriple); // Initial state of the frame pointer is R1. unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1; MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, MRI.getDwarfRegNum(Reg, true), 0); MAI->addInitialFrameState(Inst); return MAI; } namespace { class PPCTargetAsmStreamer : public PPCTargetStreamer { formatted_raw_ostream &OS; public: PPCTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS) : PPCTargetStreamer(S), OS(OS) {} void emitTCEntry(const MCSymbol &S) override { OS << "\t.tc "; OS << S.getName(); OS << "[TC],"; OS << S.getName(); OS << '\n'; } void emitMachine(StringRef CPU) override { OS << "\t.machine " << CPU << '\n'; } void emitAbiVersion(int AbiVersion) override { OS << "\t.abiversion " << AbiVersion << '\n'; } void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override { const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo(); OS << "\t.localentry\t"; S->print(OS, MAI); OS << ", "; LocalOffset->print(OS, MAI); OS << '\n'; } }; class PPCTargetELFStreamer : public PPCTargetStreamer { public: PPCTargetELFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {} MCELFStreamer &getStreamer() { return static_cast(Streamer); } void emitTCEntry(const MCSymbol &S) override { // Creates a R_PPC64_TOC relocation Streamer.EmitValueToAlignment(8); Streamer.EmitSymbolValue(&S, 8); } void emitMachine(StringRef CPU) override { // FIXME: Is there anything to do in here or does this directive only // limit the parser? } void emitAbiVersion(int AbiVersion) override { MCAssembler &MCA = getStreamer().getAssembler(); unsigned Flags = MCA.getELFHeaderEFlags(); Flags &= ~ELF::EF_PPC64_ABI; Flags |= (AbiVersion & ELF::EF_PPC64_ABI); MCA.setELFHeaderEFlags(Flags); } void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override { MCAssembler &MCA = getStreamer().getAssembler(); int64_t Res; if (!LocalOffset->evaluateAsAbsolute(Res, MCA)) report_fatal_error(".localentry expression must be absolute."); unsigned Encoded = ELF::encodePPC64LocalEntryOffset(Res); if (Res != ELF::decodePPC64LocalEntryOffset(Encoded)) report_fatal_error(".localentry expression cannot be encoded."); unsigned Other = S->getOther(); Other &= ~ELF::STO_PPC64_LOCAL_MASK; Other |= Encoded; S->setOther(Other); // For GAS compatibility, unless we already saw a .abiversion directive, // set e_flags to indicate ELFv2 ABI. unsigned Flags = MCA.getELFHeaderEFlags(); if ((Flags & ELF::EF_PPC64_ABI) == 0) MCA.setELFHeaderEFlags(Flags | 2); } void emitAssignment(MCSymbol *S, const MCExpr *Value) override { auto *Symbol = cast(S); + // When encoding an assignment to set symbol A to symbol B, also copy // the st_other bits encoding the local entry point offset. - if (Value->getKind() != MCExpr::SymbolRef) - return; - const auto &RhsSym = cast( - static_cast(Value)->getSymbol()); - unsigned Other = Symbol->getOther(); + if (copyLocalEntry(Symbol, Value)) + UpdateOther.insert(Symbol); + else + UpdateOther.erase(Symbol); + } + + void finish() override { + for (auto *Sym : UpdateOther) + copyLocalEntry(Sym, Sym->getVariableValue()); + } + +private: + SmallPtrSet UpdateOther; + + bool copyLocalEntry(MCSymbolELF *D, const MCExpr *S) { + auto *Ref = dyn_cast(S); + if (!Ref) + return false; + const auto &RhsSym = cast(Ref->getSymbol()); + unsigned Other = D->getOther(); Other &= ~ELF::STO_PPC64_LOCAL_MASK; Other |= RhsSym.getOther() & ELF::STO_PPC64_LOCAL_MASK; - Symbol->setOther(Other); + D->setOther(Other); + return true; } }; class PPCTargetMachOStreamer : public PPCTargetStreamer { public: PPCTargetMachOStreamer(MCStreamer &S) : PPCTargetStreamer(S) {} void emitTCEntry(const MCSymbol &S) override { llvm_unreachable("Unknown pseudo-op: .tc"); } void emitMachine(StringRef CPU) override { // FIXME: We should update the CPUType, CPUSubType in the Object file if // the new values are different from the defaults. } void emitAbiVersion(int AbiVersion) override { llvm_unreachable("Unknown pseudo-op: .abiversion"); } void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override { llvm_unreachable("Unknown pseudo-op: .localentry"); } }; } // end anonymous namespace static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS, MCInstPrinter *InstPrint, bool isVerboseAsm) { return new PPCTargetAsmStreamer(S, OS); } static MCTargetStreamer * createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { const Triple &TT = STI.getTargetTriple(); if (TT.isOSBinFormatELF()) return new PPCTargetELFStreamer(S); return new PPCTargetMachOStreamer(S); } static MCInstPrinter *createPPCMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI) { return new PPCInstPrinter(MAI, MII, MRI, T); } extern "C" void LLVMInitializePowerPCTargetMC() { for (Target *T : {&getThePPC32Target(), &getThePPC64Target(), &getThePPC64LETarget()}) { // Register the MC asm info. RegisterMCAsmInfoFn C(*T, createPPCMCAsmInfo); // Register the MC instruction info. TargetRegistry::RegisterMCInstrInfo(*T, createPPCMCInstrInfo); // Register the MC register info. TargetRegistry::RegisterMCRegInfo(*T, createPPCMCRegisterInfo); // Register the MC subtarget info. TargetRegistry::RegisterMCSubtargetInfo(*T, createPPCMCSubtargetInfo); // Register the MC Code Emitter TargetRegistry::RegisterMCCodeEmitter(*T, createPPCMCCodeEmitter); // Register the asm backend. TargetRegistry::RegisterMCAsmBackend(*T, createPPCAsmBackend); // Register the object target streamer. TargetRegistry::RegisterObjectTargetStreamer(*T, createObjectTargetStreamer); // Register the asm target streamer. TargetRegistry::RegisterAsmTargetStreamer(*T, createAsmTargetStreamer); // Register the MCInstPrinter. TargetRegistry::RegisterMCInstPrinter(*T, createPPCMCInstPrinter); } } Index: vendor/llvm/dist-release_80/lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/Target/PowerPC/PPCISelDAGToDAG.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/PowerPC/PPCISelDAGToDAG.cpp (revision 348932) @@ -1,6511 +1,6511 @@ //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines a pattern matching instruction selector for PowerPC, // converting from a legalized dag to a PPC dag. // //===----------------------------------------------------------------------===// #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" #include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Module.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include #include #include #include #include using namespace llvm; #define DEBUG_TYPE "ppc-codegen" STATISTIC(NumSextSetcc, "Number of (sext(setcc)) nodes expanded into GPR sequence."); STATISTIC(NumZextSetcc, "Number of (zext(setcc)) nodes expanded into GPR sequence."); STATISTIC(SignExtensionsAdded, "Number of sign extensions for compare inputs added."); STATISTIC(ZeroExtensionsAdded, "Number of zero extensions for compare inputs added."); STATISTIC(NumLogicOpsOnComparison, "Number of logical ops on i1 values calculated in GPR."); STATISTIC(OmittedForNonExtendUses, "Number of compares not eliminated as they have non-extending uses."); STATISTIC(NumP9Setb, "Number of compares lowered to setb."); // FIXME: Remove this once the bug has been fixed! cl::opt ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); static cl::opt UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), cl::desc("use aggressive ppc isel for bit permutations"), cl::Hidden); static cl::opt BPermRewriterNoMasking( "ppc-bit-perm-rewriter-stress-rotates", cl::desc("stress rotate selection in aggressive ppc isel for " "bit permutations"), cl::Hidden); static cl::opt EnableBranchHint( "ppc-use-branch-hint", cl::init(true), cl::desc("Enable static hinting of branches on ppc"), cl::Hidden); static cl::opt EnableTLSOpt( "ppc-tls-opt", cl::init(true), cl::desc("Enable tls optimization peephole"), cl::Hidden); enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64, ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32, ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 }; static cl::opt CmpInGPR( "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All), cl::desc("Specify the types of comparisons to emit GPR-only code for."), cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."), clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."), clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."), clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."), clEnumValN(ICGPR_NonExtIn, "nonextin", "Only comparisons where inputs don't need [sz]ext."), clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."), clEnumValN(ICGPR_ZextI32, "zexti32", "Only i32 comparisons with zext result."), clEnumValN(ICGPR_ZextI64, "zexti64", "Only i64 comparisons with zext result."), clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."), clEnumValN(ICGPR_SextI32, "sexti32", "Only i32 comparisons with sext result."), clEnumValN(ICGPR_SextI64, "sexti64", "Only i64 comparisons with sext result."))); namespace { //===--------------------------------------------------------------------===// /// PPCDAGToDAGISel - PPC specific code to select PPC machine /// instructions for SelectionDAG operations. /// class PPCDAGToDAGISel : public SelectionDAGISel { const PPCTargetMachine &TM; const PPCSubtarget *PPCSubTarget; const PPCTargetLowering *PPCLowering; unsigned GlobalBaseReg; public: explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel) : SelectionDAGISel(tm, OptLevel), TM(tm) {} bool runOnMachineFunction(MachineFunction &MF) override { // Make sure we re-emit a set of the global base reg if necessary GlobalBaseReg = 0; PPCSubTarget = &MF.getSubtarget(); PPCLowering = PPCSubTarget->getTargetLowering(); SelectionDAGISel::runOnMachineFunction(MF); if (!PPCSubTarget->isSVR4ABI()) InsertVRSaveCode(MF); return true; } void PreprocessISelDAG() override; void PostprocessISelDAG() override; /// getI16Imm - Return a target constant with the specified value, of type /// i16. inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) { return CurDAG->getTargetConstant(Imm, dl, MVT::i16); } /// getI32Imm - Return a target constant with the specified value, of type /// i32. inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { return CurDAG->getTargetConstant(Imm, dl, MVT::i32); } /// getI64Imm - Return a target constant with the specified value, of type /// i64. inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) { return CurDAG->getTargetConstant(Imm, dl, MVT::i64); } /// getSmallIPtrImm - Return a target constant of pointer type. inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) { return CurDAG->getTargetConstant( Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout())); } /// isRotateAndMask - Returns true if Mask and Shift can be folded into a /// rotate and mask opcode and mask operation. static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, unsigned &SH, unsigned &MB, unsigned &ME); /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC /// base register. Return the virtual register that holds this value. SDNode *getGlobalBaseReg(); void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0); // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. void Select(SDNode *N) override; bool tryBitfieldInsert(SDNode *N); bool tryBitPermutation(SDNode *N); bool tryIntCompareInGPR(SDNode *N); // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into // an X-Form load instruction with the offset being a relocation coming from // the PPCISD::ADD_TLS. bool tryTLSXFormLoad(LoadSDNode *N); // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into // an X-Form store instruction with the offset being a relocation coming from // the PPCISD::ADD_TLS. bool tryTLSXFormStore(StoreSDNode *N); /// SelectCC - Select a comparison of the specified values with the /// specified condition code, returning the CR# of the expression. SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &dl); /// SelectAddrImm - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement [r+imm]. bool SelectAddrImm(SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0); } /// SelectAddrImmOffs - Return true if the operand is valid for a preinc /// immediate field. Note that the operand at this point is already the /// result of a prior SelectAddressRegImm call. bool SelectAddrImmOffs(SDValue N, SDValue &Out) const { if (N.getOpcode() == ISD::TargetConstant || N.getOpcode() == ISD::TargetGlobalAddress) { Out = N; return true; } return false; } /// SelectAddrIdx - Given the specified addressed, check to see if it can be /// represented as an indexed [r+r] operation. Returns false if it can /// be represented by [r+imm], which are preferred. bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG); } /// SelectAddrIdxOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) { return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG); } /// SelectAddrImmX4 - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement that is a multiple of 4. /// Suitable for use by STD and friends. bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4); } bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16); } // Select an address into a single register. bool SelectAddr(SDValue N, SDValue &Base) { Base = N; return true; } /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. It is always correct to compute the value into /// a register. The case of adding a (possibly relocatable) constant to a /// register can be improved, but it is wrong to substitute Reg+Reg for /// Reg in an asm, because the load or store opcode would have to change. bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) override { switch(ConstraintID) { default: errs() << "ConstraintID: " << ConstraintID << "\n"; llvm_unreachable("Unexpected asm memory constraint"); case InlineAsm::Constraint_es: case InlineAsm::Constraint_i: case InlineAsm::Constraint_m: case InlineAsm::Constraint_o: case InlineAsm::Constraint_Q: case InlineAsm::Constraint_Z: case InlineAsm::Constraint_Zy: // We need to make sure that this one operand does not end up in r0 // (because we might end up lowering this as 0(%op)). const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo(); const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1); SDLoc dl(Op); SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32); SDValue NewOp = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, Op.getValueType(), Op, RC), 0); OutOps.push_back(NewOp); return false; } return true; } void InsertVRSaveCode(MachineFunction &MF); StringRef getPassName() const override { return "PowerPC DAG->DAG Pattern Instruction Selection"; } // Include the pieces autogenerated from the target description. #include "PPCGenDAGISel.inc" private: bool trySETCC(SDNode *N); void PeepholePPC64(); void PeepholePPC64ZExt(); void PeepholeCROps(); SDValue combineToCMPB(SDNode *N); void foldBoolExts(SDValue &Res, SDNode *&N); bool AllUsersSelectZero(SDNode *N); void SwapAllSelectUsers(SDNode *N); bool isOffsetMultipleOf(SDNode *N, unsigned Val) const; void transferMemOperands(SDNode *N, SDNode *Result); }; } // end anonymous namespace /// InsertVRSaveCode - Once the entire function has been instruction selected, /// all virtual registers are created and all machine instructions are built, /// check to see if we need to save/restore VRSAVE. If so, do it. void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { // Check to see if this function uses vector registers, which means we have to // save and restore the VRSAVE register and update it with the regs we use. // // In this case, there will be virtual registers of vector type created // by the scheduler. Detect them now. bool HasVectorVReg = false; for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) { HasVectorVReg = true; break; } } if (!HasVectorVReg) return; // nothing to do. // If we have a vector register, we want to emit code into the entry and exit // blocks to save and restore the VRSAVE register. We do this here (instead // of marking all vector instructions as clobbering VRSAVE) for two reasons: // // 1. This (trivially) reduces the load on the register allocator, by not // having to represent the live range of the VRSAVE register. // 2. This (more significantly) allows us to create a temporary virtual // register to hold the saved VRSAVE value, allowing this temporary to be // register allocated, instead of forcing it to be spilled to the stack. // Create two vregs - one to hold the VRSAVE register that is live-in to the // function and one for the value after having bits or'd into it. unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); MachineBasicBlock &EntryBB = *Fn.begin(); DebugLoc dl; // Emit the following code into the entry block: // InVRSAVE = MFVRSAVE // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE // MTVRSAVE UpdatedVRSAVE MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE); BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE), UpdatedVRSAVE).addReg(InVRSAVE); BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE); // Find all return blocks, outputting a restore in each epilog. for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { if (BB->isReturnBlock()) { IP = BB->end(); --IP; // Skip over all terminator instructions, which are part of the return // sequence. MachineBasicBlock::iterator I2 = IP; while (I2 != BB->begin() && (--I2)->isTerminator()) IP = I2; // Emit: MTVRSAVE InVRSave BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE); } } } /// getGlobalBaseReg - Output the instructions required to put the /// base address to use for accessing globals into a register. /// SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { if (!GlobalBaseReg) { const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); // Insert the set of GlobalBaseReg into the first MBB of the function MachineBasicBlock &FirstMBB = MF->front(); MachineBasicBlock::iterator MBBI = FirstMBB.begin(); const Module *M = MF->getFunction().getParent(); DebugLoc dl; if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) { if (PPCSubTarget->isTargetELF()) { GlobalBaseReg = PPC::R30; if (M->getPICLevel() == PICLevel::SmallPIC) { BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); MF->getInfo()->setUsesPICBase(true); } else { BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::UpdateGBR), GlobalBaseReg) .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg); MF->getInfo()->setUsesPICBase(true); } } else { GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); } } else { // We must ensure that this sequence is dominated by the prologue. // FIXME: This is a bit of a big hammer since we don't get the benefits // of shrink-wrapping whenever we emit this instruction. Considering // this is used in any function where we emit a jump table, this may be // a significant limitation. We should consider inserting this in the // block where it is used and then commoning this sequence up if it // appears in multiple places. // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of // MovePCtoLR8. MF->getInfo()->setShrinkWrapDisabled(true); GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg); } } return CurDAG->getRegister(GlobalBaseReg, PPCLowering->getPointerTy(CurDAG->getDataLayout())) .getNode(); } /// isInt32Immediate - This method tests to see if the node is a 32-bit constant /// operand. If so Imm will receive the 32-bit value. static bool isInt32Immediate(SDNode *N, unsigned &Imm) { if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { Imm = cast(N)->getZExtValue(); return true; } return false; } /// isInt64Immediate - This method tests to see if the node is a 64-bit constant /// operand. If so Imm will receive the 64-bit value. static bool isInt64Immediate(SDNode *N, uint64_t &Imm) { if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) { Imm = cast(N)->getZExtValue(); return true; } return false; } // isInt32Immediate - This method tests to see if a constant operand. // If so Imm will receive the 32 bit value. static bool isInt32Immediate(SDValue N, unsigned &Imm) { return isInt32Immediate(N.getNode(), Imm); } /// isInt64Immediate - This method tests to see if the value is a 64-bit /// constant operand. If so Imm will receive the 64-bit value. static bool isInt64Immediate(SDValue N, uint64_t &Imm) { return isInt64Immediate(N.getNode(), Imm); } static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo, const SDValue &DestMBB) { assert(isa(DestMBB)); if (!FuncInfo->BPI) return PPC::BR_NO_HINT; const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); const Instruction *BBTerm = BB->getTerminator(); if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT; const BasicBlock *TBB = BBTerm->getSuccessor(0); const BasicBlock *FBB = BBTerm->getSuccessor(1); auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB); auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB); // We only want to handle cases which are easy to predict at static time, e.g. // C++ throw statement, that is very likely not taken, or calling never // returned function, e.g. stdlib exit(). So we set Threshold to filter // unwanted cases. // // Below is LLVM branch weight table, we only want to handle case 1, 2 // // Case Taken:Nontaken Example // 1. Unreachable 1048575:1 C++ throw, stdlib exit(), // 2. Invoke-terminating 1:1048575 // 3. Coldblock 4:64 __builtin_expect // 4. Loop Branch 124:4 For loop // 5. PH/ZH/FPH 20:12 const uint32_t Threshold = 10000; if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb)) return PPC::BR_NO_HINT; LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() << "::" << BB->getName() << "'\n" << " -> " << TBB->getName() << ": " << TProb << "\n" << " -> " << FBB->getName() << ": " << FProb << "\n"); const BasicBlockSDNode *BBDN = cast(DestMBB); // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities, // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock if (BBDN->getBasicBlock()->getBasicBlock() != TBB) std::swap(TProb, FProb); return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT; } // isOpcWithIntImmediate - This method tests to see if the node is a specific // opcode and that it has a immediate integer right operand. // If so Imm will receive the 32 bit value. static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { return N->getOpcode() == Opc && isInt32Immediate(N->getOperand(1).getNode(), Imm); } void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { SDLoc dl(SN); int FI = cast(N)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8; if (SN->hasOneUse()) CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI, getSmallIPtrImm(Offset, dl)); else ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI, getSmallIPtrImm(Offset, dl))); } bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, unsigned &SH, unsigned &MB, unsigned &ME) { // Don't even go down this path for i64, since different logic will be // necessary for rldicl/rldicr/rldimi. if (N->getValueType(0) != MVT::i32) return false; unsigned Shift = 32; unsigned Indeterminant = ~0; // bit mask marking indeterminant results unsigned Opcode = N->getOpcode(); if (N->getNumOperands() != 2 || !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31)) return false; if (Opcode == ISD::SHL) { // apply shift left to mask if it comes first if (isShiftMask) Mask = Mask << Shift; // determine which bits are made indeterminant by shift Indeterminant = ~(0xFFFFFFFFu << Shift); } else if (Opcode == ISD::SRL) { // apply shift right to mask if it comes first if (isShiftMask) Mask = Mask >> Shift; // determine which bits are made indeterminant by shift Indeterminant = ~(0xFFFFFFFFu >> Shift); // adjust for the left rotate Shift = 32 - Shift; } else if (Opcode == ISD::ROTL) { Indeterminant = 0; } else { return false; } // if the mask doesn't intersect any Indeterminant bits if (Mask && !(Mask & Indeterminant)) { SH = Shift & 31; // make sure the mask is still a mask (wrap arounds may not be) return isRunOfOnes(Mask, MB, ME); } return false; } bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) { SDValue Base = ST->getBasePtr(); if (Base.getOpcode() != PPCISD::ADD_TLS) return false; SDValue Offset = ST->getOffset(); if (!Offset.isUndef()) return false; SDLoc dl(ST); EVT MemVT = ST->getMemoryVT(); EVT RegVT = ST->getValue().getValueType(); unsigned Opcode; switch (MemVT.getSimpleVT().SimpleTy) { default: return false; case MVT::i8: { Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS; break; } case MVT::i16: { Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS; break; } case MVT::i32: { Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS; break; } case MVT::i64: { Opcode = PPC::STDXTLS; break; } } SDValue Chain = ST->getChain(); SDVTList VTs = ST->getVTList(); SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1), Chain}; SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); transferMemOperands(ST, MN); ReplaceNode(ST, MN); return true; } bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) { SDValue Base = LD->getBasePtr(); if (Base.getOpcode() != PPCISD::ADD_TLS) return false; SDValue Offset = LD->getOffset(); if (!Offset.isUndef()) return false; SDLoc dl(LD); EVT MemVT = LD->getMemoryVT(); EVT RegVT = LD->getValueType(0); unsigned Opcode; switch (MemVT.getSimpleVT().SimpleTy) { default: return false; case MVT::i8: { Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS; break; } case MVT::i16: { Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS; break; } case MVT::i32: { Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS; break; } case MVT::i64: { Opcode = PPC::LDXTLS; break; } } SDValue Chain = LD->getChain(); SDVTList VTs = LD->getVTList(); SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain}; SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); transferMemOperands(LD, MN); ReplaceNode(LD, MN); return true; } /// Turn an or of two masked values into the rotate left word immediate then /// mask insert (rlwimi) instruction. bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); SDLoc dl(N); KnownBits LKnown = CurDAG->computeKnownBits(Op0); KnownBits RKnown = CurDAG->computeKnownBits(Op1); unsigned TargetMask = LKnown.Zero.getZExtValue(); unsigned InsertMask = RKnown.Zero.getZExtValue(); if ((TargetMask | InsertMask) == 0xFFFFFFFF) { unsigned Op0Opc = Op0.getOpcode(); unsigned Op1Opc = Op1.getOpcode(); unsigned Value, SH = 0; TargetMask = ~TargetMask; InsertMask = ~InsertMask; // If the LHS has a foldable shift and the RHS does not, then swap it to the // RHS so that we can fold the shift into the insert. if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) { if (Op0.getOperand(0).getOpcode() == ISD::SHL || Op0.getOperand(0).getOpcode() == ISD::SRL) { if (Op1.getOperand(0).getOpcode() != ISD::SHL && Op1.getOperand(0).getOpcode() != ISD::SRL) { std::swap(Op0, Op1); std::swap(Op0Opc, Op1Opc); std::swap(TargetMask, InsertMask); } } } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) { if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL && Op1.getOperand(0).getOpcode() != ISD::SRL) { std::swap(Op0, Op1); std::swap(Op0Opc, Op1Opc); std::swap(TargetMask, InsertMask); } } unsigned MB, ME; if (isRunOfOnes(InsertMask, MB, ME)) { if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && isInt32Immediate(Op1.getOperand(1), Value)) { Op1 = Op1.getOperand(0); SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value; } if (Op1Opc == ISD::AND) { // The AND mask might not be a constant, and we need to make sure that // if we're going to fold the masking with the insert, all bits not // know to be zero in the mask are known to be one. KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1)); bool CanFoldMask = InsertMask == MKnown.One.getZExtValue(); unsigned SHOpc = Op1.getOperand(0).getOpcode(); if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask && isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) { // Note that Value must be in range here (less than 32) because // otherwise there would not be any bits set in InsertMask. Op1 = Op1.getOperand(0).getOperand(0); SH = (SHOpc == ISD::SHL) ? Value : 32 - Value; } } SH &= 31; SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); return true; } } return false; } // Predict the number of instructions that would be generated by calling // selectI64Imm(N). static unsigned selectI64ImmInstrCountDirect(int64_t Imm) { // Assume no remaining bits. unsigned Remainder = 0; // Assume no shift required. unsigned Shift = 0; // If it can't be represented as a 32 bit value. if (!isInt<32>(Imm)) { Shift = countTrailingZeros(Imm); int64_t ImmSh = static_cast(Imm) >> Shift; // If the shifted value fits 32 bits. if (isInt<32>(ImmSh)) { // Go with the shifted value. Imm = ImmSh; } else { // Still stuck with a 64 bit value. Remainder = Imm; Shift = 32; Imm >>= 32; } } // Intermediate operand. unsigned Result = 0; // Handle first 32 bits. unsigned Lo = Imm & 0xFFFF; // Simple value. if (isInt<16>(Imm)) { // Just the Lo bits. ++Result; } else if (Lo) { // Handle the Hi bits and Lo bits. Result += 2; } else { // Just the Hi bits. ++Result; } // If no shift, we're done. if (!Shift) return Result; // If Hi word == Lo word, // we can use rldimi to insert the Lo word into Hi word. if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { ++Result; return Result; } // Shift for next step if the upper 32-bits were not zero. if (Imm) ++Result; // Add in the last bits as required. if ((Remainder >> 16) & 0xFFFF) ++Result; if (Remainder & 0xFFFF) ++Result; return Result; } static uint64_t Rot64(uint64_t Imm, unsigned R) { return (Imm << R) | (Imm >> (64 - R)); } static unsigned selectI64ImmInstrCount(int64_t Imm) { unsigned Count = selectI64ImmInstrCountDirect(Imm); // If the instruction count is 1 or 2, we do not need further analysis // since rotate + load constant requires at least 2 instructions. if (Count <= 2) return Count; for (unsigned r = 1; r < 63; ++r) { uint64_t RImm = Rot64(Imm, r); unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1; Count = std::min(Count, RCount); // See comments in selectI64Imm for an explanation of the logic below. unsigned LS = findLastSet(RImm); if (LS != r-1) continue; uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); uint64_t RImmWithOnes = RImm | OnesMask; RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1; Count = std::min(Count, RCount); } return Count; } // Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount // (above) needs to be kept in sync with this function. static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm) { // Assume no remaining bits. unsigned Remainder = 0; // Assume no shift required. unsigned Shift = 0; // If it can't be represented as a 32 bit value. if (!isInt<32>(Imm)) { Shift = countTrailingZeros(Imm); int64_t ImmSh = static_cast(Imm) >> Shift; // If the shifted value fits 32 bits. if (isInt<32>(ImmSh)) { // Go with the shifted value. Imm = ImmSh; } else { // Still stuck with a 64 bit value. Remainder = Imm; Shift = 32; Imm >>= 32; } } // Intermediate operand. SDNode *Result; // Handle first 32 bits. unsigned Lo = Imm & 0xFFFF; unsigned Hi = (Imm >> 16) & 0xFFFF; auto getI32Imm = [CurDAG, dl](unsigned Imm) { return CurDAG->getTargetConstant(Imm, dl, MVT::i32); }; // Simple value. if (isInt<16>(Imm)) { uint64_t SextImm = SignExtend64(Lo, 16); SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); // Just the Lo bits. Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); } else if (Lo) { // Handle the Hi bits. unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8; Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi)); // And Lo bits. Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), getI32Imm(Lo)); } else { // Just the Hi bits. Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi)); } // If no shift, we're done. if (!Shift) return Result; // If Hi word == Lo word, // we can use rldimi to insert the Lo word into Hi word. if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { SDValue Ops[] = { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)}; return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); } // Shift for next step if the upper 32-bits were not zero. if (Imm) { Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, SDValue(Result, 0), getI32Imm(Shift), getI32Imm(63 - Shift)); } // Add in the last bits as required. if ((Hi = (Remainder >> 16) & 0xFFFF)) { Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0), getI32Imm(Hi)); } if ((Lo = Remainder & 0xFFFF)) { Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), getI32Imm(Lo)); } return Result; } static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm) { unsigned Count = selectI64ImmInstrCountDirect(Imm); // If the instruction count is 1 or 2, we do not need further analysis // since rotate + load constant requires at least 2 instructions. if (Count <= 2) return selectI64ImmDirect(CurDAG, dl, Imm); unsigned RMin = 0; int64_t MatImm; unsigned MaskEnd; for (unsigned r = 1; r < 63; ++r) { uint64_t RImm = Rot64(Imm, r); unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1; if (RCount < Count) { Count = RCount; RMin = r; MatImm = RImm; MaskEnd = 63; } // If the immediate to generate has many trailing zeros, it might be // worthwhile to generate a rotated value with too many leading ones // (because that's free with li/lis's sign-extension semantics), and then // mask them off after rotation. unsigned LS = findLastSet(RImm); // We're adding (63-LS) higher-order ones, and we expect to mask them off // after performing the inverse rotation by (64-r). So we need that: // 63-LS == 64-r => LS == r-1 if (LS != r-1) continue; uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); uint64_t RImmWithOnes = RImm | OnesMask; RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1; if (RCount < Count) { Count = RCount; RMin = r; MatImm = RImmWithOnes; MaskEnd = LS; } } if (!RMin) return selectI64ImmDirect(CurDAG, dl, Imm); auto getI32Imm = [CurDAG, dl](unsigned Imm) { return CurDAG->getTargetConstant(Imm, dl, MVT::i32); }; SDValue Val = SDValue(selectI64ImmDirect(CurDAG, dl, MatImm), 0); return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val, getI32Imm(64 - RMin), getI32Imm(MaskEnd)); } static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) { unsigned MaxTruncation = 0; // Cannot use range-based for loop here as we need the actual use (i.e. we // need the operand number corresponding to the use). A range-based for // will unbox the use and provide an SDNode*. for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end(); Use != UseEnd; ++Use) { unsigned Opc = Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode(); switch (Opc) { default: return 0; case ISD::TRUNCATE: if (Use->isMachineOpcode()) return 0; MaxTruncation = std::max(MaxTruncation, Use->getValueType(0).getSizeInBits()); continue; case ISD::STORE: { if (Use->isMachineOpcode()) return 0; StoreSDNode *STN = cast(*Use); unsigned MemVTSize = STN->getMemoryVT().getSizeInBits(); if (MemVTSize == 64 || Use.getOperandNo() != 0) return 0; MaxTruncation = std::max(MaxTruncation, MemVTSize); continue; } case PPC::STW8: case PPC::STWX8: case PPC::STWU8: case PPC::STWUX8: if (Use.getOperandNo() != 0) return 0; MaxTruncation = std::max(MaxTruncation, 32u); continue; case PPC::STH8: case PPC::STHX8: case PPC::STHU8: case PPC::STHUX8: if (Use.getOperandNo() != 0) return 0; MaxTruncation = std::max(MaxTruncation, 16u); continue; case PPC::STB8: case PPC::STBX8: case PPC::STBU8: case PPC::STBUX8: if (Use.getOperandNo() != 0) return 0; MaxTruncation = std::max(MaxTruncation, 8u); continue; } } return MaxTruncation; } // Select a 64-bit constant. static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) { SDLoc dl(N); // Get 64 bit value. int64_t Imm = cast(N)->getZExtValue(); if (unsigned MinSize = allUsesTruncate(CurDAG, N)) { uint64_t SextImm = SignExtend64(Imm, MinSize); SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); if (isInt<16>(SextImm)) return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); } return selectI64Imm(CurDAG, dl, Imm); } namespace { class BitPermutationSelector { struct ValueBit { SDValue V; // The bit number in the value, using a convention where bit 0 is the // lowest-order bit. unsigned Idx; // ConstZero means a bit we need to mask off. // Variable is a bit comes from an input variable. // VariableKnownToBeZero is also a bit comes from an input variable, // but it is known to be already zero. So we do not need to mask them. enum Kind { ConstZero, Variable, VariableKnownToBeZero } K; ValueBit(SDValue V, unsigned I, Kind K = Variable) : V(V), Idx(I), K(K) {} ValueBit(Kind K = Variable) : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {} bool isZero() const { return K == ConstZero || K == VariableKnownToBeZero; } bool hasValue() const { return K == Variable || K == VariableKnownToBeZero; } SDValue getValue() const { assert(hasValue() && "Cannot get the value of a constant bit"); return V; } unsigned getValueBitIndex() const { assert(hasValue() && "Cannot get the value bit index of a constant bit"); return Idx; } }; // A bit group has the same underlying value and the same rotate factor. struct BitGroup { SDValue V; unsigned RLAmt; unsigned StartIdx, EndIdx; // This rotation amount assumes that the lower 32 bits of the quantity are // replicated in the high 32 bits by the rotation operator (which is done // by rlwinm and friends in 64-bit mode). bool Repl32; // Did converting to Repl32 == true change the rotation factor? If it did, // it decreased it by 32. bool Repl32CR; // Was this group coalesced after setting Repl32 to true? bool Repl32Coalesced; BitGroup(SDValue V, unsigned R, unsigned S, unsigned E) : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false), Repl32Coalesced(false) { LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R << " [" << S << ", " << E << "]\n"); } }; // Information on each (Value, RLAmt) pair (like the number of groups // associated with each) used to choose the lowering method. struct ValueRotInfo { SDValue V; unsigned RLAmt = std::numeric_limits::max(); unsigned NumGroups = 0; unsigned FirstGroupStartIdx = std::numeric_limits::max(); bool Repl32 = false; ValueRotInfo() = default; // For sorting (in reverse order) by NumGroups, and then by // FirstGroupStartIdx. bool operator < (const ValueRotInfo &Other) const { // We need to sort so that the non-Repl32 come first because, when we're // doing masking, the Repl32 bit groups might be subsumed into the 64-bit // masking operation. if (Repl32 < Other.Repl32) return true; else if (Repl32 > Other.Repl32) return false; else if (NumGroups > Other.NumGroups) return true; else if (NumGroups < Other.NumGroups) return false; else if (RLAmt == 0 && Other.RLAmt != 0) return true; else if (RLAmt != 0 && Other.RLAmt == 0) return false; else if (FirstGroupStartIdx < Other.FirstGroupStartIdx) return true; return false; } }; using ValueBitsMemoizedValue = std::pair>; using ValueBitsMemoizer = DenseMap>; ValueBitsMemoizer Memoizer; // Return a pair of bool and a SmallVector pointer to a memoization entry. // The bool is true if something interesting was deduced, otherwise if we're // providing only a generic representation of V (or something else likewise // uninteresting for instruction selection) through the SmallVector. std::pair *> getValueBits(SDValue V, unsigned NumBits) { auto &ValueEntry = Memoizer[V]; if (ValueEntry) return std::make_pair(ValueEntry->first, &ValueEntry->second); ValueEntry.reset(new ValueBitsMemoizedValue()); bool &Interesting = ValueEntry->first; SmallVector &Bits = ValueEntry->second; Bits.resize(NumBits); switch (V.getOpcode()) { default: break; case ISD::ROTL: if (isa(V.getOperand(1))) { unsigned RotAmt = V.getConstantOperandVal(1); const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; for (unsigned i = 0; i < NumBits; ++i) Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt]; return std::make_pair(Interesting = true, &Bits); } break; case ISD::SHL: if (isa(V.getOperand(1))) { unsigned ShiftAmt = V.getConstantOperandVal(1); const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; for (unsigned i = ShiftAmt; i < NumBits; ++i) Bits[i] = LHSBits[i - ShiftAmt]; for (unsigned i = 0; i < ShiftAmt; ++i) Bits[i] = ValueBit(ValueBit::ConstZero); return std::make_pair(Interesting = true, &Bits); } break; case ISD::SRL: if (isa(V.getOperand(1))) { unsigned ShiftAmt = V.getConstantOperandVal(1); const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; for (unsigned i = 0; i < NumBits - ShiftAmt; ++i) Bits[i] = LHSBits[i + ShiftAmt]; for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i) Bits[i] = ValueBit(ValueBit::ConstZero); return std::make_pair(Interesting = true, &Bits); } break; case ISD::AND: if (isa(V.getOperand(1))) { uint64_t Mask = V.getConstantOperandVal(1); const SmallVector *LHSBits; // Mark this as interesting, only if the LHS was also interesting. This // prevents the overall procedure from matching a single immediate 'and' // (which is non-optimal because such an and might be folded with other // things if we don't select it here). std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits); for (unsigned i = 0; i < NumBits; ++i) if (((Mask >> i) & 1) == 1) Bits[i] = (*LHSBits)[i]; else { // AND instruction masks this bit. If the input is already zero, // we have nothing to do here. Otherwise, make the bit ConstZero. if ((*LHSBits)[i].isZero()) Bits[i] = (*LHSBits)[i]; else Bits[i] = ValueBit(ValueBit::ConstZero); } return std::make_pair(Interesting, &Bits); } break; case ISD::OR: { const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second; bool AllDisjoint = true; SDValue LastVal = SDValue(); unsigned LastIdx = 0; for (unsigned i = 0; i < NumBits; ++i) { if (LHSBits[i].isZero() && RHSBits[i].isZero()) { // If both inputs are known to be zero and one is ConstZero and // another is VariableKnownToBeZero, we can select whichever // we like. To minimize the number of bit groups, we select // VariableKnownToBeZero if this bit is the next bit of the same // input variable from the previous bit. Otherwise, we select // ConstZero. if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal && LHSBits[i].getValueBitIndex() == LastIdx + 1) Bits[i] = LHSBits[i]; else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal && RHSBits[i].getValueBitIndex() == LastIdx + 1) Bits[i] = RHSBits[i]; else Bits[i] = ValueBit(ValueBit::ConstZero); } else if (LHSBits[i].isZero()) Bits[i] = RHSBits[i]; else if (RHSBits[i].isZero()) Bits[i] = LHSBits[i]; else { AllDisjoint = false; break; } // We remember the value and bit index of this bit. if (Bits[i].hasValue()) { LastVal = Bits[i].getValue(); LastIdx = Bits[i].getValueBitIndex(); } else { if (LastVal) LastVal = SDValue(); LastIdx = 0; } } if (!AllDisjoint) break; return std::make_pair(Interesting = true, &Bits); } case ISD::ZERO_EXTEND: { // We support only the case with zero extension from i32 to i64 so far. if (V.getValueType() != MVT::i64 || V.getOperand(0).getValueType() != MVT::i32) break; const SmallVector *LHSBits; const unsigned NumOperandBits = 32; std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumOperandBits); for (unsigned i = 0; i < NumOperandBits; ++i) Bits[i] = (*LHSBits)[i]; for (unsigned i = NumOperandBits; i < NumBits; ++i) Bits[i] = ValueBit(ValueBit::ConstZero); return std::make_pair(Interesting, &Bits); } case ISD::TRUNCATE: { EVT FromType = V.getOperand(0).getValueType(); EVT ToType = V.getValueType(); // We support only the case with truncate from i64 to i32. if (FromType != MVT::i64 || ToType != MVT::i32) break; const unsigned NumAllBits = FromType.getSizeInBits(); SmallVector *InBits; std::tie(Interesting, InBits) = getValueBits(V.getOperand(0), NumAllBits); const unsigned NumValidBits = ToType.getSizeInBits(); // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value. // So, we cannot include this truncate. bool UseUpper32bit = false; for (unsigned i = 0; i < NumValidBits; ++i) if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) { UseUpper32bit = true; break; } if (UseUpper32bit) break; for (unsigned i = 0; i < NumValidBits; ++i) Bits[i] = (*InBits)[i]; return std::make_pair(Interesting, &Bits); } case ISD::AssertZext: { // For AssertZext, we look through the operand and // mark the bits known to be zero. const SmallVector *LHSBits; std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits); EVT FromType = cast(V.getOperand(1))->getVT(); const unsigned NumValidBits = FromType.getSizeInBits(); for (unsigned i = 0; i < NumValidBits; ++i) Bits[i] = (*LHSBits)[i]; // These bits are known to be zero. for (unsigned i = NumValidBits; i < NumBits; ++i) Bits[i] = ValueBit((*LHSBits)[i].getValue(), (*LHSBits)[i].getValueBitIndex(), ValueBit::VariableKnownToBeZero); return std::make_pair(Interesting, &Bits); } case ISD::LOAD: LoadSDNode *LD = cast(V); if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) { EVT VT = LD->getMemoryVT(); const unsigned NumValidBits = VT.getSizeInBits(); for (unsigned i = 0; i < NumValidBits; ++i) Bits[i] = ValueBit(V, i); // These bits are known to be zero. for (unsigned i = NumValidBits; i < NumBits; ++i) Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero); // Zero-extending load itself cannot be optimized. So, it is not // interesting by itself though it gives useful information. return std::make_pair(Interesting = false, &Bits); } break; } for (unsigned i = 0; i < NumBits; ++i) Bits[i] = ValueBit(V, i); return std::make_pair(Interesting = false, &Bits); } // For each value (except the constant ones), compute the left-rotate amount // to get it from its original to final position. void computeRotationAmounts() { NeedMask = false; RLAmt.resize(Bits.size()); for (unsigned i = 0; i < Bits.size(); ++i) if (Bits[i].hasValue()) { unsigned VBI = Bits[i].getValueBitIndex(); if (i >= VBI) RLAmt[i] = i - VBI; else RLAmt[i] = Bits.size() - (VBI - i); } else if (Bits[i].isZero()) { NeedMask = true; RLAmt[i] = UINT32_MAX; } else { llvm_unreachable("Unknown value bit type"); } } // Collect groups of consecutive bits with the same underlying value and // rotation factor. If we're doing late masking, we ignore zeros, otherwise // they break up groups. void collectBitGroups(bool LateMask) { BitGroups.clear(); unsigned LastRLAmt = RLAmt[0]; SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue(); unsigned LastGroupStartIdx = 0; bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); for (unsigned i = 1; i < Bits.size(); ++i) { unsigned ThisRLAmt = RLAmt[i]; SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue(); if (LateMask && !ThisValue) { ThisValue = LastValue; ThisRLAmt = LastRLAmt; // If we're doing late masking, then the first bit group always starts // at zero (even if the first bits were zero). if (BitGroups.empty()) LastGroupStartIdx = 0; } // If this bit is known to be zero and the current group is a bit group // of zeros, we do not need to terminate the current bit group even the // Value or RLAmt does not match here. Instead, we terminate this group // when the first non-zero bit appears later. if (IsGroupOfZeros && Bits[i].isZero()) continue; // If this bit has the same underlying value and the same rotate factor as // the last one, then they're part of the same group. if (ThisRLAmt == LastRLAmt && ThisValue == LastValue) // We cannot continue the current group if this bits is not known to // be zero in a bit group of zeros. if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero())) continue; if (LastValue.getNode()) BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, i-1)); LastRLAmt = ThisRLAmt; LastValue = ThisValue; LastGroupStartIdx = i; IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); } if (LastValue.getNode()) BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, Bits.size()-1)); if (BitGroups.empty()) return; // We might be able to combine the first and last groups. if (BitGroups.size() > 1) { // If the first and last groups are the same, then remove the first group // in favor of the last group, making the ending index of the last group // equal to the ending index of the to-be-removed first group. if (BitGroups[0].StartIdx == 0 && BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 && BitGroups[0].V == BitGroups[BitGroups.size()-1].V && BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) { LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n"); BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx; BitGroups.erase(BitGroups.begin()); } } } // Take all (SDValue, RLAmt) pairs and sort them by the number of groups // associated with each. If the number of groups are same, we prefer a group // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate // instruction. If there is a degeneracy, pick the one that occurs // first (in the final value). void collectValueRotInfo() { ValueRots.clear(); for (auto &BG : BitGroups) { unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0); ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)]; VRI.V = BG.V; VRI.RLAmt = BG.RLAmt; VRI.Repl32 = BG.Repl32; VRI.NumGroups += 1; VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx); } // Now that we've collected the various ValueRotInfo instances, we need to // sort them. ValueRotsVec.clear(); for (auto &I : ValueRots) { ValueRotsVec.push_back(I.second); } llvm::sort(ValueRotsVec); } // In 64-bit mode, rlwinm and friends have a rotation operator that // replicates the low-order 32 bits into the high-order 32-bits. The mask // indices of these instructions can only be in the lower 32 bits, so they // can only represent some 64-bit bit groups. However, when they can be used, // the 32-bit replication can be used to represent, as a single bit group, // otherwise separate bit groups. We'll convert to replicated-32-bit bit // groups when possible. Returns true if any of the bit groups were // converted. void assignRepl32BitGroups() { // If we have bits like this: // // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24 // Groups: | RLAmt = 8 | RLAmt = 40 | // // But, making use of a 32-bit operation that replicates the low-order 32 // bits into the high-order 32 bits, this can be one bit group with a RLAmt // of 8. auto IsAllLow32 = [this](BitGroup & BG) { if (BG.StartIdx <= BG.EndIdx) { for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) { if (!Bits[i].hasValue()) continue; if (Bits[i].getValueBitIndex() >= 32) return false; } } else { for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) { if (!Bits[i].hasValue()) continue; if (Bits[i].getValueBitIndex() >= 32) return false; } for (unsigned i = 0; i <= BG.EndIdx; ++i) { if (!Bits[i].hasValue()) continue; if (Bits[i].getValueBitIndex() >= 32) return false; } } return true; }; for (auto &BG : BitGroups) { // If this bit group has RLAmt of 0 and will not be merged with // another bit group, we don't benefit from Repl32. We don't mark // such group to give more freedom for later instruction selection. if (BG.RLAmt == 0) { auto PotentiallyMerged = [this](BitGroup & BG) { for (auto &BG2 : BitGroups) if (&BG != &BG2 && BG.V == BG2.V && (BG2.RLAmt == 0 || BG2.RLAmt == 32)) return true; return false; }; if (!PotentiallyMerged(BG)) continue; } if (BG.StartIdx < 32 && BG.EndIdx < 32) { if (IsAllLow32(BG)) { if (BG.RLAmt >= 32) { BG.RLAmt -= 32; BG.Repl32CR = true; } BG.Repl32 = true; LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for " << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " [" << BG.StartIdx << ", " << BG.EndIdx << "]\n"); } } } // Now walk through the bit groups, consolidating where possible. for (auto I = BitGroups.begin(); I != BitGroups.end();) { // We might want to remove this bit group by merging it with the previous // group (which might be the ending group). auto IP = (I == BitGroups.begin()) ? std::prev(BitGroups.end()) : std::prev(I); if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt && I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) { LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " << I->V.getNode() << " RLAmt = " << I->RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with group with range [" << IP->StartIdx << ", " << IP->EndIdx << "]\n"); IP->EndIdx = I->EndIdx; IP->Repl32CR = IP->Repl32CR || I->Repl32CR; IP->Repl32Coalesced = true; I = BitGroups.erase(I); continue; } else { // There is a special case worth handling: If there is a single group // covering the entire upper 32 bits, and it can be merged with both // the next and previous groups (which might be the same group), then // do so. If it is the same group (so there will be only one group in // total), then we need to reverse the order of the range so that it // covers the entire 64 bits. if (I->StartIdx == 32 && I->EndIdx == 63) { assert(std::next(I) == BitGroups.end() && "bit group ends at index 63 but there is another?"); auto IN = BitGroups.begin(); if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V && (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt && IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP && IsAllLow32(*I)) { LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode() << " RLAmt = " << I->RLAmt << " [" << I->StartIdx << ", " << I->EndIdx << "] with 32-bit replicated groups with ranges [" << IP->StartIdx << ", " << IP->EndIdx << "] and [" << IN->StartIdx << ", " << IN->EndIdx << "]\n"); if (IP == IN) { // There is only one other group; change it to cover the whole // range (backward, so that it can still be Repl32 but cover the // whole 64-bit range). IP->StartIdx = 31; IP->EndIdx = 30; IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32; IP->Repl32Coalesced = true; I = BitGroups.erase(I); } else { // There are two separate groups, one before this group and one // after us (at the beginning). We're going to remove this group, // but also the group at the very beginning. IP->EndIdx = IN->EndIdx; IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32; IP->Repl32Coalesced = true; I = BitGroups.erase(I); BitGroups.erase(BitGroups.begin()); } // This must be the last group in the vector (and we might have // just invalidated the iterator above), so break here. break; } } } ++I; } } SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { return CurDAG->getTargetConstant(Imm, dl, MVT::i32); } uint64_t getZerosMask() { uint64_t Mask = 0; for (unsigned i = 0; i < Bits.size(); ++i) { if (Bits[i].hasValue()) continue; Mask |= (UINT64_C(1) << i); } return ~Mask; } // This method extends an input value to 64 bit if input is 32-bit integer. // While selecting instructions in BitPermutationSelector in 64-bit mode, // an input value can be a 32-bit integer if a ZERO_EXTEND node is included. // In such case, we extend it to 64 bit to be consistent with other values. SDValue ExtendToInt64(SDValue V, const SDLoc &dl) { if (V.getValueSizeInBits() == 64) return V; assert(V.getValueSizeInBits() == 32); SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0); SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64, ImDef, V, SubRegIdx), 0); return ExtVal; } SDValue TruncateToInt32(SDValue V, const SDLoc &dl) { if (V.getValueSizeInBits() == 32) return V; assert(V.getValueSizeInBits() == 64); SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32, V, SubRegIdx), 0); return SubVal; } // Depending on the number of groups for a particular value, it might be // better to rotate, mask explicitly (using andi/andis), and then or the // result. Select this part of the result first. void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { if (BPermRewriterNoMasking) return; for (ValueRotInfo &VRI : ValueRotsVec) { unsigned Mask = 0; for (unsigned i = 0; i < Bits.size(); ++i) { if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V) continue; if (RLAmt[i] != VRI.RLAmt) continue; Mask |= (1u << i); } // Compute the masks for andi/andis that would be necessary. unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16; assert((ANDIMask != 0 || ANDISMask != 0) && "No set bits in mask for value bit groups"); bool NeedsRotate = VRI.RLAmt != 0; // We're trying to minimize the number of instructions. If we have one // group, using one of andi/andis can break even. If we have three // groups, we can use both andi and andis and break even (to use both // andi and andis we also need to or the results together). We need four // groups if we also need to rotate. To use andi/andis we need to do more // than break even because rotate-and-mask instructions tend to be easier // to schedule. // FIXME: We've biased here against using andi/andis, which is right for // POWER cores, but not optimal everywhere. For example, on the A2, // andi/andis have single-cycle latency whereas the rotate-and-mask // instructions take two cycles, and it would be better to bias toward // andi/andis in break-even cases. unsigned NumAndInsts = (unsigned) NeedsRotate + (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + (unsigned) (ANDIMask != 0 && ANDISMask != 0) + (unsigned) (bool) Res; LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() << " RL: " << VRI.RLAmt << ":" << "\n\t\t\tisel using masking: " << NumAndInsts << " using rotates: " << VRI.NumGroups << "\n"); if (NumAndInsts >= VRI.NumGroups) continue; LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n"); if (InstCnt) *InstCnt += NumAndInsts; SDValue VRot; if (VRI.RLAmt) { SDValue Ops[] = { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl), getI32Imm(31, dl) }; VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); } else { VRot = TruncateToInt32(VRI.V, dl); } SDValue ANDIVal, ANDISVal; if (ANDIMask != 0) ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32, VRot, getI32Imm(ANDIMask, dl)), 0); if (ANDISMask != 0) ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32, VRot, getI32Imm(ANDISMask, dl)), 0); SDValue TotalVal; if (!ANDIVal) TotalVal = ANDISVal; else if (!ANDISVal) TotalVal = ANDIVal; else TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, ANDIVal, ANDISVal), 0); if (!Res) Res = TotalVal; else Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, Res, TotalVal), 0); // Now, remove all groups with this underlying value and rotation // factor. eraseMatchingBitGroups([VRI](const BitGroup &BG) { return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; }); } } // Instruction selection for the 32-bit case. SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) { SDLoc dl(N); SDValue Res; if (InstCnt) *InstCnt = 0; // Take care of cases that should use andi/andis first. SelectAndParts32(dl, Res, InstCnt); // If we've not yet selected a 'starting' instruction, and we have no zeros // to fill in, select the (Value, RLAmt) with the highest priority (largest // number of groups), and start with this rotated value. if ((!NeedMask || LateMask) && !Res) { ValueRotInfo &VRI = ValueRotsVec[0]; if (VRI.RLAmt) { if (InstCnt) *InstCnt += 1; SDValue Ops[] = { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl), getI32Imm(31, dl) }; Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); } else { Res = TruncateToInt32(VRI.V, dl); } // Now, remove all groups with this underlying value and rotation factor. eraseMatchingBitGroups([VRI](const BitGroup &BG) { return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; }); } if (InstCnt) *InstCnt += BitGroups.size(); // Insert the other groups (one at a time). for (auto &BG : BitGroups) { if (!Res) { SDValue Ops[] = { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), getI32Imm(Bits.size() - BG.EndIdx - 1, dl), getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); } else { SDValue Ops[] = { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), getI32Imm(Bits.size() - BG.EndIdx - 1, dl), getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0); } } if (LateMask) { unsigned Mask = (unsigned) getZerosMask(); unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16; assert((ANDIMask != 0 || ANDISMask != 0) && "No set bits in zeros mask?"); if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + (unsigned) (ANDIMask != 0 && ANDISMask != 0); SDValue ANDIVal, ANDISVal; if (ANDIMask != 0) ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32, Res, getI32Imm(ANDIMask, dl)), 0); if (ANDISMask != 0) ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32, Res, getI32Imm(ANDISMask, dl)), 0); if (!ANDIVal) Res = ANDISVal; else if (!ANDISVal) Res = ANDIVal; else Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, ANDIVal, ANDISVal), 0); } return Res.getNode(); } unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32, unsigned MaskStart, unsigned MaskEnd, bool IsIns) { // In the notation used by the instructions, 'start' and 'end' are reversed // because bits are counted from high to low order. unsigned InstMaskStart = 64 - MaskEnd - 1, InstMaskEnd = 64 - MaskStart - 1; if (Repl32) return 1; if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) || InstMaskEnd == 63 - RLAmt) return 1; return 2; } // For 64-bit values, not all combinations of rotates and masks are // available. Produce one if it is available. SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt, bool Repl32, unsigned MaskStart, unsigned MaskEnd, unsigned *InstCnt = nullptr) { // In the notation used by the instructions, 'start' and 'end' are reversed // because bits are counted from high to low order. unsigned InstMaskStart = 64 - MaskEnd - 1, InstMaskEnd = 64 - MaskStart - 1; if (InstCnt) *InstCnt += 1; if (Repl32) { // This rotation amount assumes that the lower 32 bits of the quantity // are replicated in the high 32 bits by the rotation operator (which is // done by rlwinm and friends). assert(InstMaskStart >= 32 && "Mask cannot start out of range"); assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); SDValue Ops[] = { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64, Ops), 0); } if (InstMaskEnd == 63) { SDValue Ops[] = { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0); } if (InstMaskStart == 0) { SDValue Ops[] = { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), getI32Imm(InstMaskEnd, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0); } if (InstMaskEnd == 63 - RLAmt) { SDValue Ops[] = { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0); } // We cannot do this with a single instruction, so we'll use two. The // problem is that we're not free to choose both a rotation amount and mask // start and end independently. We can choose an arbitrary mask start and // end, but then the rotation amount is fixed. Rotation, however, can be // inverted, and so by applying an "inverse" rotation first, we can get the // desired result. if (InstCnt) *InstCnt += 1; // The rotation mask for the second instruction must be MaskStart. unsigned RLAmt2 = MaskStart; // The first instruction must rotate V so that the overall rotation amount // is RLAmt. unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; if (RLAmt1) V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd); } // For 64-bit values, not all combinations of rotates and masks are // available. Produce a rotate-mask-and-insert if one is available. SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl, unsigned RLAmt, bool Repl32, unsigned MaskStart, unsigned MaskEnd, unsigned *InstCnt = nullptr) { // In the notation used by the instructions, 'start' and 'end' are reversed // because bits are counted from high to low order. unsigned InstMaskStart = 64 - MaskEnd - 1, InstMaskEnd = 64 - MaskStart - 1; if (InstCnt) *InstCnt += 1; if (Repl32) { // This rotation amount assumes that the lower 32 bits of the quantity // are replicated in the high 32 bits by the rotation operator (which is // done by rlwinm and friends). assert(InstMaskStart >= 32 && "Mask cannot start out of range"); assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); SDValue Ops[] = { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops), 0); } if (InstMaskEnd == 63 - RLAmt) { SDValue Ops[] = { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0); } // We cannot do this with a single instruction, so we'll use two. The // problem is that we're not free to choose both a rotation amount and mask // start and end independently. We can choose an arbitrary mask start and // end, but then the rotation amount is fixed. Rotation, however, can be // inverted, and so by applying an "inverse" rotation first, we can get the // desired result. if (InstCnt) *InstCnt += 1; // The rotation mask for the second instruction must be MaskStart. unsigned RLAmt2 = MaskStart; // The first instruction must rotate V so that the overall rotation amount // is RLAmt. unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; if (RLAmt1) V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd); } void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { if (BPermRewriterNoMasking) return; // The idea here is the same as in the 32-bit version, but with additional // complications from the fact that Repl32 might be true. Because we // aggressively convert bit groups to Repl32 form (which, for small // rotation factors, involves no other change), and then coalesce, it might // be the case that a single 64-bit masking operation could handle both // some Repl32 groups and some non-Repl32 groups. If converting to Repl32 // form allowed coalescing, then we must use a 32-bit rotaton in order to // completely capture the new combined bit group. for (ValueRotInfo &VRI : ValueRotsVec) { uint64_t Mask = 0; // We need to add to the mask all bits from the associated bit groups. // If Repl32 is false, we need to add bits from bit groups that have // Repl32 true, but are trivially convertable to Repl32 false. Such a // group is trivially convertable if it overlaps only with the lower 32 // bits, and the group has not been coalesced. auto MatchingBG = [VRI](const BitGroup &BG) { if (VRI.V != BG.V) return false; unsigned EffRLAmt = BG.RLAmt; if (!VRI.Repl32 && BG.Repl32) { if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx && !BG.Repl32Coalesced) { if (BG.Repl32CR) EffRLAmt += 32; } else { return false; } } else if (VRI.Repl32 != BG.Repl32) { return false; } return VRI.RLAmt == EffRLAmt; }; for (auto &BG : BitGroups) { if (!MatchingBG(BG)) continue; if (BG.StartIdx <= BG.EndIdx) { for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) Mask |= (UINT64_C(1) << i); } else { for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) Mask |= (UINT64_C(1) << i); for (unsigned i = 0; i <= BG.EndIdx; ++i) Mask |= (UINT64_C(1) << i); } } // We can use the 32-bit andi/andis technique if the mask does not // require any higher-order bits. This can save an instruction compared // to always using the general 64-bit technique. bool Use32BitInsts = isUInt<32>(Mask); // Compute the masks for andi/andis that would be necessary. unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = (Mask >> 16) & UINT16_MAX; bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)); unsigned NumAndInsts = (unsigned) NeedsRotate + (unsigned) (bool) Res; if (Use32BitInsts) NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + (unsigned) (ANDIMask != 0 && ANDISMask != 0); else NumAndInsts += selectI64ImmInstrCount(Mask) + /* and */ 1; unsigned NumRLInsts = 0; bool FirstBG = true; bool MoreBG = false; for (auto &BG : BitGroups) { if (!MatchingBG(BG)) { MoreBG = true; continue; } NumRLInsts += SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx, !FirstBG); FirstBG = false; } LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") << "\n\t\t\tisel using masking: " << NumAndInsts << " using rotates: " << NumRLInsts << "\n"); // When we'd use andi/andis, we bias toward using the rotates (andi only // has a record form, and is cracked on POWER cores). However, when using // general 64-bit constant formation, bias toward the constant form, // because that exposes more opportunities for CSE. if (NumAndInsts > NumRLInsts) continue; // When merging multiple bit groups, instruction or is used. // But when rotate is used, rldimi can inert the rotated value into any // register, so instruction or can be avoided. if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts) continue; LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n"); if (InstCnt) *InstCnt += NumAndInsts; SDValue VRot; // We actually need to generate a rotation if we have a non-zero rotation // factor or, in the Repl32 case, if we care about any of the // higher-order replicated bits. In the latter case, we generate a mask // backward so that it actually includes the entire 64 bits. if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask))) VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63); else VRot = VRI.V; SDValue TotalVal; if (Use32BitInsts) { assert((ANDIMask != 0 || ANDISMask != 0) && "No set bits in mask when using 32-bit ands for 64-bit value"); SDValue ANDIVal, ANDISVal; if (ANDIMask != 0) ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64, ExtendToInt64(VRot, dl), getI32Imm(ANDIMask, dl)), 0); if (ANDISMask != 0) ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64, ExtendToInt64(VRot, dl), getI32Imm(ANDISMask, dl)), 0); if (!ANDIVal) TotalVal = ANDISVal; else if (!ANDISVal) TotalVal = ANDIVal; else TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, ExtendToInt64(ANDIVal, dl), ANDISVal), 0); } else { TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); TotalVal = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, ExtendToInt64(VRot, dl), TotalVal), 0); } if (!Res) Res = TotalVal; else Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, ExtendToInt64(Res, dl), TotalVal), 0); // Now, remove all groups with this underlying value and rotation // factor. eraseMatchingBitGroups(MatchingBG); } } // Instruction selection for the 64-bit case. SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) { SDLoc dl(N); SDValue Res; if (InstCnt) *InstCnt = 0; // Take care of cases that should use andi/andis first. SelectAndParts64(dl, Res, InstCnt); // If we've not yet selected a 'starting' instruction, and we have no zeros // to fill in, select the (Value, RLAmt) with the highest priority (largest // number of groups), and start with this rotated value. if ((!NeedMask || LateMask) && !Res) { // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32 // groups will come first, and so the VRI representing the largest number // of groups might not be first (it might be the first Repl32 groups). unsigned MaxGroupsIdx = 0; if (!ValueRotsVec[0].Repl32) { for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i) if (ValueRotsVec[i].Repl32) { if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups) MaxGroupsIdx = i; break; } } ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx]; bool NeedsRotate = false; if (VRI.RLAmt) { NeedsRotate = true; } else if (VRI.Repl32) { for (auto &BG : BitGroups) { if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt || BG.Repl32 != VRI.Repl32) continue; // We don't need a rotate if the bit group is confined to the lower // 32 bits. if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx) continue; NeedsRotate = true; break; } } if (NeedsRotate) Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63, InstCnt); else Res = VRI.V; // Now, remove all groups with this underlying value and rotation factor. if (Res) eraseMatchingBitGroups([VRI](const BitGroup &BG) { return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt && BG.Repl32 == VRI.Repl32; }); } // Because 64-bit rotates are more flexible than inserts, we might have a // preference regarding which one we do first (to save one instruction). if (!Res) for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) { if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, false) < SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, true)) { if (I != BitGroups.begin()) { BitGroup BG = *I; BitGroups.erase(I); BitGroups.insert(BitGroups.begin(), BG); } break; } } // Insert the other groups (one at a time). for (auto &BG : BitGroups) { if (!Res) Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx, InstCnt); else Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx, InstCnt); } if (LateMask) { uint64_t Mask = getZerosMask(); // We can use the 32-bit andi/andis technique if the mask does not // require any higher-order bits. This can save an instruction compared // to always using the general 64-bit technique. bool Use32BitInsts = isUInt<32>(Mask); // Compute the masks for andi/andis that would be necessary. unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = (Mask >> 16) & UINT16_MAX; if (Use32BitInsts) { assert((ANDIMask != 0 || ANDISMask != 0) && "No set bits in mask when using 32-bit ands for 64-bit value"); if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + (unsigned) (ANDIMask != 0 && ANDISMask != 0); SDValue ANDIVal, ANDISVal; if (ANDIMask != 0) ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64, ExtendToInt64(Res, dl), getI32Imm(ANDIMask, dl)), 0); if (ANDISMask != 0) ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64, ExtendToInt64(Res, dl), getI32Imm(ANDISMask, dl)), 0); if (!ANDIVal) Res = ANDISVal; else if (!ANDISVal) Res = ANDIVal; else Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, ExtendToInt64(ANDIVal, dl), ANDISVal), 0); } else { if (InstCnt) *InstCnt += selectI64ImmInstrCount(Mask) + /* and */ 1; SDValue MaskVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, ExtendToInt64(Res, dl), MaskVal), 0); } } return Res.getNode(); } SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) { // Fill in BitGroups. collectBitGroups(LateMask); if (BitGroups.empty()) return nullptr; // For 64-bit values, figure out when we can use 32-bit instructions. if (Bits.size() == 64) assignRepl32BitGroups(); // Fill in ValueRotsVec. collectValueRotInfo(); if (Bits.size() == 32) { return Select32(N, LateMask, InstCnt); } else { assert(Bits.size() == 64 && "Not 64 bits here?"); return Select64(N, LateMask, InstCnt); } return nullptr; } void eraseMatchingBitGroups(function_ref F) { BitGroups.erase(remove_if(BitGroups, F), BitGroups.end()); } SmallVector Bits; bool NeedMask; SmallVector RLAmt; SmallVector BitGroups; DenseMap, ValueRotInfo> ValueRots; SmallVector ValueRotsVec; SelectionDAG *CurDAG; public: BitPermutationSelector(SelectionDAG *DAG) : CurDAG(DAG) {} // Here we try to match complex bit permutations into a set of // rotate-and-shift/shift/and/or instructions, using a set of heuristics // known to produce optimial code for common cases (like i32 byte swapping). SDNode *Select(SDNode *N) { Memoizer.clear(); auto Result = getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits()); if (!Result.first) return nullptr; Bits = std::move(*Result.second); LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction" " selection for: "); LLVM_DEBUG(N->dump(CurDAG)); // Fill it RLAmt and set NeedMask. computeRotationAmounts(); if (!NeedMask) return Select(N, false); // We currently have two techniques for handling results with zeros: early // masking (the default) and late masking. Late masking is sometimes more // efficient, but because the structure of the bit groups is different, it // is hard to tell without generating both and comparing the results. With // late masking, we ignore zeros in the resulting value when inserting each // set of bit groups, and then mask in the zeros at the end. With early // masking, we only insert the non-zero parts of the result at every step. unsigned InstCnt = 0, InstCntLateMask = 0; LLVM_DEBUG(dbgs() << "\tEarly masking:\n"); SDNode *RN = Select(N, false, &InstCnt); LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n"); LLVM_DEBUG(dbgs() << "\tLate masking:\n"); SDNode *RNLM = Select(N, true, &InstCntLateMask); LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask << " instructions\n"); if (InstCnt <= InstCntLateMask) { LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n"); return RN; } LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n"); return RNLM; } }; class IntegerCompareEliminator { SelectionDAG *CurDAG; PPCDAGToDAGISel *S; // Conversion type for interpreting results of a 32-bit instruction as // a 64-bit value or vice versa. enum ExtOrTruncConversion { Ext, Trunc }; // Modifiers to guide how an ISD::SETCC node's result is to be computed // in a GPR. // ZExtOrig - use the original condition code, zero-extend value // ZExtInvert - invert the condition code, zero-extend value // SExtOrig - use the original condition code, sign-extend value // SExtInvert - invert the condition code, sign-extend value enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert }; // Comparisons against zero to emit GPR code sequences for. Each of these // sequences may need to be emitted for two or more equivalent patterns. // For example (a >= 0) == (a > -1). The direction of the comparison () // matters as well as the extension type: sext (-1/0), zext (1/0). // GEZExt - (zext (LHS >= 0)) // GESExt - (sext (LHS >= 0)) // LEZExt - (zext (LHS <= 0)) // LESExt - (sext (LHS <= 0)) enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt }; SDNode *tryEXTEND(SDNode *N); SDNode *tryLogicOpOfCompares(SDNode *N); SDValue computeLogicOpInGPR(SDValue LogicOp); SDValue signExtendInputIfNeeded(SDValue Input); SDValue zeroExtendInputIfNeeded(SDValue Input); SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv); SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, ZeroCompare CmpTy); SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, int64_t RHSValue, SDLoc dl); SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, int64_t RHSValue, SDLoc dl); SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, int64_t RHSValue, SDLoc dl); SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, int64_t RHSValue, SDLoc dl); SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts); public: IntegerCompareEliminator(SelectionDAG *DAG, PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) { assert(CurDAG->getTargetLoweringInfo() .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && "Only expecting to use this on 64 bit targets."); } SDNode *Select(SDNode *N) { if (CmpInGPR == ICGPR_None) return nullptr; switch (N->getOpcode()) { default: break; case ISD::ZERO_EXTEND: if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 || CmpInGPR == ICGPR_SextI64) return nullptr; LLVM_FALLTHROUGH; case ISD::SIGN_EXTEND: if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_ZextI64) return nullptr; return tryEXTEND(N); case ISD::AND: case ISD::OR: case ISD::XOR: return tryLogicOpOfCompares(N); } return nullptr; } }; static bool isLogicOp(unsigned Opc) { return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; } // The obvious case for wanting to keep the value in a GPR. Namely, the // result of the comparison is actually needed in a GPR. SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) { assert((N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!"); SDValue WideRes; // If we are zero-extending the result of a logical operation on i1 // values, we can keep the values in GPRs. if (isLogicOp(N->getOperand(0).getOpcode()) && N->getOperand(0).getValueType() == MVT::i1 && N->getOpcode() == ISD::ZERO_EXTEND) WideRes = computeLogicOpInGPR(N->getOperand(0)); else if (N->getOperand(0).getOpcode() != ISD::SETCC) return nullptr; else WideRes = getSETCCInGPR(N->getOperand(0), N->getOpcode() == ISD::SIGN_EXTEND ? SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); if (!WideRes) return nullptr; SDLoc dl(N); bool Input32Bit = WideRes.getValueType() == MVT::i32; bool Output32Bit = N->getValueType(0) == MVT::i32; NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0; NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1; SDValue ConvOp = WideRes; if (Input32Bit != Output32Bit) ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext : ExtOrTruncConversion::Trunc); return ConvOp.getNode(); } // Attempt to perform logical operations on the results of comparisons while // keeping the values in GPRs. Without doing so, these would end up being // lowered to CR-logical operations which suffer from significant latency and // low ILP. SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) { if (N->getValueType(0) != MVT::i1) return nullptr; assert(isLogicOp(N->getOpcode()) && "Expected a logic operation on setcc results."); SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0)); if (!LoweredLogical) return nullptr; SDLoc dl(N); bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8; unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt; SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); SDValue LHS = LoweredLogical.getOperand(0); SDValue RHS = LoweredLogical.getOperand(1); SDValue WideOp; SDValue OpToConvToRecForm; // Look through any 32-bit to 64-bit implicit extend nodes to find the // opcode that is input to the XORI. if (IsBitwiseNegate && LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG) OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1); else if (IsBitwiseNegate) // If the input to the XORI isn't an extension, that's what we're after. OpToConvToRecForm = LoweredLogical.getOperand(0); else // If this is not an XORI, it is a reg-reg logical op and we can convert // it to record-form. OpToConvToRecForm = LoweredLogical; // Get the record-form version of the node we're looking to use to get the // CR result from. uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode(); int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc); // Convert the right node to record-form. This is either the logical we're // looking at or it is the input node to the negation (if we're looking at // a bitwise negation). if (NewOpc != -1 && IsBitwiseNegate) { // The input to the XORI has a record-form. Use it. assert(LoweredLogical.getConstantOperandVal(1) == 1 && "Expected a PPC::XORI8 only for bitwise negation."); // Emit the record-form instruction. std::vector Ops; for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++) Ops.push_back(OpToConvToRecForm.getOperand(i)); WideOp = SDValue(CurDAG->getMachineNode(NewOpc, dl, OpToConvToRecForm.getValueType(), MVT::Glue, Ops), 0); } else { assert((NewOpc != -1 || !IsBitwiseNegate) && "No record form available for AND8/OR8/XOR8?"); WideOp = SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl, MVT::i64, MVT::Glue, LHS, RHS), 0); } // Select this node to a single bit from CR0 set by the record-form node // just created. For bitwise negation, use the EQ bit which is the equivalent // of negating the result (i.e. it is a bit set when the result of the // operation is zero). SDValue SRIdxVal = CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32); SDValue CRBit = SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1, CR0Reg, SRIdxVal, WideOp.getValue(1)), 0); return CRBit.getNode(); } // Lower a logical operation on i1 values into a GPR sequence if possible. // The result can be kept in a GPR if requested. // Three types of inputs can be handled: // - SETCC // - TRUNCATE // - Logical operation (AND/OR/XOR) // There is also a special case that is handled (namely a complement operation // achieved with xor %a, -1). SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) { assert(isLogicOp(LogicOp.getOpcode()) && "Can only handle logic operations here."); assert(LogicOp.getValueType() == MVT::i1 && "Can only handle logic operations on i1 values here."); SDLoc dl(LogicOp); SDValue LHS, RHS; // Special case: xor %a, -1 bool IsBitwiseNegation = isBitwiseNot(LogicOp); // Produces a GPR sequence for each operand of the binary logic operation. // For SETCC, it produces the respective comparison, for TRUNCATE it truncates // the value in a GPR and for logic operations, it will recursively produce // a GPR sequence for the operation. auto getLogicOperand = [&] (SDValue Operand) -> SDValue { unsigned OperandOpcode = Operand.getOpcode(); if (OperandOpcode == ISD::SETCC) return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig); else if (OperandOpcode == ISD::TRUNCATE) { SDValue InputOp = Operand.getOperand(0); EVT InVT = InputOp.getValueType(); return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : PPC::RLDICL, dl, InVT, InputOp, S->getI64Imm(0, dl), S->getI64Imm(63, dl)), 0); } else if (isLogicOp(OperandOpcode)) return computeLogicOpInGPR(Operand); return SDValue(); }; LHS = getLogicOperand(LogicOp.getOperand(0)); RHS = getLogicOperand(LogicOp.getOperand(1)); // If a GPR sequence can't be produced for the LHS we can't proceed. // Not producing a GPR sequence for the RHS is only a problem if this isn't // a bitwise negation operation. if (!LHS || (!RHS && !IsBitwiseNegation)) return SDValue(); NumLogicOpsOnComparison++; // We will use the inputs as 64-bit values. if (LHS.getValueType() == MVT::i32) LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext); if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32) RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext); unsigned NewOpc; switch (LogicOp.getOpcode()) { default: llvm_unreachable("Unknown logic operation."); case ISD::AND: NewOpc = PPC::AND8; break; case ISD::OR: NewOpc = PPC::OR8; break; case ISD::XOR: NewOpc = PPC::XOR8; break; } if (IsBitwiseNegation) { RHS = S->getI64Imm(1, dl); NewOpc = PPC::XORI8; } return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0); } /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. /// Otherwise just reinterpret it as a 64-bit value. /// Useful when emitting comparison code for 32-bit values without using /// the compare instruction (which only considers the lower 32-bits). SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) { assert(Input.getValueType() == MVT::i32 && "Can only sign-extend 32-bit values here."); unsigned Opc = Input.getOpcode(); // The value was sign extended and then truncated to 32-bits. No need to // sign extend it again. if (Opc == ISD::TRUNCATE && (Input.getOperand(0).getOpcode() == ISD::AssertSext || Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND)) return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); LoadSDNode *InputLoad = dyn_cast(Input); // The input is a sign-extending load. All ppc sign-extending loads // sign-extend to the full 64-bits. if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD) return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); ConstantSDNode *InputConst = dyn_cast(Input); // We don't sign-extend constants. if (InputConst) return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); SDLoc dl(Input); SignExtensionsAdded++; return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl, MVT::i64, Input), 0); } /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it. /// Otherwise just reinterpret it as a 64-bit value. /// Useful when emitting comparison code for 32-bit values without using /// the compare instruction (which only considers the lower 32-bits). SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) { assert(Input.getValueType() == MVT::i32 && "Can only zero-extend 32-bit values here."); unsigned Opc = Input.getOpcode(); // The only condition under which we can omit the actual extend instruction: // - The value is a positive constant // - The value comes from a load that isn't a sign-extending load // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext. bool IsTruncateOfZExt = Opc == ISD::TRUNCATE && (Input.getOperand(0).getOpcode() == ISD::AssertZext || Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND); if (IsTruncateOfZExt) return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); ConstantSDNode *InputConst = dyn_cast(Input); if (InputConst && InputConst->getSExtValue() >= 0) return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); LoadSDNode *InputLoad = dyn_cast(Input); // The input is a load that doesn't sign-extend (it will be zero-extended). if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD) return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); // None of the above, need to zero-extend. SDLoc dl(Input); ZeroExtensionsAdded++; return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input, S->getI64Imm(0, dl), S->getI64Imm(32, dl)), 0); } // Handle a 32-bit value in a 64-bit register and vice-versa. These are of // course not actual zero/sign extensions that will generate machine code, // they're just a way to reinterpret a 32 bit value in a register as a // 64 bit value and vice-versa. SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv) { SDLoc dl(NatWidthRes); // For reinterpreting 32-bit values as 64 bit values, we generate // INSERT_SUBREG IMPLICIT_DEF:i64, , TargetConstant:i32<1> if (Conv == ExtOrTruncConversion::Ext) { SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0); SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64, ImDef, NatWidthRes, SubRegIdx), 0); } assert(Conv == ExtOrTruncConversion::Trunc && "Unknown convertion between 32 and 64 bit values."); // For reinterpreting 64-bit values as 32-bit values, we just need to // EXTRACT_SUBREG (i.e. extract the low word). SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32, NatWidthRes, SubRegIdx), 0); } // Produce a GPR sequence for compound comparisons (<=, >=) against zero. // Handle both zero-extensions and sign-extensions. SDValue IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, ZeroCompare CmpTy) { EVT InVT = LHS.getValueType(); bool Is32Bit = InVT == MVT::i32; SDValue ToExtend; // Produce the value that needs to be either zero or sign extended. switch (CmpTy) { case ZeroCompare::GEZExt: case ZeroCompare::GESExt: ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8, dl, InVT, LHS, LHS), 0); break; case ZeroCompare::LEZExt: case ZeroCompare::LESExt: { if (Is32Bit) { // Upper 32 bits cannot be undefined for this sequence. LHS = signExtendInputIfNeeded(LHS); SDValue Neg = SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); ToExtend = SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Neg, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); } else { SDValue Addi = SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, S->getI64Imm(~0ULL, dl)), 0); ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, Addi, LHS), 0); } break; } } // For 64-bit sequences, the extensions are the same for the GE/LE cases. if (!Is32Bit && (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt)) return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, ToExtend, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); if (!Is32Bit && (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt)) return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend, S->getI64Imm(63, dl)), 0); assert(Is32Bit && "Should have handled the 32-bit sequences above."); // For 32-bit sequences, the extensions differ between GE/LE cases. switch (CmpTy) { case ZeroCompare::GEZExt: { SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl), S->getI32Imm(31, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); } case ZeroCompare::GESExt: return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend, S->getI32Imm(31, dl)), 0); case ZeroCompare::LEZExt: return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend, S->getI32Imm(1, dl)), 0); case ZeroCompare::LESExt: return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend, S->getI32Imm(-1, dl)), 0); } // The above case covers all the enumerators so it can't have a default clause // to avoid compiler warnings. llvm_unreachable("Unknown zero-comparison type."); } /// Produces a zero-extended result of comparing two 32-bit values according to /// the passed condition code. SDValue IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, int64_t RHSValue, SDLoc dl) { if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext) return SDValue(); bool IsRHSZero = RHSValue == 0; bool IsRHSOne = RHSValue == 1; bool IsRHSNegOne = RHSValue == -1LL; switch (CC) { default: return SDValue(); case ISD::SETEQ: { // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5) // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5) SDValue Xor = IsRHSZero ? LHS : SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); SDValue Clz = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); } case ISD::SETNE: { // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1) // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1) SDValue Xor = IsRHSZero ? LHS : SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); SDValue Clz = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; SDValue Shift = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, S->getI32Imm(1, dl)), 0); } case ISD::SETGE: { // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1) // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31) if(IsRHSZero) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) // by swapping inputs and falling through. std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast(RHS); IsRHSZero = RHSConst && RHSConst->isNullValue(); LLVM_FALLTHROUGH; } case ISD::SETLE: { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1) // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1) if(IsRHSZero) { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); } // The upper 32-bits of the register can't be undefined for this sequence. LHS = signExtendInputIfNeeded(LHS); RHS = signExtendInputIfNeeded(RHS); SDValue Sub = SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); SDValue Shift = SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, Shift, S->getI32Imm(1, dl)), 0); } case ISD::SETGT: { // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63) // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31) // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63) // Handle SETLT -1 (which is equivalent to SETGE 0). if (IsRHSNegOne) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); if (IsRHSZero) { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); // The upper 32-bits of the register can't be undefined for this sequence. LHS = signExtendInputIfNeeded(LHS); RHS = signExtendInputIfNeeded(RHS); SDValue Neg = SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0); } // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as // (%b < %a) by swapping inputs and falling through. std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast(RHS); IsRHSZero = RHSConst && RHSConst->isNullValue(); IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; LLVM_FALLTHROUGH; } case ISD::SETLT: { // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63) // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1) // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31) // Handle SETLT 1 (which is equivalent to SETLE 0). if (IsRHSOne) { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); } if (IsRHSZero) { SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl), S->getI32Imm(31, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); } if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); // The upper 32-bits of the register can't be undefined for this sequence. LHS = signExtendInputIfNeeded(LHS); RHS = signExtendInputIfNeeded(RHS); SDValue SUBFNode = SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SUBFNode, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); } case ISD::SETUGE: // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1) // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1) std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ISD::SETULE: { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); // The upper 32-bits of the register can't be undefined for this sequence. LHS = zeroExtendInputIfNeeded(LHS); RHS = zeroExtendInputIfNeeded(RHS); SDValue Subtract = SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); SDValue SrdiNode = SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode, S->getI32Imm(1, dl)), 0); } case ISD::SETUGT: // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63) // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63) std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ISD::SETULT: { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); // The upper 32-bits of the register can't be undefined for this sequence. LHS = zeroExtendInputIfNeeded(LHS); RHS = zeroExtendInputIfNeeded(RHS); SDValue Subtract = SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); } } } /// Produces a sign-extended result of comparing two 32-bit values according to /// the passed condition code. SDValue IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, int64_t RHSValue, SDLoc dl) { if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext) return SDValue(); bool IsRHSZero = RHSValue == 0; bool IsRHSOne = RHSValue == 1; bool IsRHSNegOne = RHSValue == -1LL; switch (CC) { default: return SDValue(); case ISD::SETEQ: { // (sext (setcc %a, %b, seteq)) -> // (ashr (shl (ctlz (xor %a, %b)), 58), 63) // (sext (setcc %a, 0, seteq)) -> // (ashr (shl (ctlz %a), 58), 63) SDValue CountInput = IsRHSZero ? LHS : SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); SDValue Cntlzw = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0); SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; SDValue Slwi = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0); return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0); } case ISD::SETNE: { // Bitwise xor the operands, count leading zeros, shift right by 5 bits and // flip the bit, finally take 2's complement. // (sext (setcc %a, %b, setne)) -> // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1)) // Same as above, but the first xor is not needed. // (sext (setcc %a, 0, setne)) -> // (neg (xor (lshr (ctlz %a), 5), 1)) SDValue Xor = IsRHSZero ? LHS : SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); SDValue Clz = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; SDValue Shift = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); SDValue Xori = SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, S->getI32Imm(1, dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0); } case ISD::SETGE: { // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1) // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31) if (IsRHSZero) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) // by swapping inputs and falling through. std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast(RHS); IsRHSZero = RHSConst && RHSConst->isNullValue(); LLVM_FALLTHROUGH; } case ISD::SETLE: { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1) // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1) if (IsRHSZero) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); // The upper 32-bits of the register can't be undefined for this sequence. LHS = signExtendInputIfNeeded(LHS); RHS = signExtendInputIfNeeded(RHS); SDValue SUBFNode = SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue, LHS, RHS), 0); SDValue Srdi = SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SUBFNode, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi, S->getI32Imm(-1, dl)), 0); } case ISD::SETGT: { // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63) // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31) // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63) if (IsRHSNegOne) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); if (IsRHSZero) { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); // The upper 32-bits of the register can't be undefined for this sequence. LHS = signExtendInputIfNeeded(LHS); RHS = signExtendInputIfNeeded(RHS); SDValue Neg = SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg, S->getI64Imm(63, dl)), 0); } // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as // (%b < %a) by swapping inputs and falling through. std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast(RHS); IsRHSZero = RHSConst && RHSConst->isNullValue(); IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; LLVM_FALLTHROUGH; } case ISD::SETLT: { // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63) // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1) // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31) if (IsRHSOne) { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); } if (IsRHSZero) return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS, S->getI32Imm(31, dl)), 0); if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); // The upper 32-bits of the register can't be undefined for this sequence. LHS = signExtendInputIfNeeded(LHS); RHS = signExtendInputIfNeeded(RHS); SDValue SUBFNode = SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, SUBFNode, S->getI64Imm(63, dl)), 0); } case ISD::SETUGE: // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1) // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1) std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ISD::SETULE: { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); // The upper 32-bits of the register can't be undefined for this sequence. LHS = zeroExtendInputIfNeeded(LHS); RHS = zeroExtendInputIfNeeded(RHS); SDValue Subtract = SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); SDValue Shift = SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract, S->getI32Imm(1, dl), S->getI32Imm(63,dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift, S->getI32Imm(-1, dl)), 0); } case ISD::SETUGT: // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63) // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63) std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ISD::SETULT: { if (CmpInGPR == ICGPR_NonExtIn) return SDValue(); // The upper 32-bits of the register can't be undefined for this sequence. LHS = zeroExtendInputIfNeeded(LHS); RHS = zeroExtendInputIfNeeded(RHS); SDValue Subtract = SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Subtract, S->getI64Imm(63, dl)), 0); } } } /// Produces a zero-extended result of comparing two 64-bit values according to /// the passed condition code. SDValue IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, int64_t RHSValue, SDLoc dl) { if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext) return SDValue(); bool IsRHSZero = RHSValue == 0; bool IsRHSOne = RHSValue == 1; bool IsRHSNegOne = RHSValue == -1LL; switch (CC) { default: return SDValue(); case ISD::SETEQ: { // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6) // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6) SDValue Xor = IsRHSZero ? LHS : SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); SDValue Clz = SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0); return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz, S->getI64Imm(58, dl), S->getI64Imm(63, dl)), 0); } case ISD::SETNE: { // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA) // {addcz.reg, addcz.CA} = (addcarry %a, -1) // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA) SDValue Xor = IsRHSZero ? LHS : SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); SDValue AC = SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, Xor, S->getI32Imm(~0U, dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC, Xor, AC.getValue(1)), 0); } case ISD::SETGE: { // {subc.reg, subc.CA} = (subcarry %a, %b) // (zext (setcc %a, %b, setge)) -> // (adde (lshr %b, 63), (ashr %a, 63), subc.CA) // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63) if (IsRHSZero) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast(RHS); IsRHSZero = RHSConst && RHSConst->isNullValue(); LLVM_FALLTHROUGH; } case ISD::SETLE: { // {subc.reg, subc.CA} = (subcarry %b, %a) // (zext (setcc %a, %b, setge)) -> // (adde (lshr %a, 63), (ashr %b, 63), subc.CA) // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63) if (IsRHSZero) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); SDValue ShiftL = SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); SDValue ShiftR = SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, S->getI64Imm(63, dl)), 0); SDValue SubtractCarry = SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, LHS, RHS), 1); return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, ShiftR, ShiftL, SubtractCarry), 0); } case ISD::SETGT: { // {subc.reg, subc.CA} = (subcarry %b, %a) // (zext (setcc %a, %b, setgt)) -> // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63) if (IsRHSNegOne) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); if (IsRHSZero) { SDValue Addi = SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, S->getI64Imm(~0ULL, dl)), 0); SDValue Nor = SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0); return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); } std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast(RHS); IsRHSZero = RHSConst && RHSConst->isNullValue(); IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; LLVM_FALLTHROUGH; } case ISD::SETLT: { // {subc.reg, subc.CA} = (subcarry %a, %b) // (zext (setcc %a, %b, setlt)) -> // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63) if (IsRHSOne) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); if (IsRHSZero) return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); SDValue SRADINode = SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS, S->getI64Imm(63, dl)), 0); SDValue SRDINode = SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, RHS, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); SDValue SUBFC8Carry = SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, RHS, LHS), 1); SDValue ADDE8Node = SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, SRDINode, SRADINode, SUBFC8Carry), 0); return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ADDE8Node, S->getI64Imm(1, dl)), 0); } case ISD::SETUGE: // {subc.reg, subc.CA} = (subcarry %a, %b) // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1) std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ISD::SETULE: { // {subc.reg, subc.CA} = (subcarry %b, %a) // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1) SDValue SUBFC8Carry = SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, LHS, RHS), 1); SDValue SUBFE8Node = SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS, LHS, SUBFC8Carry), 0); return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, SUBFE8Node, S->getI64Imm(1, dl)), 0); } case ISD::SETUGT: // {subc.reg, subc.CA} = (subcarry %b, %a) // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA) std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ISD::SETULT: { // {subc.reg, subc.CA} = (subcarry %a, %b) // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA) SDValue SubtractCarry = SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, RHS, LHS), 1); SDValue ExtSub = SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, LHS, LHS, SubtractCarry), 0); return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, ExtSub), 0); } } } /// Produces a sign-extended result of comparing two 64-bit values according to /// the passed condition code. SDValue IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, int64_t RHSValue, SDLoc dl) { if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext) return SDValue(); bool IsRHSZero = RHSValue == 0; bool IsRHSOne = RHSValue == 1; bool IsRHSNegOne = RHSValue == -1LL; switch (CC) { default: return SDValue(); case ISD::SETEQ: { // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA) // {addcz.reg, addcz.CA} = (addcarry %a, -1) // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA) SDValue AddInput = IsRHSZero ? LHS : SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); SDValue Addic = SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, AddInput, S->getI32Imm(~0U, dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, Addic, Addic.getValue(1)), 0); } case ISD::SETNE: { // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b)) // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA) // {subfcz.reg, subfcz.CA} = (subcarry 0, %a) // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA) SDValue Xor = IsRHSZero ? LHS : SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); SDValue SC = SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue, Xor, S->getI32Imm(0, dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC, SC, SC.getValue(1)), 0); } case ISD::SETGE: { // {subc.reg, subc.CA} = (subcarry %a, %b) // (zext (setcc %a, %b, setge)) -> // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA)) // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63)) if (IsRHSZero) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast(RHS); IsRHSZero = RHSConst && RHSConst->isNullValue(); LLVM_FALLTHROUGH; } case ISD::SETLE: { // {subc.reg, subc.CA} = (subcarry %b, %a) // (zext (setcc %a, %b, setge)) -> // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA)) // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63) if (IsRHSZero) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); SDValue ShiftR = SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, S->getI64Imm(63, dl)), 0); SDValue ShiftL = SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); SDValue SubtractCarry = SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, LHS, RHS), 1); SDValue Adde = SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, ShiftR, ShiftL, SubtractCarry), 0); return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0); } case ISD::SETGT: { // {subc.reg, subc.CA} = (subcarry %b, %a) // (zext (setcc %a, %b, setgt)) -> // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63) if (IsRHSNegOne) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); if (IsRHSZero) { SDValue Add = SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, S->getI64Imm(-1, dl)), 0); SDValue Nor = SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0); return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor, S->getI64Imm(63, dl)), 0); } std::swap(LHS, RHS); ConstantSDNode *RHSConst = dyn_cast(RHS); IsRHSZero = RHSConst && RHSConst->isNullValue(); IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; LLVM_FALLTHROUGH; } case ISD::SETLT: { // {subc.reg, subc.CA} = (subcarry %a, %b) // (zext (setcc %a, %b, setlt)) -> // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63) if (IsRHSOne) return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); if (IsRHSZero) { return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS, S->getI64Imm(63, dl)), 0); } SDValue SRADINode = SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS, S->getI64Imm(63, dl)), 0); SDValue SRDINode = SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, RHS, S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 0); SDValue SUBFC8Carry = SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, RHS, LHS), 1); SDValue ADDE8Node = SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, SRDINode, SRADINode, SUBFC8Carry), 0); SDValue XORI8Node = SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ADDE8Node, S->getI64Imm(1, dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, XORI8Node), 0); } case ISD::SETUGE: // {subc.reg, subc.CA} = (subcarry %a, %b) // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA) std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ISD::SETULE: { // {subc.reg, subc.CA} = (subcarry %b, %a) // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA) SDValue SubtractCarry = SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, LHS, RHS), 1); SDValue ExtSub = SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS, LHS, SubtractCarry), 0); return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, ExtSub, ExtSub), 0); } case ISD::SETUGT: // {subc.reg, subc.CA} = (subcarry %b, %a) // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA) std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ISD::SETULT: { // {subc.reg, subc.CA} = (subcarry %a, %b) // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA) SDValue SubCarry = SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, RHS, LHS), 1); return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, LHS, LHS, SubCarry), 0); } } } /// Do all uses of this SDValue need the result in a GPR? /// This is meant to be used on values that have type i1 since /// it is somewhat meaningless to ask if values of other types /// should be kept in GPR's. static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) { assert(Compare.getOpcode() == ISD::SETCC && "An ISD::SETCC node required here."); // For values that have a single use, the caller should obviously already have // checked if that use is an extending use. We check the other uses here. if (Compare.hasOneUse()) return true; // We want the value in a GPR if it is being extended, used for a select, or // used in logical operations. for (auto CompareUse : Compare.getNode()->uses()) if (CompareUse->getOpcode() != ISD::SIGN_EXTEND && CompareUse->getOpcode() != ISD::ZERO_EXTEND && CompareUse->getOpcode() != ISD::SELECT && !isLogicOp(CompareUse->getOpcode())) { OmittedForNonExtendUses++; return false; } return true; } /// Returns an equivalent of a SETCC node but with the result the same width as /// the inputs. This can also be used for SELECT_CC if either the true or false /// values is a power of two while the other is zero. SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts) { assert((Compare.getOpcode() == ISD::SETCC || Compare.getOpcode() == ISD::SELECT_CC) && "An ISD::SETCC node required here."); // Don't convert this comparison to a GPR sequence because there are uses // of the i1 result (i.e. uses that require the result in the CR). if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG)) return SDValue(); SDValue LHS = Compare.getOperand(0); SDValue RHS = Compare.getOperand(1); // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC. int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2; ISD::CondCode CC = cast(Compare.getOperand(CCOpNum))->get(); EVT InputVT = LHS.getValueType(); if (InputVT != MVT::i32 && InputVT != MVT::i64) return SDValue(); if (ConvOpts == SetccInGPROpts::ZExtInvert || ConvOpts == SetccInGPROpts::SExtInvert) CC = ISD::getSetCCInverse(CC, true); bool Inputs32Bit = InputVT == MVT::i32; SDLoc dl(Compare); ConstantSDNode *RHSConst = dyn_cast(RHS); int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX; bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig || ConvOpts == SetccInGPROpts::SExtInvert; if (IsSext && Inputs32Bit) return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl); else if (Inputs32Bit) return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); else if (IsSext) return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl); return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); } } // end anonymous namespace bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) { if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) return false; // This optimization will emit code that assumes 64-bit registers // so we don't want to run it in 32-bit mode. Also don't run it // on functions that are not to be optimized. if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) return false; switch (N->getOpcode()) { default: break; case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: case ISD::AND: case ISD::OR: case ISD::XOR: { IntegerCompareEliminator ICmpElim(CurDAG, this); if (SDNode *New = ICmpElim.Select(N)) { ReplaceNode(N, New); return true; } } } return false; } bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) return false; if (!UseBitPermRewriter) return false; switch (N->getOpcode()) { default: break; case ISD::ROTL: case ISD::SHL: case ISD::SRL: case ISD::AND: case ISD::OR: { BitPermutationSelector BPS(CurDAG); if (SDNode *New = BPS.Select(N)) { ReplaceNode(N, New); return true; } return false; } } return false; } /// SelectCC - Select a comparison of the specified values with the specified /// condition code, returning the CR# of the expression. SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &dl) { // Always select the LHS. unsigned Opc; if (LHS.getValueType() == MVT::i32) { unsigned Imm; if (CC == ISD::SETEQ || CC == ISD::SETNE) { if (isInt32Immediate(RHS, Imm)) { // SETEQ/SETNE comparison with 16-bit immediate, fold it. if (isUInt<16>(Imm)) return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, getI32Imm(Imm & 0xFFFF, dl)), 0); // If this is a 16-bit signed immediate, fold it. if (isInt<16>((int)Imm)) return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, getI32Imm(Imm & 0xFFFF, dl)), 0); // For non-equality comparisons, the default code would materialize the // constant, then compare against it, like this: // lis r2, 4660 // ori r2, r2, 22136 // cmpw cr0, r3, r2 // Since we are just comparing for equality, we can emit this instead: // xoris r0,r3,0x1234 // cmplwi cr0,r0,0x5678 // beq cr0,L6 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS, getI32Imm(Imm >> 16, dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor, getI32Imm(Imm & 0xFFFF, dl)), 0); } Opc = PPC::CMPLW; } else if (ISD::isUnsignedIntSetCC(CC)) { if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm)) return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, getI32Imm(Imm & 0xFFFF, dl)), 0); Opc = PPC::CMPLW; } else { int16_t SImm; if (isIntS16Immediate(RHS, SImm)) return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, getI32Imm((int)SImm & 0xFFFF, dl)), 0); Opc = PPC::CMPW; } } else if (LHS.getValueType() == MVT::i64) { uint64_t Imm; if (CC == ISD::SETEQ || CC == ISD::SETNE) { if (isInt64Immediate(RHS.getNode(), Imm)) { // SETEQ/SETNE comparison with 16-bit immediate, fold it. if (isUInt<16>(Imm)) return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, getI32Imm(Imm & 0xFFFF, dl)), 0); // If this is a 16-bit signed immediate, fold it. if (isInt<16>(Imm)) return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, getI32Imm(Imm & 0xFFFF, dl)), 0); // For non-equality comparisons, the default code would materialize the // constant, then compare against it, like this: // lis r2, 4660 // ori r2, r2, 22136 // cmpd cr0, r3, r2 // Since we are just comparing for equality, we can emit this instead: // xoris r0,r3,0x1234 // cmpldi cr0,r0,0x5678 // beq cr0,L6 if (isUInt<32>(Imm)) { SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS, getI64Imm(Imm >> 16, dl)), 0); return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor, getI64Imm(Imm & 0xFFFF, dl)), 0); } } Opc = PPC::CMPLD; } else if (ISD::isUnsignedIntSetCC(CC)) { if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm)) return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, getI64Imm(Imm & 0xFFFF, dl)), 0); Opc = PPC::CMPLD; } else { int16_t SImm; if (isIntS16Immediate(RHS, SImm)) return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, getI64Imm(SImm & 0xFFFF, dl)), 0); Opc = PPC::CMPD; } } else if (LHS.getValueType() == MVT::f32) { if (PPCSubTarget->hasSPE()) { switch (CC) { default: case ISD::SETEQ: case ISD::SETNE: Opc = PPC::EFSCMPEQ; break; case ISD::SETLT: case ISD::SETGE: case ISD::SETOLT: case ISD::SETOGE: case ISD::SETULT: case ISD::SETUGE: Opc = PPC::EFSCMPLT; break; case ISD::SETGT: case ISD::SETLE: case ISD::SETOGT: case ISD::SETOLE: case ISD::SETUGT: case ISD::SETULE: Opc = PPC::EFSCMPGT; break; } } else Opc = PPC::FCMPUS; } else if (LHS.getValueType() == MVT::f64) { if (PPCSubTarget->hasSPE()) { switch (CC) { default: case ISD::SETEQ: case ISD::SETNE: Opc = PPC::EFDCMPEQ; break; case ISD::SETLT: case ISD::SETGE: case ISD::SETOLT: case ISD::SETOGE: case ISD::SETULT: case ISD::SETUGE: Opc = PPC::EFDCMPLT; break; case ISD::SETGT: case ISD::SETLE: case ISD::SETOGT: case ISD::SETOLE: case ISD::SETUGT: case ISD::SETULE: Opc = PPC::EFDCMPGT; break; } } else Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; } else { assert(LHS.getValueType() == MVT::f128 && "Unknown vt!"); assert(PPCSubTarget->hasVSX() && "__float128 requires VSX"); Opc = PPC::XSCMPUQP; } return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); } static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) { switch (CC) { case ISD::SETUEQ: case ISD::SETONE: case ISD::SETOLE: case ISD::SETOGE: llvm_unreachable("Should be lowered by legalize!"); default: llvm_unreachable("Unknown condition!"); case ISD::SETOEQ: case ISD::SETEQ: return PPC::PRED_EQ; case ISD::SETUNE: case ISD::SETNE: return PPC::PRED_NE; case ISD::SETOLT: case ISD::SETLT: return PPC::PRED_LT; case ISD::SETULE: case ISD::SETLE: return PPC::PRED_LE; case ISD::SETOGT: case ISD::SETGT: return PPC::PRED_GT; case ISD::SETUGE: case ISD::SETGE: return PPC::PRED_GE; case ISD::SETO: return PPC::PRED_NU; case ISD::SETUO: return PPC::PRED_UN; // These two are invalid for floating point. Assume we have int. case ISD::SETULT: return PPC::PRED_LT; case ISD::SETUGT: return PPC::PRED_GT; } } /// getCRIdxForSetCC - Return the index of the condition register field /// associated with the SetCC condition, and whether or not the field is /// treated as inverted. That is, lt = 0; ge = 0 inverted. static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { Invert = false; switch (CC) { default: llvm_unreachable("Unknown condition!"); case ISD::SETOLT: case ISD::SETLT: return 0; // Bit #0 = SETOLT case ISD::SETOGT: case ISD::SETGT: return 1; // Bit #1 = SETOGT case ISD::SETOEQ: case ISD::SETEQ: return 2; // Bit #2 = SETOEQ case ISD::SETUO: return 3; // Bit #3 = SETUO case ISD::SETUGE: case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE case ISD::SETULE: case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE case ISD::SETUNE: case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO case ISD::SETUEQ: case ISD::SETOGE: case ISD::SETOLE: case ISD::SETONE: llvm_unreachable("Invalid branch code: should be expanded by legalize"); // These are invalid for floating point. Assume integer. case ISD::SETULT: return 0; case ISD::SETUGT: return 1; } } // getVCmpInst: return the vector compare instruction for the specified // vector type and condition code. Since this is for altivec specific code, // only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32). static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool HasVSX, bool &Swap, bool &Negate) { Swap = false; Negate = false; if (VecVT.isFloatingPoint()) { /* Handle some cases by swapping input operands. */ switch (CC) { case ISD::SETLE: CC = ISD::SETGE; Swap = true; break; case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break; case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break; case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break; default: break; } /* Handle some cases by negating the result. */ switch (CC) { case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break; case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break; case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break; default: break; } /* We have instructions implementing the remaining cases. */ switch (CC) { case ISD::SETEQ: case ISD::SETOEQ: if (VecVT == MVT::v4f32) return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP; else if (VecVT == MVT::v2f64) return PPC::XVCMPEQDP; break; case ISD::SETGT: case ISD::SETOGT: if (VecVT == MVT::v4f32) return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP; else if (VecVT == MVT::v2f64) return PPC::XVCMPGTDP; break; case ISD::SETGE: case ISD::SETOGE: if (VecVT == MVT::v4f32) return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP; else if (VecVT == MVT::v2f64) return PPC::XVCMPGEDP; break; default: break; } llvm_unreachable("Invalid floating-point vector compare condition"); } else { /* Handle some cases by swapping input operands. */ switch (CC) { case ISD::SETGE: CC = ISD::SETLE; Swap = true; break; case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break; default: break; } /* Handle some cases by negating the result. */ switch (CC) { case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break; case ISD::SETLE: CC = ISD::SETGT; Negate = true; break; case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break; default: break; } /* We have instructions implementing the remaining cases. */ switch (CC) { case ISD::SETEQ: case ISD::SETUEQ: if (VecVT == MVT::v16i8) return PPC::VCMPEQUB; else if (VecVT == MVT::v8i16) return PPC::VCMPEQUH; else if (VecVT == MVT::v4i32) return PPC::VCMPEQUW; else if (VecVT == MVT::v2i64) return PPC::VCMPEQUD; break; case ISD::SETGT: if (VecVT == MVT::v16i8) return PPC::VCMPGTSB; else if (VecVT == MVT::v8i16) return PPC::VCMPGTSH; else if (VecVT == MVT::v4i32) return PPC::VCMPGTSW; else if (VecVT == MVT::v2i64) return PPC::VCMPGTSD; break; case ISD::SETUGT: if (VecVT == MVT::v16i8) return PPC::VCMPGTUB; else if (VecVT == MVT::v8i16) return PPC::VCMPGTUH; else if (VecVT == MVT::v4i32) return PPC::VCMPGTUW; else if (VecVT == MVT::v2i64) return PPC::VCMPGTUD; break; default: break; } llvm_unreachable("Invalid integer vector compare condition"); } } bool PPCDAGToDAGISel::trySETCC(SDNode *N) { SDLoc dl(N); unsigned Imm; ISD::CondCode CC = cast(N->getOperand(2))->get(); EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); if (!PPCSubTarget->useCRBits() && isInt32Immediate(N->getOperand(1), Imm)) { // We can codegen setcc op, imm very efficiently compared to a brcond. // Check for those cases here. // setcc op, 0 if (Imm == 0) { SDValue Op = N->getOperand(0); switch (CC) { default: break; case ISD::SETEQ: { Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0); SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl), getI32Imm(31, dl) }; CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return true; } case ISD::SETNE: { if (isPPC64) break; SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, Op, getI32Imm(~0U, dl)), 0); CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1)); return true; } case ISD::SETLT: { SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return true; } case ISD::SETGT: { SDValue T = SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0); T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0); SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return true; } } } else if (Imm == ~0U) { // setcc op, -1 SDValue Op = N->getOperand(0); switch (CC) { default: break; case ISD::SETEQ: if (isPPC64) break; Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, Op, getI32Imm(1, dl)), 0); CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(CurDAG->getMachineNode(PPC::LI, dl, MVT::i32, getI32Imm(0, dl)), 0), Op.getValue(1)); return true; case ISD::SETNE: { if (isPPC64) break; Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0); SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, Op, getI32Imm(~0U, dl)); CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op, SDValue(AD, 1)); return true; } case ISD::SETLT: { SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op, getI32Imm(1, dl)), 0); SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD, Op), 0); SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return true; } case ISD::SETGT: { SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl)); return true; } } } } SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); // Altivec Vector compare instructions do not set any CR register by default and // vector compare operations return the same type as the operands. if (LHS.getValueType().isVector()) { if (PPCSubTarget->hasQPX() || PPCSubTarget->hasSPE()) return false; EVT VecVT = LHS.getValueType(); bool Swap, Negate; unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC, PPCSubTarget->hasVSX(), Swap, Negate); if (Swap) std::swap(LHS, RHS); EVT ResVT = VecVT.changeVectorElementTypeToInteger(); if (Negate) { SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0); CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR, ResVT, VCmp, VCmp); return true; } CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS); return true; } if (PPCSubTarget->useCRBits()) return false; bool Inv; unsigned Idx = getCRIdxForSetCC(CC, Inv); SDValue CCReg = SelectCC(LHS, RHS, CC, dl); SDValue IntCR; // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that // The correct compare instruction is already set by SelectCC() if (PPCSubTarget->hasSPE() && LHS.getValueType().isFloatingPoint()) { Idx = 1; } // Force the ccreg into CR7. SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32); SDValue InFlag(nullptr, 0); // Null incoming flag value. CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, InFlag).getValue(1); IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, CCReg), 0); SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl), getI32Imm(31, dl), getI32Imm(31, dl) }; if (!Inv) { CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return true; } // Get the specified bit. SDValue Tmp = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl)); return true; } /// Does this node represent a load/store node whose address can be represented /// with a register plus an immediate that's a multiple of \p Val: bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { LoadSDNode *LDN = dyn_cast(N); StoreSDNode *STN = dyn_cast(N); SDValue AddrOp; if (LDN) AddrOp = LDN->getOperand(1); else if (STN) AddrOp = STN->getOperand(2); // If the address points a frame object or a frame object with an offset, // we need to check the object alignment. short Imm = 0; if (FrameIndexSDNode *FI = dyn_cast( AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) : AddrOp)) { // If op0 is a frame index that is under aligned, we can't do it either, // because it is translated to r31 or r1 + slot + offset. We won't know the // slot number until the stack frame is finalized. const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo(); unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex()); if ((SlotAlign % Val) != 0) return false; // If we have an offset, we need further check on the offset. if (AddrOp.getOpcode() != ISD::ADD) return true; } if (AddrOp.getOpcode() == ISD::ADD) return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val); // If the address comes from the outside, the offset will be zero. return AddrOp.getOpcode() == ISD::CopyFromReg; } void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { // Transfer memoperands. MachineMemOperand *MemOp = cast(N)->getMemOperand(); CurDAG->setNodeMemRefs(cast(Result), {MemOp}); } static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, bool &NeedSwapOps, bool &IsUnCmp) { assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here."); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDValue TrueRes = N->getOperand(2); SDValue FalseRes = N->getOperand(3); ConstantSDNode *TrueConst = dyn_cast(TrueRes); if (!TrueConst) return false; assert((N->getSimpleValueType(0) == MVT::i64 || N->getSimpleValueType(0) == MVT::i32) && "Expecting either i64 or i32 here."); // We are looking for any of: // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1) // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1) // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq) // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq) int64_t TrueResVal = TrueConst->getSExtValue(); if ((TrueResVal < -1 || TrueResVal > 1) || (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) || (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) || (TrueResVal == 0 && (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ))) return false; bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC; SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0); if (SetOrSelCC.getOpcode() != ISD::SETCC && SetOrSelCC.getOpcode() != ISD::SELECT_CC) return false; // Without this setb optimization, the outer SELECT_CC will be manually // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass // transforms pseduo instruction to isel instruction. When there are more than // one use for result like zext/sext, with current optimization we only see // isel is replaced by setb but can't see any significant gain. Since // setb has longer latency than original isel, we should avoid this. Another // point is that setb requires comparison always kept, it can break the // oppotunity to get the comparison away if we have in future. if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse())) return false; SDValue InnerLHS = SetOrSelCC.getOperand(0); SDValue InnerRHS = SetOrSelCC.getOperand(1); ISD::CondCode InnerCC = cast(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get(); // If the inner comparison is a select_cc, make sure the true/false values are // 1/-1 and canonicalize it if needed. if (InnerIsSel) { ConstantSDNode *SelCCTrueConst = dyn_cast(SetOrSelCC.getOperand(2)); ConstantSDNode *SelCCFalseConst = dyn_cast(SetOrSelCC.getOperand(3)); if (!SelCCTrueConst || !SelCCFalseConst) return false; int64_t SelCCTVal = SelCCTrueConst->getSExtValue(); int64_t SelCCFVal = SelCCFalseConst->getSExtValue(); // The values must be -1/1 (requiring a swap) or 1/-1. if (SelCCTVal == -1 && SelCCFVal == 1) { std::swap(InnerLHS, InnerRHS); } else if (SelCCTVal != 1 || SelCCFVal != -1) return false; } // Canonicalize unsigned case if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) { IsUnCmp = true; InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT; } bool InnerSwapped = false; if (LHS == InnerRHS && RHS == InnerLHS) InnerSwapped = true; else if (LHS != InnerLHS || RHS != InnerRHS) return false; switch (CC) { // (select_cc lhs, rhs, 0, \ // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq) case ISD::SETEQ: if (!InnerIsSel) return false; if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT) return false; NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped; break; // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt) // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt) // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt) // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt) // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt) // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt) case ISD::SETULT: if (!IsUnCmp && InnerCC != ISD::SETNE) return false; IsUnCmp = true; LLVM_FALLTHROUGH; case ISD::SETLT: if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) || (InnerCC == ISD::SETLT && InnerSwapped)) NeedSwapOps = (TrueResVal == 1); else return false; break; // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt) // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt) // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt) // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt) // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt) // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt) case ISD::SETUGT: if (!IsUnCmp && InnerCC != ISD::SETNE) return false; IsUnCmp = true; LLVM_FALLTHROUGH; case ISD::SETGT: if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) || (InnerCC == ISD::SETGT && InnerSwapped)) NeedSwapOps = (TrueResVal == -1); else return false; break; default: return false; } LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: "); LLVM_DEBUG(N->dump()); return true; } // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. void PPCDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); if (N->isMachineOpcode()) { N->setNodeId(-1); return; // Already selected. } // In case any misguided DAG-level optimizations form an ADD with a // TargetConstant operand, crash here instead of miscompiling (by selecting // an r+r add instead of some kind of r+i add). if (N->getOpcode() == ISD::ADD && N->getOperand(1).getOpcode() == ISD::TargetConstant) llvm_unreachable("Invalid ADD with TargetConstant operand"); // Try matching complex bit permutations before doing anything else. if (tryBitPermutation(N)) return; // Try to emit integer compares as GPR-only sequences (i.e. no use of CR). if (tryIntCompareInGPR(N)) return; switch (N->getOpcode()) { default: break; case ISD::Constant: if (N->getValueType(0) == MVT::i64) { ReplaceNode(N, selectI64Imm(CurDAG, N)); return; } break; case ISD::SETCC: if (trySETCC(N)) return; break; case PPCISD::CALL: { const Module *M = MF->getFunction().getParent(); if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || - !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() || - M->getPICLevel() == PICLevel::SmallPIC) + (!TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt()) || + !PPCSubTarget->isTargetELF() || M->getPICLevel() == PICLevel::SmallPIC) break; SDValue Op = N->getOperand(1); if (GlobalAddressSDNode *GA = dyn_cast(Op)) { if (GA->getTargetFlags() == PPCII::MO_PLT) getGlobalBaseReg(); } else if (ExternalSymbolSDNode *ES = dyn_cast(Op)) { if (ES->getTargetFlags() == PPCII::MO_PLT) getGlobalBaseReg(); } } break; case PPCISD::GlobalBaseReg: ReplaceNode(N, getGlobalBaseReg()); return; case ISD::FrameIndex: selectFrameIndex(N, N); return; case PPCISD::MFOCRF: { SDValue InFlag = N->getOperand(1); ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, N->getOperand(0), InFlag)); return; } case PPCISD::READ_TIME_BASE: ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32, MVT::Other, N->getOperand(0))); return; case PPCISD::SRA_ADDZE: { SDValue N0 = N->getOperand(0); SDValue ShiftAmt = CurDAG->getTargetConstant(*cast(N->getOperand(1))-> getConstantIntValue(), dl, N->getValueType(0)); if (N->getValueType(0) == MVT::i64) { SDNode *Op = CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue, N0, ShiftAmt); CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0), SDValue(Op, 1)); return; } else { assert(N->getValueType(0) == MVT::i32 && "Expecting i64 or i32 in PPCISD::SRA_ADDZE"); SDNode *Op = CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue, N0, ShiftAmt); CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0), SDValue(Op, 1)); return; } } case ISD::STORE: { // Change TLS initial-exec D-form stores to X-form stores. StoreSDNode *ST = cast(N); if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() && ST->getAddressingMode() != ISD::PRE_INC) if (tryTLSXFormStore(ST)) return; break; } case ISD::LOAD: { // Handle preincrement loads. LoadSDNode *LD = cast(N); EVT LoadedVT = LD->getMemoryVT(); // Normal loads are handled by code generated from the .td file. if (LD->getAddressingMode() != ISD::PRE_INC) { // Change TLS initial-exec D-form loads to X-form loads. if (EnableTLSOpt && PPCSubTarget->isELFv2ABI()) if (tryTLSXFormLoad(LD)) return; break; } SDValue Offset = LD->getOffset(); if (Offset.getOpcode() == ISD::TargetConstant || Offset.getOpcode() == ISD::TargetGlobalAddress) { unsigned Opcode; bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; if (LD->getValueType(0) != MVT::i64) { // Handle PPC32 integer and normal FP loads. assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); switch (LoadedVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Invalid PPC load type!"); case MVT::f64: Opcode = PPC::LFDU; break; case MVT::f32: Opcode = PPC::LFSU; break; case MVT::i32: Opcode = PPC::LWZU; break; case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break; case MVT::i1: case MVT::i8: Opcode = PPC::LBZU; break; } } else { assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); switch (LoadedVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Invalid PPC load type!"); case MVT::i64: Opcode = PPC::LDU; break; case MVT::i32: Opcode = PPC::LWZU8; break; case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break; case MVT::i1: case MVT::i8: Opcode = PPC::LBZU8; break; } } SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Offset, Base, Chain }; SDNode *MN = CurDAG->getMachineNode( Opcode, dl, LD->getValueType(0), PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); transferMemOperands(N, MN); ReplaceNode(N, MN); return; } else { unsigned Opcode; bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; if (LD->getValueType(0) != MVT::i64) { // Handle PPC32 integer and normal FP loads. assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); switch (LoadedVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Invalid PPC load type!"); case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX case MVT::f64: Opcode = PPC::LFDUX; break; case MVT::f32: Opcode = PPC::LFSUX; break; case MVT::i32: Opcode = PPC::LWZUX; break; case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break; case MVT::i1: case MVT::i8: Opcode = PPC::LBZUX; break; } } else { assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && "Invalid sext update load"); switch (LoadedVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Invalid PPC load type!"); case MVT::i64: Opcode = PPC::LDUX; break; case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break; case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break; case MVT::i1: case MVT::i8: Opcode = PPC::LBZUX8; break; } } SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Base, Offset, Chain }; SDNode *MN = CurDAG->getMachineNode( Opcode, dl, LD->getValueType(0), PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); transferMemOperands(N, MN); ReplaceNode(N, MN); return; } } case ISD::AND: { unsigned Imm, Imm2, SH, MB, ME; uint64_t Imm64; // If this is an and of a value rotated between 0 and 31 bits and then and'd // with a mask, emit rlwinm if (isInt32Immediate(N->getOperand(1), Imm) && isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) { SDValue Val = N->getOperand(0).getOperand(0); SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return; } // If this is just a masked value where the input is not handled above, and // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm if (isInt32Immediate(N->getOperand(1), Imm) && isRunOfOnes(Imm, MB, ME) && N->getOperand(0).getOpcode() != ISD::ROTL) { SDValue Val = N->getOperand(0); SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return; } // If this is a 64-bit zero-extension mask, emit rldicl. if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && isMask_64(Imm64)) { SDValue Val = N->getOperand(0); MB = 64 - countTrailingOnes(Imm64); SH = 0; if (Val.getOpcode() == ISD::ANY_EXTEND) { auto Op0 = Val.getOperand(0); if ( Op0.getOpcode() == ISD::SRL && isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) { auto ResultType = Val.getNode()->getValueType(0); auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType); SDValue IDVal (ImDef, 0); Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType, IDVal, Op0.getOperand(0), getI32Imm(1, dl)), 0); SH = 64 - Imm; } } // If the operand is a logical right shift, we can fold it into this // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) // for n <= mb. The right shift is really a left rotate followed by a // mask, and this mask is a more-restrictive sub-mask of the mask implied // by the shift. if (Val.getOpcode() == ISD::SRL && isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) { assert(Imm < 64 && "Illegal shift amount"); Val = Val.getOperand(0); SH = 64 - Imm; } SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) }; CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); return; } // If this is a negated 64-bit zero-extension mask, // i.e. the immediate is a sequence of ones from most significant side // and all zero for reminder, we should use rldicr. if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && isMask_64(~Imm64)) { SDValue Val = N->getOperand(0); MB = 63 - countTrailingOnes(~Imm64); SH = 0; SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) }; CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); return; } // AND X, 0 -> 0, not "rlwinm 32". if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) { ReplaceUses(SDValue(N, 0), N->getOperand(1)); return; } // ISD::OR doesn't get all the bitfield insertion fun. // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a // bitfield insert. if (isInt32Immediate(N->getOperand(1), Imm) && N->getOperand(0).getOpcode() == ISD::OR && isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) { // The idea here is to check whether this is equivalent to: // (c1 & m) | (x & ~m) // where m is a run-of-ones mask. The logic here is that, for each bit in // c1 and c2: // - if both are 1, then the output will be 1. // - if both are 0, then the output will be 0. // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will // come from x. // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will // be 0. // If that last condition is never the case, then we can form m from the // bits that are the same between c1 and c2. unsigned MB, ME; if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) { SDValue Ops[] = { N->getOperand(0).getOperand(0), N->getOperand(0).getOperand(1), getI32Imm(0, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); return; } } // Other cases are autogenerated. break; } case ISD::OR: { if (N->getValueType(0) == MVT::i32) if (tryBitfieldInsert(N)) return; int16_t Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0)); // If this is equivalent to an add, then we can fold it with the // FrameIndex calculation. if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); return; } } // OR with a 32-bit immediate can be handled by ori + oris // without creating an immediate in a GPR. uint64_t Imm64 = 0; bool IsPPC64 = PPCSubTarget->isPPC64(); if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && (Imm64 & ~0xFFFFFFFFuLL) == 0) { // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later. uint64_t ImmHi = Imm64 >> 16; uint64_t ImmLo = Imm64 & 0xFFFF; if (ImmHi != 0 && ImmLo != 0) { SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, N->getOperand(0), getI16Imm(ImmLo, dl)); SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1); return; } } // Other cases are autogenerated. break; } case ISD::XOR: { // XOR with a 32-bit immediate can be handled by xori + xoris // without creating an immediate in a GPR. uint64_t Imm64 = 0; bool IsPPC64 = PPCSubTarget->isPPC64(); if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && (Imm64 & ~0xFFFFFFFFuLL) == 0) { // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later. uint64_t ImmHi = Imm64 >> 16; uint64_t ImmLo = Imm64 & 0xFFFF; if (ImmHi != 0 && ImmLo != 0) { SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, N->getOperand(0), getI16Imm(ImmLo, dl)); SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1); return; } } break; } case ISD::ADD: { int16_t Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); return; } break; } case ISD::SHL: { unsigned Imm, SH, MB, ME; if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && isRotateAndMask(N, Imm, true, SH, MB, ME)) { SDValue Ops[] = { N->getOperand(0).getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return; } // Other cases are autogenerated. break; } case ISD::SRL: { unsigned Imm, SH, MB, ME; if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && isRotateAndMask(N, Imm, true, SH, MB, ME)) { SDValue Ops[] = { N->getOperand(0).getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl), getI32Imm(ME, dl) }; CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); return; } // Other cases are autogenerated. break; } // FIXME: Remove this once the ANDI glue bug is fixed: case PPCISD::ANDIo_1_EQ_BIT: case PPCISD::ANDIo_1_GT_BIT: { if (!ANDIGlueBug) break; EVT InVT = N->getOperand(0).getValueType(); assert((InVT == MVT::i64 || InVT == MVT::i32) && "Invalid input type for ANDIo_1_EQ_BIT"); unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo; SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue, N->getOperand(0), CurDAG->getTargetConstant(1, dl, InVT)), 0); SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); SDValue SRIdxVal = CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt, dl, MVT::i32); CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg, SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */); return; } case ISD::SELECT_CC: { ISD::CondCode CC = cast(N->getOperand(4))->get(); EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); // If this is a select of i1 operands, we'll pattern match it. if (PPCSubTarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1) break; if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) { bool NeedSwapOps = false; bool IsUnCmp = false; if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); if (NeedSwapOps) std::swap(LHS, RHS); // Make use of SelectCC to generate the comparison to set CR bits, for // equality comparisons having one literal operand, SelectCC probably // doesn't need to materialize the whole literal and just use xoris to // check it first, it leads the following comparison result can't // exactly represent GT/LT relationship. So to avoid this we specify // SETGT/SETUGT here instead of SETEQ. SDValue GenCC = SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl); CurDAG->SelectNodeTo( N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB, N->getValueType(0), GenCC); NumP9Setb++; return; } } // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc if (!isPPC64) if (ConstantSDNode *N1C = dyn_cast(N->getOperand(1))) if (ConstantSDNode *N2C = dyn_cast(N->getOperand(2))) if (ConstantSDNode *N3C = dyn_cast(N->getOperand(3))) if (N1C->isNullValue() && N3C->isNullValue() && N2C->getZExtValue() == 1ULL && CC == ISD::SETNE && // FIXME: Implement this optzn for PPC64. N->getValueType(0) == MVT::i32) { SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, N->getOperand(0), getI32Imm(~0U, dl)); CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0), N->getOperand(0), SDValue(Tmp, 1)); return; } SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); if (N->getValueType(0) == MVT::i1) { // An i1 select is: (c & t) | (!c & f). bool Inv; unsigned Idx = getCRIdxForSetCC(CC, Inv); unsigned SRI; switch (Idx) { default: llvm_unreachable("Invalid CC index"); case 0: SRI = PPC::sub_lt; break; case 1: SRI = PPC::sub_gt; break; case 2: SRI = PPC::sub_eq; break; case 3: SRI = PPC::sub_un; break; } SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg); SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1, CCBit, CCBit), 0); SDValue C = Inv ? NotCCBit : CCBit, NotC = Inv ? CCBit : NotCCBit; SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, C, N->getOperand(2)), 0); SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, NotC, N->getOperand(3)), 0); CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF); return; } unsigned BROpc = getPredicateForSetCC(CC); unsigned SelectCCOp; if (N->getValueType(0) == MVT::i32) SelectCCOp = PPC::SELECT_CC_I4; else if (N->getValueType(0) == MVT::i64) SelectCCOp = PPC::SELECT_CC_I8; else if (N->getValueType(0) == MVT::f32) { if (PPCSubTarget->hasP8Vector()) SelectCCOp = PPC::SELECT_CC_VSSRC; else if (PPCSubTarget->hasSPE()) SelectCCOp = PPC::SELECT_CC_SPE4; else SelectCCOp = PPC::SELECT_CC_F4; } else if (N->getValueType(0) == MVT::f64) { if (PPCSubTarget->hasVSX()) SelectCCOp = PPC::SELECT_CC_VSFRC; else if (PPCSubTarget->hasSPE()) SelectCCOp = PPC::SELECT_CC_SPE; else SelectCCOp = PPC::SELECT_CC_F8; } else if (N->getValueType(0) == MVT::f128) SelectCCOp = PPC::SELECT_CC_F16; else if (PPCSubTarget->hasSPE()) SelectCCOp = PPC::SELECT_CC_SPE; else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64) SelectCCOp = PPC::SELECT_CC_QFRC; else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32) SelectCCOp = PPC::SELECT_CC_QSRC; else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1) SelectCCOp = PPC::SELECT_CC_QBRC; else if (N->getValueType(0) == MVT::v2f64 || N->getValueType(0) == MVT::v2i64) SelectCCOp = PPC::SELECT_CC_VSRC; else SelectCCOp = PPC::SELECT_CC_VRRC; SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3), getI32Imm(BROpc, dl) }; CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops); return; } case ISD::VECTOR_SHUFFLE: if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || N->getValueType(0) == MVT::v2i64)) { ShuffleVectorSDNode *SVN = cast(N); SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1), Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1); unsigned DM[2]; for (int i = 0; i < 2; ++i) if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2) DM[i] = 0; else DM[i] = 1; if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 && Op1.getOpcode() == ISD::SCALAR_TO_VECTOR && isa(Op1.getOperand(0))) { LoadSDNode *LD = cast(Op1.getOperand(0)); SDValue Base, Offset; if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() && (LD->getMemoryVT() == MVT::f64 || LD->getMemoryVT() == MVT::i64) && SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { SDValue Chain = LD->getChain(); SDValue Ops[] = { Base, Offset, Chain }; MachineMemOperand *MemOp = LD->getMemOperand(); SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX, N->getValueType(0), Ops); CurDAG->setNodeMemRefs(cast(NewN), {MemOp}); return; } } // For little endian, we must swap the input operands and adjust // the mask elements (reverse and invert them). if (PPCSubTarget->isLittleEndian()) { std::swap(Op1, Op2); unsigned tmp = DM[0]; DM[0] = 1 - DM[1]; DM[1] = 1 - tmp; } SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl, MVT::i32); SDValue Ops[] = { Op1, Op2, DMV }; CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops); return; } break; case PPCISD::BDNZ: case PPCISD::BDZ: { bool IsPPC64 = PPCSubTarget->isPPC64(); SDValue Ops[] = { N->getOperand(1), N->getOperand(0) }; CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ), MVT::Other, Ops); return; } case PPCISD::COND_BRANCH: { // Op #0 is the Chain. // Op #1 is the PPC::PRED_* number. // Op #2 is the CR# // Op #3 is the Dest MBB // Op #4 is the Flag. // Prevent PPC::PRED_* from being selected into LI. unsigned PCC = cast(N->getOperand(1))->getZExtValue(); if (EnableBranchHint) PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3)); SDValue Pred = getI32Imm(PCC, dl); SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3), N->getOperand(0), N->getOperand(4) }; CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); return; } case ISD::BR_CC: { ISD::CondCode CC = cast(N->getOperand(1))->get(); unsigned PCC = getPredicateForSetCC(CC); if (N->getOperand(2).getValueType() == MVT::i1) { unsigned Opc; bool Swap; switch (PCC) { default: llvm_unreachable("Unexpected Boolean-operand predicate"); case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break; case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break; case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break; case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break; case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break; case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break; } // A signed comparison of i1 values produces the opposite result to an // unsigned one if the condition code includes less-than or greater-than. // This is because 1 is the most negative signed i1 number and the most // positive unsigned i1 number. The CR-logical operations used for such // comparisons are non-commutative so for signed comparisons vs. unsigned // ones, the input operands just need to be swapped. if (ISD::isSignedIntSetCC(CC)) Swap = !Swap; SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1, N->getOperand(Swap ? 3 : 2), N->getOperand(Swap ? 2 : 3)), 0); CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4), N->getOperand(0)); return; } if (EnableBranchHint) PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4)); SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); SDValue Ops[] = { getI32Imm(PCC, dl), CondCode, N->getOperand(4), N->getOperand(0) }; CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); return; } case ISD::BRIND: { // FIXME: Should custom lower this. SDValue Chain = N->getOperand(0); SDValue Target = N->getOperand(1); unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8; unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8; Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target, Chain), 0); CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); return; } case PPCISD::TOC_ENTRY: { assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) && "Only supported for 64-bit ABI and 32-bit SVR4"); if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) { SDValue GA = N->getOperand(0); SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, N->getOperand(1)); transferMemOperands(N, MN); ReplaceNode(N, MN); return; } // For medium and large code model, we generate two instructions as // described below. Otherwise we allow SelectCodeCommon to handle this, // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. CodeModel::Model CModel = TM.getCodeModel(); if (CModel != CodeModel::Medium && CModel != CodeModel::Large) break; // The first source operand is a TargetGlobalAddress or a TargetJumpTable. // If it must be toc-referenced according to PPCSubTarget, we generate: // LDtocL(@sym, ADDIStocHA(%x2, @sym)) // Otherwise we generate: // ADDItocL(ADDIStocHA(%x2, @sym), @sym) SDValue GA = N->getOperand(0); SDValue TOCbase = N->getOperand(1); SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64, TOCbase, GA); if (PPCLowering->isAccessedAsGotIndirect(GA)) { // If it is access as got-indirect, we need an extra LD to load // the address. SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, SDValue(Tmp, 0)); transferMemOperands(N, MN); ReplaceNode(N, MN); return; } // Build the address relative to the TOC-pointer.. ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, SDValue(Tmp, 0), GA)); return; } case PPCISD::PPC32_PICGOT: // Generate a PIC-safe GOT reference. assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() && "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::i32); return; case PPCISD::VADD_SPLAT: { // This expands into one of three sequences, depending on whether // the first operand is odd or even, positive or negative. assert(isa(N->getOperand(0)) && isa(N->getOperand(1)) && "Invalid operand on VADD_SPLAT!"); int Elt = N->getConstantOperandVal(0); int EltSize = N->getConstantOperandVal(1); unsigned Opc1, Opc2, Opc3; EVT VT; if (EltSize == 1) { Opc1 = PPC::VSPLTISB; Opc2 = PPC::VADDUBM; Opc3 = PPC::VSUBUBM; VT = MVT::v16i8; } else if (EltSize == 2) { Opc1 = PPC::VSPLTISH; Opc2 = PPC::VADDUHM; Opc3 = PPC::VSUBUHM; VT = MVT::v8i16; } else { assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!"); Opc1 = PPC::VSPLTISW; Opc2 = PPC::VADDUWM; Opc3 = PPC::VSUBUWM; VT = MVT::v4i32; } if ((Elt & 1) == 0) { // Elt is even, in the range [-32,-18] + [16,30]. // // Convert: VADD_SPLAT elt, size // Into: tmp = VSPLTIS[BHW] elt // VADDU[BHW]M tmp, tmp // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4 SDValue EltVal = getI32Imm(Elt >> 1, dl); SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); SDValue TmpVal = SDValue(Tmp, 0); ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal)); return; } else if (Elt > 0) { // Elt is odd and positive, in the range [17,31]. // // Convert: VADD_SPLAT elt, size // Into: tmp1 = VSPLTIS[BHW] elt-16 // tmp2 = VSPLTIS[BHW] -16 // VSUBU[BHW]M tmp1, tmp2 SDValue EltVal = getI32Imm(Elt - 16, dl); SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); EltVal = getI32Imm(-16, dl); SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), SDValue(Tmp2, 0))); return; } else { // Elt is odd and negative, in the range [-31,-17]. // // Convert: VADD_SPLAT elt, size // Into: tmp1 = VSPLTIS[BHW] elt+16 // tmp2 = VSPLTIS[BHW] -16 // VADDU[BHW]M tmp1, tmp2 SDValue EltVal = getI32Imm(Elt + 16, dl); SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); EltVal = getI32Imm(-16, dl); SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0), SDValue(Tmp2, 0))); return; } } } SelectCode(N); } // If the target supports the cmpb instruction, do the idiom recognition here. // We don't do this as a DAG combine because we don't want to do it as nodes // are being combined (because we might miss part of the eventual idiom). We // don't want to do it during instruction selection because we want to reuse // the logic for lowering the masking operations already part of the // instruction selector. SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { SDLoc dl(N); assert(N->getOpcode() == ISD::OR && "Only OR nodes are supported for CMPB"); SDValue Res; if (!PPCSubTarget->hasCMPB()) return Res; if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) return Res; EVT VT = N->getValueType(0); SDValue RHS, LHS; bool BytesFound[8] = {false, false, false, false, false, false, false, false}; uint64_t Mask = 0, Alt = 0; auto IsByteSelectCC = [this](SDValue O, unsigned &b, uint64_t &Mask, uint64_t &Alt, SDValue &LHS, SDValue &RHS) { if (O.getOpcode() != ISD::SELECT_CC) return false; ISD::CondCode CC = cast(O.getOperand(4))->get(); if (!isa(O.getOperand(2)) || !isa(O.getOperand(3))) return false; uint64_t PM = O.getConstantOperandVal(2); uint64_t PAlt = O.getConstantOperandVal(3); for (b = 0; b < 8; ++b) { uint64_t Mask = UINT64_C(0xFF) << (8*b); if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt) break; } if (b == 8) return false; Mask |= PM; Alt |= PAlt; if (!isa(O.getOperand(1)) || O.getConstantOperandVal(1) != 0) { SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1); if (Op0.getOpcode() == ISD::TRUNCATE) Op0 = Op0.getOperand(0); if (Op1.getOpcode() == ISD::TRUNCATE) Op1 = Op1.getOperand(0); if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL && Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ && isa(Op0.getOperand(1))) { unsigned Bits = Op0.getValueSizeInBits(); if (b != Bits/8-1) return false; if (Op0.getConstantOperandVal(1) != Bits-8) return false; LHS = Op0.getOperand(0); RHS = Op1.getOperand(0); return true; } // When we have small integers (i16 to be specific), the form present // post-legalization uses SETULT in the SELECT_CC for the // higher-order byte, depending on the fact that the // even-higher-order bytes are known to all be zero, for example: // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult // (so when the second byte is the same, because all higher-order // bits from bytes 3 and 4 are known to be zero, the result of the // xor can be at most 255) if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT && isa(O.getOperand(1))) { uint64_t ULim = O.getConstantOperandVal(1); if (ULim != (UINT64_C(1) << b*8)) return false; // Now we need to make sure that the upper bytes are known to be // zero. unsigned Bits = Op0.getValueSizeInBits(); if (!CurDAG->MaskedValueIsZero( Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8))) return false; LHS = Op0.getOperand(0); RHS = Op0.getOperand(1); return true; } return false; } if (CC != ISD::SETEQ) return false; SDValue Op = O.getOperand(0); if (Op.getOpcode() == ISD::AND) { if (!isa(Op.getOperand(1))) return false; if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b))) return false; SDValue XOR = Op.getOperand(0); if (XOR.getOpcode() == ISD::TRUNCATE) XOR = XOR.getOperand(0); if (XOR.getOpcode() != ISD::XOR) return false; LHS = XOR.getOperand(0); RHS = XOR.getOperand(1); return true; } else if (Op.getOpcode() == ISD::SRL) { if (!isa(Op.getOperand(1))) return false; unsigned Bits = Op.getValueSizeInBits(); if (b != Bits/8-1) return false; if (Op.getConstantOperandVal(1) != Bits-8) return false; SDValue XOR = Op.getOperand(0); if (XOR.getOpcode() == ISD::TRUNCATE) XOR = XOR.getOperand(0); if (XOR.getOpcode() != ISD::XOR) return false; LHS = XOR.getOperand(0); RHS = XOR.getOperand(1); return true; } return false; }; SmallVector Queue(1, SDValue(N, 0)); while (!Queue.empty()) { SDValue V = Queue.pop_back_val(); for (const SDValue &O : V.getNode()->ops()) { unsigned b; uint64_t M = 0, A = 0; SDValue OLHS, ORHS; if (O.getOpcode() == ISD::OR) { Queue.push_back(O); } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) { if (!LHS) { LHS = OLHS; RHS = ORHS; BytesFound[b] = true; Mask |= M; Alt |= A; } else if ((LHS == ORHS && RHS == OLHS) || (RHS == ORHS && LHS == OLHS)) { BytesFound[b] = true; Mask |= M; Alt |= A; } else { return Res; } } else { return Res; } } } unsigned LastB = 0, BCnt = 0; for (unsigned i = 0; i < 8; ++i) if (BytesFound[LastB]) { ++BCnt; LastB = i; } if (!LastB || BCnt < 2) return Res; // Because we'll be zero-extending the output anyway if don't have a specific // value for each input byte (via the Mask), we can 'anyext' the inputs. if (LHS.getValueType() != VT) { LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT); RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT); } Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS); bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1); if (NonTrivialMask && !Alt) { // Res = Mask & CMPB Res = CurDAG->getNode(ISD::AND, dl, VT, Res, CurDAG->getConstant(Mask, dl, VT)); } else if (Alt) { // Res = (CMPB & Mask) | (~CMPB & Alt) // Which, as suggested here: // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge // can be written as: // Res = Alt ^ ((Alt ^ Mask) & CMPB) // useful because the (Alt ^ Mask) can be pre-computed. Res = CurDAG->getNode(ISD::AND, dl, VT, Res, CurDAG->getConstant(Mask ^ Alt, dl, VT)); Res = CurDAG->getNode(ISD::XOR, dl, VT, Res, CurDAG->getConstant(Alt, dl, VT)); } return Res; } // When CR bit registers are enabled, an extension of an i1 variable to a i32 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus // involves constant materialization of a 0 or a 1 or both. If the result of // the extension is then operated upon by some operator that can be constant // folded with a constant 0 or 1, and that constant can be materialized using // only one instruction (like a zero or one), then we should fold in those // operations with the select. void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { if (!PPCSubTarget->useCRBits()) return; if (N->getOpcode() != ISD::ZERO_EXTEND && N->getOpcode() != ISD::SIGN_EXTEND && N->getOpcode() != ISD::ANY_EXTEND) return; if (N->getOperand(0).getValueType() != MVT::i1) return; if (!N->hasOneUse()) return; SDLoc dl(N); EVT VT = N->getValueType(0); SDValue Cond = N->getOperand(0); SDValue ConstTrue = CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT); SDValue ConstFalse = CurDAG->getConstant(0, dl, VT); do { SDNode *User = *N->use_begin(); if (User->getNumOperands() != 2) break; auto TryFold = [this, N, User, dl](SDValue Val) { SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1); SDValue O0 = UserO0.getNode() == N ? Val : UserO0; SDValue O1 = UserO1.getNode() == N ? Val : UserO1; return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl, User->getValueType(0), O0.getNode(), O1.getNode()); }; // FIXME: When the semantics of the interaction between select and undef // are clearly defined, it may turn out to be unnecessary to break here. SDValue TrueRes = TryFold(ConstTrue); if (!TrueRes || TrueRes.isUndef()) break; SDValue FalseRes = TryFold(ConstFalse); if (!FalseRes || FalseRes.isUndef()) break; // For us to materialize these using one instruction, we must be able to // represent them as signed 16-bit integers. uint64_t True = cast(TrueRes)->getZExtValue(), False = cast(FalseRes)->getZExtValue(); if (!isInt<16>(True) || !isInt<16>(False)) break; // We can replace User with a new SELECT node, and try again to see if we // can fold the select with its user. Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes); N = User; ConstTrue = TrueRes; ConstFalse = FalseRes; } while (N->hasOneUse()); } void PPCDAGToDAGISel::PreprocessISelDAG() { SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); bool MadeChange = false; while (Position != CurDAG->allnodes_begin()) { SDNode *N = &*--Position; if (N->use_empty()) continue; SDValue Res; switch (N->getOpcode()) { default: break; case ISD::OR: Res = combineToCMPB(N); break; } if (!Res) foldBoolExts(Res, N); if (Res) { LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: "); LLVM_DEBUG(N->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\nNew: "); LLVM_DEBUG(Res.getNode()->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\n"); CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); MadeChange = true; } } if (MadeChange) CurDAG->RemoveDeadNodes(); } /// PostprocessISelDAG - Perform some late peephole optimizations /// on the DAG representation. void PPCDAGToDAGISel::PostprocessISelDAG() { // Skip peepholes at -O0. if (TM.getOptLevel() == CodeGenOpt::None) return; PeepholePPC64(); PeepholeCROps(); PeepholePPC64ZExt(); } // Check if all users of this node will become isel where the second operand // is the constant zero. If this is so, and if we can negate the condition, // then we can flip the true and false operands. This will allow the zero to // be folded with the isel so that we don't need to materialize a register // containing zero. bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (!User->isMachineOpcode()) return false; if (User->getMachineOpcode() != PPC::SELECT_I4 && User->getMachineOpcode() != PPC::SELECT_I8) return false; SDNode *Op2 = User->getOperand(2).getNode(); if (!Op2->isMachineOpcode()) return false; if (Op2->getMachineOpcode() != PPC::LI && Op2->getMachineOpcode() != PPC::LI8) return false; ConstantSDNode *C = dyn_cast(Op2->getOperand(0)); if (!C) return false; if (!C->isNullValue()) return false; } return true; } void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) { SmallVector ToReplace; for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE; ++UI) { SDNode *User = *UI; assert((User->getMachineOpcode() == PPC::SELECT_I4 || User->getMachineOpcode() == PPC::SELECT_I8) && "Must have all select users"); ToReplace.push_back(User); } for (SmallVector::iterator UI = ToReplace.begin(), UE = ToReplace.end(); UI != UE; ++UI) { SDNode *User = *UI; SDNode *ResNode = CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User), User->getValueType(0), User->getOperand(0), User->getOperand(2), User->getOperand(1)); LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); LLVM_DEBUG(User->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\nNew: "); LLVM_DEBUG(ResNode->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\n"); ReplaceUses(User, ResNode); } } void PPCDAGToDAGISel::PeepholeCROps() { bool IsModified; do { IsModified = false; for (SDNode &Node : CurDAG->allnodes()) { MachineSDNode *MachineNode = dyn_cast(&Node); if (!MachineNode || MachineNode->use_empty()) continue; SDNode *ResNode = MachineNode; bool Op1Set = false, Op1Unset = false, Op1Not = false, Op2Set = false, Op2Unset = false, Op2Not = false; unsigned Opcode = MachineNode->getMachineOpcode(); switch (Opcode) { default: break; case PPC::CRAND: case PPC::CRNAND: case PPC::CROR: case PPC::CRXOR: case PPC::CRNOR: case PPC::CREQV: case PPC::CRANDC: case PPC::CRORC: { SDValue Op = MachineNode->getOperand(1); if (Op.isMachineOpcode()) { if (Op.getMachineOpcode() == PPC::CRSET) Op2Set = true; else if (Op.getMachineOpcode() == PPC::CRUNSET) Op2Unset = true; else if (Op.getMachineOpcode() == PPC::CRNOR && Op.getOperand(0) == Op.getOperand(1)) Op2Not = true; } LLVM_FALLTHROUGH; } case PPC::BC: case PPC::BCn: case PPC::SELECT_I4: case PPC::SELECT_I8: case PPC::SELECT_F4: case PPC::SELECT_F8: case PPC::SELECT_QFRC: case PPC::SELECT_QSRC: case PPC::SELECT_QBRC: case PPC::SELECT_SPE: case PPC::SELECT_SPE4: case PPC::SELECT_VRRC: case PPC::SELECT_VSFRC: case PPC::SELECT_VSSRC: case PPC::SELECT_VSRC: { SDValue Op = MachineNode->getOperand(0); if (Op.isMachineOpcode()) { if (Op.getMachineOpcode() == PPC::CRSET) Op1Set = true; else if (Op.getMachineOpcode() == PPC::CRUNSET) Op1Unset = true; else if (Op.getMachineOpcode() == PPC::CRNOR && Op.getOperand(0) == Op.getOperand(1)) Op1Not = true; } } break; } bool SelectSwap = false; switch (Opcode) { default: break; case PPC::CRAND: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) // x & x = x ResNode = MachineNode->getOperand(0).getNode(); else if (Op1Set) // 1 & y = y ResNode = MachineNode->getOperand(1).getNode(); else if (Op2Set) // x & 1 = x ResNode = MachineNode->getOperand(0).getNode(); else if (Op1Unset || Op2Unset) // x & 0 = 0 & y = 0 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), MVT::i1); else if (Op1Not) // ~x & y = andc(y, x) ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), MachineNode->getOperand(0). getOperand(0)); else if (Op2Not) // x & ~y = andc(x, y) ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1)); SelectSwap = true; } break; case PPC::CRNAND: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) // nand(x, x) -> nor(x, x) ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(0)); else if (Op1Set) // nand(1, y) -> nor(y, y) ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), MachineNode->getOperand(1)); else if (Op2Set) // nand(x, 1) -> nor(x, x) ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(0)); else if (Op1Unset || Op2Unset) // nand(x, 0) = nand(0, y) = 1 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), MVT::i1); else if (Op1Not) // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y) ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0). getOperand(0), MachineNode->getOperand(1)); else if (Op2Not) // nand(x, ~y) = ~x | y = orc(y, x) ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1). getOperand(0), MachineNode->getOperand(0)); else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1)); SelectSwap = true; } break; case PPC::CROR: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) // x | x = x ResNode = MachineNode->getOperand(0).getNode(); else if (Op1Set || Op2Set) // x | 1 = 1 | y = 1 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), MVT::i1); else if (Op1Unset) // 0 | y = y ResNode = MachineNode->getOperand(1).getNode(); else if (Op2Unset) // x | 0 = x ResNode = MachineNode->getOperand(0).getNode(); else if (Op1Not) // ~x | y = orc(y, x) ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), MachineNode->getOperand(0). getOperand(0)); else if (Op2Not) // x | ~y = orc(x, y) ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1)); SelectSwap = true; } break; case PPC::CRXOR: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) // xor(x, x) = 0 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), MVT::i1); else if (Op1Set) // xor(1, y) -> nor(y, y) ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), MachineNode->getOperand(1)); else if (Op2Set) // xor(x, 1) -> nor(x, x) ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(0)); else if (Op1Unset) // xor(0, y) = y ResNode = MachineNode->getOperand(1).getNode(); else if (Op2Unset) // xor(x, 0) = x ResNode = MachineNode->getOperand(0).getNode(); else if (Op1Not) // xor(~x, y) = eqv(x, y) ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0). getOperand(0), MachineNode->getOperand(1)); else if (Op2Not) // xor(x, ~y) = eqv(x, y) ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1)); SelectSwap = true; } break; case PPC::CRNOR: if (Op1Set || Op2Set) // nor(1, y) -> 0 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), MVT::i1); else if (Op1Unset) // nor(0, y) = ~y -> nor(y, y) ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), MachineNode->getOperand(1)); else if (Op2Unset) // nor(x, 0) = ~x ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(0)); else if (Op1Not) // nor(~x, y) = andc(x, y) ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0). getOperand(0), MachineNode->getOperand(1)); else if (Op2Not) // nor(x, ~y) = andc(y, x) ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1). getOperand(0), MachineNode->getOperand(0)); else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1)); SelectSwap = true; } break; case PPC::CREQV: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) // eqv(x, x) = 1 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), MVT::i1); else if (Op1Set) // eqv(1, y) = y ResNode = MachineNode->getOperand(1).getNode(); else if (Op2Set) // eqv(x, 1) = x ResNode = MachineNode->getOperand(0).getNode(); else if (Op1Unset) // eqv(0, y) = ~y -> nor(y, y) ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), MachineNode->getOperand(1)); else if (Op2Unset) // eqv(x, 0) = ~x ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(0)); else if (Op1Not) // eqv(~x, y) = xor(x, y) ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0). getOperand(0), MachineNode->getOperand(1)); else if (Op2Not) // eqv(x, ~y) = xor(x, y) ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1)); SelectSwap = true; } break; case PPC::CRANDC: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) // andc(x, x) = 0 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), MVT::i1); else if (Op1Set) // andc(1, y) = ~y ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), MachineNode->getOperand(1)); else if (Op1Unset || Op2Set) // andc(0, y) = andc(x, 1) = 0 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), MVT::i1); else if (Op2Unset) // andc(x, 0) = x ResNode = MachineNode->getOperand(0).getNode(); else if (Op1Not) // andc(~x, y) = ~(x | y) = nor(x, y) ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0). getOperand(0), MachineNode->getOperand(1)); else if (Op2Not) // andc(x, ~y) = x & y ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), MachineNode->getOperand(0)); SelectSwap = true; } break; case PPC::CRORC: if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) // orc(x, x) = 1 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), MVT::i1); else if (Op1Set || Op2Unset) // orc(1, y) = orc(x, 0) = 1 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), MVT::i1); else if (Op2Set) // orc(x, 1) = x ResNode = MachineNode->getOperand(0).getNode(); else if (Op1Unset) // orc(0, y) = ~y ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), MachineNode->getOperand(1)); else if (Op1Not) // orc(~x, y) = ~(x & y) = nand(x, y) ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0). getOperand(0), MachineNode->getOperand(1)); else if (Op2Not) // orc(x, ~y) = x | y ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(0), MachineNode->getOperand(1). getOperand(0)); else if (AllUsersSelectZero(MachineNode)) { ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), MVT::i1, MachineNode->getOperand(1), MachineNode->getOperand(0)); SelectSwap = true; } break; case PPC::SELECT_I4: case PPC::SELECT_I8: case PPC::SELECT_F4: case PPC::SELECT_F8: case PPC::SELECT_QFRC: case PPC::SELECT_QSRC: case PPC::SELECT_QBRC: case PPC::SELECT_SPE: case PPC::SELECT_SPE4: case PPC::SELECT_VRRC: case PPC::SELECT_VSFRC: case PPC::SELECT_VSSRC: case PPC::SELECT_VSRC: if (Op1Set) ResNode = MachineNode->getOperand(1).getNode(); else if (Op1Unset) ResNode = MachineNode->getOperand(2).getNode(); else if (Op1Not) ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(), SDLoc(MachineNode), MachineNode->getValueType(0), MachineNode->getOperand(0). getOperand(0), MachineNode->getOperand(2), MachineNode->getOperand(1)); break; case PPC::BC: case PPC::BCn: if (Op1Not) ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn : PPC::BC, SDLoc(MachineNode), MVT::Other, MachineNode->getOperand(0). getOperand(0), MachineNode->getOperand(1), MachineNode->getOperand(2)); // FIXME: Handle Op1Set, Op1Unset here too. break; } // If we're inverting this node because it is used only by selects that // we'd like to swap, then swap the selects before the node replacement. if (SelectSwap) SwapAllSelectUsers(MachineNode); if (ResNode != MachineNode) { LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); LLVM_DEBUG(MachineNode->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\nNew: "); LLVM_DEBUG(ResNode->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\n"); ReplaceUses(MachineNode, ResNode); IsModified = true; } } if (IsModified) CurDAG->RemoveDeadNodes(); } while (IsModified); } // Gather the set of 32-bit operations that are known to have their // higher-order 32 bits zero, where ToPromote contains all such operations. static bool PeepholePPC64ZExtGather(SDValue Op32, SmallPtrSetImpl &ToPromote) { if (!Op32.isMachineOpcode()) return false; // First, check for the "frontier" instructions (those that will clear the // higher-order 32 bits. // For RLWINM and RLWNM, we need to make sure that the mask does not wrap // around. If it does not, then these instructions will clear the // higher-order bits. if ((Op32.getMachineOpcode() == PPC::RLWINM || Op32.getMachineOpcode() == PPC::RLWNM) && Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) { ToPromote.insert(Op32.getNode()); return true; } // SLW and SRW always clear the higher-order bits. if (Op32.getMachineOpcode() == PPC::SLW || Op32.getMachineOpcode() == PPC::SRW) { ToPromote.insert(Op32.getNode()); return true; } // For LI and LIS, we need the immediate to be positive (so that it is not // sign extended). if (Op32.getMachineOpcode() == PPC::LI || Op32.getMachineOpcode() == PPC::LIS) { if (!isUInt<15>(Op32.getConstantOperandVal(0))) return false; ToPromote.insert(Op32.getNode()); return true; } // LHBRX and LWBRX always clear the higher-order bits. if (Op32.getMachineOpcode() == PPC::LHBRX || Op32.getMachineOpcode() == PPC::LWBRX) { ToPromote.insert(Op32.getNode()); return true; } // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended. if (Op32.getMachineOpcode() == PPC::CNTLZW || Op32.getMachineOpcode() == PPC::CNTTZW) { ToPromote.insert(Op32.getNode()); return true; } // Next, check for those instructions we can look through. // Assuming the mask does not wrap around, then the higher-order bits are // taken directly from the first operand. if (Op32.getMachineOpcode() == PPC::RLWIMI && Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) { SmallPtrSet ToPromote1; if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) return false; ToPromote.insert(Op32.getNode()); ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); return true; } // For OR, the higher-order bits are zero if that is true for both operands. // For SELECT_I4, the same is true (but the relevant operand numbers are // shifted by 1). if (Op32.getMachineOpcode() == PPC::OR || Op32.getMachineOpcode() == PPC::SELECT_I4) { unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0; SmallPtrSet ToPromote1; if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1)) return false; if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1)) return false; ToPromote.insert(Op32.getNode()); ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); return true; } // For ORI and ORIS, we need the higher-order bits of the first operand to be // zero, and also for the constant to be positive (so that it is not sign // extended). if (Op32.getMachineOpcode() == PPC::ORI || Op32.getMachineOpcode() == PPC::ORIS) { SmallPtrSet ToPromote1; if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) return false; if (!isUInt<15>(Op32.getConstantOperandVal(1))) return false; ToPromote.insert(Op32.getNode()); ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); return true; } // The higher-order bits of AND are zero if that is true for at least one of // the operands. if (Op32.getMachineOpcode() == PPC::AND) { SmallPtrSet ToPromote1, ToPromote2; bool Op0OK = PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); bool Op1OK = PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2); if (!Op0OK && !Op1OK) return false; ToPromote.insert(Op32.getNode()); if (Op0OK) ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); if (Op1OK) ToPromote.insert(ToPromote2.begin(), ToPromote2.end()); return true; } // For ANDI and ANDIS, the higher-order bits are zero if either that is true // of the first operand, or if the second operand is positive (so that it is // not sign extended). if (Op32.getMachineOpcode() == PPC::ANDIo || Op32.getMachineOpcode() == PPC::ANDISo) { SmallPtrSet ToPromote1; bool Op0OK = PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1)); if (!Op0OK && !Op1OK) return false; ToPromote.insert(Op32.getNode()); if (Op0OK) ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); return true; } return false; } void PPCDAGToDAGISel::PeepholePPC64ZExt() { if (!PPCSubTarget->isPPC64()) return; // When we zero-extend from i32 to i64, we use a pattern like this: // def : Pat<(i64 (zext i32:$in)), // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32), // 0, 32)>; // There are several 32-bit shift/rotate instructions, however, that will // clear the higher-order bits of their output, rendering the RLDICL // unnecessary. When that happens, we remove it here, and redefine the // relevant 32-bit operation to be a 64-bit operation. SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); bool MadeChange = false; while (Position != CurDAG->allnodes_begin()) { SDNode *N = &*--Position; // Skip dead nodes and any non-machine opcodes. if (N->use_empty() || !N->isMachineOpcode()) continue; if (N->getMachineOpcode() != PPC::RLDICL) continue; if (N->getConstantOperandVal(1) != 0 || N->getConstantOperandVal(2) != 32) continue; SDValue ISR = N->getOperand(0); if (!ISR.isMachineOpcode() || ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG) continue; if (!ISR.hasOneUse()) continue; if (ISR.getConstantOperandVal(2) != PPC::sub_32) continue; SDValue IDef = ISR.getOperand(0); if (!IDef.isMachineOpcode() || IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF) continue; // We now know that we're looking at a canonical i32 -> i64 zext. See if we // can get rid of it. SDValue Op32 = ISR->getOperand(1); if (!Op32.isMachineOpcode()) continue; // There are some 32-bit instructions that always clear the high-order 32 // bits, there are also some instructions (like AND) that we can look // through. SmallPtrSet ToPromote; if (!PeepholePPC64ZExtGather(Op32, ToPromote)) continue; // If the ToPromote set contains nodes that have uses outside of the set // (except for the original INSERT_SUBREG), then abort the transformation. bool OutsideUse = false; for (SDNode *PN : ToPromote) { for (SDNode *UN : PN->uses()) { if (!ToPromote.count(UN) && UN != ISR.getNode()) { OutsideUse = true; break; } } if (OutsideUse) break; } if (OutsideUse) continue; MadeChange = true; // We now know that this zero extension can be removed by promoting to // nodes in ToPromote to 64-bit operations, where for operations in the // frontier of the set, we need to insert INSERT_SUBREGs for their // operands. for (SDNode *PN : ToPromote) { unsigned NewOpcode; switch (PN->getMachineOpcode()) { default: llvm_unreachable("Don't know the 64-bit variant of this instruction"); case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break; case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break; case PPC::SLW: NewOpcode = PPC::SLW8; break; case PPC::SRW: NewOpcode = PPC::SRW8; break; case PPC::LI: NewOpcode = PPC::LI8; break; case PPC::LIS: NewOpcode = PPC::LIS8; break; case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break; case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break; case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break; case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break; case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break; case PPC::OR: NewOpcode = PPC::OR8; break; case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break; case PPC::ORI: NewOpcode = PPC::ORI8; break; case PPC::ORIS: NewOpcode = PPC::ORIS8; break; case PPC::AND: NewOpcode = PPC::AND8; break; case PPC::ANDIo: NewOpcode = PPC::ANDIo8; break; case PPC::ANDISo: NewOpcode = PPC::ANDISo8; break; } // Note: During the replacement process, the nodes will be in an // inconsistent state (some instructions will have operands with values // of the wrong type). Once done, however, everything should be right // again. SmallVector Ops; for (const SDValue &V : PN->ops()) { if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 && !isa(V)) { SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) }; SDNode *ReplOp = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V), ISR.getNode()->getVTList(), ReplOpOps); Ops.push_back(SDValue(ReplOp, 0)); } else { Ops.push_back(V); } } // Because all to-be-promoted nodes only have users that are other // promoted nodes (or the original INSERT_SUBREG), we can safely replace // the i32 result value type with i64. SmallVector NewVTs; SDVTList VTs = PN->getVTList(); for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i) if (VTs.VTs[i] == MVT::i32) NewVTs.push_back(MVT::i64); else NewVTs.push_back(VTs.VTs[i]); LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: "); LLVM_DEBUG(PN->dump(CurDAG)); CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops); LLVM_DEBUG(dbgs() << "\nNew: "); LLVM_DEBUG(PN->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\n"); } // Now we replace the original zero extend and its associated INSERT_SUBREG // with the value feeding the INSERT_SUBREG (which has now been promoted to // return an i64). LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: "); LLVM_DEBUG(N->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\nNew: "); LLVM_DEBUG(Op32.getNode()->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\n"); ReplaceUses(N, Op32.getNode()); } if (MadeChange) CurDAG->RemoveDeadNodes(); } void PPCDAGToDAGISel::PeepholePPC64() { // These optimizations are currently supported only for 64-bit SVR4. if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64()) return; SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); while (Position != CurDAG->allnodes_begin()) { SDNode *N = &*--Position; // Skip dead nodes and any non-machine opcodes. if (N->use_empty() || !N->isMachineOpcode()) continue; unsigned FirstOp; unsigned StorageOpcode = N->getMachineOpcode(); bool RequiresMod4Offset = false; switch (StorageOpcode) { default: continue; case PPC::LWA: case PPC::LD: case PPC::DFLOADf64: case PPC::DFLOADf32: RequiresMod4Offset = true; LLVM_FALLTHROUGH; case PPC::LBZ: case PPC::LBZ8: case PPC::LFD: case PPC::LFS: case PPC::LHA: case PPC::LHA8: case PPC::LHZ: case PPC::LHZ8: case PPC::LWZ: case PPC::LWZ8: FirstOp = 0; break; case PPC::STD: case PPC::DFSTOREf64: case PPC::DFSTOREf32: RequiresMod4Offset = true; LLVM_FALLTHROUGH; case PPC::STB: case PPC::STB8: case PPC::STFD: case PPC::STFS: case PPC::STH: case PPC::STH8: case PPC::STW: case PPC::STW8: FirstOp = 1; break; } // If this is a load or store with a zero offset, or within the alignment, // we may be able to fold an add-immediate into the memory operation. // The check against alignment is below, as it can't occur until we check // the arguments to N if (!isa(N->getOperand(FirstOp))) continue; SDValue Base = N->getOperand(FirstOp + 1); if (!Base.isMachineOpcode()) continue; unsigned Flags = 0; bool ReplaceFlags = true; // When the feeding operation is an add-immediate of some sort, // determine whether we need to add relocation information to the // target flags on the immediate operand when we fold it into the // load instruction. // // For something like ADDItocL, the relocation information is // inferred from the opcode; when we process it in the AsmPrinter, // we add the necessary relocation there. A load, though, can receive // relocation from various flavors of ADDIxxx, so we need to carry // the relocation information in the target flags. switch (Base.getMachineOpcode()) { default: continue; case PPC::ADDI8: case PPC::ADDI: // In some cases (such as TLS) the relocation information // is already in place on the operand, so copying the operand // is sufficient. ReplaceFlags = false; // For these cases, the immediate may not be divisible by 4, in // which case the fold is illegal for DS-form instructions. (The // other cases provide aligned addresses and are always safe.) if (RequiresMod4Offset && (!isa(Base.getOperand(1)) || Base.getConstantOperandVal(1) % 4 != 0)) continue; break; case PPC::ADDIdtprelL: Flags = PPCII::MO_DTPREL_LO; break; case PPC::ADDItlsldL: Flags = PPCII::MO_TLSLD_LO; break; case PPC::ADDItocL: Flags = PPCII::MO_TOC_LO; break; } SDValue ImmOpnd = Base.getOperand(1); // On PPC64, the TOC base pointer is guaranteed by the ABI only to have // 8-byte alignment, and so we can only use offsets less than 8 (otherwise, // we might have needed different @ha relocation values for the offset // pointers). int MaxDisplacement = 7; if (GlobalAddressSDNode *GA = dyn_cast(ImmOpnd)) { const GlobalValue *GV = GA->getGlobal(); MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement); } bool UpdateHBase = false; SDValue HBase = Base.getOperand(0); int Offset = N->getConstantOperandVal(FirstOp); if (ReplaceFlags) { if (Offset < 0 || Offset > MaxDisplacement) { // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only // one use, then we can do this for any offset, we just need to also // update the offset (i.e. the symbol addend) on the addis also. if (Base.getMachineOpcode() != PPC::ADDItocL) continue; if (!HBase.isMachineOpcode() || HBase.getMachineOpcode() != PPC::ADDIStocHA) continue; if (!Base.hasOneUse() || !HBase.hasOneUse()) continue; SDValue HImmOpnd = HBase.getOperand(1); if (HImmOpnd != ImmOpnd) continue; UpdateHBase = true; } } else { // If we're directly folding the addend from an addi instruction, then: // 1. In general, the offset on the memory access must be zero. // 2. If the addend is a constant, then it can be combined with a // non-zero offset, but only if the result meets the encoding // requirements. if (auto *C = dyn_cast(ImmOpnd)) { Offset += C->getSExtValue(); if (RequiresMod4Offset && (Offset % 4) != 0) continue; if (!isInt<16>(Offset)) continue; ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd), ImmOpnd.getValueType()); } else if (Offset != 0) { continue; } } // We found an opportunity. Reverse the operands from the add // immediate and substitute them into the load or store. If // needed, update the target flags for the immediate operand to // reflect the necessary relocation information. LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); LLVM_DEBUG(Base->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\nN: "); LLVM_DEBUG(N->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\n"); // If the relocation information isn't already present on the // immediate operand, add it now. if (ReplaceFlags) { if (GlobalAddressSDNode *GA = dyn_cast(ImmOpnd)) { SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); // We can't perform this optimization for data whose alignment // is insufficient for the instruction encoding. if (GV->getAlignment() < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) { LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); continue; } ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags); } else if (ConstantPoolSDNode *CP = dyn_cast(ImmOpnd)) { const Constant *C = CP->getConstVal(); ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlignment(), Offset, Flags); } } if (FirstOp == 1) // Store (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, Base.getOperand(0), N->getOperand(3)); else // Load (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), N->getOperand(2)); if (UpdateHBase) (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0), ImmOpnd); // The add-immediate may now be dead, in which case remove it. if (Base.getNode()->use_empty()) CurDAG->RemoveDeadNode(Base.getNode()); } } /// createPPCISelDag - This pass converts a legalized DAG into a /// PowerPC-specific DAG, ready for instruction scheduling. /// FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM, CodeGenOpt::Level OptLevel) { return new PPCDAGToDAGISel(TM, OptLevel); } Index: vendor/llvm/dist-release_80/lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- vendor/llvm/dist-release_80/lib/Target/PowerPC/PPCInstrInfo.td (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/PowerPC/PPCInstrInfo.td (revision 348932) @@ -1,4958 +1,4960 @@ //===-- PPCInstrInfo.td - The PowerPC Instruction Set ------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file describes the subset of the 32-bit PowerPC instruction set, as used // by the PowerPC instruction selector. // //===----------------------------------------------------------------------===// include "PPCInstrFormats.td" //===----------------------------------------------------------------------===// // PowerPC specific type constraints. // def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx SDTCisVT<0, f64>, SDTCisPtrTy<1> ]>; def SDT_PPClfiwx : SDTypeProfile<1, 1, [ // lfiw[az]x SDTCisVT<0, f64>, SDTCisPtrTy<1> ]>; def SDT_PPCLxsizx : SDTypeProfile<1, 2, [ SDTCisVT<0, f64>, SDTCisPtrTy<1>, SDTCisPtrTy<2> ]>; def SDT_PPCstxsix : SDTypeProfile<0, 3, [ SDTCisVT<0, f64>, SDTCisPtrTy<1>, SDTCisPtrTy<2> ]>; def SDT_PPCcv_fp_to_int : SDTypeProfile<1, 1, [ SDTCisFP<0>, SDTCisFP<1> ]>; def SDT_PPCstore_scal_int_from_vsr : SDTypeProfile<0, 3, [ SDTCisVT<0, f64>, SDTCisPtrTy<1>, SDTCisPtrTy<2> ]>; def SDT_PPCVexts : SDTypeProfile<1, 2, [ SDTCisVT<0, f64>, SDTCisVT<1, f64>, SDTCisPtrTy<2> ]>; def SDT_PPCSExtVElems : SDTypeProfile<1, 1, [ SDTCisVec<0>, SDTCisVec<1> ]>; def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; def SDT_PPCvperm : SDTypeProfile<1, 3, [ SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2> ]>; def SDT_PPCVecSplat : SDTypeProfile<1, 2, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<2> ]>; def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3> ]>; def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> ]>; def SDT_PPCVecReverse: SDTypeProfile<1, 1, [ SDTCisVec<0>, SDTCisVec<1> ]>; def SDT_PPCxxpermdi: SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> ]>; def SDT_PPCvcmp : SDTypeProfile<1, 3, [ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32> ]>; def SDT_PPCcondbr : SDTypeProfile<0, 3, [ SDTCisVT<0, i32>, SDTCisVT<2, OtherVT> ]>; def SDT_PPClbrx : SDTypeProfile<1, 2, [ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> ]>; def SDT_PPCstbrx : SDTypeProfile<0, 3, [ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> ]>; def SDT_PPCTC_ret : SDTypeProfile<0, 2, [ SDTCisPtrTy<0>, SDTCisVT<1, i32> ]>; def tocentry32 : Operand { let MIOperandInfo = (ops i32imm:$imm); } def SDT_PPCqvfperm : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVec<3> ]>; def SDT_PPCqvgpci : SDTypeProfile<1, 1, [ SDTCisVec<0>, SDTCisInt<1> ]>; def SDT_PPCqvaligni : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3> ]>; def SDT_PPCqvesplati : SDTypeProfile<1, 2, [ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2> ]>; def SDT_PPCqbflt : SDTypeProfile<1, 1, [ SDTCisVec<0>, SDTCisVec<1> ]>; def SDT_PPCqvlfsb : SDTypeProfile<1, 1, [ SDTCisVec<0>, SDTCisPtrTy<1> ]>; def SDT_PPCextswsli : SDTypeProfile<1, 2, [ // extswsli SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisInt<2> ]>; //===----------------------------------------------------------------------===// // PowerPC specific DAG Nodes. // def PPCfre : SDNode<"PPCISD::FRE", SDTFPUnaryOp, []>; def PPCfrsqrte: SDNode<"PPCISD::FRSQRTE", SDTFPUnaryOp, []>; def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>; def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>; def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>; def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>; def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>; def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>; def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>; def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>; def PPCcv_fp_to_uint_in_vsr: SDNode<"PPCISD::FP_TO_UINT_IN_VSR", SDT_PPCcv_fp_to_int, []>; def PPCcv_fp_to_sint_in_vsr: SDNode<"PPCISD::FP_TO_SINT_IN_VSR", SDT_PPCcv_fp_to_int, []>; def PPCstore_scal_int_from_vsr: SDNode<"PPCISD::ST_VSR_SCAL_INT", SDT_PPCstore_scal_int_from_vsr, [SDNPHasChain, SDNPMayStore]>; def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, [SDNPHasChain, SDNPMayStore]>; def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPClxsizx : SDNode<"PPCISD::LXSIZX", SDT_PPCLxsizx, [SDNPHasChain, SDNPMayLoad]>; def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix, [SDNPHasChain, SDNPMayStore]>; def PPCVexts : SDNode<"PPCISD::VEXTS", SDT_PPCVexts, []>; def PPCSExtVElems : SDNode<"PPCISD::SExtVElems", SDT_PPCSExtVElems, []>; // Extract FPSCR (not modeled at the DAG level). def PPCmffs : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>; // Perform FADD in round-to-zero mode. def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>; def PPCfsel : SDNode<"PPCISD::FSEL", // Type constraint for fsel. SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisFP<0>, SDTCisVT<1, f64>]>, []>; def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>; def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>; def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad, SDNPMemOperand]>; def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>; def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>; def PPCppc32GOT : SDNode<"PPCISD::PPC32_GOT", SDTIntLeaf, []>; def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>; def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp, [SDNPMayLoad]>; def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>; def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>; def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>; def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>; def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR", SDTypeProfile<1, 3, [ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>; def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>; def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>; def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>; def PPCaddiTlsldLAddr : SDNode<"PPCISD::ADDI_TLSLD_L_ADDR", SDTypeProfile<1, 3, [ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>; def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>; def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>; def PPCxxreverse : SDNode<"PPCISD::XXREVERSE", SDT_PPCVecReverse, []>; def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>; def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>; def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>; def PPCqvgpci : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>; def PPCqvaligni : SDNode<"PPCISD::QVALIGNI", SDT_PPCqvaligni, []>; def PPCqvesplati : SDNode<"PPCISD::QVESPLATI", SDT_PPCqvesplati, []>; def PPCqbflt : SDNode<"PPCISD::QBFLT", SDT_PPCqbflt, []>; def PPCqvlfsb : SDNode<"PPCISD::QVLFSb", SDT_PPCqvlfsb, [SDNPHasChain, SDNPMayLoad]>; def PPCcmpb : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>; // These nodes represent the 32-bit PPC shifts that operate on 6-bit shift // amounts. These nodes are generated by the multi-precision shift code. def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>; def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>; def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>; def PPCextswsli : SDNode<"PPCISD::EXTSWSLI" , SDT_PPCextswsli>; // Move 2 i64 values into a VSX register def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128", SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisSameSizeAs<1,2>, SDTCisSameAs<1,2>]>, []>; // These are target-independent nodes, but have target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; def PPCbctrl_load_toc : SDNode<"PPCISD::BCTRL_LOAD_TOC", SDTypeProfile<0, 1, []>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def PPCeh_sjlj_setjmp : SDNode<"PPCISD::EH_SJLJ_SETJMP", SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, [SDNPHasChain, SDNPSideEffect]>; def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPSideEffect]>; def SDT_PPCsc : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def PPCsc : SDNode<"PPCISD::SC", SDT_PPCsc, [SDNPHasChain, SDNPSideEffect]>; def PPCclrbhrb : SDNode<"PPCISD::CLRBHRB", SDTNone, [SDNPHasChain, SDNPSideEffect]>; def PPCmfbhrbe : SDNode<"PPCISD::MFBHRBE", SDTIntBinOp, [SDNPHasChain]>; def PPCrfebb : SDNode<"PPCISD::RFEBB", SDT_PPCsc, [SDNPHasChain, SDNPSideEffect]>; def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>; def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr, [SDNPHasChain, SDNPOptInGlue]>; // PPC-specific atomic operations. def PPCatomicCmpSwap_8 : SDNode<"PPCISD::ATOMIC_CMP_SWAP_8", SDTAtomic3, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def PPCatomicCmpSwap_16 : SDNode<"PPCISD::ATOMIC_CMP_SWAP_16", SDTAtomic3, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, [SDNPHasChain, SDNPMayStore]>; // Instructions to set/unset CR bit 6 for SVR4 vararg calls def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; // Instructions to support dynamic alloca. def SDTDynOp : SDTypeProfile<1, 2, []>; def SDTDynAreaOp : SDTypeProfile<1, 1, []>; def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>; def PPCdynareaoffset : SDNode<"PPCISD::DYNAREAOFFSET", SDTDynAreaOp, [SDNPHasChain]>; //===----------------------------------------------------------------------===// // PowerPC specific transformation functions and pattern fragments. // def SHL32 : SDNodeXFormgetZExtValue(), SDLoc(N)); }]>; def SRL32 : SDNodeXFormgetZExtValue() ? getI32Imm(32 - N->getZExtValue(), SDLoc(N)) : getI32Imm(0, SDLoc(N)); }]>; def LO16 : SDNodeXFormgetZExtValue(), SDLoc(N)); }]>; def HI16 : SDNodeXFormgetZExtValue() >> 16, SDLoc(N)); }]>; def HA16 : SDNodeXFormgetZExtValue(); return getI32Imm((Val - (signed short)Val) >> 16, SDLoc(N)); }]>; def MB : SDNodeXFormgetZExtValue(), mb, me); return getI32Imm(mb, SDLoc(N)); }]>; def ME : SDNodeXFormgetZExtValue(), mb, me); return getI32Imm(me, SDLoc(N)); }]>; def maskimm32 : PatLeaf<(imm), [{ // maskImm predicate - True if immediate is a run of ones. unsigned mb, me; if (N->getValueType(0) == MVT::i32) return isRunOfOnes((unsigned)N->getZExtValue(), mb, me); else return false; }]>; def imm32SExt16 : Operand, ImmLeaf; def imm64SExt16 : Operand, ImmLeaf; def immZExt16 : PatLeaf<(imm), [{ // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended // field. Used by instructions like 'ori'. return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue(); }], LO16>; def immAnyExt8 : ImmLeaf(Imm) || isUInt<8>(Imm); }]>; def immSExt5NonZero : ImmLeaf(Imm); }]>; // imm16Shifted* - These match immediates where the low 16-bits are zero. There // are two forms: imm16ShiftedSExt and imm16ShiftedZExt. These two forms are // identical in 32-bit mode, but in 64-bit mode, they return true if the // immediate fits into a sign/zero extended 32-bit immediate (with the low bits // clear). def imm16ShiftedZExt : PatLeaf<(imm), [{ // imm16ShiftedZExt predicate - True if only bits in the top 16-bits of the // immediate are set. Used by instructions like 'xoris'. return (N->getZExtValue() & ~uint64_t(0xFFFF0000)) == 0; }], HI16>; def imm16ShiftedSExt : PatLeaf<(imm), [{ // imm16ShiftedSExt predicate - True if only bits in the top 16-bits of the // immediate are set. Used by instructions like 'addis'. Identical to // imm16ShiftedZExt in 32-bit mode. if (N->getZExtValue() & 0xFFFF) return false; if (N->getValueType(0) == MVT::i32) return true; // For 64-bit, make sure it is sext right. return N->getZExtValue() == (uint64_t)(int)N->getZExtValue(); }], HI16>; def imm64ZExt32 : Operand, ImmLeaf(Imm); }]>; // Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require // restricted memrix (4-aligned) constants are alignment sensitive. If these // offsets are hidden behind TOC entries than the values of the lower-order // bits cannot be checked directly. As a result, we need to also incorporate // an alignment check into the relevant patterns. def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast(N)->getAlignment() >= 4; }]>; def aligned4store : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ return cast(N)->getAlignment() >= 4; }]>; def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ return cast(N)->getAlignment() >= 4; }]>; def aligned4pre_store : PatFrag< (ops node:$val, node:$base, node:$offset), (pre_store node:$val, node:$base, node:$offset), [{ return cast(N)->getAlignment() >= 4; }]>; def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast(N)->getAlignment() < 4; }]>; def unaligned4store : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ return cast(N)->getAlignment() < 4; }]>; def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ return cast(N)->getAlignment() < 4; }]>; // This is a somewhat weaker condition than actually checking for 16-byte // alignment. It is simply checking that the displacement can be represented // as an immediate that is a multiple of 16 (i.e. the requirements for DQ-Form // instructions). def quadwOffsetLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return isOffsetMultipleOf(N, 16); }]>; def quadwOffsetStore : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ return isOffsetMultipleOf(N, 16); }]>; def nonQuadwOffsetLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return !isOffsetMultipleOf(N, 16); }]>; def nonQuadwOffsetStore : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ return !isOffsetMultipleOf(N, 16); }]>; //===----------------------------------------------------------------------===// // PowerPC Flag Definitions. class isPPC64 { bit PPC64 = 1; } class isDOT { bit RC = 1; } class RegConstraint { string Constraints = C; } class NoEncode { string DisableEncoding = E; } //===----------------------------------------------------------------------===// // PowerPC Operand Definitions. // In the default PowerPC assembler syntax, registers are specified simply // by number, so they cannot be distinguished from immediate values (without // looking at the opcode). This means that the default operand matching logic // for the asm parser does not work, and we need to specify custom matchers. // Since those can only be specified with RegisterOperand classes and not // directly on the RegisterClass, all instructions patterns used by the asm // parser need to use a RegisterOperand (instead of a RegisterClass) for // all their register operands. // For this purpose, we define one RegisterOperand for each RegisterClass, // using the same name as the class, just in lower case. def PPCRegGPRCAsmOperand : AsmOperandClass { let Name = "RegGPRC"; let PredicateMethod = "isRegNumber"; } def gprc : RegisterOperand { let ParserMatchClass = PPCRegGPRCAsmOperand; } def PPCRegG8RCAsmOperand : AsmOperandClass { let Name = "RegG8RC"; let PredicateMethod = "isRegNumber"; } def g8rc : RegisterOperand { let ParserMatchClass = PPCRegG8RCAsmOperand; } def PPCRegGPRCNoR0AsmOperand : AsmOperandClass { let Name = "RegGPRCNoR0"; let PredicateMethod = "isRegNumber"; } def gprc_nor0 : RegisterOperand { let ParserMatchClass = PPCRegGPRCNoR0AsmOperand; } def PPCRegG8RCNoX0AsmOperand : AsmOperandClass { let Name = "RegG8RCNoX0"; let PredicateMethod = "isRegNumber"; } def g8rc_nox0 : RegisterOperand { let ParserMatchClass = PPCRegG8RCNoX0AsmOperand; } def PPCRegF8RCAsmOperand : AsmOperandClass { let Name = "RegF8RC"; let PredicateMethod = "isRegNumber"; } def f8rc : RegisterOperand { let ParserMatchClass = PPCRegF8RCAsmOperand; } def PPCRegF4RCAsmOperand : AsmOperandClass { let Name = "RegF4RC"; let PredicateMethod = "isRegNumber"; } def f4rc : RegisterOperand { let ParserMatchClass = PPCRegF4RCAsmOperand; } def PPCRegVRRCAsmOperand : AsmOperandClass { let Name = "RegVRRC"; let PredicateMethod = "isRegNumber"; } def vrrc : RegisterOperand { let ParserMatchClass = PPCRegVRRCAsmOperand; } def PPCRegVFRCAsmOperand : AsmOperandClass { let Name = "RegVFRC"; let PredicateMethod = "isRegNumber"; } def vfrc : RegisterOperand { let ParserMatchClass = PPCRegVFRCAsmOperand; } def PPCRegCRBITRCAsmOperand : AsmOperandClass { let Name = "RegCRBITRC"; let PredicateMethod = "isCRBitNumber"; } def crbitrc : RegisterOperand { let ParserMatchClass = PPCRegCRBITRCAsmOperand; } def PPCRegCRRCAsmOperand : AsmOperandClass { let Name = "RegCRRC"; let PredicateMethod = "isCCRegNumber"; } def crrc : RegisterOperand { let ParserMatchClass = PPCRegCRRCAsmOperand; } def crrc0 : RegisterOperand { let ParserMatchClass = PPCRegCRRCAsmOperand; } def PPCRegSPERCAsmOperand : AsmOperandClass { let Name = "RegSPERC"; let PredicateMethod = "isRegNumber"; } def sperc : RegisterOperand { let ParserMatchClass = PPCRegSPERCAsmOperand; } def PPCRegSPE4RCAsmOperand : AsmOperandClass { let Name = "RegSPE4RC"; let PredicateMethod = "isRegNumber"; } def spe4rc : RegisterOperand { let ParserMatchClass = PPCRegSPE4RCAsmOperand; } def PPCU1ImmAsmOperand : AsmOperandClass { let Name = "U1Imm"; let PredicateMethod = "isU1Imm"; let RenderMethod = "addImmOperands"; } def u1imm : Operand { let PrintMethod = "printU1ImmOperand"; let ParserMatchClass = PPCU1ImmAsmOperand; } def PPCU2ImmAsmOperand : AsmOperandClass { let Name = "U2Imm"; let PredicateMethod = "isU2Imm"; let RenderMethod = "addImmOperands"; } def u2imm : Operand { let PrintMethod = "printU2ImmOperand"; let ParserMatchClass = PPCU2ImmAsmOperand; } def PPCATBitsAsHintAsmOperand : AsmOperandClass { let Name = "ATBitsAsHint"; let PredicateMethod = "isATBitsAsHint"; let RenderMethod = "addImmOperands"; // Irrelevant, predicate always fails. } def atimm : Operand { let PrintMethod = "printATBitsAsHint"; let ParserMatchClass = PPCATBitsAsHintAsmOperand; } def PPCU3ImmAsmOperand : AsmOperandClass { let Name = "U3Imm"; let PredicateMethod = "isU3Imm"; let RenderMethod = "addImmOperands"; } def u3imm : Operand { let PrintMethod = "printU3ImmOperand"; let ParserMatchClass = PPCU3ImmAsmOperand; } def PPCU4ImmAsmOperand : AsmOperandClass { let Name = "U4Imm"; let PredicateMethod = "isU4Imm"; let RenderMethod = "addImmOperands"; } def u4imm : Operand { let PrintMethod = "printU4ImmOperand"; let ParserMatchClass = PPCU4ImmAsmOperand; } def PPCS5ImmAsmOperand : AsmOperandClass { let Name = "S5Imm"; let PredicateMethod = "isS5Imm"; let RenderMethod = "addImmOperands"; } def s5imm : Operand { let PrintMethod = "printS5ImmOperand"; let ParserMatchClass = PPCS5ImmAsmOperand; let DecoderMethod = "decodeSImmOperand<5>"; } def PPCU5ImmAsmOperand : AsmOperandClass { let Name = "U5Imm"; let PredicateMethod = "isU5Imm"; let RenderMethod = "addImmOperands"; } def u5imm : Operand { let PrintMethod = "printU5ImmOperand"; let ParserMatchClass = PPCU5ImmAsmOperand; let DecoderMethod = "decodeUImmOperand<5>"; } def PPCU6ImmAsmOperand : AsmOperandClass { let Name = "U6Imm"; let PredicateMethod = "isU6Imm"; let RenderMethod = "addImmOperands"; } def u6imm : Operand { let PrintMethod = "printU6ImmOperand"; let ParserMatchClass = PPCU6ImmAsmOperand; let DecoderMethod = "decodeUImmOperand<6>"; } def PPCU7ImmAsmOperand : AsmOperandClass { let Name = "U7Imm"; let PredicateMethod = "isU7Imm"; let RenderMethod = "addImmOperands"; } def u7imm : Operand { let PrintMethod = "printU7ImmOperand"; let ParserMatchClass = PPCU7ImmAsmOperand; let DecoderMethod = "decodeUImmOperand<7>"; } def PPCU8ImmAsmOperand : AsmOperandClass { let Name = "U8Imm"; let PredicateMethod = "isU8Imm"; let RenderMethod = "addImmOperands"; } def u8imm : Operand { let PrintMethod = "printU8ImmOperand"; let ParserMatchClass = PPCU8ImmAsmOperand; let DecoderMethod = "decodeUImmOperand<8>"; } def PPCU10ImmAsmOperand : AsmOperandClass { let Name = "U10Imm"; let PredicateMethod = "isU10Imm"; let RenderMethod = "addImmOperands"; } def u10imm : Operand { let PrintMethod = "printU10ImmOperand"; let ParserMatchClass = PPCU10ImmAsmOperand; let DecoderMethod = "decodeUImmOperand<10>"; } def PPCU12ImmAsmOperand : AsmOperandClass { let Name = "U12Imm"; let PredicateMethod = "isU12Imm"; let RenderMethod = "addImmOperands"; } def u12imm : Operand { let PrintMethod = "printU12ImmOperand"; let ParserMatchClass = PPCU12ImmAsmOperand; let DecoderMethod = "decodeUImmOperand<12>"; } def PPCS16ImmAsmOperand : AsmOperandClass { let Name = "S16Imm"; let PredicateMethod = "isS16Imm"; let RenderMethod = "addS16ImmOperands"; } def s16imm : Operand { let PrintMethod = "printS16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCS16ImmAsmOperand; let DecoderMethod = "decodeSImmOperand<16>"; } def PPCU16ImmAsmOperand : AsmOperandClass { let Name = "U16Imm"; let PredicateMethod = "isU16Imm"; let RenderMethod = "addU16ImmOperands"; } def u16imm : Operand { let PrintMethod = "printU16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCU16ImmAsmOperand; let DecoderMethod = "decodeUImmOperand<16>"; } def PPCS17ImmAsmOperand : AsmOperandClass { let Name = "S17Imm"; let PredicateMethod = "isS17Imm"; let RenderMethod = "addS16ImmOperands"; } def s17imm : Operand { // This operand type is used for addis/lis to allow the assembler parser // to accept immediates in the range -65536..65535 for compatibility with // the GNU assembler. The operand is treated as 16-bit otherwise. let PrintMethod = "printS16ImmOperand"; let EncoderMethod = "getImm16Encoding"; let ParserMatchClass = PPCS17ImmAsmOperand; let DecoderMethod = "decodeSImmOperand<16>"; } def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>; def PPCDirectBrAsmOperand : AsmOperandClass { let Name = "DirectBr"; let PredicateMethod = "isDirectBr"; let RenderMethod = "addBranchTargetOperands"; } def directbrtarget : Operand { let PrintMethod = "printBranchOperand"; let EncoderMethod = "getDirectBrEncoding"; let ParserMatchClass = PPCDirectBrAsmOperand; } def absdirectbrtarget : Operand { let PrintMethod = "printAbsBranchOperand"; let EncoderMethod = "getAbsDirectBrEncoding"; let ParserMatchClass = PPCDirectBrAsmOperand; } def PPCCondBrAsmOperand : AsmOperandClass { let Name = "CondBr"; let PredicateMethod = "isCondBr"; let RenderMethod = "addBranchTargetOperands"; } def condbrtarget : Operand { let PrintMethod = "printBranchOperand"; let EncoderMethod = "getCondBrEncoding"; let ParserMatchClass = PPCCondBrAsmOperand; } def abscondbrtarget : Operand { let PrintMethod = "printAbsBranchOperand"; let EncoderMethod = "getAbsCondBrEncoding"; let ParserMatchClass = PPCCondBrAsmOperand; } def calltarget : Operand { let PrintMethod = "printBranchOperand"; let EncoderMethod = "getDirectBrEncoding"; + let DecoderMethod = "DecodePCRel24BranchTarget"; let ParserMatchClass = PPCDirectBrAsmOperand; + let OperandType = "OPERAND_PCREL"; } def abscalltarget : Operand { let PrintMethod = "printAbsBranchOperand"; let EncoderMethod = "getAbsDirectBrEncoding"; let ParserMatchClass = PPCDirectBrAsmOperand; } def PPCCRBitMaskOperand : AsmOperandClass { let Name = "CRBitMask"; let PredicateMethod = "isCRBitMask"; } def crbitm: Operand { let PrintMethod = "printcrbitm"; let EncoderMethod = "get_crbitm_encoding"; let DecoderMethod = "decodeCRBitMOperand"; let ParserMatchClass = PPCCRBitMaskOperand; } // Address operands // A version of ptr_rc which excludes R0 (or X0 in 64-bit mode). def PPCRegGxRCNoR0Operand : AsmOperandClass { let Name = "RegGxRCNoR0"; let PredicateMethod = "isRegNumber"; } def ptr_rc_nor0 : Operand, PointerLikeRegClass<1> { let ParserMatchClass = PPCRegGxRCNoR0Operand; } // A version of ptr_rc usable with the asm parser. def PPCRegGxRCOperand : AsmOperandClass { let Name = "RegGxRC"; let PredicateMethod = "isRegNumber"; } def ptr_rc_idx : Operand, PointerLikeRegClass<0> { let ParserMatchClass = PPCRegGxRCOperand; } def PPCDispRIOperand : AsmOperandClass { let Name = "DispRI"; let PredicateMethod = "isS16Imm"; let RenderMethod = "addS16ImmOperands"; } def dispRI : Operand { let ParserMatchClass = PPCDispRIOperand; } def PPCDispRIXOperand : AsmOperandClass { let Name = "DispRIX"; let PredicateMethod = "isS16ImmX4"; let RenderMethod = "addImmOperands"; } def dispRIX : Operand { let ParserMatchClass = PPCDispRIXOperand; } def PPCDispRIX16Operand : AsmOperandClass { let Name = "DispRIX16"; let PredicateMethod = "isS16ImmX16"; let RenderMethod = "addImmOperands"; } def dispRIX16 : Operand { let ParserMatchClass = PPCDispRIX16Operand; } def PPCDispSPE8Operand : AsmOperandClass { let Name = "DispSPE8"; let PredicateMethod = "isU8ImmX8"; let RenderMethod = "addImmOperands"; } def dispSPE8 : Operand { let ParserMatchClass = PPCDispSPE8Operand; } def PPCDispSPE4Operand : AsmOperandClass { let Name = "DispSPE4"; let PredicateMethod = "isU7ImmX4"; let RenderMethod = "addImmOperands"; } def dispSPE4 : Operand { let ParserMatchClass = PPCDispSPE4Operand; } def PPCDispSPE2Operand : AsmOperandClass { let Name = "DispSPE2"; let PredicateMethod = "isU6ImmX2"; let RenderMethod = "addImmOperands"; } def dispSPE2 : Operand { let ParserMatchClass = PPCDispSPE2Operand; } def memri : Operand { let PrintMethod = "printMemRegImm"; let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIEncoding"; let DecoderMethod = "decodeMemRIOperands"; } def memrr : Operand { let PrintMethod = "printMemRegReg"; let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc_idx:$offreg); } def memrix : Operand { // memri where the imm is 4-aligned. let PrintMethod = "printMemRegImm"; let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIXEncoding"; let DecoderMethod = "decodeMemRIXOperands"; } def memrix16 : Operand { // memri, imm is 16-aligned, 12-bit, Inst{16:27} let PrintMethod = "printMemRegImm"; let MIOperandInfo = (ops dispRIX16:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIX16Encoding"; let DecoderMethod = "decodeMemRIX16Operands"; } def spe8dis : Operand { // SPE displacement where the imm is 8-aligned. let PrintMethod = "printMemRegImm"; let MIOperandInfo = (ops dispSPE8:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getSPE8DisEncoding"; let DecoderMethod = "decodeSPE8Operands"; } def spe4dis : Operand { // SPE displacement where the imm is 4-aligned. let PrintMethod = "printMemRegImm"; let MIOperandInfo = (ops dispSPE4:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getSPE4DisEncoding"; let DecoderMethod = "decodeSPE4Operands"; } def spe2dis : Operand { // SPE displacement where the imm is 2-aligned. let PrintMethod = "printMemRegImm"; let MIOperandInfo = (ops dispSPE2:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getSPE2DisEncoding"; let DecoderMethod = "decodeSPE2Operands"; } // A single-register address. This is used with the SjLj // pseudo-instructions which tranlates to LD/LWZ. These instructions requires // G8RC_NOX0 registers. def memr : Operand { let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg); } def PPCTLSRegOperand : AsmOperandClass { let Name = "TLSReg"; let PredicateMethod = "isTLSReg"; let RenderMethod = "addTLSRegOperands"; } def tlsreg32 : Operand { let EncoderMethod = "getTLSRegEncoding"; let ParserMatchClass = PPCTLSRegOperand; } def tlsgd32 : Operand {} def tlscall32 : Operand { let PrintMethod = "printTLSCall"; let MIOperandInfo = (ops calltarget:$func, tlsgd32:$sym); let EncoderMethod = "getTLSCallEncoding"; } // PowerPC Predicate operand. def pred : Operand { let PrintMethod = "printPredicateOperand"; let MIOperandInfo = (ops i32imm:$bibo, crrc:$reg); } // Define PowerPC specific addressing mode. def iaddr : ComplexPattern; def xaddr : ComplexPattern; def xoaddr : ComplexPattern; def ixaddr : ComplexPattern; // "std" def iqaddr : ComplexPattern; // "stxv" // The address in a single register. This is used with the SjLj // pseudo-instructions. def addr : ComplexPattern; /// This is just the offset part of iaddr, used for preinc. def iaddroff : ComplexPattern; //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. def In32BitMode : Predicate<"!PPCSubTarget->isPPC64()">; def In64BitMode : Predicate<"PPCSubTarget->isPPC64()">; def IsBookE : Predicate<"PPCSubTarget->isBookE()">; def IsNotBookE : Predicate<"!PPCSubTarget->isBookE()">; def HasOnlyMSYNC : Predicate<"PPCSubTarget->hasOnlyMSYNC()">; def HasSYNC : Predicate<"!PPCSubTarget->hasOnlyMSYNC()">; def IsPPC4xx : Predicate<"PPCSubTarget->isPPC4xx()">; def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">; def IsE500 : Predicate<"PPCSubTarget->isE500()">; def HasSPE : Predicate<"PPCSubTarget->hasSPE()">; def HasICBT : Predicate<"PPCSubTarget->hasICBT()">; def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">; def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">; def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">; def HasBPERMD : Predicate<"PPCSubTarget->hasBPERMD()">; def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">; def IsISA3_0 : Predicate<"PPCSubTarget->isISA3_0()">; def HasFPU : Predicate<"PPCSubTarget->hasFPU()">; //===----------------------------------------------------------------------===// // PowerPC Multiclass Definitions. multiclass XForm_6r opcode, bits<10> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { let BaseName = asmbase in { def NAME : XForm_6, RecFormRel; let Defs = [CR0] in def o : XForm_6, isDOT, RecFormRel; } } multiclass XForm_6rc opcode, bits<10> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { let BaseName = asmbase in { let Defs = [CARRY] in def NAME : XForm_6, RecFormRel; let Defs = [CARRY, CR0] in def o : XForm_6, isDOT, RecFormRel; } } multiclass XForm_10rc opcode, bits<10> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { let BaseName = asmbase in { let Defs = [CARRY] in def NAME : XForm_10, RecFormRel; let Defs = [CARRY, CR0] in def o : XForm_10, isDOT, RecFormRel; } } multiclass XForm_11r opcode, bits<10> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { let BaseName = asmbase in { def NAME : XForm_11, RecFormRel; let Defs = [CR0] in def o : XForm_11, isDOT, RecFormRel; } } multiclass XOForm_1r opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { let BaseName = asmbase in { def NAME : XOForm_1, RecFormRel; let Defs = [CR0] in def o : XOForm_1, isDOT, RecFormRel; } } // Multiclass for instructions for which the non record form is not cracked // and the record form is cracked (i.e. divw, mullw, etc.) multiclass XOForm_1rcr opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { let BaseName = asmbase in { def NAME : XOForm_1, RecFormRel; let Defs = [CR0] in def o : XOForm_1, isDOT, RecFormRel, PPC970_DGroup_First, PPC970_DGroup_Cracked; } } multiclass XOForm_1rc opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { let BaseName = asmbase in { let Defs = [CARRY] in def NAME : XOForm_1, RecFormRel; let Defs = [CARRY, CR0] in def o : XOForm_1, isDOT, RecFormRel; } } multiclass XOForm_3r opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { let BaseName = asmbase in { def NAME : XOForm_3, RecFormRel; let Defs = [CR0] in def o : XOForm_3, isDOT, RecFormRel; } } multiclass XOForm_3rc opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { let BaseName = asmbase in { let Defs = [CARRY] in def NAME : XOForm_3, RecFormRel; let Defs = [CARRY, CR0] in def o : XOForm_3, isDOT, RecFormRel; } } multiclass MForm_2r opcode, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { let BaseName = asmbase in { def NAME : MForm_2, RecFormRel; let Defs = [CR0] in def o : MForm_2, isDOT, RecFormRel; } } multiclass MDForm_1r opcode, bits<3> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { let BaseName = asmbase in { def NAME : MDForm_1, RecFormRel; let Defs = [CR0] in def o : MDForm_1, isDOT, RecFormRel; } } multiclass MDSForm_1r opcode, bits<4> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { let BaseName = asmbase in { def NAME : MDSForm_1, RecFormRel; let Defs = [CR0] in def o : MDSForm_1, isDOT, RecFormRel; } } multiclass XSForm_1rc opcode, bits<9> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { let BaseName = asmbase in { let Defs = [CARRY] in def NAME : XSForm_1, RecFormRel; let Defs = [CARRY, CR0] in def o : XSForm_1, isDOT, RecFormRel; } } multiclass XSForm_1r opcode, bits<9> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { let BaseName = asmbase in { def NAME : XSForm_1, RecFormRel; let Defs = [CR0] in def o : XSForm_1, isDOT, RecFormRel; } } multiclass XForm_26r opcode, bits<10> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { let BaseName = asmbase in { def NAME : XForm_26, RecFormRel; let Defs = [CR1] in def o : XForm_26, isDOT, RecFormRel; } } multiclass XForm_28r opcode, bits<10> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { let BaseName = asmbase in { def NAME : XForm_28, RecFormRel; let Defs = [CR1] in def o : XForm_28, isDOT, RecFormRel; } } multiclass AForm_1r opcode, bits<5> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { let BaseName = asmbase in { def NAME : AForm_1, RecFormRel; let Defs = [CR1] in def o : AForm_1, isDOT, RecFormRel; } } multiclass AForm_2r opcode, bits<5> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { let BaseName = asmbase in { def NAME : AForm_2, RecFormRel; let Defs = [CR1] in def o : AForm_2, isDOT, RecFormRel; } } multiclass AForm_3r opcode, bits<5> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, list pattern> { let BaseName = asmbase in { def NAME : AForm_3, RecFormRel; let Defs = [CR1] in def o : AForm_3, isDOT, RecFormRel; } } //===----------------------------------------------------------------------===// // PowerPC Instruction Definitions. // Pseudo instructions: let hasCtrlDep = 1 in { let Defs = [R1], Uses = [R1] in { def ADJCALLSTACKDOWN : PPCEmitTimePseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "#ADJCALLSTACKDOWN $amt1 $amt2", [(callseq_start timm:$amt1, timm:$amt2)]>; def ADJCALLSTACKUP : PPCEmitTimePseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "#ADJCALLSTACKUP $amt1 $amt2", [(callseq_end timm:$amt1, timm:$amt2)]>; } def UPDATE_VRSAVE : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$rS), "UPDATE_VRSAVE $rD, $rS", []>; } let Defs = [R1], Uses = [R1] in def DYNALLOC : PPCEmitTimePseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC", [(set i32:$result, (PPCdynalloc i32:$negsize, iaddr:$fpsi))]>; def DYNAREAOFFSET : PPCEmitTimePseudo<(outs i32imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET", [(set i32:$result, (PPCdynareaoffset iaddr:$fpsi))]>; // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after // instruction selection into a branch sequence. let PPC970_Single = 1 in { // Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes // because either operand might become the first operand in an isel, and // that operand cannot be r0. def SELECT_CC_I4 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins crrc:$cond, gprc_nor0:$T, gprc_nor0:$F, i32imm:$BROPC), "#SELECT_CC_I4", []>; def SELECT_CC_I8 : PPCCustomInserterPseudo<(outs g8rc:$dst), (ins crrc:$cond, g8rc_nox0:$T, g8rc_nox0:$F, i32imm:$BROPC), "#SELECT_CC_I8", []>; def SELECT_CC_F4 : PPCCustomInserterPseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F, i32imm:$BROPC), "#SELECT_CC_F4", []>; def SELECT_CC_F8 : PPCCustomInserterPseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F, i32imm:$BROPC), "#SELECT_CC_F8", []>; def SELECT_CC_F16 : PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F, i32imm:$BROPC), "#SELECT_CC_F16", []>; def SELECT_CC_VRRC: PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F, i32imm:$BROPC), "#SELECT_CC_VRRC", []>; // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition // register bit directly. def SELECT_I4 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins crbitrc:$cond, gprc_nor0:$T, gprc_nor0:$F), "#SELECT_I4", [(set i32:$dst, (select i1:$cond, i32:$T, i32:$F))]>; def SELECT_I8 : PPCCustomInserterPseudo<(outs g8rc:$dst), (ins crbitrc:$cond, g8rc_nox0:$T, g8rc_nox0:$F), "#SELECT_I8", [(set i64:$dst, (select i1:$cond, i64:$T, i64:$F))]>; let Predicates = [HasFPU] in { def SELECT_F4 : PPCCustomInserterPseudo<(outs f4rc:$dst), (ins crbitrc:$cond, f4rc:$T, f4rc:$F), "#SELECT_F4", [(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>; def SELECT_F8 : PPCCustomInserterPseudo<(outs f8rc:$dst), (ins crbitrc:$cond, f8rc:$T, f8rc:$F), "#SELECT_F8", [(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>; def SELECT_F16 : PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crbitrc:$cond, vrrc:$T, vrrc:$F), "#SELECT_F16", [(set f128:$dst, (select i1:$cond, f128:$T, f128:$F))]>; } def SELECT_VRRC: PPCCustomInserterPseudo<(outs vrrc:$dst), (ins crbitrc:$cond, vrrc:$T, vrrc:$F), "#SELECT_VRRC", [(set v4i32:$dst, (select i1:$cond, v4i32:$T, v4i32:$F))]>; } // SPILL_CR - Indicate that we're dumping the CR register, so we'll need to // scavenge a register for it. let mayStore = 1 in { def SPILL_CR : PPCEmitTimePseudo<(outs), (ins crrc:$cond, memri:$F), "#SPILL_CR", []>; def SPILL_CRBIT : PPCEmitTimePseudo<(outs), (ins crbitrc:$cond, memri:$F), "#SPILL_CRBIT", []>; } // RESTORE_CR - Indicate that we're restoring the CR register (previously // spilled), so we'll need to scavenge a register for it. let mayLoad = 1 in { def RESTORE_CR : PPCEmitTimePseudo<(outs crrc:$cond), (ins memri:$F), "#RESTORE_CR", []>; def RESTORE_CRBIT : PPCEmitTimePseudo<(outs crbitrc:$cond), (ins memri:$F), "#RESTORE_CRBIT", []>; } let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { let isReturn = 1, Uses = [LR, RM] in def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB, [(retflag)]>, Requires<[In32BitMode]>; let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in { def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, []>; let isCodeGenOnly = 1 in { def BCCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond), "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB, []>; def BCCTR : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$bi), "bcctr 12, $bi, 0", IIC_BrB, []>; def BCCTRn : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$bi), "bcctr 4, $bi, 0", IIC_BrB, []>; } } } let Defs = [LR] in def MovePCtoLR : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR", []>, PPC970_Unit_BRU; let Defs = [LR] in def MoveGOTtoLR : PPCEmitTimePseudo<(outs), (ins), "#MoveGOTtoLR", []>, PPC970_Unit_BRU; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { let isBarrier = 1 in { def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst), "b $dst", IIC_BrB, [(br bb:$dst)]>; def BA : IForm<18, 1, 0, (outs), (ins absdirectbrtarget:$dst), "ba $dst", IIC_BrB, []>; } // BCC represents an arbitrary conditional branch on a predicate. // FIXME: should be able to write a pattern for PPCcondbranch, but can't use // a two-value operand where a dag node expects two operands. :( let isCodeGenOnly = 1 in { class BCC_class : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst), "b${cond:cc}${cond:pm} ${cond:reg}, $dst" /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>; def BCC : BCC_class; // The same as BCC, except that it's not a terminator. Used for introducing // control flow dependency without creating new blocks. let isTerminator = 0 in def CTRL_DEP : BCC_class; def BCCA : BForm<16, 1, 0, (outs), (ins pred:$cond, abscondbrtarget:$dst), "b${cond:cc}a${cond:pm} ${cond:reg}, $dst">; let isReturn = 1, Uses = [LR, RM] in def BCCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond), "b${cond:cc}lr${cond:pm} ${cond:reg}", IIC_BrB, []>; } let isCodeGenOnly = 1 in { let Pattern = [(brcond i1:$bi, bb:$dst)] in def BC : BForm_4<16, 12, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst), "bc 12, $bi, $dst">; let Pattern = [(brcond (not i1:$bi), bb:$dst)] in def BCn : BForm_4<16, 4, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst), "bc 4, $bi, $dst">; let isReturn = 1, Uses = [LR, RM] in def BCLR : XLForm_2_br2<19, 16, 12, 0, (outs), (ins crbitrc:$bi), "bclr 12, $bi, 0", IIC_BrB, []>; def BCLRn : XLForm_2_br2<19, 16, 4, 0, (outs), (ins crbitrc:$bi), "bclr 4, $bi, 0", IIC_BrB, []>; } let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in { def BDZLR : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins), "bdzlr", IIC_BrB, []>; def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins), "bdnzlr", IIC_BrB, []>; def BDZLRp : XLForm_2_ext<19, 16, 27, 0, 0, (outs), (ins), "bdzlr+", IIC_BrB, []>; def BDNZLRp: XLForm_2_ext<19, 16, 25, 0, 0, (outs), (ins), "bdnzlr+", IIC_BrB, []>; def BDZLRm : XLForm_2_ext<19, 16, 26, 0, 0, (outs), (ins), "bdzlr-", IIC_BrB, []>; def BDNZLRm: XLForm_2_ext<19, 16, 24, 0, 0, (outs), (ins), "bdnzlr-", IIC_BrB, []>; } let Defs = [CTR], Uses = [CTR] in { def BDZ : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), "bdz $dst">; def BDNZ : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), "bdnz $dst">; def BDZA : BForm_1<16, 18, 1, 0, (outs), (ins abscondbrtarget:$dst), "bdza $dst">; def BDNZA : BForm_1<16, 16, 1, 0, (outs), (ins abscondbrtarget:$dst), "bdnza $dst">; def BDZp : BForm_1<16, 27, 0, 0, (outs), (ins condbrtarget:$dst), "bdz+ $dst">; def BDNZp: BForm_1<16, 25, 0, 0, (outs), (ins condbrtarget:$dst), "bdnz+ $dst">; def BDZAp : BForm_1<16, 27, 1, 0, (outs), (ins abscondbrtarget:$dst), "bdza+ $dst">; def BDNZAp: BForm_1<16, 25, 1, 0, (outs), (ins abscondbrtarget:$dst), "bdnza+ $dst">; def BDZm : BForm_1<16, 26, 0, 0, (outs), (ins condbrtarget:$dst), "bdz- $dst">; def BDNZm: BForm_1<16, 24, 0, 0, (outs), (ins condbrtarget:$dst), "bdnz- $dst">; def BDZAm : BForm_1<16, 26, 1, 0, (outs), (ins abscondbrtarget:$dst), "bdza- $dst">; def BDNZAm: BForm_1<16, 24, 1, 0, (outs), (ins abscondbrtarget:$dst), "bdnza- $dst">; } } // The unconditional BCL used by the SjLj setjmp code. let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7 in { let Defs = [LR], Uses = [RM] in { def BCLalways : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst), "bcl 20, 31, $dst">; } } let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL : IForm<18, 0, 1, (outs), (ins calltarget:$func), "bl $func", IIC_BrB, []>; // See Pat patterns below. def BLA : IForm<18, 1, 1, (outs), (ins abscalltarget:$func), "bla $func", IIC_BrB, [(PPCcall (i32 imm:$func))]>; let isCodeGenOnly = 1 in { def BL_TLS : IForm<18, 0, 1, (outs), (ins tlscall32:$func), "bl $func", IIC_BrB, []>; def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst), "b${cond:cc}l${cond:pm} ${cond:reg}, $dst">; def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst), "b${cond:cc}la${cond:pm} ${cond:reg}, $dst">; def BCL : BForm_4<16, 12, 0, 1, (outs), (ins crbitrc:$bi, condbrtarget:$dst), "bcl 12, $bi, $dst">; def BCLn : BForm_4<16, 4, 0, 1, (outs), (ins crbitrc:$bi, condbrtarget:$dst), "bcl 4, $bi, $dst">; } } let Uses = [CTR, RM] in { def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins), "bctrl", IIC_BrB, [(PPCbctrl)]>, Requires<[In32BitMode]>; let isCodeGenOnly = 1 in { def BCCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond), "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB, []>; def BCCTRL : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$bi), "bcctrl 12, $bi, 0", IIC_BrB, []>; def BCCTRLn : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$bi), "bcctrl 4, $bi, 0", IIC_BrB, []>; } } let Uses = [LR, RM] in { def BLRL : XLForm_2_ext<19, 16, 20, 0, 1, (outs), (ins), "blrl", IIC_BrB, []>; let isCodeGenOnly = 1 in { def BCCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond), "b${cond:cc}lrl${cond:pm} ${cond:reg}", IIC_BrB, []>; def BCLRL : XLForm_2_br2<19, 16, 12, 1, (outs), (ins crbitrc:$bi), "bclrl 12, $bi, 0", IIC_BrB, []>; def BCLRLn : XLForm_2_br2<19, 16, 4, 1, (outs), (ins crbitrc:$bi), "bclrl 4, $bi, 0", IIC_BrB, []>; } } let Defs = [CTR], Uses = [CTR, RM] in { def BDZL : BForm_1<16, 18, 0, 1, (outs), (ins condbrtarget:$dst), "bdzl $dst">; def BDNZL : BForm_1<16, 16, 0, 1, (outs), (ins condbrtarget:$dst), "bdnzl $dst">; def BDZLA : BForm_1<16, 18, 1, 1, (outs), (ins abscondbrtarget:$dst), "bdzla $dst">; def BDNZLA : BForm_1<16, 16, 1, 1, (outs), (ins abscondbrtarget:$dst), "bdnzla $dst">; def BDZLp : BForm_1<16, 27, 0, 1, (outs), (ins condbrtarget:$dst), "bdzl+ $dst">; def BDNZLp: BForm_1<16, 25, 0, 1, (outs), (ins condbrtarget:$dst), "bdnzl+ $dst">; def BDZLAp : BForm_1<16, 27, 1, 1, (outs), (ins abscondbrtarget:$dst), "bdzla+ $dst">; def BDNZLAp: BForm_1<16, 25, 1, 1, (outs), (ins abscondbrtarget:$dst), "bdnzla+ $dst">; def BDZLm : BForm_1<16, 26, 0, 1, (outs), (ins condbrtarget:$dst), "bdzl- $dst">; def BDNZLm: BForm_1<16, 24, 0, 1, (outs), (ins condbrtarget:$dst), "bdnzl- $dst">; def BDZLAm : BForm_1<16, 26, 1, 1, (outs), (ins abscondbrtarget:$dst), "bdzla- $dst">; def BDNZLAm: BForm_1<16, 24, 1, 1, (outs), (ins abscondbrtarget:$dst), "bdnzla- $dst">; } let Defs = [CTR], Uses = [CTR, LR, RM] in { def BDZLRL : XLForm_2_ext<19, 16, 18, 0, 1, (outs), (ins), "bdzlrl", IIC_BrB, []>; def BDNZLRL : XLForm_2_ext<19, 16, 16, 0, 1, (outs), (ins), "bdnzlrl", IIC_BrB, []>; def BDZLRLp : XLForm_2_ext<19, 16, 27, 0, 1, (outs), (ins), "bdzlrl+", IIC_BrB, []>; def BDNZLRLp: XLForm_2_ext<19, 16, 25, 0, 1, (outs), (ins), "bdnzlrl+", IIC_BrB, []>; def BDZLRLm : XLForm_2_ext<19, 16, 26, 0, 1, (outs), (ins), "bdzlrl-", IIC_BrB, []>; def BDNZLRLm: XLForm_2_ext<19, 16, 24, 0, 1, (outs), (ins), "bdnzlrl-", IIC_BrB, []>; } } let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in def TCRETURNdi :PPCEmitTimePseudo< (outs), (ins calltarget:$dst, i32imm:$offset), "#TC_RETURNd $dst $offset", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in def TCRETURNai :PPCEmitTimePseudo<(outs), (ins abscalltarget:$func, i32imm:$offset), "#TC_RETURNa $func $offset", [(PPCtc_return (i32 imm:$func), imm:$offset)]>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in def TCRETURNri : PPCEmitTimePseudo<(outs), (ins CTRRC:$dst, i32imm:$offset), "#TC_RETURNr $dst $offset", []>; let isCodeGenOnly = 1 in { let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, []>, Requires<[In32BitMode]>; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst), "b $dst", IIC_BrB, []>; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in def TAILBA : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst), "ba $dst", IIC_BrB, []>; } // While longjmp is a control-flow barrier (fallthrough isn't allowed), setjmp // is not. let hasSideEffects = 1 in { let Defs = [CTR] in def EH_SjLj_SetJmp32 : PPCCustomInserterPseudo<(outs gprc:$dst), (ins memr:$buf), "#EH_SJLJ_SETJMP32", [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, Requires<[In32BitMode]>; } let hasSideEffects = 1, isBarrier = 1 in { let isTerminator = 1 in def EH_SjLj_LongJmp32 : PPCCustomInserterPseudo<(outs), (ins memr:$buf), "#EH_SJLJ_LONGJMP32", [(PPCeh_sjlj_longjmp addr:$buf)]>, Requires<[In32BitMode]>; } // This pseudo is never removed from the function, as it serves as // a terminator. Size is set to 0 to prevent the builtin assembler // from emitting it. let isBranch = 1, isTerminator = 1, Size = 0 in { def EH_SjLj_Setup : PPCEmitTimePseudo<(outs), (ins directbrtarget:$dst), "#EH_SjLj_Setup\t$dst", []>; } // System call. let PPC970_Unit = 7 in { def SC : SCForm<17, 1, (outs), (ins i32imm:$lev), "sc $lev", IIC_BrB, [(PPCsc (i32 imm:$lev))]>; } // Branch history rolling buffer. def CLRBHRB : XForm_0<31, 430, (outs), (ins), "clrbhrb", IIC_BrB, [(PPCclrbhrb)]>, PPC970_DGroup_Single; // The $dmy argument used for MFBHRBE is not needed; however, including // it avoids automatic generation of PPCFastISel::fastEmit_i(), which // interferes with necessary special handling (see PPCFastISel.cpp). def MFBHRBE : XFXForm_3p<31, 302, (outs gprc:$rD), (ins u10imm:$imm, u10imm:$dmy), "mfbhrbe $rD, $imm", IIC_BrB, [(set i32:$rD, (PPCmfbhrbe imm:$imm, imm:$dmy))]>, PPC970_DGroup_First; def RFEBB : XLForm_S<19, 146, (outs), (ins u1imm:$imm), "rfebb $imm", IIC_BrB, [(PPCrfebb (i32 imm:$imm))]>, PPC970_DGroup_Single; // DCB* instructions. def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), "dcba $dst", IIC_LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>, PPC970_DGroup_Single; def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst), "dcbi $dst", IIC_LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>, PPC970_DGroup_Single; def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst), "dcbst $dst", IIC_LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>, PPC970_DGroup_Single; def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst), "dcbz $dst", IIC_LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>, PPC970_DGroup_Single; def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst", IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>, PPC970_DGroup_Single; def DCBF : DCB_Form_hint<86, (outs), (ins u5imm:$TH, memrr:$dst), "dcbf $dst, $TH", IIC_LdStDCBF, []>, PPC970_DGroup_Single; let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in { def DCBT : DCB_Form_hint<278, (outs), (ins u5imm:$TH, memrr:$dst), "dcbt $dst, $TH", IIC_LdStDCBF, []>, PPC970_DGroup_Single; def DCBTST : DCB_Form_hint<246, (outs), (ins u5imm:$TH, memrr:$dst), "dcbtst $dst, $TH", IIC_LdStDCBF, []>, PPC970_DGroup_Single; } // hasSideEffects = 0 def ICBLC : XForm_icbt<31, 230, (outs), (ins u4imm:$CT, memrr:$src), "icblc $CT, $src", IIC_LdStStore>, Requires<[HasICBT]>; def ICBLQ : XForm_icbt<31, 198, (outs), (ins u4imm:$CT, memrr:$src), "icblq. $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>; def ICBT : XForm_icbt<31, 22, (outs), (ins u4imm:$CT, memrr:$src), "icbt $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>; def ICBTLS : XForm_icbt<31, 486, (outs), (ins u4imm:$CT, memrr:$src), "icbtls $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>; def : Pat<(int_ppc_dcbt xoaddr:$dst), (DCBT 0, xoaddr:$dst)>; def : Pat<(int_ppc_dcbtst xoaddr:$dst), (DCBTST 0, xoaddr:$dst)>; def : Pat<(int_ppc_dcbf xoaddr:$dst), (DCBF 0, xoaddr:$dst)>; def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)), (DCBT 0, xoaddr:$dst)>; // data prefetch for loads def : Pat<(prefetch xoaddr:$dst, (i32 1), imm, (i32 1)), (DCBTST 0, xoaddr:$dst)>; // data prefetch for stores def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 0)), (ICBT 0, xoaddr:$dst)>, Requires<[HasICBT]>; // inst prefetch (for read) // Atomic operations // FIXME: some of these might be used with constant operands. This will result // in constant materialization instructions that may be redundant. We currently // clean this up in PPCMIPeephole with calls to // PPCInstrInfo::convertToImmediateForm() but we should probably not emit them // in the first place. let Defs = [CR0] in { def ATOMIC_LOAD_ADD_I8 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I8", [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_SUB_I8 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I8", [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_AND_I8 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I8", [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_OR_I8 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I8", [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_XOR_I8 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "ATOMIC_LOAD_XOR_I8", [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_NAND_I8 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8", [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_MIN_I8 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I8", [(set i32:$dst, (atomic_load_min_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_MAX_I8 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I8", [(set i32:$dst, (atomic_load_max_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_UMIN_I8 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I8", [(set i32:$dst, (atomic_load_umin_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_UMAX_I8 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I8", [(set i32:$dst, (atomic_load_umax_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_ADD_I16 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16", [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_SUB_I16 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I16", [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_AND_I16 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I16", [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_OR_I16 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I16", [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_XOR_I16 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I16", [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_NAND_I16 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16", [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_MIN_I16 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I16", [(set i32:$dst, (atomic_load_min_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_MAX_I16 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I16", [(set i32:$dst, (atomic_load_max_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_UMIN_I16 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I16", [(set i32:$dst, (atomic_load_umin_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_UMAX_I16 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I16", [(set i32:$dst, (atomic_load_umax_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_ADD_I32 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32", [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_SUB_I32 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I32", [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_AND_I32 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I32", [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_OR_I32 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I32", [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_XOR_I32 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I32", [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_NAND_I32 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32", [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_MIN_I32 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I32", [(set i32:$dst, (atomic_load_min_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_MAX_I32 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I32", [(set i32:$dst, (atomic_load_max_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_UMIN_I32 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I32", [(set i32:$dst, (atomic_load_umin_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_UMAX_I32 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I32", [(set i32:$dst, (atomic_load_umax_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_CMP_SWAP_I8 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8", [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>; def ATOMIC_CMP_SWAP_I16 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new", [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>; def ATOMIC_CMP_SWAP_I32 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new", [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>; def ATOMIC_SWAP_I8 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_i8", [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>; def ATOMIC_SWAP_I16 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I16", [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>; def ATOMIC_SWAP_I32 : PPCCustomInserterPseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I32", [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>; } def : Pat<(PPCatomicCmpSwap_8 xoaddr:$ptr, i32:$old, i32:$new), (ATOMIC_CMP_SWAP_I8 xoaddr:$ptr, i32:$old, i32:$new)>; def : Pat<(PPCatomicCmpSwap_16 xoaddr:$ptr, i32:$old, i32:$new), (ATOMIC_CMP_SWAP_I16 xoaddr:$ptr, i32:$old, i32:$new)>; // Instructions to support atomic operations let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in { def LBARX : XForm_1_memOp<31, 52, (outs gprc:$rD), (ins memrr:$src), "lbarx $rD, $src", IIC_LdStLWARX, []>, Requires<[HasPartwordAtomics]>; def LHARX : XForm_1_memOp<31, 116, (outs gprc:$rD), (ins memrr:$src), "lharx $rD, $src", IIC_LdStLWARX, []>, Requires<[HasPartwordAtomics]>; def LWARX : XForm_1_memOp<31, 20, (outs gprc:$rD), (ins memrr:$src), "lwarx $rD, $src", IIC_LdStLWARX, []>; // Instructions to support lock versions of atomics // (EH=1 - see Power ISA 2.07 Book II 4.4.2) def LBARXL : XForm_1_memOp<31, 52, (outs gprc:$rD), (ins memrr:$src), "lbarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT, Requires<[HasPartwordAtomics]>; def LHARXL : XForm_1_memOp<31, 116, (outs gprc:$rD), (ins memrr:$src), "lharx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT, Requires<[HasPartwordAtomics]>; def LWARXL : XForm_1_memOp<31, 20, (outs gprc:$rD), (ins memrr:$src), "lwarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT; // The atomic instructions use the destination register as well as the next one // or two registers in order (modulo 31). let hasExtraSrcRegAllocReq = 1 in def LWAT : X_RD5_RS5_IM5<31, 582, (outs gprc:$rD), (ins gprc:$rA, u5imm:$FC), "lwat $rD, $rA, $FC", IIC_LdStLoad>, Requires<[IsISA3_0]>; } let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in { def STBCX : XForm_1_memOp<31, 694, (outs), (ins gprc:$rS, memrr:$dst), "stbcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT, Requires<[HasPartwordAtomics]>; def STHCX : XForm_1_memOp<31, 726, (outs), (ins gprc:$rS, memrr:$dst), "sthcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT, Requires<[HasPartwordAtomics]>; def STWCX : XForm_1_memOp<31, 150, (outs), (ins gprc:$rS, memrr:$dst), "stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT; } let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in def STWAT : X_RD5_RS5_IM5<31, 710, (outs), (ins gprc:$rS, gprc:$rA, u5imm:$FC), "stwat $rS, $rA, $FC", IIC_LdStStore>, Requires<[IsISA3_0]>; let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>; def TWI : DForm_base<3, (outs), (ins u5imm:$to, gprc:$rA, s16imm:$imm), "twi $to, $rA, $imm", IIC_IntTrapW, []>; def TW : XForm_1<31, 4, (outs), (ins u5imm:$to, gprc:$rA, gprc:$rB), "tw $to, $rA, $rB", IIC_IntTrapW, []>; def TDI : DForm_base<2, (outs), (ins u5imm:$to, g8rc:$rA, s16imm:$imm), "tdi $to, $rA, $imm", IIC_IntTrapD, []>; def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB), "td $to, $rA, $rB", IIC_IntTrapD, []>; //===----------------------------------------------------------------------===// // PPC32 Load Instructions. // // Unindexed (r+i) Loads. let PPC970_Unit = 2 in { def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src), "lbz $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi8 iaddr:$src))]>; def LHA : DForm_1<42, (outs gprc:$rD), (ins memri:$src), "lha $rD, $src", IIC_LdStLHA, [(set i32:$rD, (sextloadi16 iaddr:$src))]>, PPC970_DGroup_Cracked; def LHZ : DForm_1<40, (outs gprc:$rD), (ins memri:$src), "lhz $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi16 iaddr:$src))]>; def LWZ : DForm_1<32, (outs gprc:$rD), (ins memri:$src), "lwz $rD, $src", IIC_LdStLoad, [(set i32:$rD, (load iaddr:$src))]>; let Predicates = [HasFPU] in { def LFS : DForm_1<48, (outs f4rc:$rD), (ins memri:$src), "lfs $rD, $src", IIC_LdStLFD, [(set f32:$rD, (load iaddr:$src))]>; def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src), "lfd $rD, $src", IIC_LdStLFD, [(set f64:$rD, (load iaddr:$src))]>; } // Unindexed (r+i) Loads with Update (preinc). let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in { def LBZU : DForm_1<35, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lbzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHAU : DForm_1<43, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhau $rD, $addr", IIC_LdStLHAU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU : DForm_1<41, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU : DForm_1<33, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lwzu $rD, $addr", IIC_LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; let Predicates = [HasFPU] in { def LFSU : DForm_1<49, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lfsu $rD, $addr", IIC_LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LFDU : DForm_1<51, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lfdu $rD, $addr", IIC_LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; } // Indexed (r+r) Loads with Update (preinc). def LBZUX : XForm_1_memOp<31, 119, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lbzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LHAUX : XForm_1_memOp<31, 375, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhaux $rD, $addr", IIC_LdStLHAUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LHZUX : XForm_1_memOp<31, 311, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LWZUX : XForm_1_memOp<31, 55, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwzux $rD, $addr", IIC_LdStLoadUpdX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; let Predicates = [HasFPU] in { def LFSUX : XForm_1_memOp<31, 567, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lfsux $rD, $addr", IIC_LdStLFDUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; def LFDUX : XForm_1_memOp<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lfdux $rD, $addr", IIC_LdStLFDUX, []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; } } } // Indexed (r+r) Loads. // let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { def LBZX : XForm_1_memOp<31, 87, (outs gprc:$rD), (ins memrr:$src), "lbzx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi8 xaddr:$src))]>; def LHAX : XForm_1_memOp<31, 343, (outs gprc:$rD), (ins memrr:$src), "lhax $rD, $src", IIC_LdStLHA, [(set i32:$rD, (sextloadi16 xaddr:$src))]>, PPC970_DGroup_Cracked; def LHZX : XForm_1_memOp<31, 279, (outs gprc:$rD), (ins memrr:$src), "lhzx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (zextloadi16 xaddr:$src))]>; def LWZX : XForm_1_memOp<31, 23, (outs gprc:$rD), (ins memrr:$src), "lwzx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (load xaddr:$src))]>; def LHBRX : XForm_1_memOp<31, 790, (outs gprc:$rD), (ins memrr:$src), "lhbrx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>; def LWBRX : XForm_1_memOp<31, 534, (outs gprc:$rD), (ins memrr:$src), "lwbrx $rD, $src", IIC_LdStLoad, [(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>; let Predicates = [HasFPU] in { def LFSX : XForm_25_memOp<31, 535, (outs f4rc:$frD), (ins memrr:$src), "lfsx $frD, $src", IIC_LdStLFD, [(set f32:$frD, (load xaddr:$src))]>; def LFDX : XForm_25_memOp<31, 599, (outs f8rc:$frD), (ins memrr:$src), "lfdx $frD, $src", IIC_LdStLFD, [(set f64:$frD, (load xaddr:$src))]>; def LFIWAX : XForm_25_memOp<31, 855, (outs f8rc:$frD), (ins memrr:$src), "lfiwax $frD, $src", IIC_LdStLFD, [(set f64:$frD, (PPClfiwax xoaddr:$src))]>; def LFIWZX : XForm_25_memOp<31, 887, (outs f8rc:$frD), (ins memrr:$src), "lfiwzx $frD, $src", IIC_LdStLFD, [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>; } } // Load Multiple def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src), "lmw $rD, $src", IIC_LdStLMW, []>; //===----------------------------------------------------------------------===// // PPC32 Store Instructions. // // Unindexed (r+i) Stores. let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$dst), "stb $rS, $dst", IIC_LdStStore, [(truncstorei8 i32:$rS, iaddr:$dst)]>; def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$dst), "sth $rS, $dst", IIC_LdStStore, [(truncstorei16 i32:$rS, iaddr:$dst)]>; def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$dst), "stw $rS, $dst", IIC_LdStStore, [(store i32:$rS, iaddr:$dst)]>; let Predicates = [HasFPU] in { def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst), "stfs $rS, $dst", IIC_LdStSTFD, [(store f32:$rS, iaddr:$dst)]>; def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst), "stfd $rS, $dst", IIC_LdStSTFD, [(store f64:$rS, iaddr:$dst)]>; } } // Unindexed (r+i) Stores with Update (preinc). let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), "stbu $rS, $dst", IIC_LdStSTU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), "sthu $rS, $dst", IIC_LdStSTU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst), "stwu $rS, $dst", IIC_LdStSTU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; let Predicates = [HasFPU] in { def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst), "stfsu $rS, $dst", IIC_LdStSTFDU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; def STFDU : DForm_1<55, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memri:$dst), "stfdu $rS, $dst", IIC_LdStSTFDU, []>, RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; } } // Patterns to match the pre-inc stores. We can't put the patterns on // the instruction definitions directly as ISel wants the address base // and offset to be separate operands, not a single complex operand. def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff), (STBU $rS, iaddroff:$ptroff, $ptrreg)>; def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff), (STHU $rS, iaddroff:$ptroff, $ptrreg)>; def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff), (STWU $rS, iaddroff:$ptroff, $ptrreg)>; def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iaddroff:$ptroff), (STFSU $rS, iaddroff:$ptroff, $ptrreg)>; def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), (STFDU $rS, iaddroff:$ptroff, $ptrreg)>; // Indexed (r+r) Stores. let PPC970_Unit = 2 in { def STBX : XForm_8_memOp<31, 215, (outs), (ins gprc:$rS, memrr:$dst), "stbx $rS, $dst", IIC_LdStStore, [(truncstorei8 i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX : XForm_8_memOp<31, 407, (outs), (ins gprc:$rS, memrr:$dst), "sthx $rS, $dst", IIC_LdStStore, [(truncstorei16 i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX : XForm_8_memOp<31, 151, (outs), (ins gprc:$rS, memrr:$dst), "stwx $rS, $dst", IIC_LdStStore, [(store i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHBRX: XForm_8_memOp<31, 918, (outs), (ins gprc:$rS, memrr:$dst), "sthbrx $rS, $dst", IIC_LdStStore, [(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>, PPC970_DGroup_Cracked; def STWBRX: XForm_8_memOp<31, 662, (outs), (ins gprc:$rS, memrr:$dst), "stwbrx $rS, $dst", IIC_LdStStore, [(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>, PPC970_DGroup_Cracked; let Predicates = [HasFPU] in { def STFIWX: XForm_28_memOp<31, 983, (outs), (ins f8rc:$frS, memrr:$dst), "stfiwx $frS, $dst", IIC_LdStSTFD, [(PPCstfiwx f64:$frS, xoaddr:$dst)]>; def STFSX : XForm_28_memOp<31, 663, (outs), (ins f4rc:$frS, memrr:$dst), "stfsx $frS, $dst", IIC_LdStSTFD, [(store f32:$frS, xaddr:$dst)]>; def STFDX : XForm_28_memOp<31, 727, (outs), (ins f8rc:$frS, memrr:$dst), "stfdx $frS, $dst", IIC_LdStSTFD, [(store f64:$frS, xaddr:$dst)]>; } } // Indexed (r+r) Stores with Update (preinc). let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { def STBUX : XForm_8_memOp<31, 247, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst), "stbux $rS, $dst", IIC_LdStSTUX, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STHUX : XForm_8_memOp<31, 439, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst), "sthux $rS, $dst", IIC_LdStSTUX, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STWUX : XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst), "stwux $rS, $dst", IIC_LdStSTUX, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; let Predicates = [HasFPU] in { def STFSUX: XForm_8_memOp<31, 695, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memrr:$dst), "stfsux $rS, $dst", IIC_LdStSTFDU, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STFDUX: XForm_8_memOp<31, 759, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memrr:$dst), "stfdux $rS, $dst", IIC_LdStSTFDU, []>, RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; } } // Patterns to match the pre-inc stores. We can't put the patterns on // the instruction definitions directly as ISel wants the address base // and offset to be separate operands, not a single complex operand. def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff), (STBUX $rS, $ptrreg, $ptroff)>; def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff), (STHUX $rS, $ptrreg, $ptroff)>; def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iPTR:$ptroff), (STWUX $rS, $ptrreg, $ptroff)>; let Predicates = [HasFPU] in { def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iPTR:$ptroff), (STFSUX $rS, $ptrreg, $ptroff)>; def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff), (STFDUX $rS, $ptrreg, $ptroff)>; } // Store Multiple def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst), "stmw $rS, $dst", IIC_LdStLMW, []>; def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L), "sync $L", IIC_LdStSync, []>; let isCodeGenOnly = 1 in { def MSYNC : XForm_24_sync<31, 598, (outs), (ins), "msync", IIC_LdStSync, []> { let L = 0; } } def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[HasSYNC]>; def : Pat<(int_ppc_lwsync), (SYNC 1)>, Requires<[HasSYNC]>; def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[HasOnlyMSYNC]>; def : Pat<(int_ppc_lwsync), (MSYNC)>, Requires<[HasOnlyMSYNC]>; //===----------------------------------------------------------------------===// // PPC32 Arithmetic Instructions. // let PPC970_Unit = 1 in { // FXU Operations. def ADDI : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$imm), "addi $rD, $rA, $imm", IIC_IntSimple, [(set i32:$rD, (add i32:$rA, imm32SExt16:$imm))]>; let BaseName = "addic" in { let Defs = [CARRY] in def ADDIC : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), "addic $rD, $rA, $imm", IIC_IntGeneral, [(set i32:$rD, (addc i32:$rA, imm32SExt16:$imm))]>, RecFormRel, PPC970_DGroup_Cracked; let Defs = [CARRY, CR0] in def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), "addic. $rD, $rA, $imm", IIC_IntGeneral, []>, isDOT, RecFormRel; } def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, s17imm:$imm), "addis $rD, $rA, $imm", IIC_IntSimple, [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>; let isCodeGenOnly = 1 in def LA : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$sym), "la $rD, $sym($rA)", IIC_IntGeneral, [(set i32:$rD, (add i32:$rA, (PPClo tglobaladdr:$sym, 0)))]>; def MULLI : DForm_2< 7, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), "mulli $rD, $rA, $imm", IIC_IntMulLI, [(set i32:$rD, (mul i32:$rA, imm32SExt16:$imm))]>; let Defs = [CARRY] in def SUBFIC : DForm_2< 8, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm), "subfic $rD, $rA, $imm", IIC_IntGeneral, [(set i32:$rD, (subc imm32SExt16:$imm, i32:$rA))]>; let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI : DForm_2_r0<14, (outs gprc:$rD), (ins s16imm:$imm), "li $rD, $imm", IIC_IntSimple, [(set i32:$rD, imm32SExt16:$imm)]>; def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins s17imm:$imm), "lis $rD, $imm", IIC_IntSimple, [(set i32:$rD, imm16ShiftedSExt:$imm)]>; } } let PPC970_Unit = 1 in { // FXU Operations. let Defs = [CR0] in { def ANDIo : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), "andi. $dst, $src1, $src2", IIC_IntGeneral, [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>, isDOT; def ANDISo : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), "andis. $dst, $src1, $src2", IIC_IntGeneral, [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>, isDOT; } def ORI : DForm_4<24, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), "ori $dst, $src1, $src2", IIC_IntSimple, [(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>; def ORIS : DForm_4<25, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), "oris $dst, $src1, $src2", IIC_IntSimple, [(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>; def XORI : DForm_4<26, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), "xori $dst, $src1, $src2", IIC_IntSimple, [(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>; def XORIS : DForm_4<27, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2), "xoris $dst, $src1, $src2", IIC_IntSimple, [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>; def NOP : DForm_4_zero<24, (outs), (ins), "nop", IIC_IntSimple, []>; let isCodeGenOnly = 1 in { // The POWER6 and POWER7 have special group-terminating nops. def NOP_GT_PWR6 : DForm_4_fixedreg_zero<24, 1, (outs), (ins), "ori 1, 1, 0", IIC_IntSimple, []>; def NOP_GT_PWR7 : DForm_4_fixedreg_zero<24, 2, (outs), (ins), "ori 2, 2, 0", IIC_IntSimple, []>; } let isCompare = 1, hasSideEffects = 0 in { def CMPWI : DForm_5_ext<11, (outs crrc:$crD), (ins gprc:$rA, s16imm:$imm), "cmpwi $crD, $rA, $imm", IIC_IntCompare>; def CMPLWI : DForm_6_ext<10, (outs crrc:$dst), (ins gprc:$src1, u16imm:$src2), "cmplwi $dst, $src1, $src2", IIC_IntCompare>; def CMPRB : X_BF3_L1_RS5_RS5<31, 192, (outs crbitrc:$BF), (ins u1imm:$L, g8rc:$rA, g8rc:$rB), "cmprb $BF, $L, $rA, $rB", IIC_IntCompare, []>, Requires<[IsISA3_0]>; } } let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations. let isCommutable = 1 in { defm NAND : XForm_6r<31, 476, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), "nand", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>; defm AND : XForm_6r<31, 28, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), "and", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (and i32:$rS, i32:$rB))]>; } // isCommutable defm ANDC : XForm_6r<31, 60, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), "andc", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>; let isCommutable = 1 in { defm OR : XForm_6r<31, 444, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), "or", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (or i32:$rS, i32:$rB))]>; defm NOR : XForm_6r<31, 124, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), "nor", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>; } // isCommutable defm ORC : XForm_6r<31, 412, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), "orc", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>; let isCommutable = 1 in { defm EQV : XForm_6r<31, 284, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), "eqv", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>; defm XOR : XForm_6r<31, 316, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), "xor", "$rA, $rS, $rB", IIC_IntSimple, [(set i32:$rA, (xor i32:$rS, i32:$rB))]>; } // isCommutable defm SLW : XForm_6r<31, 24, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), "slw", "$rA, $rS, $rB", IIC_IntGeneral, [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>; defm SRW : XForm_6r<31, 536, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), "srw", "$rA, $rS, $rB", IIC_IntGeneral, [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>; defm SRAW : XForm_6rc<31, 792, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), "sraw", "$rA, $rS, $rB", IIC_IntShift, [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>; } let PPC970_Unit = 1 in { // FXU Operations. let hasSideEffects = 0 in { defm SRAWI : XForm_10rc<31, 824, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH), "srawi", "$rA, $rS, $SH", IIC_IntShift, [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>; defm CNTLZW : XForm_11r<31, 26, (outs gprc:$rA), (ins gprc:$rS), "cntlzw", "$rA, $rS", IIC_IntGeneral, [(set i32:$rA, (ctlz i32:$rS))]>; defm CNTTZW : XForm_11r<31, 538, (outs gprc:$rA), (ins gprc:$rS), "cnttzw", "$rA, $rS", IIC_IntGeneral, [(set i32:$rA, (cttz i32:$rS))]>, Requires<[IsISA3_0]>; defm EXTSB : XForm_11r<31, 954, (outs gprc:$rA), (ins gprc:$rS), "extsb", "$rA, $rS", IIC_IntSimple, [(set i32:$rA, (sext_inreg i32:$rS, i8))]>; defm EXTSH : XForm_11r<31, 922, (outs gprc:$rA), (ins gprc:$rS), "extsh", "$rA, $rS", IIC_IntSimple, [(set i32:$rA, (sext_inreg i32:$rS, i16))]>; let isCommutable = 1 in def CMPB : XForm_6<31, 508, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB), "cmpb $rA, $rS, $rB", IIC_IntGeneral, [(set i32:$rA, (PPCcmpb i32:$rS, i32:$rB))]>; } let isCompare = 1, hasSideEffects = 0 in { def CMPW : XForm_16_ext<31, 0, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB), "cmpw $crD, $rA, $rB", IIC_IntCompare>; def CMPLW : XForm_16_ext<31, 32, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB), "cmplw $crD, $rA, $rB", IIC_IntCompare>; } } let PPC970_Unit = 3, Predicates = [HasFPU] in { // FPU Operations. //def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB), // "fcmpo $crD, $fA, $fB", IIC_FPCompare>; let isCompare = 1, hasSideEffects = 0 in { def FCMPUS : XForm_17<63, 0, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB), "fcmpu $crD, $fA, $fB", IIC_FPCompare>; let Interpretation64Bit = 1, isCodeGenOnly = 1 in def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB), "fcmpu $crD, $fA, $fB", IIC_FPCompare>; } def FTDIV: XForm_17<63, 128, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB), "ftdiv $crD, $fA, $fB", IIC_FPCompare>; def FTSQRT: XForm_17a<63, 160, (outs crrc:$crD), (ins f8rc:$fB), "ftsqrt $crD, $fB", IIC_FPCompare>; let Uses = [RM] in { let hasSideEffects = 0 in { defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB), "fctiw", "$frD, $frB", IIC_FPGeneral, []>; defm FCTIWU : XForm_26r<63, 142, (outs f8rc:$frD), (ins f8rc:$frB), "fctiwu", "$frD, $frB", IIC_FPGeneral, []>; defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB), "fctiwz", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfctiwz f64:$frB))]>; defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB), "frsp", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fpround f64:$frB))]>; let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB), "frin", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (fround f64:$frB))]>; defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB), "frin", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fround f32:$frB))]>; } let hasSideEffects = 0 in { let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIPD : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB), "frip", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (fceil f64:$frB))]>; defm FRIPS : XForm_26r<63, 456, (outs f4rc:$frD), (ins f4rc:$frB), "frip", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fceil f32:$frB))]>; let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIZD : XForm_26r<63, 424, (outs f8rc:$frD), (ins f8rc:$frB), "friz", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (ftrunc f64:$frB))]>; defm FRIZS : XForm_26r<63, 424, (outs f4rc:$frD), (ins f4rc:$frB), "friz", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (ftrunc f32:$frB))]>; let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FRIMD : XForm_26r<63, 488, (outs f8rc:$frD), (ins f8rc:$frB), "frim", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (ffloor f64:$frB))]>; defm FRIMS : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB), "frim", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (ffloor f32:$frB))]>; defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB), "fsqrt", "$frD, $frB", IIC_FPSqrtD, [(set f64:$frD, (fsqrt f64:$frB))]>; defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB), "fsqrts", "$frD, $frB", IIC_FPSqrtS, [(set f32:$frD, (fsqrt f32:$frB))]>; } } } /// Note that FMR is defined as pseudo-ops on the PPC970 because they are /// often coalesced away and we don't want the dispatch group builder to think /// that they will fill slots (which could cause the load of a LSU reject to /// sneak into a d-group with a store). let hasSideEffects = 0, Predicates = [HasFPU] in defm FMR : XForm_26r<63, 72, (outs f4rc:$frD), (ins f4rc:$frB), "fmr", "$frD, $frB", IIC_FPGeneral, []>, // (set f32:$frD, f32:$frB) PPC970_Unit_Pseudo; let PPC970_Unit = 3, hasSideEffects = 0, Predicates = [HasFPU] in { // FPU Operations. // These are artificially split into two different forms, for 4/8 byte FP. defm FABSS : XForm_26r<63, 264, (outs f4rc:$frD), (ins f4rc:$frB), "fabs", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fabs f32:$frB))]>; let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FABSD : XForm_26r<63, 264, (outs f8rc:$frD), (ins f8rc:$frB), "fabs", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (fabs f64:$frB))]>; defm FNABSS : XForm_26r<63, 136, (outs f4rc:$frD), (ins f4rc:$frB), "fnabs", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fneg (fabs f32:$frB)))]>; let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FNABSD : XForm_26r<63, 136, (outs f8rc:$frD), (ins f8rc:$frB), "fnabs", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (fneg (fabs f64:$frB)))]>; defm FNEGS : XForm_26r<63, 40, (outs f4rc:$frD), (ins f4rc:$frB), "fneg", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (fneg f32:$frB))]>; let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FNEGD : XForm_26r<63, 40, (outs f8rc:$frD), (ins f8rc:$frB), "fneg", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (fneg f64:$frB))]>; defm FCPSGNS : XForm_28r<63, 8, (outs f4rc:$frD), (ins f4rc:$frA, f4rc:$frB), "fcpsgn", "$frD, $frA, $frB", IIC_FPGeneral, [(set f32:$frD, (fcopysign f32:$frB, f32:$frA))]>; let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$frD), (ins f8rc:$frA, f8rc:$frB), "fcpsgn", "$frD, $frA, $frB", IIC_FPGeneral, [(set f64:$frD, (fcopysign f64:$frB, f64:$frA))]>; // Reciprocal estimates. defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB), "fre", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfre f64:$frB))]>; defm FRES : XForm_26r<59, 24, (outs f4rc:$frD), (ins f4rc:$frB), "fres", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (PPCfre f32:$frB))]>; defm FRSQRTE : XForm_26r<63, 26, (outs f8rc:$frD), (ins f8rc:$frB), "frsqrte", "$frD, $frB", IIC_FPGeneral, [(set f64:$frD, (PPCfrsqrte f64:$frB))]>; defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB), "frsqrtes", "$frD, $frB", IIC_FPGeneral, [(set f32:$frD, (PPCfrsqrte f32:$frB))]>; } // XL-Form instructions. condition register logical ops. // let hasSideEffects = 0 in def MCRF : XLForm_3<19, 0, (outs crrc:$BF), (ins crrc:$BFA), "mcrf $BF, $BFA", IIC_BrMCR>, PPC970_DGroup_First, PPC970_Unit_CRU; // FIXME: According to the ISA (section 2.5.1 of version 2.06), the // condition-register logical instructions have preferred forms. Specifically, // it is preferred that the bit specified by the BT field be in the same // condition register as that specified by the bit BB. We might want to account // for this via hinting the register allocator and anti-dep breakers, or we // could constrain the register class to force this constraint and then loosen // it during register allocation via convertToThreeAddress or some similar // mechanism. let isCommutable = 1 in { def CRAND : XLForm_1<19, 257, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), "crand $CRD, $CRA, $CRB", IIC_BrCR, [(set i1:$CRD, (and i1:$CRA, i1:$CRB))]>; def CRNAND : XLForm_1<19, 225, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), "crnand $CRD, $CRA, $CRB", IIC_BrCR, [(set i1:$CRD, (not (and i1:$CRA, i1:$CRB)))]>; def CROR : XLForm_1<19, 449, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), "cror $CRD, $CRA, $CRB", IIC_BrCR, [(set i1:$CRD, (or i1:$CRA, i1:$CRB))]>; def CRXOR : XLForm_1<19, 193, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), "crxor $CRD, $CRA, $CRB", IIC_BrCR, [(set i1:$CRD, (xor i1:$CRA, i1:$CRB))]>; def CRNOR : XLForm_1<19, 33, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), "crnor $CRD, $CRA, $CRB", IIC_BrCR, [(set i1:$CRD, (not (or i1:$CRA, i1:$CRB)))]>; def CREQV : XLForm_1<19, 289, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), "creqv $CRD, $CRA, $CRB", IIC_BrCR, [(set i1:$CRD, (not (xor i1:$CRA, i1:$CRB)))]>; } // isCommutable def CRANDC : XLForm_1<19, 129, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), "crandc $CRD, $CRA, $CRB", IIC_BrCR, [(set i1:$CRD, (and i1:$CRA, (not i1:$CRB)))]>; def CRORC : XLForm_1<19, 417, (outs crbitrc:$CRD), (ins crbitrc:$CRA, crbitrc:$CRB), "crorc $CRD, $CRA, $CRB", IIC_BrCR, [(set i1:$CRD, (or i1:$CRA, (not i1:$CRB)))]>; let isCodeGenOnly = 1 in { def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins), "creqv $dst, $dst, $dst", IIC_BrCR, [(set i1:$dst, 1)]>; def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins), "crxor $dst, $dst, $dst", IIC_BrCR, [(set i1:$dst, 0)]>; let Defs = [CR1EQ], CRD = 6 in { def CR6SET : XLForm_1_ext<19, 289, (outs), (ins), "creqv 6, 6, 6", IIC_BrCR, [(PPCcr6set)]>; def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins), "crxor 6, 6, 6", IIC_BrCR, [(PPCcr6unset)]>; } } // XFX-Form instructions. Instructions that deal with SPRs. // def MFSPR : XFXForm_1<31, 339, (outs gprc:$RT), (ins i32imm:$SPR), "mfspr $RT, $SPR", IIC_SprMFSPR>; def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT), "mtspr $SPR, $RT", IIC_SprMTSPR>; def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR), "mftb $RT, $SPR", IIC_SprMFTB>; def MFPMR : XFXForm_1<31, 334, (outs gprc:$RT), (ins i32imm:$SPR), "mfpmr $RT, $SPR", IIC_SprMFPMR>; def MTPMR : XFXForm_1<31, 462, (outs), (ins i32imm:$SPR, gprc:$RT), "mtpmr $SPR, $RT", IIC_SprMTPMR>; // A pseudo-instruction used to implement the read of the 64-bit cycle counter // on a 32-bit target. let hasSideEffects = 1 in def ReadTB : PPCCustomInserterPseudo<(outs gprc:$lo, gprc:$hi), (ins), "#ReadTB", []>; let Uses = [CTR] in { def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins), "mfctr $rT", IIC_SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in { def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS), "mtctr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR] in { let Pattern = [(int_ppc_mtctr i32:$rS)] in def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS), "mtctr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Defs = [LR] in { def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS), "mtlr $rS", IIC_SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let Uses = [LR] in { def MFLR : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins), "mflr $rT", IIC_SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } let isCodeGenOnly = 1 in { // Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed // like a GPR on the PPC970. As such, copies in and out have the same // performance characteristics as an OR instruction. def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins gprc:$rS), "mtspr 256, $rS", IIC_IntGeneral>, PPC970_DGroup_Single, PPC970_Unit_FXU; def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins), "mfspr $rT, 256", IIC_IntGeneral>, PPC970_DGroup_First, PPC970_Unit_FXU; def MTVRSAVEv : XFXForm_7_ext<31, 467, 256, (outs VRSAVERC:$reg), (ins gprc:$rS), "mtspr 256, $rS", IIC_IntGeneral>, PPC970_DGroup_Single, PPC970_Unit_FXU; def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins VRSAVERC:$reg), "mfspr $rT, 256", IIC_IntGeneral>, PPC970_DGroup_First, PPC970_Unit_FXU; } // Aliases for mtvrsave/mfvrsave to mfspr/mtspr. def : InstAlias<"mtvrsave $rS", (MTVRSAVE gprc:$rS)>; def : InstAlias<"mfvrsave $rS", (MFVRSAVE gprc:$rS)>; // SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register, // so we'll need to scavenge a register for it. let mayStore = 1 in def SPILL_VRSAVE : PPCEmitTimePseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F), "#SPILL_VRSAVE", []>; // RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously // spilled), so we'll need to scavenge a register for it. let mayLoad = 1 in def RESTORE_VRSAVE : PPCEmitTimePseudo<(outs VRSAVERC:$vrsave), (ins memri:$F), "#RESTORE_VRSAVE", []>; let hasSideEffects = 0 in { // mtocrf's input needs to be prepared by shifting by an amount dependent // on the cr register selected. Thus, post-ra anti-dep breaking must not // later change that register assignment. let hasExtraDefRegAllocReq = 1 in { def MTOCRF: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins gprc:$ST), "mtocrf $FXM, $ST", IIC_BrMCRX>, PPC970_DGroup_First, PPC970_Unit_CRU; // Similarly to mtocrf, the mask for mtcrf must be prepared in a way that // is dependent on the cr fields being set. def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS), "mtcrf $FXM, $rS", IIC_BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; } // hasExtraDefRegAllocReq = 1 // mfocrf's input needs to be prepared by shifting by an amount dependent // on the cr register selected. Thus, post-ra anti-dep breaking must not // later change that register assignment. let hasExtraSrcRegAllocReq = 1 in { def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM), "mfocrf $rT, $FXM", IIC_SprMFCRF>, PPC970_DGroup_First, PPC970_Unit_CRU; // Similarly to mfocrf, the mask for mfcrf must be prepared in a way that // is dependent on the cr fields being copied. def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins), "mfcr $rT", IIC_SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; } // hasExtraSrcRegAllocReq = 1 def MCRXRX : X_BF3<31, 576, (outs crrc:$BF), (ins), "mcrxrx $BF", IIC_BrMCRX>, Requires<[IsISA3_0]>; } // hasSideEffects = 0 let Predicates = [HasFPU] in { // Custom inserter instruction to perform FADD in round-to-zero mode. let Uses = [RM] in { def FADDrtz: PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "", [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>; } // The above pseudo gets expanded to make use of the following instructions // to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level. let Uses = [RM], Defs = [RM] in { def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM), "mtfsb0 $FM", IIC_IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM), "mtfsb1 $FM", IIC_IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; let isCodeGenOnly = 1 in def MTFSFb : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT), "mtfsf $FM, $rT", IIC_IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; } let Uses = [RM] in { def MFFS : XForm_42<63, 583, (outs f8rc:$rT), (ins), "mffs $rT", IIC_IntMFFS, [(set f64:$rT, (PPCmffs))]>, PPC970_DGroup_Single, PPC970_Unit_FPU; let Defs = [CR1] in def MFFSo : XForm_42<63, 583, (outs f8rc:$rT), (ins), "mffs. $rT", IIC_IntMFFS, []>, isDOT; def MFFSCE : X_FRT5_XO2_XO3_XO10<63, 0, 1, 583, (outs f8rc:$rT), (ins), "mffsce $rT", IIC_IntMFFS, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; def MFFSCDRN : X_FRT5_XO2_XO3_FRB5_XO10<63, 2, 4, 583, (outs f8rc:$rT), (ins f8rc:$FRB), "mffscdrn $rT, $FRB", IIC_IntMFFS, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; def MFFSCDRNI : X_FRT5_XO2_XO3_DRM3_XO10<63, 2, 5, 583, (outs f8rc:$rT), (ins u3imm:$DRM), "mffscdrni $rT, $DRM", IIC_IntMFFS, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; def MFFSCRN : X_FRT5_XO2_XO3_FRB5_XO10<63, 2, 6, 583, (outs f8rc:$rT), (ins f8rc:$FRB), "mffscrn $rT, $FRB", IIC_IntMFFS, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; def MFFSCRNI : X_FRT5_XO2_XO3_RM2_X10<63, 2, 7, 583, (outs f8rc:$rT), (ins u2imm:$RM), "mffscrni $rT, $RM", IIC_IntMFFS, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; def MFFSL : X_FRT5_XO2_XO3_XO10<63, 3, 0, 583, (outs f8rc:$rT), (ins), "mffsl $rT", IIC_IntMFFS, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; } } let Predicates = [IsISA3_0] in { def MODSW : XForm_8<31, 779, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "modsw $rT, $rA, $rB", IIC_IntDivW, [(set i32:$rT, (srem i32:$rA, i32:$rB))]>; def MODUW : XForm_8<31, 267, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "moduw $rT, $rA, $rB", IIC_IntDivW, [(set i32:$rT, (urem i32:$rA, i32:$rB))]>; } let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations. // XO-Form instructions. Arithmetic instructions that can set overflow bit let isCommutable = 1 in defm ADD4 : XOForm_1r<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "add", "$rT, $rA, $rB", IIC_IntSimple, [(set i32:$rT, (add i32:$rA, i32:$rB))]>; let isCodeGenOnly = 1 in def ADD4TLS : XOForm_1<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, tlsreg32:$rB), "add $rT, $rA, $rB", IIC_IntSimple, [(set i32:$rT, (add i32:$rA, tglobaltlsaddr:$rB))]>; let isCommutable = 1 in defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "addc", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (addc i32:$rA, i32:$rB))]>, PPC970_DGroup_Cracked; defm DIVW : XOForm_1rcr<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "divw", "$rT, $rA, $rB", IIC_IntDivW, [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>; defm DIVWU : XOForm_1rcr<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "divwu", "$rT, $rA, $rB", IIC_IntDivW, [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>; def DIVWE : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "divwe $rT, $rA, $rB", IIC_IntDivW, [(set i32:$rT, (int_ppc_divwe gprc:$rA, gprc:$rB))]>, Requires<[HasExtDiv]>; let Defs = [CR0] in def DIVWEo : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "divwe. $rT, $rA, $rB", IIC_IntDivW, []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, Requires<[HasExtDiv]>; def DIVWEU : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "divweu $rT, $rA, $rB", IIC_IntDivW, [(set i32:$rT, (int_ppc_divweu gprc:$rA, gprc:$rB))]>, Requires<[HasExtDiv]>; let Defs = [CR0] in def DIVWEUo : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "divweu. $rT, $rA, $rB", IIC_IntDivW, []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First, Requires<[HasExtDiv]>; let isCommutable = 1 in { defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "mulhw", "$rT, $rA, $rB", IIC_IntMulHW, [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>; defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "mulhwu", "$rT, $rA, $rB", IIC_IntMulHWU, [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>; defm MULLW : XOForm_1r<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "mullw", "$rT, $rA, $rB", IIC_IntMulHW, [(set i32:$rT, (mul i32:$rA, i32:$rB))]>; } // isCommutable defm SUBF : XOForm_1r<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "subf", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (sub i32:$rB, i32:$rA))]>; defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "subfc", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (subc i32:$rB, i32:$rA))]>, PPC970_DGroup_Cracked; defm NEG : XOForm_3r<31, 104, 0, (outs gprc:$rT), (ins gprc:$rA), "neg", "$rT, $rA", IIC_IntSimple, [(set i32:$rT, (ineg i32:$rA))]>; let Uses = [CARRY] in { let isCommutable = 1 in defm ADDE : XOForm_1rc<31, 138, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "adde", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (adde i32:$rA, i32:$rB))]>; defm ADDME : XOForm_3rc<31, 234, 0, (outs gprc:$rT), (ins gprc:$rA), "addme", "$rT, $rA", IIC_IntGeneral, [(set i32:$rT, (adde i32:$rA, -1))]>; defm ADDZE : XOForm_3rc<31, 202, 0, (outs gprc:$rT), (ins gprc:$rA), "addze", "$rT, $rA", IIC_IntGeneral, [(set i32:$rT, (adde i32:$rA, 0))]>; defm SUBFE : XOForm_1rc<31, 136, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), "subfe", "$rT, $rA, $rB", IIC_IntGeneral, [(set i32:$rT, (sube i32:$rB, i32:$rA))]>; defm SUBFME : XOForm_3rc<31, 232, 0, (outs gprc:$rT), (ins gprc:$rA), "subfme", "$rT, $rA", IIC_IntGeneral, [(set i32:$rT, (sube -1, i32:$rA))]>; defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA), "subfze", "$rT, $rA", IIC_IntGeneral, [(set i32:$rT, (sube 0, i32:$rA))]>; } } // A-Form instructions. Most of the instructions executed in the FPU are of // this type. // let PPC970_Unit = 3, hasSideEffects = 0, Predicates = [HasFPU] in { // FPU Operations. let Uses = [RM] in { let isCommutable = 1 in { defm FMADD : AForm_1r<63, 29, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), "fmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>; defm FMADDS : AForm_1r<59, 29, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), "fmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>; defm FMSUB : AForm_1r<63, 28, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), "fmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>; defm FMSUBS : AForm_1r<59, 28, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), "fmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>; defm FNMADD : AForm_1r<63, 31, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), "fnmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>; defm FNMADDS : AForm_1r<59, 31, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), "fnmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>; defm FNMSUB : AForm_1r<63, 30, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), "fnmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused, [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB))))]>; defm FNMSUBS : AForm_1r<59, 30, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB), "fnmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB))))]>; } // isCommutable } // FSEL is artificially split into 4 and 8-byte forms for the result. To avoid // having 4 of these, force the comparison to always be an 8-byte double (code // should use an FMRSD if the input comparison value really wants to be a float) // and 4/8 byte forms for the result and operand type.. let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm FSELD : AForm_1r<63, 23, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB), "fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>; defm FSELS : AForm_1r<63, 23, (outs f4rc:$FRT), (ins f8rc:$FRA, f4rc:$FRC, f4rc:$FRB), "fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral, [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>; let Uses = [RM] in { let isCommutable = 1 in { defm FADD : AForm_2r<63, 21, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "fadd", "$FRT, $FRA, $FRB", IIC_FPAddSub, [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>; defm FADDS : AForm_2r<59, 21, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), "fadds", "$FRT, $FRA, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>; } // isCommutable defm FDIV : AForm_2r<63, 18, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "fdiv", "$FRT, $FRA, $FRB", IIC_FPDivD, [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>; defm FDIVS : AForm_2r<59, 18, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), "fdivs", "$FRT, $FRA, $FRB", IIC_FPDivS, [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>; let isCommutable = 1 in { defm FMUL : AForm_3r<63, 25, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC), "fmul", "$FRT, $FRA, $FRC", IIC_FPFused, [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>; defm FMULS : AForm_3r<59, 25, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC), "fmuls", "$FRT, $FRA, $FRC", IIC_FPGeneral, [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>; } // isCommutable defm FSUB : AForm_2r<63, 20, (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "fsub", "$FRT, $FRA, $FRB", IIC_FPAddSub, [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>; defm FSUBS : AForm_2r<59, 20, (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB), "fsubs", "$FRT, $FRA, $FRB", IIC_FPGeneral, [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>; } } let hasSideEffects = 0 in { let PPC970_Unit = 1 in { // FXU Operations. let isSelect = 1 in def ISEL : AForm_4<31, 15, (outs gprc:$rT), (ins gprc_nor0:$rA, gprc:$rB, crbitrc:$cond), "isel $rT, $rA, $rB, $cond", IIC_IntISEL, []>; } let PPC970_Unit = 1 in { // FXU Operations. // M-Form instructions. rotate and mask instructions. // let isCommutable = 1 in { // RLWIMI can be commuted if the rotate amount is zero. defm RLWIMI : MForm_2r<20, (outs gprc:$rA), (ins gprc:$rSi, gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME", IIC_IntRotate, []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">; } let BaseName = "rlwinm" in { def RLWINM : MForm_2<21, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), "rlwinm $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral, []>, RecFormRel; let Defs = [CR0] in def RLWINMo : MForm_2<21, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), "rlwinm. $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral, []>, isDOT, RecFormRel, PPC970_DGroup_Cracked; } defm RLWNM : MForm_2r<23, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB, u5imm:$MB, u5imm:$ME), "rlwnm", "$rA, $rS, $rB, $MB, $ME", IIC_IntGeneral, []>; } } // hasSideEffects = 0 //===----------------------------------------------------------------------===// // PowerPC Instruction Patterns // // Arbitrary immediate support. Implement in terms of LIS/ORI. def : Pat<(i32 imm:$imm), (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>; // Implement the 'not' operation with the NOR instruction. def i32not : OutPatFrag<(ops node:$in), (NOR $in, $in)>; def : Pat<(not i32:$in), (i32not $in)>; // ADD an arbitrary immediate. def : Pat<(add i32:$in, imm:$imm), (ADDIS (ADDI $in, (LO16 imm:$imm)), (HA16 imm:$imm))>; // OR an arbitrary immediate. def : Pat<(or i32:$in, imm:$imm), (ORIS (ORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>; // XOR an arbitrary immediate. def : Pat<(xor i32:$in, imm:$imm), (XORIS (XORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>; // SUBFIC def : Pat<(sub imm32SExt16:$imm, i32:$in), (SUBFIC $in, imm:$imm)>; // SHL/SRL def : Pat<(shl i32:$in, (i32 imm:$imm)), (RLWINM $in, imm:$imm, 0, (SHL32 imm:$imm))>; def : Pat<(srl i32:$in, (i32 imm:$imm)), (RLWINM $in, (SRL32 imm:$imm), imm:$imm, 31)>; // ROTL def : Pat<(rotl i32:$in, i32:$sh), (RLWNM $in, $sh, 0, 31)>; def : Pat<(rotl i32:$in, (i32 imm:$imm)), (RLWINM $in, imm:$imm, 0, 31)>; // RLWNM def : Pat<(and (rotl i32:$in, i32:$sh), maskimm32:$imm), (RLWNM $in, $sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>; // Calls def : Pat<(PPCcall (i32 tglobaladdr:$dst)), (BL tglobaladdr:$dst)>; def : Pat<(PPCcall (i32 texternalsym:$dst)), (BL texternalsym:$dst)>; def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), (TCRETURNdi tglobaladdr:$dst, imm:$imm)>; def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm), (TCRETURNdi texternalsym:$dst, imm:$imm)>; def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm), (TCRETURNri CTRRC:$dst, imm:$imm)>; // Hi and Lo for Darwin Global Addresses. def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>; def : Pat<(PPClo tglobaladdr:$in, 0), (LI tglobaladdr:$in)>; def : Pat<(PPChi tconstpool:$in, 0), (LIS tconstpool:$in)>; def : Pat<(PPClo tconstpool:$in, 0), (LI tconstpool:$in)>; def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>; def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>; def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>; def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>; def : Pat<(PPChi tglobaltlsaddr:$g, i32:$in), (ADDIS $in, tglobaltlsaddr:$g)>; def : Pat<(PPClo tglobaltlsaddr:$g, i32:$in), (ADDI $in, tglobaltlsaddr:$g)>; def : Pat<(add i32:$in, (PPChi tglobaladdr:$g, 0)), (ADDIS $in, tglobaladdr:$g)>; def : Pat<(add i32:$in, (PPChi tconstpool:$g, 0)), (ADDIS $in, tconstpool:$g)>; def : Pat<(add i32:$in, (PPChi tjumptable:$g, 0)), (ADDIS $in, tjumptable:$g)>; def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)), (ADDIS $in, tblockaddress:$g)>; // Support for thread-local storage. def PPC32GOT: PPCEmitTimePseudo<(outs gprc:$rD), (ins), "#PPC32GOT", [(set i32:$rD, (PPCppc32GOT))]>; // Get the _GLOBAL_OFFSET_TABLE_ in PIC mode. // This uses two output registers, the first as the real output, the second as a // temporary register, used internally in code generation. def PPC32PICGOT: PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT", []>, NoEncode<"$rT">; def LDgotTprelL32: PPCEmitTimePseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg), "#LDgotTprelL32", [(set i32:$rD, (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>; def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g), (ADD4TLS $in, tglobaltlsaddr:$g)>; def ADDItlsgdL32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), "#ADDItlsgdL32", [(set i32:$rD, (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>; // LR is a true define, while the rest of the Defs are clobbers. R3 is // explicitly defined when this op is created, so not mentioned here. let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in def GETtlsADDR32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), "GETtlsADDR32", [(set i32:$rD, (PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>; // Combined op for ADDItlsgdL32 and GETtlsADDR32, late expanded. R3 and LR // are true defines while the rest of the Defs are clobbers. let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in def ADDItlsgdLADDR32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym), "#ADDItlsgdLADDR32", [(set i32:$rD, (PPCaddiTlsgdLAddr i32:$reg, tglobaltlsaddr:$disp, tglobaltlsaddr:$sym))]>; def ADDItlsldL32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), "#ADDItlsldL32", [(set i32:$rD, (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>; // LR is a true define, while the rest of the Defs are clobbers. R3 is // explicitly defined when this op is created, so not mentioned here. let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in def GETtlsldADDR32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), "GETtlsldADDR32", [(set i32:$rD, (PPCgetTlsldAddr i32:$reg, tglobaltlsaddr:$sym))]>; // Combined op for ADDItlsldL32 and GETtlsADDR32, late expanded. R3 and LR // are true defines while the rest of the Defs are clobbers. let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in def ADDItlsldLADDR32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym), "#ADDItlsldLADDR32", [(set i32:$rD, (PPCaddiTlsldLAddr i32:$reg, tglobaltlsaddr:$disp, tglobaltlsaddr:$sym))]>; def ADDIdtprelL32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), "#ADDIdtprelL32", [(set i32:$rD, (PPCaddiDtprelL i32:$reg, tglobaltlsaddr:$disp))]>; def ADDISdtprelHA32 : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), "#ADDISdtprelHA32", [(set i32:$rD, (PPCaddisDtprelHA i32:$reg, tglobaltlsaddr:$disp))]>; // Support for Position-independent code def LWZtoc : PPCEmitTimePseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg), "#LWZtoc", [(set i32:$rD, (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>; // Get Global (GOT) Base Register offset, from the word immediately preceding // the function label. def UpdateGBR : PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>; // Standard shifts. These are represented separately from the real shifts above // so that we can distinguish between shifts that allow 5-bit and 6-bit shift // amounts. def : Pat<(sra i32:$rS, i32:$rB), (SRAW $rS, $rB)>; def : Pat<(srl i32:$rS, i32:$rB), (SRW $rS, $rB)>; def : Pat<(shl i32:$rS, i32:$rB), (SLW $rS, $rB)>; def : Pat<(zextloadi1 iaddr:$src), (LBZ iaddr:$src)>; def : Pat<(zextloadi1 xaddr:$src), (LBZX xaddr:$src)>; def : Pat<(extloadi1 iaddr:$src), (LBZ iaddr:$src)>; def : Pat<(extloadi1 xaddr:$src), (LBZX xaddr:$src)>; def : Pat<(extloadi8 iaddr:$src), (LBZ iaddr:$src)>; def : Pat<(extloadi8 xaddr:$src), (LBZX xaddr:$src)>; def : Pat<(extloadi16 iaddr:$src), (LHZ iaddr:$src)>; def : Pat<(extloadi16 xaddr:$src), (LHZX xaddr:$src)>; let Predicates = [HasFPU] in { def : Pat<(f64 (extloadf32 iaddr:$src)), (COPY_TO_REGCLASS (LFS iaddr:$src), F8RC)>; def : Pat<(f64 (extloadf32 xaddr:$src)), (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>; def : Pat<(f64 (fpextend f32:$src)), (COPY_TO_REGCLASS $src, F8RC)>; } // Only seq_cst fences require the heavyweight sync (SYNC 0). // All others can use the lightweight sync (SYNC 1). // source: http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html // The rule for seq_cst is duplicated to work with both 64 bits and 32 bits // versions of Power. def : Pat<(atomic_fence (i64 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>; def : Pat<(atomic_fence (i32 7), (imm)), (SYNC 0)>, Requires<[HasSYNC]>; def : Pat<(atomic_fence (imm), (imm)), (SYNC 1)>, Requires<[HasSYNC]>; def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[HasOnlyMSYNC]>; let Predicates = [HasFPU] in { // Additional FNMSUB patterns: -a*c + b == -(a*c - b) def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B), (FNMSUB $A, $C, $B)>; def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B), (FNMSUB $A, $C, $B)>; def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B), (FNMSUBS $A, $C, $B)>; def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B), (FNMSUBS $A, $C, $B)>; // FCOPYSIGN's operand types need not agree. def : Pat<(fcopysign f64:$frB, f32:$frA), (FCPSGND (COPY_TO_REGCLASS $frA, F8RC), $frB)>; def : Pat<(fcopysign f32:$frB, f64:$frA), (FCPSGNS (COPY_TO_REGCLASS $frA, F4RC), $frB)>; } include "PPCInstrAltivec.td" include "PPCInstrSPE.td" include "PPCInstr64Bit.td" include "PPCInstrVSX.td" include "PPCInstrQPX.td" include "PPCInstrHTM.td" def crnot : OutPatFrag<(ops node:$in), (CRNOR $in, $in)>; def : Pat<(not i1:$in), (crnot $in)>; // Patterns for arithmetic i1 operations. def : Pat<(add i1:$a, i1:$b), (CRXOR $a, $b)>; def : Pat<(sub i1:$a, i1:$b), (CRXOR $a, $b)>; def : Pat<(mul i1:$a, i1:$b), (CRAND $a, $b)>; // We're sometimes asked to materialize i1 -1, which is just 1 in this case // (-1 is used to mean all bits set). def : Pat<(i1 -1), (CRSET)>; // i1 extensions, implemented in terms of isel. def : Pat<(i32 (zext i1:$in)), (SELECT_I4 $in, (LI 1), (LI 0))>; def : Pat<(i32 (sext i1:$in)), (SELECT_I4 $in, (LI -1), (LI 0))>; def : Pat<(i64 (zext i1:$in)), (SELECT_I8 $in, (LI8 1), (LI8 0))>; def : Pat<(i64 (sext i1:$in)), (SELECT_I8 $in, (LI8 -1), (LI8 0))>; // FIXME: We should choose either a zext or a sext based on other constants // already around. def : Pat<(i32 (anyext i1:$in)), (SELECT_I4 $in, (LI 1), (LI 0))>; def : Pat<(i64 (anyext i1:$in)), (SELECT_I8 $in, (LI8 1), (LI8 0))>; // match setcc on i1 variables. // CRANDC is: // 1 1 : F // 1 0 : T // 0 1 : F // 0 0 : F // // LT is: // -1 -1 : F // -1 0 : T // 0 -1 : F // 0 0 : F // // ULT is: // 1 1 : F // 1 0 : F // 0 1 : T // 0 0 : F def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLT)), (CRANDC $s1, $s2)>; def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULT)), (CRANDC $s2, $s1)>; // CRORC is: // 1 1 : T // 1 0 : T // 0 1 : F // 0 0 : T // // LE is: // -1 -1 : T // -1 0 : T // 0 -1 : F // 0 0 : T // // ULE is: // 1 1 : T // 1 0 : F // 0 1 : T // 0 0 : T def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLE)), (CRORC $s1, $s2)>; def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULE)), (CRORC $s2, $s1)>; def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETEQ)), (CREQV $s1, $s2)>; // GE is: // -1 -1 : T // -1 0 : F // 0 -1 : T // 0 0 : T // // UGE is: // 1 1 : T // 1 0 : T // 0 1 : F // 0 0 : T def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGE)), (CRORC $s2, $s1)>; def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGE)), (CRORC $s1, $s2)>; // GT is: // -1 -1 : F // -1 0 : F // 0 -1 : T // 0 0 : F // // UGT is: // 1 1 : F // 1 0 : T // 0 1 : F // 0 0 : F def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGT)), (CRANDC $s2, $s1)>; def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGT)), (CRANDC $s1, $s2)>; def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETNE)), (CRXOR $s1, $s2)>; // match setcc on non-i1 (non-vector) variables. Note that SETUEQ, SETOGE, // SETOLE, SETONE, SETULT and SETUGT should be expanded by legalize for // floating-point types. multiclass CRNotPat { def : Pat; def : Pat<(not pattern), result>; // We can also fold the crnot into an extension: def : Pat<(i32 (zext pattern)), (SELECT_I4 result, (LI 0), (LI 1))>; def : Pat<(i32 (sext pattern)), (SELECT_I4 result, (LI 0), (LI -1))>; // We can also fold the crnot into an extension: def : Pat<(i64 (zext pattern)), (SELECT_I8 result, (LI8 0), (LI8 1))>; def : Pat<(i64 (sext pattern)), (SELECT_I8 result, (LI8 0), (LI8 -1))>; // FIXME: We should choose either a zext or a sext based on other constants // already around. def : Pat<(i32 (anyext pattern)), (SELECT_I4 result, (LI 0), (LI 1))>; def : Pat<(i64 (anyext pattern)), (SELECT_I8 result, (LI8 0), (LI8 1))>; } // FIXME: Because of what seems like a bug in TableGen's type-inference code, // we need to write imm:$imm in the output patterns below, not just $imm, or // else the resulting matcher will not correctly add the immediate operand // (making it a register operand instead). // extended SETCC. multiclass ExtSetCCPat { def : Pat<(i32 (zext (i1 (pfrag i32:$s1, cc)))), (rfrag $s1)>; def : Pat<(i64 (zext (i1 (pfrag i64:$s1, cc)))), (rfrag8 $s1)>; def : Pat<(i64 (zext (i1 (pfrag i32:$s1, cc)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1), sub_32)>; def : Pat<(i32 (zext (i1 (pfrag i64:$s1, cc)))), (EXTRACT_SUBREG (rfrag8 $s1), sub_32)>; def : Pat<(i32 (anyext (i1 (pfrag i32:$s1, cc)))), (rfrag $s1)>; def : Pat<(i64 (anyext (i1 (pfrag i64:$s1, cc)))), (rfrag8 $s1)>; def : Pat<(i64 (anyext (i1 (pfrag i32:$s1, cc)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1), sub_32)>; def : Pat<(i32 (anyext (i1 (pfrag i64:$s1, cc)))), (EXTRACT_SUBREG (rfrag8 $s1), sub_32)>; } // Note that we do all inversions below with i(32|64)not, instead of using // (xori x, 1) because on the A2 nor has single-cycle latency while xori // has 2-cycle latency. defm : ExtSetCCPat, OutPatFrag<(ops node:$in), (RLWINM (CNTLZW $in), 27, 31, 31)>, OutPatFrag<(ops node:$in), (RLDICL (CNTLZD $in), 58, 63)> >; defm : ExtSetCCPat, OutPatFrag<(ops node:$in), (RLWINM (i32not (CNTLZW $in)), 27, 31, 31)>, OutPatFrag<(ops node:$in), (RLDICL (i64not (CNTLZD $in)), 58, 63)> >; defm : ExtSetCCPat, OutPatFrag<(ops node:$in), (RLWINM $in, 1, 31, 31)>, OutPatFrag<(ops node:$in), (RLDICL $in, 1, 63)> >; defm : ExtSetCCPat, OutPatFrag<(ops node:$in), (RLWINM (i32not $in), 1, 31, 31)>, OutPatFrag<(ops node:$in), (RLDICL (i64not $in), 1, 63)> >; defm : ExtSetCCPat, OutPatFrag<(ops node:$in), (RLWINM (ANDC (NEG $in), $in), 1, 31, 31)>, OutPatFrag<(ops node:$in), (RLDICL (ANDC8 (NEG8 $in), $in), 1, 63)> >; defm : ExtSetCCPat, OutPatFrag<(ops node:$in), (RLWINM (ORC $in, (NEG $in)), 1, 31, 31)>, OutPatFrag<(ops node:$in), (RLDICL (ORC8 $in, (NEG8 $in)), 1, 63)> >; defm : ExtSetCCPat, OutPatFrag<(ops node:$in), (RLWINM (AND $in, (ADDI $in, 1)), 1, 31, 31)>, OutPatFrag<(ops node:$in), (RLDICL (AND8 $in, (ADDI8 $in, 1)), 1, 63)> >; defm : ExtSetCCPat, OutPatFrag<(ops node:$in), (RLWINM (NAND $in, (ADDI $in, 1)), 1, 31, 31)>, OutPatFrag<(ops node:$in), (RLDICL (NAND8 $in, (ADDI8 $in, 1)), 1, 63)> >; defm : ExtSetCCPat, OutPatFrag<(ops node:$in), (RLWINM (i32not $in), 1, 31, 31)>, OutPatFrag<(ops node:$in), (RLDICL (i64not $in), 1, 63)> >; defm : ExtSetCCPat, OutPatFrag<(ops node:$in), (RLWINM $in, 1, 31, 31)>, OutPatFrag<(ops node:$in), (RLDICL $in, 1, 63)> >; // An extended SETCC with shift amount. multiclass ExtSetCCShiftPat { def : Pat<(i32 (zext (i1 (pfrag i32:$s1, i32:$sa, cc)))), (rfrag $s1, $sa)>; def : Pat<(i64 (zext (i1 (pfrag i64:$s1, i32:$sa, cc)))), (rfrag8 $s1, $sa)>; def : Pat<(i64 (zext (i1 (pfrag i32:$s1, i32:$sa, cc)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1, $sa), sub_32)>; def : Pat<(i32 (zext (i1 (pfrag i64:$s1, i32:$sa, cc)))), (EXTRACT_SUBREG (rfrag8 $s1, $sa), sub_32)>; def : Pat<(i32 (anyext (i1 (pfrag i32:$s1, i32:$sa, cc)))), (rfrag $s1, $sa)>; def : Pat<(i64 (anyext (i1 (pfrag i64:$s1, i32:$sa, cc)))), (rfrag8 $s1, $sa)>; def : Pat<(i64 (anyext (i1 (pfrag i32:$s1, i32:$sa, cc)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1, $sa), sub_32)>; def : Pat<(i32 (anyext (i1 (pfrag i64:$s1, i32:$sa, cc)))), (EXTRACT_SUBREG (rfrag8 $s1, $sa), sub_32)>; } defm : ExtSetCCShiftPat, OutPatFrag<(ops node:$in, node:$sa), (RLWNM $in, (SUBFIC $sa, 32), 31, 31)>, OutPatFrag<(ops node:$in, node:$sa), (RLDCL $in, (SUBFIC $sa, 64), 63)> >; defm : ExtSetCCShiftPat, OutPatFrag<(ops node:$in, node:$sa), (RLWNM (i32not $in), (SUBFIC $sa, 32), 31, 31)>, OutPatFrag<(ops node:$in, node:$sa), (RLDCL (i64not $in), (SUBFIC $sa, 64), 63)> >; // SETCC for i32. def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULT)), (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>; def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLT)), (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>; def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGT)), (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>; def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGT)), (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>; def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETEQ)), (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>; def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETEQ)), (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>; // For non-equality comparisons, the default code would materialize the // constant, then compare against it, like this: // lis r2, 4660 // ori r2, r2, 22136 // cmpw cr0, r3, r2 // beq cr0,L6 // Since we are just comparing for equality, we can emit this instead: // xoris r0,r3,0x1234 // cmplwi cr0,r0,0x5678 // beq cr0,L6 def : Pat<(i1 (setcc i32:$s1, imm:$imm, SETEQ)), (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), (LO16 imm:$imm)), sub_eq)>; defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)), (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>; defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)), (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>; defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)), (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>; defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)), (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>; defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)), (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>; defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)), (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>; defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)), (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)), (LO16 imm:$imm)), sub_eq)>; def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETULT)), (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETLT)), (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>; def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETUGT)), (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>; def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETGT)), (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>; def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETEQ)), (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)), (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>; defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)), (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>; defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)), (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)), (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)), (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>; // SETCC for i64. def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULT)), (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>; def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETLT)), (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_lt)>; def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGT)), (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_gt)>; def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGT)), (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_gt)>; def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETEQ)), (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_eq)>; def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETEQ)), (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_eq)>; // For non-equality comparisons, the default code would materialize the // constant, then compare against it, like this: // lis r2, 4660 // ori r2, r2, 22136 // cmpd cr0, r3, r2 // beq cr0,L6 // Since we are just comparing for equality, we can emit this instead: // xoris r0,r3,0x1234 // cmpldi cr0,r0,0x5678 // beq cr0,L6 def : Pat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETEQ)), (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), (LO16 imm:$imm)), sub_eq)>; defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGE)), (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>; defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGE)), (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_lt)>; defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULE)), (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_gt)>; defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETLE)), (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_gt)>; defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETNE)), (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_eq)>; defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETNE)), (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_eq)>; defm : CRNotPat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)), (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)), (LO16 imm:$imm)), sub_eq)>; def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)), (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)), (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>; def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)), (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>; def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)), (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>; def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)), (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETUGE)), (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>; defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETGE)), (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>; defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETULE)), (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETLE)), (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETNE)), (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>; // SETCC for f32. let Predicates = [HasFPU] in { def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)), (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETLT)), (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOGT)), (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETGT)), (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOEQ)), (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)), (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETUO)), (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)), (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)), (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>; defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)), (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)), (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)), (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)), (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>; defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)), (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>; // SETCC for f64. def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETLT)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOGT)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETGT)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOEQ)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETUO)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>; defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>; defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)), (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>; // SETCC for f128. def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOLT)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETLT)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOGT)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETGT)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETOEQ)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETEQ)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; def : Pat<(i1 (setcc f128:$s1, f128:$s2, SETUO)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>; defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUGE)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETGE)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_lt)>; defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETULE)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETLE)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETUNE)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETNE)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_eq)>; defm : CRNotPat<(i1 (setcc f128:$s1, f128:$s2, SETO)), (EXTRACT_SUBREG (XSCMPUQP $s1, $s2), sub_un)>; } // This must be in this file because it relies on patterns defined in this file // after the inclusion of the instruction sets. let Predicates = [HasSPE] in { // SETCC for f32. def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)), (EXTRACT_SUBREG (EFSCMPLT $s1, $s2), sub_gt)>; def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETLT)), (EXTRACT_SUBREG (EFSCMPLT $s1, $s2), sub_gt)>; def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOGT)), (EXTRACT_SUBREG (EFSCMPGT $s1, $s2), sub_gt)>; def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETGT)), (EXTRACT_SUBREG (EFSCMPGT $s1, $s2), sub_gt)>; def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOEQ)), (EXTRACT_SUBREG (EFSCMPEQ $s1, $s2), sub_gt)>; def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)), (EXTRACT_SUBREG (EFSCMPEQ $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)), (EXTRACT_SUBREG (EFSCMPLT $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)), (EXTRACT_SUBREG (EFSCMPLT $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)), (EXTRACT_SUBREG (EFSCMPGT $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)), (EXTRACT_SUBREG (EFSCMPGT $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)), (EXTRACT_SUBREG (EFSCMPEQ $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)), (EXTRACT_SUBREG (EFSCMPEQ $s1, $s2), sub_gt)>; // SETCC for f64. def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)), (EXTRACT_SUBREG (EFDCMPLT $s1, $s2), sub_gt)>; def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETLT)), (EXTRACT_SUBREG (EFDCMPLT $s1, $s2), sub_gt)>; def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOGT)), (EXTRACT_SUBREG (EFDCMPGT $s1, $s2), sub_gt)>; def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETGT)), (EXTRACT_SUBREG (EFDCMPGT $s1, $s2), sub_gt)>; def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOEQ)), (EXTRACT_SUBREG (EFDCMPEQ $s1, $s2), sub_gt)>; def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)), (EXTRACT_SUBREG (EFDCMPEQ $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)), (EXTRACT_SUBREG (EFDCMPLT $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)), (EXTRACT_SUBREG (EFDCMPLT $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)), (EXTRACT_SUBREG (EFDCMPGT $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)), (EXTRACT_SUBREG (EFDCMPGT $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)), (EXTRACT_SUBREG (EFDCMPEQ $s1, $s2), sub_gt)>; defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)), (EXTRACT_SUBREG (EFDCMPEQ $s1, $s2), sub_gt)>; } // match select on i1 variables: def : Pat<(i1 (select i1:$cond, i1:$tval, i1:$fval)), (CROR (CRAND $cond , $tval), (CRAND (crnot $cond), $fval))>; // match selectcc on i1 variables: // select (lhs == rhs), tval, fval is: // ((lhs == rhs) & tval) | (!(lhs == rhs) & fval) def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLT)), (CROR (CRAND (CRANDC $lhs, $rhs), $tval), (CRAND (CRORC $rhs, $lhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETULT)), (CROR (CRAND (CRANDC $rhs, $lhs), $tval), (CRAND (CRORC $lhs, $rhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLE)), (CROR (CRAND (CRORC $lhs, $rhs), $tval), (CRAND (CRANDC $rhs, $lhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETULE)), (CROR (CRAND (CRORC $rhs, $lhs), $tval), (CRAND (CRANDC $lhs, $rhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETEQ)), (CROR (CRAND (CREQV $lhs, $rhs), $tval), (CRAND (CRXOR $lhs, $rhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGE)), (CROR (CRAND (CRORC $rhs, $lhs), $tval), (CRAND (CRANDC $lhs, $rhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETUGE)), (CROR (CRAND (CRORC $lhs, $rhs), $tval), (CRAND (CRANDC $rhs, $lhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGT)), (CROR (CRAND (CRANDC $rhs, $lhs), $tval), (CRAND (CRORC $lhs, $rhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETUGT)), (CROR (CRAND (CRANDC $lhs, $rhs), $tval), (CRAND (CRORC $rhs, $lhs), $fval))>; def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)), (CROR (CRAND (CREQV $lhs, $rhs), $fval), (CRAND (CRXOR $lhs, $rhs), $tval))>; // match selectcc on i1 variables with non-i1 output. def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLT)), (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETULT)), (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLE)), (SELECT_I4 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETULE)), (SELECT_I4 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETEQ)), (SELECT_I4 (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGE)), (SELECT_I4 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETUGE)), (SELECT_I4 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGT)), (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETUGT)), (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETNE)), (SELECT_I4 (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLT)), (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETULT)), (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLE)), (SELECT_I8 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETULE)), (SELECT_I8 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETEQ)), (SELECT_I8 (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGE)), (SELECT_I8 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETUGE)), (SELECT_I8 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGT)), (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETUGT)), (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETNE)), (SELECT_I8 (CRXOR $lhs, $rhs), $tval, $fval)>; let Predicates = [HasFPU] in { def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)), (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), (SELECT_F4 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)), (SELECT_F4 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), (SELECT_F4 (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), (SELECT_F4 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)), (SELECT_F4 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)), (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), (SELECT_F4 (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)), (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)), (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)), (SELECT_F8 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)), (SELECT_F8 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)), (SELECT_F8 (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)), (SELECT_F8 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)), (SELECT_F8 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)), (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)), (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), (SELECT_F8 (CRXOR $lhs, $rhs), $tval, $fval)>; } def : Pat<(f128 (selectcc i1:$lhs, i1:$rhs, f128:$tval, f128:$fval, SETLT)), (SELECT_F16 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f128 (selectcc i1:$lhs, i1:$rhs, f128:$tval, f128:$fval, SETULT)), (SELECT_F16 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f128 (selectcc i1:$lhs, i1:$rhs, f128:$tval, f128:$fval, SETLE)), (SELECT_F16 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f128 (selectcc i1:$lhs, i1:$rhs, f128:$tval, f128:$fval, SETULE)), (SELECT_F16 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f128 (selectcc i1:$lhs, i1:$rhs, f128:$tval, f128:$fval, SETEQ)), (SELECT_F16 (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(f128 (selectcc i1:$lhs, i1:$rhs, f128:$tval, f128:$fval, SETGE)), (SELECT_F16 (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(f128 (selectcc i1:$lhs, i1:$rhs, f128:$tval, f128:$fval, SETUGE)), (SELECT_F16 (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(f128 (selectcc i1:$lhs, i1:$rhs, f128:$tval, f128:$fval, SETGT)), (SELECT_F16 (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(f128 (selectcc i1:$lhs, i1:$rhs, f128:$tval, f128:$fval, SETUGT)), (SELECT_F16 (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(f128 (selectcc i1:$lhs, i1:$rhs, f128:$tval, f128:$fval, SETNE)), (SELECT_F16 (CRXOR $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLT)), (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETULT)), (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLE)), (SELECT_VRRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETULE)), (SELECT_VRRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETEQ)), (SELECT_VRRC (CREQV $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGE)), (SELECT_VRRC (CRORC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGE)), (SELECT_VRRC (CRORC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGT)), (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETUGT)), (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>; def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)), (SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>; def ANDIo_1_EQ_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in), "#ANDIo_1_EQ_BIT", [(set i1:$dst, (trunc (not i32:$in)))]>; def ANDIo_1_GT_BIT : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins gprc:$in), "#ANDIo_1_GT_BIT", [(set i1:$dst, (trunc i32:$in))]>; def ANDIo_1_EQ_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in), "#ANDIo_1_EQ_BIT8", [(set i1:$dst, (trunc (not i64:$in)))]>; def ANDIo_1_GT_BIT8 : PPCCustomInserterPseudo<(outs crbitrc:$dst), (ins g8rc:$in), "#ANDIo_1_GT_BIT8", [(set i1:$dst, (trunc i64:$in))]>; def : Pat<(i1 (not (trunc i32:$in))), (ANDIo_1_EQ_BIT $in)>; def : Pat<(i1 (not (trunc i64:$in))), (ANDIo_1_EQ_BIT8 $in)>; //===----------------------------------------------------------------------===// // PowerPC Instructions used for assembler/disassembler only // // FIXME: For B=0 or B > 8, the registers following RT are used. // WARNING: Do not add patterns for this instruction without fixing this. def LSWI : XForm_base_r3xo_memOp<31, 597, (outs gprc:$RT), (ins gprc:$A, u5imm:$B), "lswi $RT, $A, $B", IIC_LdStLoad, []>; // FIXME: For B=0 or B > 8, the registers following RT are used. // WARNING: Do not add patterns for this instruction without fixing this. def STSWI : XForm_base_r3xo_memOp<31, 725, (outs), (ins gprc:$RT, gprc:$A, u5imm:$B), "stswi $RT, $A, $B", IIC_LdStLoad, []>; def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins), "isync", IIC_SprISYNC, []>; def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src), "icbi $src", IIC_LdStICBI, []>; // We used to have EIEIO as value but E[0-9A-Z] is a reserved name def EnforceIEIO : XForm_24_eieio<31, 854, (outs), (ins), "eieio", IIC_LdStLoad, []>; def WAIT : XForm_24_sync<31, 30, (outs), (ins i32imm:$L), "wait $L", IIC_LdStLoad, []>; def MBAR : XForm_mbar<31, 854, (outs), (ins u5imm:$MO), "mbar $MO", IIC_LdStLoad>, Requires<[IsBookE]>; def MTSR: XForm_sr<31, 210, (outs), (ins gprc:$RS, u4imm:$SR), "mtsr $SR, $RS", IIC_SprMTSR>; def MFSR: XForm_sr<31, 595, (outs gprc:$RS), (ins u4imm:$SR), "mfsr $RS, $SR", IIC_SprMFSR>; def MTSRIN: XForm_srin<31, 242, (outs), (ins gprc:$RS, gprc:$RB), "mtsrin $RS, $RB", IIC_SprMTSR>; def MFSRIN: XForm_srin<31, 659, (outs gprc:$RS), (ins gprc:$RB), "mfsrin $RS, $RB", IIC_SprMFSR>; def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L), "mtmsr $RS, $L", IIC_SprMTMSR>; def WRTEE: XForm_mtmsr<31, 131, (outs), (ins gprc:$RS), "wrtee $RS", IIC_SprMTMSR>, Requires<[IsBookE]> { let L = 0; } def WRTEEI: I<31, (outs), (ins i1imm:$E), "wrteei $E", IIC_SprMTMSR>, Requires<[IsBookE]> { bits<1> E; let Inst{16} = E; let Inst{21-30} = 163; } def DCCCI : XForm_tlb<454, (outs), (ins gprc:$A, gprc:$B), "dccci $A, $B", IIC_LdStLoad>, Requires<[IsPPC4xx]>; def ICCCI : XForm_tlb<966, (outs), (ins gprc:$A, gprc:$B), "iccci $A, $B", IIC_LdStLoad>, Requires<[IsPPC4xx]>; def : InstAlias<"dci 0", (DCCCI R0, R0)>, Requires<[IsPPC4xx]>; def : InstAlias<"dccci", (DCCCI R0, R0)>, Requires<[IsPPC4xx]>; def : InstAlias<"ici 0", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>; def : InstAlias<"iccci", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>; def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins), "mfmsr $RT", IIC_SprMFMSR, []>; def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, i32imm:$L), "mtmsrd $RS, $L", IIC_SprMTMSRD>; def MCRFS : XLForm_3<63, 64, (outs crrc:$BF), (ins crrc:$BFA), "mcrfs $BF, $BFA", IIC_BrMCR>; def MTFSFI : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W), "mtfsfi $BF, $U, $W", IIC_IntMFFS>; def MTFSFIo : XLForm_4<63, 134, (outs crrc:$BF), (ins i32imm:$U, i32imm:$W), "mtfsfi. $BF, $U, $W", IIC_IntMFFS>, isDOT; def : InstAlias<"mtfsfi $BF, $U", (MTFSFI crrc:$BF, i32imm:$U, 0)>; def : InstAlias<"mtfsfi. $BF, $U", (MTFSFIo crrc:$BF, i32imm:$U, 0)>; let Predicates = [HasFPU] in { def MTFSF : XFLForm_1<63, 711, (outs), (ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W), "mtfsf $FLM, $FRB, $L, $W", IIC_IntMFFS, []>; def MTFSFo : XFLForm_1<63, 711, (outs), (ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W), "mtfsf. $FLM, $FRB, $L, $W", IIC_IntMFFS, []>, isDOT; def : InstAlias<"mtfsf $FLM, $FRB", (MTFSF i32imm:$FLM, f8rc:$FRB, 0, 0)>; def : InstAlias<"mtfsf. $FLM, $FRB", (MTFSFo i32imm:$FLM, f8rc:$FRB, 0, 0)>; } def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB), "slbie $RB", IIC_SprSLBIE, []>; def SLBMTE : XForm_26<31, 402, (outs), (ins gprc:$RS, gprc:$RB), "slbmte $RS, $RB", IIC_SprSLBMTE, []>; def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB), "slbmfee $RT, $RB", IIC_SprSLBMFEE, []>; def SLBMFEV : XLForm_1_gen<31, 851, (outs gprc:$RT), (ins gprc:$RB), "slbmfev $RT, $RB", IIC_SprSLBMFEV, []>; def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>; def TLBIA : XForm_0<31, 370, (outs), (ins), "tlbia", IIC_SprTLBIA, []>; def TLBSYNC : XForm_0<31, 566, (outs), (ins), "tlbsync", IIC_SprTLBSYNC, []>; def TLBIEL : XForm_16b<31, 274, (outs), (ins gprc:$RB), "tlbiel $RB", IIC_SprTLBIEL, []>; def TLBLD : XForm_16b<31, 978, (outs), (ins gprc:$RB), "tlbld $RB", IIC_LdStLoad, []>, Requires<[IsPPC6xx]>; def TLBLI : XForm_16b<31, 1010, (outs), (ins gprc:$RB), "tlbli $RB", IIC_LdStLoad, []>, Requires<[IsPPC6xx]>; def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB), "tlbie $RB,$RS", IIC_SprTLBIE, []>; def TLBSX : XForm_tlb<914, (outs), (ins gprc:$A, gprc:$B), "tlbsx $A, $B", IIC_LdStLoad>, Requires<[IsBookE]>; def TLBIVAX : XForm_tlb<786, (outs), (ins gprc:$A, gprc:$B), "tlbivax $A, $B", IIC_LdStLoad>, Requires<[IsBookE]>; def TLBRE : XForm_24_eieio<31, 946, (outs), (ins), "tlbre", IIC_LdStLoad, []>, Requires<[IsBookE]>; def TLBWE : XForm_24_eieio<31, 978, (outs), (ins), "tlbwe", IIC_LdStLoad, []>, Requires<[IsBookE]>; def TLBRE2 : XForm_tlbws<31, 946, (outs gprc:$RS), (ins gprc:$A, i1imm:$WS), "tlbre $RS, $A, $WS", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>; def TLBWE2 : XForm_tlbws<31, 978, (outs), (ins gprc:$RS, gprc:$A, i1imm:$WS), "tlbwe $RS, $A, $WS", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>; def TLBSX2 : XForm_base_r3xo<31, 914, (outs), (ins gprc:$RST, gprc:$A, gprc:$B), "tlbsx $RST, $A, $B", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>; def TLBSX2D : XForm_base_r3xo<31, 914, (outs), (ins gprc:$RST, gprc:$A, gprc:$B), "tlbsx. $RST, $A, $B", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>, isDOT; def RFID : XForm_0<19, 18, (outs), (ins), "rfid", IIC_IntRFID, []>; def RFI : XForm_0<19, 50, (outs), (ins), "rfi", IIC_SprRFI, []>, Requires<[IsBookE]>; def RFCI : XForm_0<19, 51, (outs), (ins), "rfci", IIC_BrB, []>, Requires<[IsBookE]>; def RFDI : XForm_0<19, 39, (outs), (ins), "rfdi", IIC_BrB, []>, Requires<[IsE500]>; def RFMCI : XForm_0<19, 38, (outs), (ins), "rfmci", IIC_BrB, []>, Requires<[IsE500]>; def MFDCR : XFXForm_1<31, 323, (outs gprc:$RT), (ins i32imm:$SPR), "mfdcr $RT, $SPR", IIC_SprMFSPR>, Requires<[IsPPC4xx]>; def MTDCR : XFXForm_1<31, 451, (outs), (ins gprc:$RT, i32imm:$SPR), "mtdcr $SPR, $RT", IIC_SprMTSPR>, Requires<[IsPPC4xx]>; def HRFID : XLForm_1_np<19, 274, (outs), (ins), "hrfid", IIC_BrB, []>; def NAP : XLForm_1_np<19, 434, (outs), (ins), "nap", IIC_BrB, []>; def ATTN : XForm_attn<0, 256, (outs), (ins), "attn", IIC_BrB>; def LBZCIX : XForm_base_r3xo_memOp<31, 853, (outs gprc:$RST), (ins gprc:$A, gprc:$B), "lbzcix $RST, $A, $B", IIC_LdStLoad, []>; def LHZCIX : XForm_base_r3xo_memOp<31, 821, (outs gprc:$RST), (ins gprc:$A, gprc:$B), "lhzcix $RST, $A, $B", IIC_LdStLoad, []>; def LWZCIX : XForm_base_r3xo_memOp<31, 789, (outs gprc:$RST), (ins gprc:$A, gprc:$B), "lwzcix $RST, $A, $B", IIC_LdStLoad, []>; def LDCIX : XForm_base_r3xo_memOp<31, 885, (outs gprc:$RST), (ins gprc:$A, gprc:$B), "ldcix $RST, $A, $B", IIC_LdStLoad, []>; def STBCIX : XForm_base_r3xo_memOp<31, 981, (outs), (ins gprc:$RST, gprc:$A, gprc:$B), "stbcix $RST, $A, $B", IIC_LdStLoad, []>; def STHCIX : XForm_base_r3xo_memOp<31, 949, (outs), (ins gprc:$RST, gprc:$A, gprc:$B), "sthcix $RST, $A, $B", IIC_LdStLoad, []>; def STWCIX : XForm_base_r3xo_memOp<31, 917, (outs), (ins gprc:$RST, gprc:$A, gprc:$B), "stwcix $RST, $A, $B", IIC_LdStLoad, []>; def STDCIX : XForm_base_r3xo_memOp<31, 1013, (outs), (ins gprc:$RST, gprc:$A, gprc:$B), "stdcix $RST, $A, $B", IIC_LdStLoad, []>; // External PID Load Store Instructions def LBEPX : XForm_1<31, 95, (outs gprc:$rD), (ins memrr:$src), "lbepx $rD, $src", IIC_LdStLoad, []>, Requires<[IsE500]>; def LFDEPX : XForm_25<31, 607, (outs f8rc:$frD), (ins memrr:$src), "lfdepx $frD, $src", IIC_LdStLFD, []>, Requires<[IsE500]>; def LHEPX : XForm_1<31, 287, (outs gprc:$rD), (ins memrr:$src), "lhepx $rD, $src", IIC_LdStLoad, []>, Requires<[IsE500]>; def LWEPX : XForm_1<31, 31, (outs gprc:$rD), (ins memrr:$src), "lwepx $rD, $src", IIC_LdStLoad, []>, Requires<[IsE500]>; def STBEPX : XForm_8<31, 223, (outs), (ins gprc:$rS, memrr:$dst), "stbepx $rS, $dst", IIC_LdStStore, []>, Requires<[IsE500]>; def STFDEPX : XForm_28_memOp<31, 735, (outs), (ins f8rc:$frS, memrr:$dst), "stfdepx $frS, $dst", IIC_LdStSTFD, []>, Requires<[IsE500]>; def STHEPX : XForm_8<31, 415, (outs), (ins gprc:$rS, memrr:$dst), "sthepx $rS, $dst", IIC_LdStStore, []>, Requires<[IsE500]>; def STWEPX : XForm_8<31, 159, (outs), (ins gprc:$rS, memrr:$dst), "stwepx $rS, $dst", IIC_LdStStore, []>, Requires<[IsE500]>; def DCBFEP : DCB_Form<127, 0, (outs), (ins memrr:$dst), "dcbfep $dst", IIC_LdStDCBF, []>, Requires<[IsE500]>; def DCBSTEP : DCB_Form<63, 0, (outs), (ins memrr:$dst), "dcbstep $dst", IIC_LdStDCBF, []>, Requires<[IsE500]>; def DCBTEP : DCB_Form_hint<319, (outs), (ins memrr:$dst, u5imm:$TH), "dcbtep $TH, $dst", IIC_LdStDCBF, []>, Requires<[IsE500]>; def DCBTSTEP : DCB_Form_hint<255, (outs), (ins memrr:$dst, u5imm:$TH), "dcbtstep $TH, $dst", IIC_LdStDCBF, []>, Requires<[IsE500]>; def DCBZEP : DCB_Form<1023, 0, (outs), (ins memrr:$dst), "dcbzep $dst", IIC_LdStDCBF, []>, Requires<[IsE500]>; def DCBZLEP : DCB_Form<1023, 1, (outs), (ins memrr:$dst), "dcbzlep $dst", IIC_LdStDCBF, []>, Requires<[IsE500]>; def ICBIEP : XForm_1a<31, 991, (outs), (ins memrr:$src), "icbiep $src", IIC_LdStICBI, []>, Requires<[IsE500]>; //===----------------------------------------------------------------------===// // PowerPC Assembler Instruction Aliases // // Pseudo-instructions for alternate assembly syntax (never used by codegen). // These are aliases that require C++ handling to convert to the target // instruction, while InstAliases can be handled directly by tblgen. class PPCAsmPseudo : Instruction { let Namespace = "PPC"; bit PPC64 = 0; // Default value, override with isPPC64 let OutOperandList = (outs); let InOperandList = iops; let Pattern = []; let AsmString = asm; let isAsmParserOnly = 1; let isPseudo = 1; let hasNoSchedulingInfo = 1; } def : InstAlias<"sc", (SC 0)>; def : InstAlias<"sync", (SYNC 0)>, Requires<[HasSYNC]>; def : InstAlias<"msync", (SYNC 0), 0>, Requires<[HasSYNC]>; def : InstAlias<"lwsync", (SYNC 1)>, Requires<[HasSYNC]>; def : InstAlias<"ptesync", (SYNC 2)>, Requires<[HasSYNC]>; def : InstAlias<"wait", (WAIT 0)>; def : InstAlias<"waitrsv", (WAIT 1)>; def : InstAlias<"waitimpl", (WAIT 2)>; def : InstAlias<"mbar", (MBAR 0)>, Requires<[IsBookE]>; def DCBTx : PPCAsmPseudo<"dcbt $dst", (ins memrr:$dst)>; def DCBTSTx : PPCAsmPseudo<"dcbtst $dst", (ins memrr:$dst)>; def DCBTCT : PPCAsmPseudo<"dcbtct $dst, $TH", (ins memrr:$dst, u5imm:$TH)>; def DCBTDS : PPCAsmPseudo<"dcbtds $dst, $TH", (ins memrr:$dst, u5imm:$TH)>; def DCBTT : PPCAsmPseudo<"dcbtt $dst", (ins memrr:$dst)>; def DCBTSTCT : PPCAsmPseudo<"dcbtstct $dst, $TH", (ins memrr:$dst, u5imm:$TH)>; def DCBTSTDS : PPCAsmPseudo<"dcbtstds $dst, $TH", (ins memrr:$dst, u5imm:$TH)>; def DCBTSTT : PPCAsmPseudo<"dcbtstt $dst", (ins memrr:$dst)>; def DCBFx : PPCAsmPseudo<"dcbf $dst", (ins memrr:$dst)>; def DCBFL : PPCAsmPseudo<"dcbfl $dst", (ins memrr:$dst)>; def DCBFLP : PPCAsmPseudo<"dcbflp $dst", (ins memrr:$dst)>; def : InstAlias<"crset $bx", (CREQV crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>; def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>; def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>; def : InstAlias<"crnot $bx, $by", (CRNOR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>; def : InstAlias<"mtxer $Rx", (MTSPR 1, gprc:$Rx)>; def : InstAlias<"mfxer $Rx", (MFSPR gprc:$Rx, 1)>; def : InstAlias<"mfrtcu $Rx", (MFSPR gprc:$Rx, 4)>; def : InstAlias<"mfrtcl $Rx", (MFSPR gprc:$Rx, 5)>; def : InstAlias<"mtdscr $Rx", (MTSPR 17, gprc:$Rx)>; def : InstAlias<"mfdscr $Rx", (MFSPR gprc:$Rx, 17)>; def : InstAlias<"mtdsisr $Rx", (MTSPR 18, gprc:$Rx)>; def : InstAlias<"mfdsisr $Rx", (MFSPR gprc:$Rx, 18)>; def : InstAlias<"mtdar $Rx", (MTSPR 19, gprc:$Rx)>; def : InstAlias<"mfdar $Rx", (MFSPR gprc:$Rx, 19)>; def : InstAlias<"mtdec $Rx", (MTSPR 22, gprc:$Rx)>; def : InstAlias<"mfdec $Rx", (MFSPR gprc:$Rx, 22)>; def : InstAlias<"mtsdr1 $Rx", (MTSPR 25, gprc:$Rx)>; def : InstAlias<"mfsdr1 $Rx", (MFSPR gprc:$Rx, 25)>; def : InstAlias<"mtsrr0 $Rx", (MTSPR 26, gprc:$Rx)>; def : InstAlias<"mfsrr0 $Rx", (MFSPR gprc:$Rx, 26)>; def : InstAlias<"mtsrr1 $Rx", (MTSPR 27, gprc:$Rx)>; def : InstAlias<"mfsrr1 $Rx", (MFSPR gprc:$Rx, 27)>; def : InstAlias<"mtsrr2 $Rx", (MTSPR 990, gprc:$Rx)>, Requires<[IsPPC4xx]>; def : InstAlias<"mfsrr2 $Rx", (MFSPR gprc:$Rx, 990)>, Requires<[IsPPC4xx]>; def : InstAlias<"mtsrr3 $Rx", (MTSPR 991, gprc:$Rx)>, Requires<[IsPPC4xx]>; def : InstAlias<"mfsrr3 $Rx", (MFSPR gprc:$Rx, 991)>, Requires<[IsPPC4xx]>; def : InstAlias<"mtcfar $Rx", (MTSPR 28, gprc:$Rx)>; def : InstAlias<"mfcfar $Rx", (MFSPR gprc:$Rx, 28)>; def : InstAlias<"mtamr $Rx", (MTSPR 29, gprc:$Rx)>; def : InstAlias<"mfamr $Rx", (MFSPR gprc:$Rx, 29)>; def : InstAlias<"mtpid $Rx", (MTSPR 48, gprc:$Rx)>, Requires<[IsBookE]>; def : InstAlias<"mfpid $Rx", (MFSPR gprc:$Rx, 48)>, Requires<[IsBookE]>; def : InstAlias<"mftb $Rx", (MFTB gprc:$Rx, 268)>; def : InstAlias<"mftbl $Rx", (MFTB gprc:$Rx, 268)>; def : InstAlias<"mftbu $Rx", (MFTB gprc:$Rx, 269)>; def : InstAlias<"mttbl $Rx", (MTSPR 284, gprc:$Rx)>; def : InstAlias<"mttbu $Rx", (MTSPR 285, gprc:$Rx)>; def : InstAlias<"mftblo $Rx", (MFSPR gprc:$Rx, 989)>, Requires<[IsPPC4xx]>; def : InstAlias<"mttblo $Rx", (MTSPR 989, gprc:$Rx)>, Requires<[IsPPC4xx]>; def : InstAlias<"mftbhi $Rx", (MFSPR gprc:$Rx, 988)>, Requires<[IsPPC4xx]>; def : InstAlias<"mttbhi $Rx", (MTSPR 988, gprc:$Rx)>, Requires<[IsPPC4xx]>; def : InstAlias<"xnop", (XORI R0, R0, 0)>; def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>; def : InstAlias<"mr. $rA, $rB", (OR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>; def : InstAlias<"not $rA, $rB", (NOR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>; def : InstAlias<"not. $rA, $rB", (NOR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>; def : InstAlias<"mtcr $rA", (MTCRF8 255, g8rc:$rA)>; foreach BATR = 0-3 in { def : InstAlias<"mtdbatu "#BATR#", $Rx", (MTSPR !add(BATR, !add(BATR, 536)), gprc:$Rx)>, Requires<[IsPPC6xx]>; def : InstAlias<"mfdbatu $Rx, "#BATR, (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 536)))>, Requires<[IsPPC6xx]>; def : InstAlias<"mtdbatl "#BATR#", $Rx", (MTSPR !add(BATR, !add(BATR, 537)), gprc:$Rx)>, Requires<[IsPPC6xx]>; def : InstAlias<"mfdbatl $Rx, "#BATR, (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 537)))>, Requires<[IsPPC6xx]>; def : InstAlias<"mtibatu "#BATR#", $Rx", (MTSPR !add(BATR, !add(BATR, 528)), gprc:$Rx)>, Requires<[IsPPC6xx]>; def : InstAlias<"mfibatu $Rx, "#BATR, (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 528)))>, Requires<[IsPPC6xx]>; def : InstAlias<"mtibatl "#BATR#", $Rx", (MTSPR !add(BATR, !add(BATR, 529)), gprc:$Rx)>, Requires<[IsPPC6xx]>; def : InstAlias<"mfibatl $Rx, "#BATR, (MFSPR gprc:$Rx, !add(BATR, !add(BATR, 529)))>, Requires<[IsPPC6xx]>; } foreach BR = 0-7 in { def : InstAlias<"mfbr"#BR#" $Rx", (MFDCR gprc:$Rx, !add(BR, 0x80))>, Requires<[IsPPC4xx]>; def : InstAlias<"mtbr"#BR#" $Rx", (MTDCR gprc:$Rx, !add(BR, 0x80))>, Requires<[IsPPC4xx]>; } def : InstAlias<"mtdccr $Rx", (MTSPR 1018, gprc:$Rx)>, Requires<[IsPPC4xx]>; def : InstAlias<"mfdccr $Rx", (MFSPR gprc:$Rx, 1018)>, Requires<[IsPPC4xx]>; def : InstAlias<"mticcr $Rx", (MTSPR 1019, gprc:$Rx)>, Requires<[IsPPC4xx]>; def : InstAlias<"mficcr $Rx", (MFSPR gprc:$Rx, 1019)>, Requires<[IsPPC4xx]>; def : InstAlias<"mtdear $Rx", (MTSPR 981, gprc:$Rx)>, Requires<[IsPPC4xx]>; def : InstAlias<"mfdear $Rx", (MFSPR gprc:$Rx, 981)>, Requires<[IsPPC4xx]>; def : InstAlias<"mtesr $Rx", (MTSPR 980, gprc:$Rx)>, Requires<[IsPPC4xx]>; def : InstAlias<"mfesr $Rx", (MFSPR gprc:$Rx, 980)>, Requires<[IsPPC4xx]>; def : InstAlias<"mfspefscr $Rx", (MFSPR gprc:$Rx, 512)>; def : InstAlias<"mtspefscr $Rx", (MTSPR 512, gprc:$Rx)>; def : InstAlias<"mttcr $Rx", (MTSPR 986, gprc:$Rx)>, Requires<[IsPPC4xx]>; def : InstAlias<"mftcr $Rx", (MFSPR gprc:$Rx, 986)>, Requires<[IsPPC4xx]>; def LAx : PPCAsmPseudo<"la $rA, $addr", (ins gprc:$rA, memri:$addr)>; def SUBI : PPCAsmPseudo<"subi $rA, $rB, $imm", (ins gprc:$rA, gprc:$rB, s16imm:$imm)>; def SUBIS : PPCAsmPseudo<"subis $rA, $rB, $imm", (ins gprc:$rA, gprc:$rB, s16imm:$imm)>; def SUBIC : PPCAsmPseudo<"subic $rA, $rB, $imm", (ins gprc:$rA, gprc:$rB, s16imm:$imm)>; def SUBICo : PPCAsmPseudo<"subic. $rA, $rB, $imm", (ins gprc:$rA, gprc:$rB, s16imm:$imm)>; def : InstAlias<"sub $rA, $rB, $rC", (SUBF8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>; def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>; def : InstAlias<"subc $rA, $rB, $rC", (SUBFC8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>; def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>; def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>; def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>; def : InstAlias<"mfasr $RT", (MFSPR gprc:$RT, 280)>; def : InstAlias<"mtasr $RT", (MTSPR 280, gprc:$RT)>; foreach SPRG = 0-3 in { def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 272))>; def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 272))>; def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>; def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 272), gprc:$RT)>; } foreach SPRG = 4-7 in { def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR gprc:$RT, !add(SPRG, 256))>, Requires<[IsBookE]>; def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR gprc:$RT, !add(SPRG, 256))>, Requires<[IsBookE]>; def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>, Requires<[IsBookE]>; def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR !add(SPRG, 256), gprc:$RT)>, Requires<[IsBookE]>; } def : InstAlias<"mtasr $RS", (MTSPR 280, gprc:$RS)>; def : InstAlias<"mfdec $RT", (MFSPR gprc:$RT, 22)>; def : InstAlias<"mtdec $RT", (MTSPR 22, gprc:$RT)>; def : InstAlias<"mfpvr $RT", (MFSPR gprc:$RT, 287)>; def : InstAlias<"mfsdr1 $RT", (MFSPR gprc:$RT, 25)>; def : InstAlias<"mtsdr1 $RT", (MTSPR 25, gprc:$RT)>; def : InstAlias<"mfsrr0 $RT", (MFSPR gprc:$RT, 26)>; def : InstAlias<"mfsrr1 $RT", (MFSPR gprc:$RT, 27)>; def : InstAlias<"mtsrr0 $RT", (MTSPR 26, gprc:$RT)>; def : InstAlias<"mtsrr1 $RT", (MTSPR 27, gprc:$RT)>; def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>; def : InstAlias<"tlbrehi $RS, $A", (TLBRE2 gprc:$RS, gprc:$A, 0)>, Requires<[IsPPC4xx]>; def : InstAlias<"tlbrelo $RS, $A", (TLBRE2 gprc:$RS, gprc:$A, 1)>, Requires<[IsPPC4xx]>; def : InstAlias<"tlbwehi $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 0)>, Requires<[IsPPC4xx]>; def : InstAlias<"tlbwelo $RS, $A", (TLBWE2 gprc:$RS, gprc:$A, 1)>, Requires<[IsPPC4xx]>; def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b", (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; def EXTLWIo : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b", (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; def EXTRWI : PPCAsmPseudo<"extrwi $rA, $rS, $n, $b", (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; def EXTRWIo : PPCAsmPseudo<"extrwi. $rA, $rS, $n, $b", (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; def INSLWI : PPCAsmPseudo<"inslwi $rA, $rS, $n, $b", (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; def INSLWIo : PPCAsmPseudo<"inslwi. $rA, $rS, $n, $b", (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; def INSRWI : PPCAsmPseudo<"insrwi $rA, $rS, $n, $b", (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; def INSRWIo : PPCAsmPseudo<"insrwi. $rA, $rS, $n, $b", (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>; def ROTRWI : PPCAsmPseudo<"rotrwi $rA, $rS, $n", (ins gprc:$rA, gprc:$rS, u5imm:$n)>; def ROTRWIo : PPCAsmPseudo<"rotrwi. $rA, $rS, $n", (ins gprc:$rA, gprc:$rS, u5imm:$n)>; def SLWI : PPCAsmPseudo<"slwi $rA, $rS, $n", (ins gprc:$rA, gprc:$rS, u5imm:$n)>; def SLWIo : PPCAsmPseudo<"slwi. $rA, $rS, $n", (ins gprc:$rA, gprc:$rS, u5imm:$n)>; def SRWI : PPCAsmPseudo<"srwi $rA, $rS, $n", (ins gprc:$rA, gprc:$rS, u5imm:$n)>; def SRWIo : PPCAsmPseudo<"srwi. $rA, $rS, $n", (ins gprc:$rA, gprc:$rS, u5imm:$n)>; def CLRRWI : PPCAsmPseudo<"clrrwi $rA, $rS, $n", (ins gprc:$rA, gprc:$rS, u5imm:$n)>; def CLRRWIo : PPCAsmPseudo<"clrrwi. $rA, $rS, $n", (ins gprc:$rA, gprc:$rS, u5imm:$n)>; def CLRLSLWI : PPCAsmPseudo<"clrlslwi $rA, $rS, $b, $n", (ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>; def CLRLSLWIo : PPCAsmPseudo<"clrlslwi. $rA, $rS, $b, $n", (ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>; def : InstAlias<"rotlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>; def : InstAlias<"rotlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>; def : InstAlias<"rotlw $rA, $rS, $rB", (RLWNM gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>; def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNMo gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>; def : InstAlias<"clrlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>; def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>; def : InstAlias<"cntlzw $rA, $rS", (CNTLZW gprc:$rA, gprc:$rS)>; def : InstAlias<"cntlzw. $rA, $rS", (CNTLZWo gprc:$rA, gprc:$rS)>; // The POWER variant def : MnemonicAlias<"cntlz", "cntlzw">; def : MnemonicAlias<"cntlz.", "cntlzw.">; def EXTLDI : PPCAsmPseudo<"extldi $rA, $rS, $n, $b", (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>; def EXTLDIo : PPCAsmPseudo<"extldi. $rA, $rS, $n, $b", (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>; def EXTRDI : PPCAsmPseudo<"extrdi $rA, $rS, $n, $b", (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>; def EXTRDIo : PPCAsmPseudo<"extrdi. $rA, $rS, $n, $b", (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>; def INSRDI : PPCAsmPseudo<"insrdi $rA, $rS, $n, $b", (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>; def INSRDIo : PPCAsmPseudo<"insrdi. $rA, $rS, $n, $b", (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>; def ROTRDI : PPCAsmPseudo<"rotrdi $rA, $rS, $n", (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; def ROTRDIo : PPCAsmPseudo<"rotrdi. $rA, $rS, $n", (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; def SLDI : PPCAsmPseudo<"sldi $rA, $rS, $n", (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; def SLDIo : PPCAsmPseudo<"sldi. $rA, $rS, $n", (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; def SRDI : PPCAsmPseudo<"srdi $rA, $rS, $n", (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; def SRDIo : PPCAsmPseudo<"srdi. $rA, $rS, $n", (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; def CLRRDI : PPCAsmPseudo<"clrrdi $rA, $rS, $n", (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; def CLRRDIo : PPCAsmPseudo<"clrrdi. $rA, $rS, $n", (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>; def CLRLSLDI : PPCAsmPseudo<"clrlsldi $rA, $rS, $b, $n", (ins g8rc:$rA, g8rc:$rS, u6imm:$b, u6imm:$n)>; def CLRLSLDIo : PPCAsmPseudo<"clrlsldi. $rA, $rS, $b, $n", (ins g8rc:$rA, g8rc:$rS, u6imm:$b, u6imm:$n)>; def SUBPCIS : PPCAsmPseudo<"subpcis $RT, $D", (ins g8rc:$RT, s16imm:$D)>; def : InstAlias<"rotldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>; def : InstAlias<"rotldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>; def : InstAlias<"rotld $rA, $rS, $rB", (RLDCL g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>; def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>; def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>; def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL_32_64 g8rc:$rA, gprc:$rS, 0, u6imm:$n)>; def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>; def : InstAlias<"lnia $RT", (ADDPCIS g8rc:$RT, 0)>; def RLWINMbm : PPCAsmPseudo<"rlwinm $rA, $rS, $n, $b", (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; def RLWINMobm : PPCAsmPseudo<"rlwinm. $rA, $rS, $n, $b", (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; def RLWIMIbm : PPCAsmPseudo<"rlwimi $rA, $rS, $n, $b", (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; def RLWIMIobm : PPCAsmPseudo<"rlwimi. $rA, $rS, $n, $b", (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; def RLWNMbm : PPCAsmPseudo<"rlwnm $rA, $rS, $n, $b", (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; def RLWNMobm : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b", (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>; // These generic branch instruction forms are used for the assembler parser only. // Defs and Uses are conservative, since we don't know the BO value. let PPC970_Unit = 7, isBranch = 1 in { let Defs = [CTR], Uses = [CTR, RM] in { def gBC : BForm_3<16, 0, 0, (outs), (ins u5imm:$bo, crbitrc:$bi, condbrtarget:$dst), "bc $bo, $bi, $dst">; def gBCA : BForm_3<16, 1, 0, (outs), (ins u5imm:$bo, crbitrc:$bi, abscondbrtarget:$dst), "bca $bo, $bi, $dst">; let isAsmParserOnly = 1 in { def gBCat : BForm_3_at<16, 0, 0, (outs), (ins u5imm:$bo, atimm:$at, crbitrc:$bi, condbrtarget:$dst), "bc$at $bo, $bi, $dst">; def gBCAat : BForm_3_at<16, 1, 0, (outs), (ins u5imm:$bo, atimm:$at, crbitrc:$bi, abscondbrtarget:$dst), "bca$at $bo, $bi, $dst">; } // isAsmParserOnly = 1 } let Defs = [LR, CTR], Uses = [CTR, RM] in { def gBCL : BForm_3<16, 0, 1, (outs), (ins u5imm:$bo, crbitrc:$bi, condbrtarget:$dst), "bcl $bo, $bi, $dst">; def gBCLA : BForm_3<16, 1, 1, (outs), (ins u5imm:$bo, crbitrc:$bi, abscondbrtarget:$dst), "bcla $bo, $bi, $dst">; let isAsmParserOnly = 1 in { def gBCLat : BForm_3_at<16, 0, 1, (outs), (ins u5imm:$bo, atimm:$at, crbitrc:$bi, condbrtarget:$dst), "bcl$at $bo, $bi, $dst">; def gBCLAat : BForm_3_at<16, 1, 1, (outs), (ins u5imm:$bo, atimm:$at, crbitrc:$bi, abscondbrtarget:$dst), "bcla$at $bo, $bi, $dst">; } // // isAsmParserOnly = 1 } let Defs = [CTR], Uses = [CTR, LR, RM] in def gBCLR : XLForm_2<19, 16, 0, (outs), (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), "bclr $bo, $bi, $bh", IIC_BrB, []>; let Defs = [LR, CTR], Uses = [CTR, LR, RM] in def gBCLRL : XLForm_2<19, 16, 1, (outs), (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), "bclrl $bo, $bi, $bh", IIC_BrB, []>; let Defs = [CTR], Uses = [CTR, LR, RM] in def gBCCTR : XLForm_2<19, 528, 0, (outs), (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), "bcctr $bo, $bi, $bh", IIC_BrB, []>; let Defs = [LR, CTR], Uses = [CTR, LR, RM] in def gBCCTRL : XLForm_2<19, 528, 1, (outs), (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh), "bcctrl $bo, $bi, $bh", IIC_BrB, []>; } multiclass BranchSimpleMnemonicAT { def : InstAlias<"bc"#pm#" $bo, $bi, $dst", (gBCat u5imm:$bo, at, crbitrc:$bi, condbrtarget:$dst)>; def : InstAlias<"bca"#pm#" $bo, $bi, $dst", (gBCAat u5imm:$bo, at, crbitrc:$bi, condbrtarget:$dst)>; def : InstAlias<"bcl"#pm#" $bo, $bi, $dst", (gBCLat u5imm:$bo, at, crbitrc:$bi, condbrtarget:$dst)>; def : InstAlias<"bcla"#pm#" $bo, $bi, $dst", (gBCLAat u5imm:$bo, at, crbitrc:$bi, condbrtarget:$dst)>; } defm : BranchSimpleMnemonicAT<"+", 3>; defm : BranchSimpleMnemonicAT<"-", 2>; def : InstAlias<"bclr $bo, $bi", (gBCLR u5imm:$bo, crbitrc:$bi, 0)>; def : InstAlias<"bclrl $bo, $bi", (gBCLRL u5imm:$bo, crbitrc:$bi, 0)>; def : InstAlias<"bcctr $bo, $bi", (gBCCTR u5imm:$bo, crbitrc:$bi, 0)>; def : InstAlias<"bcctrl $bo, $bi", (gBCCTRL u5imm:$bo, crbitrc:$bi, 0)>; multiclass BranchSimpleMnemonic1 { def : InstAlias<"b"#name#pm#" $bi, $dst", (gBC bo, crbitrc:$bi, condbrtarget:$dst)>; def : InstAlias<"b"#name#"a"#pm#" $bi, $dst", (gBCA bo, crbitrc:$bi, abscondbrtarget:$dst)>; def : InstAlias<"b"#name#"lr"#pm#" $bi", (gBCLR bo, crbitrc:$bi, 0)>; def : InstAlias<"b"#name#"l"#pm#" $bi, $dst", (gBCL bo, crbitrc:$bi, condbrtarget:$dst)>; def : InstAlias<"b"#name#"la"#pm#" $bi, $dst", (gBCLA bo, crbitrc:$bi, abscondbrtarget:$dst)>; def : InstAlias<"b"#name#"lrl"#pm#" $bi", (gBCLRL bo, crbitrc:$bi, 0)>; } multiclass BranchSimpleMnemonic2 : BranchSimpleMnemonic1 { def : InstAlias<"b"#name#"ctr"#pm#" $bi", (gBCCTR bo, crbitrc:$bi, 0)>; def : InstAlias<"b"#name#"ctrl"#pm#" $bi", (gBCCTRL bo, crbitrc:$bi, 0)>; } defm : BranchSimpleMnemonic2<"t", "", 12>; defm : BranchSimpleMnemonic2<"f", "", 4>; defm : BranchSimpleMnemonic2<"t", "-", 14>; defm : BranchSimpleMnemonic2<"f", "-", 6>; defm : BranchSimpleMnemonic2<"t", "+", 15>; defm : BranchSimpleMnemonic2<"f", "+", 7>; defm : BranchSimpleMnemonic1<"dnzt", "", 8>; defm : BranchSimpleMnemonic1<"dnzf", "", 0>; defm : BranchSimpleMnemonic1<"dzt", "", 10>; defm : BranchSimpleMnemonic1<"dzf", "", 2>; multiclass BranchExtendedMnemonicPM { def : InstAlias<"b"#name#pm#" $cc, $dst", (BCC bibo, crrc:$cc, condbrtarget:$dst)>; def : InstAlias<"b"#name#pm#" $dst", (BCC bibo, CR0, condbrtarget:$dst)>; def : InstAlias<"b"#name#"a"#pm#" $cc, $dst", (BCCA bibo, crrc:$cc, abscondbrtarget:$dst)>; def : InstAlias<"b"#name#"a"#pm#" $dst", (BCCA bibo, CR0, abscondbrtarget:$dst)>; def : InstAlias<"b"#name#"lr"#pm#" $cc", (BCCLR bibo, crrc:$cc)>; def : InstAlias<"b"#name#"lr"#pm, (BCCLR bibo, CR0)>; def : InstAlias<"b"#name#"ctr"#pm#" $cc", (BCCCTR bibo, crrc:$cc)>; def : InstAlias<"b"#name#"ctr"#pm, (BCCCTR bibo, CR0)>; def : InstAlias<"b"#name#"l"#pm#" $cc, $dst", (BCCL bibo, crrc:$cc, condbrtarget:$dst)>; def : InstAlias<"b"#name#"l"#pm#" $dst", (BCCL bibo, CR0, condbrtarget:$dst)>; def : InstAlias<"b"#name#"la"#pm#" $cc, $dst", (BCCLA bibo, crrc:$cc, abscondbrtarget:$dst)>; def : InstAlias<"b"#name#"la"#pm#" $dst", (BCCLA bibo, CR0, abscondbrtarget:$dst)>; def : InstAlias<"b"#name#"lrl"#pm#" $cc", (BCCLRL bibo, crrc:$cc)>; def : InstAlias<"b"#name#"lrl"#pm, (BCCLRL bibo, CR0)>; def : InstAlias<"b"#name#"ctrl"#pm#" $cc", (BCCCTRL bibo, crrc:$cc)>; def : InstAlias<"b"#name#"ctrl"#pm, (BCCCTRL bibo, CR0)>; } multiclass BranchExtendedMnemonic { defm : BranchExtendedMnemonicPM; defm : BranchExtendedMnemonicPM; defm : BranchExtendedMnemonicPM; } defm : BranchExtendedMnemonic<"lt", 12>; defm : BranchExtendedMnemonic<"gt", 44>; defm : BranchExtendedMnemonic<"eq", 76>; defm : BranchExtendedMnemonic<"un", 108>; defm : BranchExtendedMnemonic<"so", 108>; defm : BranchExtendedMnemonic<"ge", 4>; defm : BranchExtendedMnemonic<"nl", 4>; defm : BranchExtendedMnemonic<"le", 36>; defm : BranchExtendedMnemonic<"ng", 36>; defm : BranchExtendedMnemonic<"ne", 68>; defm : BranchExtendedMnemonic<"nu", 100>; defm : BranchExtendedMnemonic<"ns", 100>; def : InstAlias<"cmpwi $rA, $imm", (CMPWI CR0, gprc:$rA, s16imm:$imm)>; def : InstAlias<"cmpw $rA, $rB", (CMPW CR0, gprc:$rA, gprc:$rB)>; def : InstAlias<"cmplwi $rA, $imm", (CMPLWI CR0, gprc:$rA, u16imm:$imm)>; def : InstAlias<"cmplw $rA, $rB", (CMPLW CR0, gprc:$rA, gprc:$rB)>; def : InstAlias<"cmpdi $rA, $imm", (CMPDI CR0, g8rc:$rA, s16imm64:$imm)>; def : InstAlias<"cmpd $rA, $rB", (CMPD CR0, g8rc:$rA, g8rc:$rB)>; def : InstAlias<"cmpldi $rA, $imm", (CMPLDI CR0, g8rc:$rA, u16imm64:$imm)>; def : InstAlias<"cmpld $rA, $rB", (CMPLD CR0, g8rc:$rA, g8rc:$rB)>; def : InstAlias<"cmpi $bf, 0, $rA, $imm", (CMPWI crrc:$bf, gprc:$rA, s16imm:$imm)>; def : InstAlias<"cmp $bf, 0, $rA, $rB", (CMPW crrc:$bf, gprc:$rA, gprc:$rB)>; def : InstAlias<"cmpli $bf, 0, $rA, $imm", (CMPLWI crrc:$bf, gprc:$rA, u16imm:$imm)>; def : InstAlias<"cmpl $bf, 0, $rA, $rB", (CMPLW crrc:$bf, gprc:$rA, gprc:$rB)>; def : InstAlias<"cmpi $bf, 1, $rA, $imm", (CMPDI crrc:$bf, g8rc:$rA, s16imm64:$imm)>; def : InstAlias<"cmp $bf, 1, $rA, $rB", (CMPD crrc:$bf, g8rc:$rA, g8rc:$rB)>; def : InstAlias<"cmpli $bf, 1, $rA, $imm", (CMPLDI crrc:$bf, g8rc:$rA, u16imm64:$imm)>; def : InstAlias<"cmpl $bf, 1, $rA, $rB", (CMPLD crrc:$bf, g8rc:$rA, g8rc:$rB)>; multiclass TrapExtendedMnemonic { def : InstAlias<"td"#name#"i $rA, $imm", (TDI to, g8rc:$rA, s16imm:$imm)>; def : InstAlias<"td"#name#" $rA, $rB", (TD to, g8rc:$rA, g8rc:$rB)>; def : InstAlias<"tw"#name#"i $rA, $imm", (TWI to, gprc:$rA, s16imm:$imm)>; def : InstAlias<"tw"#name#" $rA, $rB", (TW to, gprc:$rA, gprc:$rB)>; } defm : TrapExtendedMnemonic<"lt", 16>; defm : TrapExtendedMnemonic<"le", 20>; defm : TrapExtendedMnemonic<"eq", 4>; defm : TrapExtendedMnemonic<"ge", 12>; defm : TrapExtendedMnemonic<"gt", 8>; defm : TrapExtendedMnemonic<"nl", 12>; defm : TrapExtendedMnemonic<"ne", 24>; defm : TrapExtendedMnemonic<"ng", 20>; defm : TrapExtendedMnemonic<"llt", 2>; defm : TrapExtendedMnemonic<"lle", 6>; defm : TrapExtendedMnemonic<"lge", 5>; defm : TrapExtendedMnemonic<"lgt", 1>; defm : TrapExtendedMnemonic<"lnl", 5>; defm : TrapExtendedMnemonic<"lng", 6>; defm : TrapExtendedMnemonic<"u", 31>; // Atomic loads def : Pat<(atomic_load_8 iaddr:$src), (LBZ memri:$src)>; def : Pat<(atomic_load_16 iaddr:$src), (LHZ memri:$src)>; def : Pat<(atomic_load_32 iaddr:$src), (LWZ memri:$src)>; def : Pat<(atomic_load_8 xaddr:$src), (LBZX memrr:$src)>; def : Pat<(atomic_load_16 xaddr:$src), (LHZX memrr:$src)>; def : Pat<(atomic_load_32 xaddr:$src), (LWZX memrr:$src)>; // Atomic stores def : Pat<(atomic_store_8 iaddr:$ptr, i32:$val), (STB gprc:$val, memri:$ptr)>; def : Pat<(atomic_store_16 iaddr:$ptr, i32:$val), (STH gprc:$val, memri:$ptr)>; def : Pat<(atomic_store_32 iaddr:$ptr, i32:$val), (STW gprc:$val, memri:$ptr)>; def : Pat<(atomic_store_8 xaddr:$ptr, i32:$val), (STBX gprc:$val, memrr:$ptr)>; def : Pat<(atomic_store_16 xaddr:$ptr, i32:$val), (STHX gprc:$val, memrr:$ptr)>; def : Pat<(atomic_store_32 xaddr:$ptr, i32:$val), (STWX gprc:$val, memrr:$ptr)>; let Predicates = [IsISA3_0] in { // Copy-Paste Facility // We prefix 'CP' to COPY due to name conflict in Target.td. We also prefix to // PASTE for naming consistency. let mayLoad = 1 in def CP_COPY : X_L1_RA5_RB5<31, 774, "copy" , gprc, IIC_LdStCOPY, []>; let mayStore = 1 in def CP_PASTE : X_L1_RA5_RB5<31, 902, "paste" , gprc, IIC_LdStPASTE, []>; let mayStore = 1, Defs = [CR0] in def CP_PASTEo : X_L1_RA5_RB5<31, 902, "paste.", gprc, IIC_LdStPASTE, []>, isDOT; def CP_COPYx : PPCAsmPseudo<"copy $rA, $rB" , (ins gprc:$rA, gprc:$rB)>; def CP_PASTEx : PPCAsmPseudo<"paste $rA, $rB", (ins gprc:$rA, gprc:$rB)>; def CP_COPY_FIRST : PPCAsmPseudo<"copy_first $rA, $rB", (ins gprc:$rA, gprc:$rB)>; def CP_PASTE_LAST : PPCAsmPseudo<"paste_last $rA, $rB", (ins gprc:$rA, gprc:$rB)>; def CP_ABORT : XForm_0<31, 838, (outs), (ins), "cp_abort", IIC_SprABORT, []>; // Message Synchronize def MSGSYNC : XForm_0<31, 886, (outs), (ins), "msgsync", IIC_SprMSGSYNC, []>; // Power-Saving Mode Instruction: def STOP : XForm_0<19, 370, (outs), (ins), "stop", IIC_SprSTOP, []>; } // IsISA3_0 // Fast 32-bit reverse bits algorithm: // Step 1: 1-bit swap (swap odd 1-bit and even 1-bit): // n = ((n >> 1) & 0x55555555) | ((n << 1) & 0xAAAAAAAA); // Step 2: 2-bit swap (swap odd 2-bit and even 2-bit): // n = ((n >> 2) & 0x33333333) | ((n << 2) & 0xCCCCCCCC); // Step 3: 4-bit swap (swap odd 4-bit and even 4-bit): // n = ((n >> 4) & 0x0F0F0F0F) | ((n << 4) & 0xF0F0F0F0); // Step 4: byte reverse (Suppose n = [B1,B2,B3,B4]): // Step 4.1: Put B4,B2 in the right position (rotate left 3 bytes): // n' = (n rotl 24); After which n' = [B4, B1, B2, B3] // Step 4.2: Insert B3 to the right position: // n' = rlwimi n', n, 8, 8, 15; After which n' = [B4, B3, B2, B3] // Step 4.3: Insert B1 to the right position: // n' = rlwimi n', n, 8, 24, 31; After which n' = [B4, B3, B2, B1] def MaskValues { dag Lo1 = (ORI (LIS 0x5555), 0x5555); dag Hi1 = (ORI (LIS 0xAAAA), 0xAAAA); dag Lo2 = (ORI (LIS 0x3333), 0x3333); dag Hi2 = (ORI (LIS 0xCCCC), 0xCCCC); dag Lo4 = (ORI (LIS 0x0F0F), 0x0F0F); dag Hi4 = (ORI (LIS 0xF0F0), 0xF0F0); } def Shift1 { dag Right = (RLWINM $A, 31, 1, 31); dag Left = (RLWINM $A, 1, 0, 30); } def Swap1 { dag Bit = (OR (AND Shift1.Right, MaskValues.Lo1), (AND Shift1.Left, MaskValues.Hi1)); } def Shift2 { dag Right = (RLWINM Swap1.Bit, 30, 2, 31); dag Left = (RLWINM Swap1.Bit, 2, 0, 29); } def Swap2 { dag Bits = (OR (AND Shift2.Right, MaskValues.Lo2), (AND Shift2.Left, MaskValues.Hi2)); } def Shift4 { dag Right = (RLWINM Swap2.Bits, 28, 4, 31); dag Left = (RLWINM Swap2.Bits, 4, 0, 27); } def Swap4 { dag Bits = (OR (AND Shift4.Right, MaskValues.Lo4), (AND Shift4.Left, MaskValues.Hi4)); } def Rotate { dag Left3Bytes = (RLWINM Swap4.Bits, 24, 0, 31); } def RotateInsertByte3 { dag Left = (RLWIMI Rotate.Left3Bytes, Swap4.Bits, 8, 8, 15); } def RotateInsertByte1 { dag Left = (RLWIMI RotateInsertByte3.Left, Swap4.Bits, 8, 24, 31); } def : Pat<(i32 (bitreverse i32:$A)), (RLDICL_32 RotateInsertByte1.Left, 0, 32)>; // Fast 64-bit reverse bits algorithm: // Step 1: 1-bit swap (swap odd 1-bit and even 1-bit): // n = ((n >> 1) & 0x5555555555555555) | ((n << 1) & 0xAAAAAAAAAAAAAAAA); // Step 2: 2-bit swap (swap odd 2-bit and even 2-bit): // n = ((n >> 2) & 0x3333333333333333) | ((n << 2) & 0xCCCCCCCCCCCCCCCC); // Step 3: 4-bit swap (swap odd 4-bit and even 4-bit): // n = ((n >> 4) & 0x0F0F0F0F0F0F0F0F) | ((n << 4) & 0xF0F0F0F0F0F0F0F0); // Step 4: byte reverse (Suppose n = [B0,B1,B2,B3,B4,B5,B6,B7]): // Apply the same byte reverse algorithm mentioned above for the fast 32-bit // reverse to both the high 32 bit and low 32 bit of the 64 bit value. And // then OR them together to get the final result. def MaskValues64 { dag Lo1 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo1, sub_32)); dag Hi1 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi1, sub_32)); dag Lo2 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo2, sub_32)); dag Hi2 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi2, sub_32)); dag Lo4 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo4, sub_32)); dag Hi4 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi4, sub_32)); } def DWMaskValues { dag Lo1 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo1, 32, 31), 0x5555), 0x5555); dag Hi1 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi1, 32, 31), 0xAAAA), 0xAAAA); dag Lo2 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo2, 32, 31), 0x3333), 0x3333); dag Hi2 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi2, 32, 31), 0xCCCC), 0xCCCC); dag Lo4 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo4, 32, 31), 0x0F0F), 0x0F0F); dag Hi4 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi4, 32, 31), 0xF0F0), 0xF0F0); } def DWSwapInByte { dag Swap1 = (OR8 (AND8 (RLDICL $A, 63, 1), DWMaskValues.Lo1), (AND8 (RLDICR $A, 1, 62), DWMaskValues.Hi1)); dag Swap2 = (OR8 (AND8 (RLDICL Swap1, 62, 2), DWMaskValues.Lo2), (AND8 (RLDICR Swap1, 2, 61), DWMaskValues.Hi2)); dag Swap4 = (OR8 (AND8 (RLDICL Swap2, 60, 4), DWMaskValues.Lo4), (AND8 (RLDICR Swap2, 4, 59), DWMaskValues.Hi4)); } // Intra-byte swap is done, now start inter-byte swap. def DWBytes4567 { dag Word = (i32 (EXTRACT_SUBREG DWSwapInByte.Swap4, sub_32)); } def DWBytes7456 { dag Word = (RLWINM DWBytes4567.Word, 24, 0, 31); } def DWBytes7656 { dag Word = (RLWIMI DWBytes7456.Word, DWBytes4567.Word, 8, 8, 15); } // B7 B6 B5 B4 in the right order def DWBytes7654 { dag Word = (RLWIMI DWBytes7656.Word, DWBytes4567.Word, 8, 24, 31); dag DWord = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), Word, sub_32)); } def DWBytes0123 { dag Word = (i32 (EXTRACT_SUBREG (RLDICL DWSwapInByte.Swap4, 32, 32), sub_32)); } def DWBytes3012 { dag Word = (RLWINM DWBytes0123.Word, 24, 0, 31); } def DWBytes3212 { dag Word = (RLWIMI DWBytes3012.Word, DWBytes0123.Word, 8, 8, 15); } // B3 B2 B1 B0 in the right order def DWBytes3210 { dag Word = (RLWIMI DWBytes3212.Word, DWBytes0123.Word, 8, 24, 31); dag DWord = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), Word, sub_32)); } // Now both high word and low word are reversed, next // swap the high word and low word. def : Pat<(i64 (bitreverse i64:$A)), (OR8 (RLDICR DWBytes7654.DWord, 32, 31), DWBytes3210.DWord)>; Index: vendor/llvm/dist-release_80/lib/Target/PowerPC/PPCSubtarget.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/Target/PowerPC/PPCSubtarget.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/PowerPC/PPCSubtarget.cpp (revision 348932) @@ -1,231 +1,234 @@ //===-- PowerPCSubtarget.cpp - PPC Subtarget Information ------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the PPC specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #include "PPCSubtarget.h" #include "PPC.h" #include "PPCRegisterInfo.h" #include "PPCTargetMachine.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetMachine.h" #include using namespace llvm; #define DEBUG_TYPE "ppc-subtarget" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "PPCGenSubtargetInfo.inc" static cl::opt UseSubRegLiveness("ppc-track-subreg-liveness", cl::desc("Enable subregister liveness tracking for PPC"), cl::Hidden); static cl::opt QPXStackUnaligned("qpx-stack-unaligned", cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"), cl::Hidden); PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { initializeEnvironment(); initSubtargetFeatures(CPU, FS); return *this; } PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const PPCTargetMachine &TM) : PPCGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT), IsPPC64(TargetTriple.getArch() == Triple::ppc64 || TargetTriple.getArch() == Triple::ppc64le), TM(TM), FrameLowering(initializeSubtargetDependencies(CPU, FS)), InstrInfo(*this), TLInfo(TM, *this) {} void PPCSubtarget::initializeEnvironment() { StackAlignment = 16; DarwinDirective = PPC::DIR_NONE; HasMFOCRF = false; Has64BitSupport = false; Use64BitRegs = false; UseCRBits = false; HasHardFloat = false; HasAltivec = false; HasSPE = false; HasFPU = false; HasQPX = false; HasVSX = false; HasP8Vector = false; HasP8Altivec = false; HasP8Crypto = false; HasP9Vector = false; HasP9Altivec = false; HasFCPSGN = false; HasFSQRT = false; HasFRE = false; HasFRES = false; HasFRSQRTE = false; HasFRSQRTES = false; HasRecipPrec = false; HasSTFIWX = false; HasLFIWAX = false; HasFPRND = false; HasFPCVT = false; HasISEL = false; HasBPERMD = false; HasExtDiv = false; HasCMPB = false; HasLDBRX = false; IsBookE = false; HasOnlyMSYNC = false; IsPPC4xx = false; IsPPC6xx = false; IsE500 = false; FeatureMFTB = false; DeprecatedDST = false; HasLazyResolverStubs = false; HasICBT = false; HasInvariantFunctionDescriptors = false; HasPartwordAtomics = false; HasDirectMove = false; IsQPXStackUnaligned = false; HasHTM = false; HasFusion = false; HasFloat128 = false; IsISA3_0 = false; UseLongCalls = false; SecurePlt = false; HasPOPCNTD = POPCNTD_Unavailable; } void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { // Determine default and user specified characteristics std::string CPUName = CPU; if (CPUName.empty() || CPU == "generic") { // If cross-compiling with -march=ppc64le without -mcpu if (TargetTriple.getArch() == Triple::ppc64le) CPUName = "ppc64le"; else CPUName = "generic"; } // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUName); // Parse features string. ParseSubtargetFeatures(CPUName, FS); // If the user requested use of 64-bit regs, but the cpu selected doesn't // support it, ignore. if (IsPPC64 && has64BitSupport()) Use64BitRegs = true; // Set up darwin-specific properties. if (isDarwin()) HasLazyResolverStubs = true; + if (TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD()) + SecurePlt = true; + if (HasSPE && IsPPC64) report_fatal_error( "SPE is only supported for 32-bit targets.\n", false); if (HasSPE && (HasAltivec || HasQPX || HasVSX || HasFPU)) report_fatal_error( "SPE and traditional floating point cannot both be enabled.\n", false); // If not SPE, set standard FPU if (!HasSPE) HasFPU = true; // QPX requires a 32-byte aligned stack. Note that we need to do this if // we're compiling for a BG/Q system regardless of whether or not QPX // is enabled because external functions will assume this alignment. IsQPXStackUnaligned = QPXStackUnaligned; StackAlignment = getPlatformStackAlignment(); // Determine endianness. // FIXME: Part of the TargetMachine. IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le); } /// Return true if accesses to the specified global have to go through a dyld /// lazy resolution stub. This means that an extra load is required to get the /// address of the global. bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const { if (!HasLazyResolverStubs) return false; if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) return true; // 32 bit macho has no relocation for a-b if a is undefined, even if b is in // the section that is being relocated. This means we have to use o load even // for GVs that are known to be local to the dso. if (GV->isDeclarationForLinker() || GV->hasCommonLinkage()) return true; return false; } bool PPCSubtarget::enableMachineScheduler() const { return true; } // This overrides the PostRAScheduler bit in the SchedModel for each CPU. bool PPCSubtarget::enablePostRAScheduler() const { return true; } PPCGenSubtargetInfo::AntiDepBreakMode PPCSubtarget::getAntiDepBreakMode() const { return TargetSubtargetInfo::ANTIDEP_ALL; } void PPCSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const { CriticalPathRCs.clear(); CriticalPathRCs.push_back(isPPC64() ? &PPC::G8RCRegClass : &PPC::GPRCRegClass); } void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const { // The GenericScheduler that we use defaults to scheduling bottom up only. // We want to schedule from both the top and the bottom and so we set // OnlyBottomUp to false. // We want to do bi-directional scheduling since it provides a more balanced // schedule leading to better performance. Policy.OnlyBottomUp = false; // Spilling is generally expensive on all PPC cores, so always enable // register-pressure tracking. Policy.ShouldTrackPressure = true; } bool PPCSubtarget::useAA() const { return true; } bool PPCSubtarget::enableSubRegLiveness() const { return UseSubRegLiveness; } unsigned char PPCSubtarget::classifyGlobalReference(const GlobalValue *GV) const { // Note that currently we don't generate non-pic references. // If a caller wants that, this will have to be updated. // Large code model always uses the TOC even for local symbols. if (TM.getCodeModel() == CodeModel::Large) return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG; if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) return PPCII::MO_PIC_FLAG; return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG; } bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); } bool PPCSubtarget::isPPC64() const { return TM.isPPC64(); } Index: vendor/llvm/dist-release_80/lib/Target/Sparc/SparcRegisterInfo.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/Target/Sparc/SparcRegisterInfo.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/Sparc/SparcRegisterInfo.cpp (revision 348932) @@ -1,241 +1,241 @@ //===-- SparcRegisterInfo.cpp - SPARC Register Information ----------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file contains the SPARC implementation of the TargetRegisterInfo class. // //===----------------------------------------------------------------------===// #include "SparcRegisterInfo.h" #include "Sparc.h" #include "SparcMachineFunctionInfo.h" #include "SparcSubtarget.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; #define GET_REGINFO_TARGET_DESC #include "SparcGenRegisterInfo.inc" static cl::opt ReserveAppRegisters("sparc-reserve-app-registers", cl::Hidden, cl::init(false), cl::desc("Reserve application registers (%g2-%g4)")); SparcRegisterInfo::SparcRegisterInfo() : SparcGenRegisterInfo(SP::O7) {} const MCPhysReg* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_SaveList; } const uint32_t * SparcRegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { return CSR_RegMask; } const uint32_t* SparcRegisterInfo::getRTCallPreservedMask(CallingConv::ID CC) const { return RTCSR_RegMask; } BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const SparcSubtarget &Subtarget = MF.getSubtarget(); // FIXME: G1 reserved for now for large imm generation by frame code. Reserved.set(SP::G1); // G1-G4 can be used in applications. if (ReserveAppRegisters) { Reserved.set(SP::G2); Reserved.set(SP::G3); Reserved.set(SP::G4); } // G5 is not reserved in 64 bit mode. if (!Subtarget.is64Bit()) Reserved.set(SP::G5); Reserved.set(SP::O6); Reserved.set(SP::I6); Reserved.set(SP::I7); Reserved.set(SP::G0); Reserved.set(SP::G6); Reserved.set(SP::G7); // Also reserve the register pair aliases covering the above // registers, with the same conditions. Reserved.set(SP::G0_G1); if (ReserveAppRegisters) Reserved.set(SP::G2_G3); if (ReserveAppRegisters || !Subtarget.is64Bit()) Reserved.set(SP::G4_G5); Reserved.set(SP::O6_O7); Reserved.set(SP::I6_I7); Reserved.set(SP::G6_G7); // Unaliased double registers are not available in non-V9 targets. if (!Subtarget.isV9()) { for (unsigned n = 0; n != 16; ++n) { for (MCRegAliasIterator AI(SP::D16 + n, this, true); AI.isValid(); ++AI) Reserved.set(*AI); } } // Reserve ASR1-ASR31 for (unsigned n = 0; n < 31; n++) Reserved.set(SP::ASR1 + n); return Reserved; } const TargetRegisterClass* SparcRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { const SparcSubtarget &Subtarget = MF.getSubtarget(); return Subtarget.is64Bit() ? &SP::I64RegsRegClass : &SP::IntRegsRegClass; } static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II, MachineInstr &MI, const DebugLoc &dl, unsigned FIOperandNum, int Offset, unsigned FramePtr) { // Replace frame index with a frame pointer reference. if (Offset >= -4096 && Offset <= 4095) { // If the offset is small enough to fit in the immediate field, directly // encode it. MI.getOperand(FIOperandNum).ChangeToRegister(FramePtr, false); MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); return; } const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); // FIXME: it would be better to scavenge a register here instead of // reserving G1 all of the time. if (Offset >= 0) { // Emit nonnegaive immediates with sethi + or. // sethi %hi(Offset), %g1 // add %g1, %fp, %g1 // Insert G1+%lo(offset) into the user. BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1) .addImm(HI22(Offset)); // Emit G1 = G1 + I6 BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1) .addReg(FramePtr); // Insert: G1+%lo(offset) into the user. MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false); MI.getOperand(FIOperandNum + 1).ChangeToImmediate(LO10(Offset)); return; } // Emit Negative numbers with sethi + xor // sethi %hix(Offset), %g1 // xor %g1, %lox(offset), %g1 // add %g1, %fp, %g1 // Insert: G1 + 0 into the user. BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1) .addImm(HIX22(Offset)); BuildMI(*MI.getParent(), II, dl, TII.get(SP::XORri), SP::G1) .addReg(SP::G1).addImm(LOX10(Offset)); BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1) .addReg(FramePtr); // Insert: G1+%lo(offset) into the user. MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false); MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0); } void SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); MachineInstr &MI = *II; DebugLoc dl = MI.getDebugLoc(); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); MachineFunction &MF = *MI.getParent()->getParent(); const SparcSubtarget &Subtarget = MF.getSubtarget(); const SparcFrameLowering *TFI = getFrameLowering(MF); unsigned FrameReg; int Offset; Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg); Offset += MI.getOperand(FIOperandNum + 1).getImm(); if (!Subtarget.isV9() || !Subtarget.hasHardQuad()) { if (MI.getOpcode() == SP::STQFri) { const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); unsigned SrcReg = MI.getOperand(2).getReg(); unsigned SrcEvenReg = getSubReg(SrcReg, SP::sub_even64); unsigned SrcOddReg = getSubReg(SrcReg, SP::sub_odd64); MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(SP::STDFri)) .addReg(FrameReg).addImm(0).addReg(SrcEvenReg); - replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg); + replaceFI(MF, *StMI, *StMI, dl, 0, Offset, FrameReg); MI.setDesc(TII.get(SP::STDFri)); MI.getOperand(2).setReg(SrcOddReg); Offset += 8; } else if (MI.getOpcode() == SP::LDQFri) { const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); unsigned DestReg = MI.getOperand(0).getReg(); unsigned DestEvenReg = getSubReg(DestReg, SP::sub_even64); unsigned DestOddReg = getSubReg(DestReg, SP::sub_odd64); MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(SP::LDDFri), DestEvenReg) .addReg(FrameReg).addImm(0); - replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg); + replaceFI(MF, *StMI, *StMI, dl, 1, Offset, FrameReg); MI.setDesc(TII.get(SP::LDDFri)); MI.getOperand(0).setReg(DestOddReg); Offset += 8; } } replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FrameReg); } unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return SP::I6; } // Sparc has no architectural need for stack realignment support, // except that LLVM unfortunately currently implements overaligned // stack objects by depending upon stack realignment support. // If that ever changes, this can probably be deleted. bool SparcRegisterInfo::canRealignStack(const MachineFunction &MF) const { if (!TargetRegisterInfo::canRealignStack(MF)) return false; // Sparc always has a fixed frame pointer register, so don't need to // worry about needing to reserve it. [even if we don't have a frame // pointer for our frame, it still cannot be used for other things, // or register window traps will be SADNESS.] // If there's a reserved call frame, we can use SP to access locals. if (getFrameLowering(MF)->hasReservedCallFrame(MF)) return true; // Otherwise, we'd need a base pointer, but those aren't implemented // for SPARC at the moment. return false; } Index: vendor/llvm/dist-release_80/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp (revision 348932) @@ -1,1229 +1,1232 @@ //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// /// \file /// This file implements the WebAssemblyTargetLowering class. /// //===----------------------------------------------------------------------===// #include "WebAssemblyISelLowering.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyTargetMachine.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/WasmEHFuncInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; #define DEBUG_TYPE "wasm-lower" WebAssemblyTargetLowering::WebAssemblyTargetLowering( const TargetMachine &TM, const WebAssemblySubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32; // Booleans always contain 0 or 1. setBooleanContents(ZeroOrOneBooleanContent); // Except in SIMD vectors setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // WebAssembly does not produce floating-point exceptions on normal floating // point operations. setHasFloatingPointExceptions(false); // We don't know the microarchitecture here, so just reduce register pressure. setSchedulingPreference(Sched::RegPressure); // Tell ISel that we have a stack pointer. setStackPointerRegisterToSaveRestore( Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32); // Set up the register classes. addRegisterClass(MVT::i32, &WebAssembly::I32RegClass); addRegisterClass(MVT::i64, &WebAssembly::I64RegClass); addRegisterClass(MVT::f32, &WebAssembly::F32RegClass); addRegisterClass(MVT::f64, &WebAssembly::F64RegClass); if (Subtarget->hasSIMD128()) { addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass); addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass); addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass); addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass); if (Subtarget->hasUnimplementedSIMD128()) { addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass); addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass); } } // Compute derived properties from the register classes. computeRegisterProperties(Subtarget->getRegisterInfo()); setOperationAction(ISD::GlobalAddress, MVTPtr, Custom); setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom); setOperationAction(ISD::JumpTable, MVTPtr, Custom); setOperationAction(ISD::BlockAddress, MVTPtr, Custom); setOperationAction(ISD::BRIND, MVT::Other, Custom); // Take the default expansion for va_arg, va_copy, and va_end. There is no // default action for va_start, so we do that custom. setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::Other, Expand); setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VAEND, MVT::Other, Expand); for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { // Don't expand the floating-point types to constant pools. setOperationAction(ISD::ConstantFP, T, Legal); // Expand floating-point comparisons. for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE, ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE}) setCondCodeAction(CC, T, Expand); // Expand floating-point library function operators. for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA}) setOperationAction(Op, T, Expand); // Note supported floating-point library function operators that otherwise // default to expand. for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT}) setOperationAction(Op, T, Legal); // Support minimum and maximum, which otherwise default to expand. setOperationAction(ISD::FMINIMUM, T, Legal); setOperationAction(ISD::FMAXIMUM, T, Legal); // WebAssembly currently has no builtin f16 support. setOperationAction(ISD::FP16_TO_FP, T, Expand); setOperationAction(ISD::FP_TO_FP16, T, Expand); setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand); setTruncStoreAction(T, MVT::f16, Expand); } // Support saturating add for i8x16 and i16x8 if (Subtarget->hasSIMD128()) for (auto T : {MVT::v16i8, MVT::v8i16}) for (auto Op : {ISD::SADDSAT, ISD::UADDSAT}) setOperationAction(Op, T, Legal); // Expand unavailable integer operations. for (auto Op : {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU, ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) { for (auto T : {MVT::i32, MVT::i64}) { setOperationAction(Op, T, Expand); } if (Subtarget->hasSIMD128()) { for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) { setOperationAction(Op, T, Expand); } if (Subtarget->hasUnimplementedSIMD128()) { setOperationAction(Op, MVT::v2i64, Expand); } } } // There is no i64x2.mul instruction setOperationAction(ISD::MUL, MVT::v2i64, Expand); // We have custom shuffle lowering to expose the shuffle mask if (Subtarget->hasSIMD128()) { for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) { setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom); } if (Subtarget->hasUnimplementedSIMD128()) { setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); } } // Custom lowering since wasm shifts must have a scalar shift amount if (Subtarget->hasSIMD128()) { for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32}) for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) setOperationAction(Op, T, Custom); if (Subtarget->hasUnimplementedSIMD128()) for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) setOperationAction(Op, MVT::v2i64, Custom); } // There are no select instructions for vectors if (Subtarget->hasSIMD128()) for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT}) { for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) setOperationAction(Op, T, Expand); if (Subtarget->hasUnimplementedSIMD128()) for (auto T : {MVT::v2i64, MVT::v2f64}) setOperationAction(Op, T, Expand); } // As a special case, these operators use the type to mean the type to // sign-extend from. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (!Subtarget->hasSignExt()) { // Sign extends are legal only when extending a vector extract auto Action = Subtarget->hasSIMD128() ? Custom : Expand; for (auto T : {MVT::i8, MVT::i16, MVT::i32}) setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action); } for (auto T : MVT::integer_vector_valuetypes()) setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand); // Dynamic stack allocation: use the default expansion. setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand); setOperationAction(ISD::FrameIndex, MVT::i32, Custom); setOperationAction(ISD::CopyToReg, MVT::Other, Custom); // Expand these forms; we pattern-match the forms that we can handle in isel. for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) for (auto Op : {ISD::BR_CC, ISD::SELECT_CC}) setOperationAction(Op, T, Expand); // We have custom switch handling. setOperationAction(ISD::BR_JT, MVT::Other, Custom); // WebAssembly doesn't have: // - Floating-point extending loads. // - Floating-point truncating stores. // - i1 extending loads. // - extending/truncating SIMD loads/stores setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); for (auto T : MVT::integer_valuetypes()) for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) setLoadExtAction(Ext, T, MVT::i1, Promote); if (Subtarget->hasSIMD128()) { for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}) { for (auto MemT : MVT::vector_valuetypes()) { if (MVT(T) != MemT) { setTruncStoreAction(T, MemT, Expand); for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) setLoadExtAction(Ext, T, MemT, Expand); } } } } // Expand additional SIMD ops that V8 hasn't implemented yet if (Subtarget->hasSIMD128() && !Subtarget->hasUnimplementedSIMD128()) { setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); setOperationAction(ISD::FDIV, MVT::v4f32, Expand); } // Custom lower lane accesses to expand out variable indices if (Subtarget->hasSIMD128()) { for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) { setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom); } if (Subtarget->hasUnimplementedSIMD128()) { for (auto T : {MVT::v2i64, MVT::v2f64}) { setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom); } } } // Trap lowers to wasm unreachable setOperationAction(ISD::TRAP, MVT::Other, Legal); // Exception handling intrinsics setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setMaxAtomicSizeInBitsSupported(64); } TargetLowering::AtomicExpansionKind WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { // We have wasm instructions for these switch (AI->getOperation()) { case AtomicRMWInst::Add: case AtomicRMWInst::Sub: case AtomicRMWInst::And: case AtomicRMWInst::Or: case AtomicRMWInst::Xor: case AtomicRMWInst::Xchg: return AtomicExpansionKind::None; default: break; } return AtomicExpansionKind::CmpXChg; } FastISel *WebAssemblyTargetLowering::createFastISel( FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const { return WebAssembly::createFastISel(FuncInfo, LibInfo); } bool WebAssemblyTargetLowering::isOffsetFoldingLegal( const GlobalAddressSDNode * /*GA*/) const { // All offsets can be folded. return true; } MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/, EVT VT) const { unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1); if (BitWidth > 1 && BitWidth < 8) BitWidth = 8; if (BitWidth > 64) { // The shift will be lowered to a libcall, and compiler-rt libcalls expect // the count to be an i32. BitWidth = 32; assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) && "32-bit shift counts ought to be enough for anyone"); } MVT Result = MVT::getIntegerVT(BitWidth); assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE && "Unable to represent scalar shift amount type"); return Result; } // Lower an fp-to-int conversion operator from the LLVM opcode, which has an // undefined result on invalid/overflow, to the WebAssembly opcode, which // traps on invalid/overflow. static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool IsUnsigned, bool Int64, bool Float64, unsigned LoweredOpcode) { MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); unsigned OutReg = MI.getOperand(0).getReg(); unsigned InReg = MI.getOperand(1).getReg(); unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32; unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32; unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32; unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32; unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32; unsigned Eqz = WebAssembly::EQZ_I32; unsigned And = WebAssembly::AND_I32; int64_t Limit = Int64 ? INT64_MIN : INT32_MIN; int64_t Substitute = IsUnsigned ? 0 : Limit; double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit; auto &Context = BB->getParent()->getFunction().getContext(); Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context); const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++BB->getIterator(); F->insert(It, FalseMBB); F->insert(It, TrueMBB); F->insert(It, DoneMBB); // Transfer the remainder of BB and its successor edges to DoneMBB. DoneMBB->splice(DoneMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); DoneMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(TrueMBB); BB->addSuccessor(FalseMBB); TrueMBB->addSuccessor(DoneMBB); FalseMBB->addSuccessor(DoneMBB); unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg; Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg)); MI.eraseFromParent(); // For signed numbers, we can do a single comparison to determine whether // fabs(x) is within range. if (IsUnsigned) { Tmp0 = InReg; } else { BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg); } BuildMI(BB, DL, TII.get(FConst), Tmp1) .addFPImm(cast(ConstantFP::get(Ty, CmpVal))); BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1); // For unsigned numbers, we have to do a separate comparison with zero. if (IsUnsigned) { Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); unsigned SecondCmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); unsigned AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); BuildMI(BB, DL, TII.get(FConst), Tmp1) .addFPImm(cast(ConstantFP::get(Ty, 0.0))); BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1); BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg); CmpReg = AndReg; } BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg); // Create the CFG diamond to select between doing the conversion or using // the substitute value. BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg); BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg); BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB); BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute); BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg) .addReg(FalseReg) .addMBB(FalseMBB) .addReg(TrueReg) .addMBB(TrueMBB); return DoneMBB; } MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *BB) const { const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected instr type to insert"); case WebAssembly::FP_TO_SINT_I32_F32: return LowerFPToInt(MI, DL, BB, TII, false, false, false, WebAssembly::I32_TRUNC_S_F32); case WebAssembly::FP_TO_UINT_I32_F32: return LowerFPToInt(MI, DL, BB, TII, true, false, false, WebAssembly::I32_TRUNC_U_F32); case WebAssembly::FP_TO_SINT_I64_F32: return LowerFPToInt(MI, DL, BB, TII, false, true, false, WebAssembly::I64_TRUNC_S_F32); case WebAssembly::FP_TO_UINT_I64_F32: return LowerFPToInt(MI, DL, BB, TII, true, true, false, WebAssembly::I64_TRUNC_U_F32); case WebAssembly::FP_TO_SINT_I32_F64: return LowerFPToInt(MI, DL, BB, TII, false, false, true, WebAssembly::I32_TRUNC_S_F64); case WebAssembly::FP_TO_UINT_I32_F64: return LowerFPToInt(MI, DL, BB, TII, true, false, true, WebAssembly::I32_TRUNC_U_F64); case WebAssembly::FP_TO_SINT_I64_F64: return LowerFPToInt(MI, DL, BB, TII, false, true, true, WebAssembly::I64_TRUNC_S_F64); case WebAssembly::FP_TO_UINT_I64_F64: return LowerFPToInt(MI, DL, BB, TII, true, true, true, WebAssembly::I64_TRUNC_U_F64); llvm_unreachable("Unexpected instruction to emit with custom inserter"); } } const char * WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (static_cast(Opcode)) { case WebAssemblyISD::FIRST_NUMBER: break; #define HANDLE_NODETYPE(NODE) \ case WebAssemblyISD::NODE: \ return "WebAssemblyISD::" #NODE; #include "WebAssemblyISD.def" #undef HANDLE_NODETYPE } return nullptr; } std::pair WebAssemblyTargetLowering::getRegForInlineAsmConstraint( const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { // First, see if this is a constraint that directly corresponds to a // WebAssembly register class. if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': assert(VT != MVT::iPTR && "Pointer MVT not expected here"); if (Subtarget->hasSIMD128() && VT.isVector()) { if (VT.getSizeInBits() == 128) return std::make_pair(0U, &WebAssembly::V128RegClass); } if (VT.isInteger() && !VT.isVector()) { if (VT.getSizeInBits() <= 32) return std::make_pair(0U, &WebAssembly::I32RegClass); if (VT.getSizeInBits() <= 64) return std::make_pair(0U, &WebAssembly::I64RegClass); } break; default: break; } } return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const { // Assume ctz is a relatively cheap operation. return true; } bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const { // Assume clz is a relatively cheap operation. return true; } bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { // WebAssembly offsets are added as unsigned without wrapping. The // isLegalAddressingMode gives us no way to determine if wrapping could be // happening, so we approximate this by accepting only non-negative offsets. if (AM.BaseOffs < 0) return false; // WebAssembly has no scale register operands. if (AM.Scale != 0) return false; // Everything else is legal. return true; } bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses( EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/, bool *Fast) const { // WebAssembly supports unaligned accesses, though it should be declared // with the p2align attribute on loads and stores which do so, and there // may be a performance impact. We tell LLVM they're "fast" because // for the kinds of things that LLVM uses this for (merging adjacent stores // of constants, etc.), WebAssembly implementations will either want the // unaligned access or they'll split anyway. if (Fast) *Fast = true; return true; } bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { // The current thinking is that wasm engines will perform this optimization, // so we can save on code size. return true; } EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, EVT VT) const { if (VT.isVector()) return VT.changeVectorElementTypeToInteger(); return TargetLowering::getSetCCResultType(DL, C, VT); } bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { case Intrinsic::wasm_atomic_notify: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i32; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = 4; // atomic.notify instruction does not really load the memory specified with // this argument, but MachineMemOperand should either be load or store, so // we set this to a load. // FIXME Volatile isn't really correct, but currently all LLVM atomic // instructions are treated as volatiles in the backend, so we should be // consistent. The same applies for wasm_atomic_wait intrinsics too. Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; return true; case Intrinsic::wasm_atomic_wait_i32: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i32; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = 4; Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; return true; case Intrinsic::wasm_atomic_wait_i64: Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = 8; Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; return true; default: return false; } } //===----------------------------------------------------------------------===// // WebAssembly Lowering private implementation. //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // Lowering Code //===----------------------------------------------------------------------===// static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *msg) { MachineFunction &MF = DAG.getMachineFunction(); DAG.getContext()->diagnose( DiagnosticInfoUnsupported(MF.getFunction(), msg, DL.getDebugLoc())); } // Test whether the given calling convention is supported. static bool CallingConvSupported(CallingConv::ID CallConv) { // We currently support the language-independent target-independent // conventions. We don't yet have a way to annotate calls with properties like // "cold", and we don't have any call-clobbered registers, so these are mostly // all handled the same. return CallConv == CallingConv::C || CallConv == CallingConv::Fast || CallConv == CallingConv::Cold || CallConv == CallingConv::PreserveMost || CallConv == CallingConv::PreserveAll || CallConv == CallingConv::CXX_FAST_TLS; } SDValue WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc DL = CLI.DL; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; MachineFunction &MF = DAG.getMachineFunction(); auto Layout = MF.getDataLayout(); CallingConv::ID CallConv = CLI.CallConv; if (!CallingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support language-specific or target-specific " "calling conventions yet"); if (CLI.IsPatchPoint) fail(DL, DAG, "WebAssembly doesn't support patch point yet"); // WebAssembly doesn't currently support explicit tail calls. If they are // required, fail. Otherwise, just disable them. if ((CallConv == CallingConv::Fast && CLI.IsTailCall && MF.getTarget().Options.GuaranteedTailCallOpt) || (CLI.CS && CLI.CS.isMustTailCall())) fail(DL, DAG, "WebAssembly doesn't support tail call yet"); CLI.IsTailCall = false; SmallVectorImpl &Ins = CLI.Ins; if (Ins.size() > 1) fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet"); SmallVectorImpl &Outs = CLI.Outs; SmallVectorImpl &OutVals = CLI.OutVals; unsigned NumFixedArgs = 0; for (unsigned i = 0; i < Outs.size(); ++i) { const ISD::OutputArg &Out = Outs[i]; SDValue &OutVal = OutVals[i]; if (Out.Flags.isNest()) fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); if (Out.Flags.isInAlloca()) fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); if (Out.Flags.isInConsecutiveRegs()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); if (Out.Flags.isInConsecutiveRegsLast()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) { auto &MFI = MF.getFrameInfo(); int FI = MFI.CreateStackObject(Out.Flags.getByValSize(), Out.Flags.getByValAlign(), /*isSS=*/false); SDValue SizeNode = DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32); SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); Chain = DAG.getMemcpy( Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getByValAlign(), /*isVolatile*/ false, /*AlwaysInline=*/false, /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo()); OutVal = FINode; } // Count the number of fixed args *after* legalization. NumFixedArgs += Out.IsFixed; } bool IsVarArg = CLI.IsVarArg; auto PtrVT = getPointerTy(Layout); // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); if (IsVarArg) { // Outgoing non-fixed arguments are placed in a buffer. First // compute their offsets and the total amount of buffer space needed. - for (SDValue Arg : - make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { + for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) { + const ISD::OutputArg &Out = Outs[I]; + SDValue &Arg = OutVals[I]; EVT VT = Arg.getValueType(); assert(VT != MVT::iPTR && "Legalized args should be concrete"); Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + unsigned Align = std::max(Out.Flags.getOrigAlign(), + Layout.getABITypeAlignment(Ty)); unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), - Layout.getABITypeAlignment(Ty)); + Align); CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(), Offset, VT.getSimpleVT(), CCValAssign::Full)); } } unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); SDValue FINode; if (IsVarArg && NumBytes) { // For non-fixed arguments, next emit stores to store the argument values // to the stack buffer at the offsets computed above. int FI = MF.getFrameInfo().CreateStackObject(NumBytes, Layout.getStackAlignment(), /*isSS=*/false); unsigned ValNo = 0; SmallVector Chains; for (SDValue Arg : make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { assert(ArgLocs[ValNo].getValNo() == ValNo && "ArgLocs should remain in order and only hold varargs args"); unsigned Offset = ArgLocs[ValNo++].getLocMemOffset(); FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode, DAG.getConstant(Offset, DL, PtrVT)); Chains.push_back( DAG.getStore(Chain, DL, Arg, Add, MachinePointerInfo::getFixedStack(MF, FI, Offset), 0)); } if (!Chains.empty()) Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); } else if (IsVarArg) { FINode = DAG.getIntPtrConstant(0, DL); } // Compute the operands for the CALLn node. SmallVector Ops; Ops.push_back(Chain); Ops.push_back(Callee); // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs // isn't reliable. Ops.append(OutVals.begin(), IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end()); // Add a pointer to the vararg buffer. if (IsVarArg) Ops.push_back(FINode); SmallVector InTys; for (const auto &In : Ins) { assert(!In.Flags.isByVal() && "byval is not valid for return values"); assert(!In.Flags.isNest() && "nest is not valid for return values"); if (In.Flags.isInAlloca()) fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values"); if (In.Flags.isInConsecutiveRegs()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values"); if (In.Flags.isInConsecutiveRegsLast()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs last return values"); // Ignore In.getOrigAlign() because all our arguments are passed in // registers. InTys.push_back(In.VT); } InTys.push_back(MVT::Other); SDVTList InTyList = DAG.getVTList(InTys); SDValue Res = DAG.getNode(Ins.empty() ? WebAssemblyISD::CALL0 : WebAssemblyISD::CALL1, DL, InTyList, Ops); if (Ins.empty()) { Chain = Res; } else { InVals.push_back(Res); Chain = Res.getValue(1); } return Chain; } bool WebAssemblyTargetLowering::CanLowerReturn( CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/, const SmallVectorImpl &Outs, LLVMContext & /*Context*/) const { // WebAssembly can't currently handle returning tuples. return Outs.size() <= 1; } SDValue WebAssemblyTargetLowering::LowerReturn( SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { assert(Outs.size() <= 1 && "WebAssembly can only return up to one value"); if (!CallingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); SmallVector RetOps(1, Chain); RetOps.append(OutVals.begin(), OutVals.end()); Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps); // Record the number and types of the return values. for (const ISD::OutputArg &Out : Outs) { assert(!Out.Flags.isByVal() && "byval is not valid for return values"); assert(!Out.Flags.isNest() && "nest is not valid for return values"); assert(Out.IsFixed && "non-fixed return value is not valid"); if (Out.Flags.isInAlloca()) fail(DL, DAG, "WebAssembly hasn't implemented inalloca results"); if (Out.Flags.isInConsecutiveRegs()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs results"); if (Out.Flags.isInConsecutiveRegsLast()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results"); } return Chain; } SDValue WebAssemblyTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const { if (!CallingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); MachineFunction &MF = DAG.getMachineFunction(); auto *MFI = MF.getInfo(); // Set up the incoming ARGUMENTS value, which serves to represent the liveness // of the incoming values before they're represented by virtual registers. MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS); for (const ISD::InputArg &In : Ins) { if (In.Flags.isInAlloca()) fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); if (In.Flags.isNest()) fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); if (In.Flags.isInConsecutiveRegs()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); if (In.Flags.isInConsecutiveRegsLast()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); // Ignore In.getOrigAlign() because all our arguments are passed in // registers. InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT, DAG.getTargetConstant(InVals.size(), DL, MVT::i32)) : DAG.getUNDEF(In.VT)); // Record the number and types of arguments. MFI->addParam(In.VT); } // Varargs are copied into a buffer allocated by the caller, and a pointer to // the buffer is passed as an argument. if (IsVarArg) { MVT PtrVT = getPointerTy(MF.getDataLayout()); unsigned VarargVreg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT)); MFI->setVarargBufferVreg(VarargVreg); Chain = DAG.getCopyToReg( Chain, DL, VarargVreg, DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT, DAG.getTargetConstant(Ins.size(), DL, MVT::i32))); MFI->addParam(PtrVT); } // Record the number and types of arguments and results. SmallVector Params; SmallVector Results; ComputeSignatureVTs(MF.getFunction().getFunctionType(), MF.getFunction(), DAG.getTarget(), Params, Results); for (MVT VT : Results) MFI->addResult(VT); // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify // the param logic here with ComputeSignatureVTs assert(MFI->getParams().size() == Params.size() && std::equal(MFI->getParams().begin(), MFI->getParams().end(), Params.begin())); return Chain; } //===----------------------------------------------------------------------===// // Custom lowering hooks. //===----------------------------------------------------------------------===// SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); switch (Op.getOpcode()) { default: llvm_unreachable("unimplemented operation lowering"); return SDValue(); case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::BlockAddress: case ISD::BRIND: fail(DL, DAG, "WebAssembly hasn't implemented computed gotos"); return SDValue(); case ISD::RETURNADDR: // Probably nothing meaningful can be returned here. fail(DL, DAG, "WebAssembly hasn't implemented __builtin_return_address"); return SDValue(); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::CopyToReg: return LowerCopyToReg(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: case ISD::INSERT_VECTOR_ELT: return LowerAccessVectorElement(Op, DAG); case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG); case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::SHL: case ISD::SRA: case ISD::SRL: return LowerShift(Op, DAG); } } SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op, SelectionDAG &DAG) const { SDValue Src = Op.getOperand(2); if (isa(Src.getNode())) { // CopyToReg nodes don't support FrameIndex operands. Other targets select // the FI to some LEA-like instruction, but since we don't have that, we // need to insert some kind of instruction that can take an FI operand and // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy // local.copy between Op and its FI operand. SDValue Chain = Op.getOperand(0); SDLoc DL(Op); unsigned Reg = cast(Op.getOperand(1))->getReg(); EVT VT = Src.getValueType(); SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32 : WebAssembly::COPY_I64, DL, VT, Src), 0); return Op.getNode()->getNumValues() == 1 ? DAG.getCopyToReg(Chain, DL, Reg, Copy) : DAG.getCopyToReg(Chain, DL, Reg, Copy, Op.getNumOperands() == 4 ? Op.getOperand(3) : SDValue()); } return SDValue(); } SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const { int FI = cast(Op)->getIndex(); return DAG.getTargetFrameIndex(FI, Op.getValueType()); } SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // Non-zero depths are not supported by WebAssembly currently. Use the // legalizer's default expansion, which is to return 0 (what this function is // documented to do). if (Op.getConstantOperandVal(0) > 0) return SDValue(); DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); unsigned FP = Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction()); return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT); } SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); const auto *GA = cast(Op); EVT VT = Op.getValueType(); assert(GA->getTargetFlags() == 0 && "Unexpected target flags on generic GlobalAddressSDNode"); if (GA->getAddressSpace() != 0) fail(DL, DAG, "WebAssembly only expects the 0 address space"); return DAG.getNode( WebAssemblyISD::Wrapper, DL, VT, DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset())); } SDValue WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); const auto *ES = cast(Op); EVT VT = Op.getValueType(); assert(ES->getTargetFlags() == 0 && "Unexpected target flags on generic ExternalSymbolSDNode"); // Set the TargetFlags to 0x1 which indicates that this is a "function" // symbol rather than a data symbol. We do this unconditionally even though // we don't know anything about the symbol other than its name, because all // external symbols used in target-independent SelectionDAG code are for // functions. return DAG.getNode( WebAssemblyISD::Wrapper, DL, VT, DAG.getTargetExternalSymbol(ES->getSymbol(), VT, WebAssemblyII::MO_SYMBOL_FUNCTION)); } SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { // There's no need for a Wrapper node because we always incorporate a jump // table operand into a BR_TABLE instruction, rather than ever // materializing it in a register. const JumpTableSDNode *JT = cast(Op); return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(), JT->getTargetFlags()); } SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); SDValue Chain = Op.getOperand(0); const auto *JT = cast(Op.getOperand(1)); SDValue Index = Op.getOperand(2); assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); SmallVector Ops; Ops.push_back(Chain); Ops.push_back(Index); MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo(); const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs; // Add an operand for each case. for (auto MBB : MBBs) Ops.push_back(DAG.getBasicBlock(MBB)); // TODO: For now, we just pick something arbitrary for a default case for now. // We really want to sniff out the guard and put in the real default case (and // delete the guard). Ops.push_back(DAG.getBasicBlock(MBBs[0])); return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops); } SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout()); auto *MFI = DAG.getMachineFunction().getInfo(); const Value *SV = cast(Op.getOperand(2))->getValue(); SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL, MFI->getVarargBufferVreg(), PtrVT); return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1), MachinePointerInfo(SV), 0); } SDValue WebAssemblyTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); SDLoc DL(Op); switch (IntNo) { default: return {}; // Don't custom lower most intrinsics. case Intrinsic::wasm_lsda: { MachineFunction &MF = DAG.getMachineFunction(); EVT VT = Op.getValueType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); auto &Context = MF.getMMI().getContext(); MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") + Twine(MF.getFunctionNumber())); return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, DAG.getMCSymbol(S, PtrVT)); } } } SDValue WebAssemblyTargetLowering::LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); SDLoc DL(Op); switch (IntNo) { default: return {}; // Don't custom lower most intrinsics. case Intrinsic::wasm_throw: { int Tag = cast(Op.getOperand(2).getNode())->getZExtValue(); switch (Tag) { case CPP_EXCEPTION: { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); const char *SymName = MF.createExternalSymbolName("__cpp_exception"); SDValue SymNode = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, DAG.getTargetExternalSymbol( SymName, PtrVT, WebAssemblyII::MO_SYMBOL_EVENT)); return DAG.getNode(WebAssemblyISD::THROW, DL, MVT::Other, // outchain type { Op.getOperand(0), // inchain SymNode, // exception symbol Op.getOperand(3) // thrown value }); } default: llvm_unreachable("Invalid tag!"); } break; } } } SDValue WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { // If sign extension operations are disabled, allow sext_inreg only if operand // is a vector extract. SIMD does not depend on sign extension operations, but // allowing sext_inreg in this context lets us have simple patterns to select // extract_lane_s instructions. Expanding sext_inreg everywhere would be // simpler in this file, but would necessitate large and brittle patterns to // undo the expansion and select extract_lane_s instructions. assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128()); if (Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT) return Op; // Otherwise expand return SDValue(); } SDValue WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); ArrayRef Mask = cast(Op.getNode())->getMask(); MVT VecType = Op.getOperand(0).getSimpleValueType(); assert(VecType.is128BitVector() && "Unexpected shuffle vector type"); size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8; // Space for two vector args and sixteen mask indices SDValue Ops[18]; size_t OpIdx = 0; Ops[OpIdx++] = Op.getOperand(0); Ops[OpIdx++] = Op.getOperand(1); // Expand mask indices to byte indices and materialize them as operands for (size_t I = 0, Lanes = Mask.size(); I < Lanes; ++I) { for (size_t J = 0; J < LaneBytes; ++J) { // Lower undefs (represented by -1 in mask) to zero uint64_t ByteIndex = Mask[I] == -1 ? 0 : (uint64_t)Mask[I] * LaneBytes + J; Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32); } } return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops); } SDValue WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op, SelectionDAG &DAG) const { // Allow constant lane indices, expand variable lane indices SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode(); if (isa(IdxNode) || IdxNode->isUndef()) return Op; else // Perform default expansion return SDValue(); } static SDValue UnrollVectorShift(SDValue Op, SelectionDAG &DAG) { EVT LaneT = Op.getSimpleValueType().getVectorElementType(); // 32-bit and 64-bit unrolled shifts will have proper semantics if (LaneT.bitsGE(MVT::i32)) return DAG.UnrollVectorOp(Op.getNode()); // Otherwise mask the shift value to get proper semantics from 32-bit shift SDLoc DL(Op); SDValue ShiftVal = Op.getOperand(1); uint64_t MaskVal = LaneT.getSizeInBits() - 1; SDValue MaskedShiftVal = DAG.getNode( ISD::AND, // mask opcode DL, ShiftVal.getValueType(), // masked value type ShiftVal, // original shift value operand DAG.getConstant(MaskVal, DL, ShiftVal.getValueType()) // mask operand ); return DAG.UnrollVectorOp( DAG.getNode(Op.getOpcode(), // original shift opcode DL, Op.getValueType(), // original return type Op.getOperand(0), // original vector operand, MaskedShiftVal // new masked shift value operand ) .getNode()); } SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); // Only manually lower vector shifts assert(Op.getSimpleValueType().isVector()); // Expand all vector shifts until V8 fixes its implementation // TODO: remove this once V8 is fixed if (!Subtarget->hasUnimplementedSIMD128()) return UnrollVectorShift(Op, DAG); // Unroll non-splat vector shifts BuildVectorSDNode *ShiftVec; SDValue SplatVal; if (!(ShiftVec = dyn_cast(Op.getOperand(1).getNode())) || !(SplatVal = ShiftVec->getSplatValue())) return UnrollVectorShift(Op, DAG); // All splats except i64x2 const splats are handled by patterns ConstantSDNode *SplatConst = dyn_cast(SplatVal); if (!SplatConst || Op.getSimpleValueType() != MVT::v2i64) return Op; // i64x2 const splats are custom lowered to avoid unnecessary wraps unsigned Opcode; switch (Op.getOpcode()) { case ISD::SHL: Opcode = WebAssemblyISD::VEC_SHL; break; case ISD::SRA: Opcode = WebAssemblyISD::VEC_SHR_S; break; case ISD::SRL: Opcode = WebAssemblyISD::VEC_SHR_U; break; default: llvm_unreachable("unexpected opcode"); } APInt Shift = SplatConst->getAPIntValue().zextOrTrunc(32); return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), DAG.getConstant(Shift, DL, MVT::i32)); } //===----------------------------------------------------------------------===// // WebAssembly Optimization Hooks //===----------------------------------------------------------------------===// Index: vendor/llvm/dist-release_80/lib/Target/X86/X86FastISel.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/Target/X86/X86FastISel.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/X86/X86FastISel.cpp (revision 348932) @@ -1,4038 +1,4038 @@ //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the X86-specific support for the FastISel class. Much // of the target-specific code is generated by tablegen in the file // X86GenFastISel.inc, which is #included here. // //===----------------------------------------------------------------------===// #include "X86.h" #include "X86CallingConv.h" #include "X86InstrBuilder.h" #include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; namespace { class X86FastISel final : public FastISel { /// Subtarget - Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. const X86Subtarget *Subtarget; /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 /// floating point ops. /// When SSE is available, use it for f32 operations. /// When SSE2 is available, use it for f64 operations. bool X86ScalarSSEf64; bool X86ScalarSSEf32; public: explicit X86FastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) : FastISel(funcInfo, libInfo) { Subtarget = &funcInfo.MF->getSubtarget(); X86ScalarSSEf64 = Subtarget->hasSSE2(); X86ScalarSSEf32 = Subtarget->hasSSE1(); } bool fastSelectInstruction(const Instruction *I) override; /// The specified machine instr operand is a vreg, and that /// vreg is being provided by the specified load instruction. If possible, /// try to fold the load as an operand to the instruction, returning true if /// possible. bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, const LoadInst *LI) override; bool fastLowerArguments() override; bool fastLowerCall(CallLoweringInfo &CLI) override; bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; #include "X86GenFastISel.inc" private: bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT, const DebugLoc &DL); bool X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO, unsigned &ResultReg, unsigned Alignment = 1); bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM, MachineMemOperand *MMO = nullptr, bool Aligned = false); bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, X86AddressMode &AM, MachineMemOperand *MMO = nullptr, bool Aligned = false); bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, unsigned &ResultReg); bool X86SelectAddress(const Value *V, X86AddressMode &AM); bool X86SelectCallAddress(const Value *V, X86AddressMode &AM); bool X86SelectLoad(const Instruction *I); bool X86SelectStore(const Instruction *I); bool X86SelectRet(const Instruction *I); bool X86SelectCmp(const Instruction *I); bool X86SelectZExt(const Instruction *I); bool X86SelectSExt(const Instruction *I); bool X86SelectBranch(const Instruction *I); bool X86SelectShift(const Instruction *I); bool X86SelectDivRem(const Instruction *I); bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I); bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I); bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I); bool X86SelectSelect(const Instruction *I); bool X86SelectTrunc(const Instruction *I); bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc, const TargetRegisterClass *RC); bool X86SelectFPExt(const Instruction *I); bool X86SelectFPTrunc(const Instruction *I); bool X86SelectSIToFP(const Instruction *I); bool X86SelectUIToFP(const Instruction *I); bool X86SelectIntToFP(const Instruction *I, bool IsSigned); const X86InstrInfo *getInstrInfo() const { return Subtarget->getInstrInfo(); } const X86TargetMachine *getTargetMachine() const { return static_cast(&TM); } bool handleConstantAddresses(const Value *V, X86AddressMode &AM); unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT); unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT); unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT); unsigned fastMaterializeConstant(const Constant *C) override; unsigned fastMaterializeAlloca(const AllocaInst *C) override; unsigned fastMaterializeFloatZero(const ConstantFP *CF) override; /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is /// computed in an SSE register, not on the X87 floating point stack. bool isScalarFPTypeInSSEReg(EVT VT) const { return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 } bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false); bool IsMemcpySmall(uint64_t Len); bool TryEmitSmallMemcpy(X86AddressMode DestAM, X86AddressMode SrcAM, uint64_t Len); bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, const Value *Cond); const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB, X86AddressMode &AM); unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, unsigned Op2, bool Op2IsKill, unsigned Op3, bool Op3IsKill); }; } // end anonymous namespace. static std::pair getX86SSEConditionCode(CmpInst::Predicate Predicate) { unsigned CC; bool NeedSwap = false; // SSE Condition code mapping: // 0 - EQ // 1 - LT // 2 - LE // 3 - UNORD // 4 - NEQ // 5 - NLT // 6 - NLE // 7 - ORD switch (Predicate) { default: llvm_unreachable("Unexpected predicate"); case CmpInst::FCMP_OEQ: CC = 0; break; case CmpInst::FCMP_OGT: NeedSwap = true; LLVM_FALLTHROUGH; case CmpInst::FCMP_OLT: CC = 1; break; case CmpInst::FCMP_OGE: NeedSwap = true; LLVM_FALLTHROUGH; case CmpInst::FCMP_OLE: CC = 2; break; case CmpInst::FCMP_UNO: CC = 3; break; case CmpInst::FCMP_UNE: CC = 4; break; case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH; case CmpInst::FCMP_UGE: CC = 5; break; case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH; case CmpInst::FCMP_UGT: CC = 6; break; case CmpInst::FCMP_ORD: CC = 7; break; case CmpInst::FCMP_UEQ: CC = 8; break; case CmpInst::FCMP_ONE: CC = 12; break; } return std::make_pair(CC, NeedSwap); } /// Adds a complex addressing mode to the given machine instr builder. /// Note, this will constrain the index register. If its not possible to /// constrain the given index register, then a new one will be created. The /// IndexReg field of the addressing mode will be updated to match in this case. const MachineInstrBuilder & X86FastISel::addFullAddress(const MachineInstrBuilder &MIB, X86AddressMode &AM) { // First constrain the index register. It needs to be a GR64_NOSP. AM.IndexReg = constrainOperandRegClass(MIB->getDesc(), AM.IndexReg, MIB->getNumOperands() + X86::AddrIndexReg); return ::addFullAddress(MIB, AM); } /// Check if it is possible to fold the condition from the XALU intrinsic /// into the user. The condition code will only be updated on success. bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, const Value *Cond) { if (!isa(Cond)) return false; const auto *EV = cast(Cond); if (!isa(EV->getAggregateOperand())) return false; const auto *II = cast(EV->getAggregateOperand()); MVT RetVT; const Function *Callee = II->getCalledFunction(); Type *RetTy = cast(Callee->getReturnType())->getTypeAtIndex(0U); if (!isTypeLegal(RetTy, RetVT)) return false; if (RetVT != MVT::i32 && RetVT != MVT::i64) return false; X86::CondCode TmpCC; switch (II->getIntrinsicID()) { default: return false; case Intrinsic::sadd_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::smul_with_overflow: case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break; case Intrinsic::uadd_with_overflow: case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break; } // Check if both instructions are in the same basic block. if (II->getParent() != I->getParent()) return false; // Make sure nothing is in the way BasicBlock::const_iterator Start(I); BasicBlock::const_iterator End(II); for (auto Itr = std::prev(Start); Itr != End; --Itr) { // We only expect extractvalue instructions between the intrinsic and the // instruction to be selected. if (!isa(Itr)) return false; // Check that the extractvalue operand comes from the intrinsic. const auto *EVI = cast(Itr); if (EVI->getAggregateOperand() != II) return false; } CC = TmpCC; return true; } bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true); if (evt == MVT::Other || !evt.isSimple()) // Unhandled type. Halt "fast" selection and bail. return false; VT = evt.getSimpleVT(); // For now, require SSE/SSE2 for performing floating-point operations, // since x87 requires additional work. if (VT == MVT::f64 && !X86ScalarSSEf64) return false; if (VT == MVT::f32 && !X86ScalarSSEf32) return false; // Similarly, no f80 support yet. if (VT == MVT::f80) return false; // We only handle legal types. For example, on x86-32 the instruction // selector contains all of the 64-bit instructions from x86-64, // under the assumption that i64 won't be used if the target doesn't // support it. return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT); } #include "X86GenCallingConv.inc" /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. /// Return true and the result register by reference if it is possible. bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO, unsigned &ResultReg, unsigned Alignment) { bool HasSSE41 = Subtarget->hasSSE41(); bool HasAVX = Subtarget->hasAVX(); bool HasAVX2 = Subtarget->hasAVX2(); bool HasAVX512 = Subtarget->hasAVX512(); bool HasVLX = Subtarget->hasVLX(); bool IsNonTemporal = MMO && MMO->isNonTemporal(); // Get opcode and regclass of the output for the given load instruction. unsigned Opc = 0; const TargetRegisterClass *RC = nullptr; switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: Opc = X86::MOV8rm; RC = &X86::GR8RegClass; break; case MVT::i16: Opc = X86::MOV16rm; RC = &X86::GR16RegClass; break; case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break; case MVT::i64: // Must be in x86-64 mode. Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break; case MVT::f32: if (X86ScalarSSEf32) { Opc = HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm; RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass; } else { Opc = X86::LD_Fp32m; RC = &X86::RFP32RegClass; } break; case MVT::f64: if (X86ScalarSSEf64) { Opc = HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm; RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass; } else { Opc = X86::LD_Fp64m; RC = &X86::RFP64RegClass; } break; case MVT::f80: // No f80 support yet. return false; case MVT::v4f32: if (IsNonTemporal && Alignment >= 16 && HasSSE41) Opc = HasVLX ? X86::VMOVNTDQAZ128rm : HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm; else if (Alignment >= 16) Opc = HasVLX ? X86::VMOVAPSZ128rm : HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm; else Opc = HasVLX ? X86::VMOVUPSZ128rm : HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm; RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass; break; case MVT::v2f64: if (IsNonTemporal && Alignment >= 16 && HasSSE41) Opc = HasVLX ? X86::VMOVNTDQAZ128rm : HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm; else if (Alignment >= 16) Opc = HasVLX ? X86::VMOVAPDZ128rm : HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm; else Opc = HasVLX ? X86::VMOVUPDZ128rm : HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm; RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass; break; case MVT::v4i32: case MVT::v2i64: case MVT::v8i16: case MVT::v16i8: - if (IsNonTemporal && Alignment >= 16) + if (IsNonTemporal && Alignment >= 16 && HasSSE41) Opc = HasVLX ? X86::VMOVNTDQAZ128rm : HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm; else if (Alignment >= 16) Opc = HasVLX ? X86::VMOVDQA64Z128rm : HasAVX ? X86::VMOVDQArm : X86::MOVDQArm; else Opc = HasVLX ? X86::VMOVDQU64Z128rm : HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm; RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass; break; case MVT::v8f32: assert(HasAVX); if (IsNonTemporal && Alignment >= 32 && HasAVX2) Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm; else if (IsNonTemporal && Alignment >= 16) return false; // Force split for X86::VMOVNTDQArm else if (Alignment >= 32) Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm; else Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm; RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass; break; case MVT::v4f64: assert(HasAVX); if (IsNonTemporal && Alignment >= 32 && HasAVX2) Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm; else if (IsNonTemporal && Alignment >= 16) return false; // Force split for X86::VMOVNTDQArm else if (Alignment >= 32) Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm; else Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm; RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass; break; case MVT::v8i32: case MVT::v4i64: case MVT::v16i16: case MVT::v32i8: assert(HasAVX); if (IsNonTemporal && Alignment >= 32 && HasAVX2) Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm; else if (IsNonTemporal && Alignment >= 16) return false; // Force split for X86::VMOVNTDQArm else if (Alignment >= 32) Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm; else Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm; RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass; break; case MVT::v16f32: assert(HasAVX512); if (IsNonTemporal && Alignment >= 64) Opc = X86::VMOVNTDQAZrm; else Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm; RC = &X86::VR512RegClass; break; case MVT::v8f64: assert(HasAVX512); if (IsNonTemporal && Alignment >= 64) Opc = X86::VMOVNTDQAZrm; else Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm; RC = &X86::VR512RegClass; break; case MVT::v8i64: case MVT::v16i32: case MVT::v32i16: case MVT::v64i8: assert(HasAVX512); // Note: There are a lot more choices based on type with AVX-512, but // there's really no advantage when the load isn't masked. if (IsNonTemporal && Alignment >= 64) Opc = X86::VMOVNTDQAZrm; else Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm; RC = &X86::VR512RegClass; break; } ResultReg = createResultReg(RC); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); addFullAddress(MIB, AM); if (MMO) MIB->addMemOperand(*FuncInfo.MF, MMO); return true; } /// X86FastEmitStore - Emit a machine instruction to store a value Val of /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr /// and a displacement offset, or a GlobalAddress, /// i.e. V. Return true if it is possible. bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, X86AddressMode &AM, MachineMemOperand *MMO, bool Aligned) { bool HasSSE1 = Subtarget->hasSSE1(); bool HasSSE2 = Subtarget->hasSSE2(); bool HasSSE4A = Subtarget->hasSSE4A(); bool HasAVX = Subtarget->hasAVX(); bool HasAVX512 = Subtarget->hasAVX512(); bool HasVLX = Subtarget->hasVLX(); bool IsNonTemporal = MMO && MMO->isNonTemporal(); // Get opcode and regclass of the output for the given store instruction. unsigned Opc = 0; switch (VT.getSimpleVT().SimpleTy) { case MVT::f80: // No f80 support yet. default: return false; case MVT::i1: { // Mask out all but lowest bit. unsigned AndResult = createResultReg(&X86::GR8RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::AND8ri), AndResult) .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1); ValReg = AndResult; LLVM_FALLTHROUGH; // handle i1 as i8. } case MVT::i8: Opc = X86::MOV8mr; break; case MVT::i16: Opc = X86::MOV16mr; break; case MVT::i32: Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr; break; case MVT::i64: // Must be in x86-64 mode. Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr; break; case MVT::f32: if (X86ScalarSSEf32) { if (IsNonTemporal && HasSSE4A) Opc = X86::MOVNTSS; else Opc = HasAVX512 ? X86::VMOVSSZmr : HasAVX ? X86::VMOVSSmr : X86::MOVSSmr; } else Opc = X86::ST_Fp32m; break; case MVT::f64: if (X86ScalarSSEf32) { if (IsNonTemporal && HasSSE4A) Opc = X86::MOVNTSD; else Opc = HasAVX512 ? X86::VMOVSDZmr : HasAVX ? X86::VMOVSDmr : X86::MOVSDmr; } else Opc = X86::ST_Fp64m; break; case MVT::x86mmx: Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr; break; case MVT::v4f32: if (Aligned) { if (IsNonTemporal) Opc = HasVLX ? X86::VMOVNTPSZ128mr : HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr; else Opc = HasVLX ? X86::VMOVAPSZ128mr : HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr; } else Opc = HasVLX ? X86::VMOVUPSZ128mr : HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr; break; case MVT::v2f64: if (Aligned) { if (IsNonTemporal) Opc = HasVLX ? X86::VMOVNTPDZ128mr : HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr; else Opc = HasVLX ? X86::VMOVAPDZ128mr : HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr; } else Opc = HasVLX ? X86::VMOVUPDZ128mr : HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr; break; case MVT::v4i32: case MVT::v2i64: case MVT::v8i16: case MVT::v16i8: if (Aligned) { if (IsNonTemporal) Opc = HasVLX ? X86::VMOVNTDQZ128mr : HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr; else Opc = HasVLX ? X86::VMOVDQA64Z128mr : HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr; } else Opc = HasVLX ? X86::VMOVDQU64Z128mr : HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr; break; case MVT::v8f32: assert(HasAVX); if (Aligned) { if (IsNonTemporal) Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr; else Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr; } else Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr; break; case MVT::v4f64: assert(HasAVX); if (Aligned) { if (IsNonTemporal) Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr; else Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr; } else Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr; break; case MVT::v8i32: case MVT::v4i64: case MVT::v16i16: case MVT::v32i8: assert(HasAVX); if (Aligned) { if (IsNonTemporal) Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr; else Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr; } else Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr; break; case MVT::v16f32: assert(HasAVX512); if (Aligned) Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr; else Opc = X86::VMOVUPSZmr; break; case MVT::v8f64: assert(HasAVX512); if (Aligned) { Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr; } else Opc = X86::VMOVUPDZmr; break; case MVT::v8i64: case MVT::v16i32: case MVT::v32i16: case MVT::v64i8: assert(HasAVX512); // Note: There are a lot more choices based on type with AVX-512, but // there's really no advantage when the store isn't masked. if (Aligned) Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr; else Opc = X86::VMOVDQU64Zmr; break; } const MCInstrDesc &Desc = TII.get(Opc); // Some of the instructions in the previous switch use FR128 instead // of FR32 for ValReg. Make sure the register we feed the instruction // matches its register class constraints. // Note: This is fine to do a copy from FR32 to FR128, this is the // same registers behind the scene and actually why it did not trigger // any bugs before. ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, Desc); addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill)); if (MMO) MIB->addMemOperand(*FuncInfo.MF, MMO); return true; } bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM, MachineMemOperand *MMO, bool Aligned) { // Handle 'null' like i32/i64 0. if (isa(Val)) Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext())); // If this is a store of a simple constant, fold the constant into the store. if (const ConstantInt *CI = dyn_cast(Val)) { unsigned Opc = 0; bool Signed = true; switch (VT.getSimpleVT().SimpleTy) { default: break; case MVT::i1: Signed = false; LLVM_FALLTHROUGH; // Handle as i8. case MVT::i8: Opc = X86::MOV8mi; break; case MVT::i16: Opc = X86::MOV16mi; break; case MVT::i32: Opc = X86::MOV32mi; break; case MVT::i64: // Must be a 32-bit sign extended value. if (isInt<32>(CI->getSExtValue())) Opc = X86::MOV64mi32; break; } if (Opc) { MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue() : CI->getZExtValue()); if (MMO) MIB->addMemOperand(*FuncInfo.MF, MMO); return true; } } unsigned ValReg = getRegForValue(Val); if (ValReg == 0) return false; bool ValKill = hasTrivialKill(Val); return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned); } /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g. /// ISD::SIGN_EXTEND). bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, unsigned &ResultReg) { unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src, /*TODO: Kill=*/false); if (RR == 0) return false; ResultReg = RR; return true; } bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) { // Handle constant address. if (const GlobalValue *GV = dyn_cast(V)) { // Can't handle alternate code models yet. if (TM.getCodeModel() != CodeModel::Small) return false; // Can't handle TLS yet. if (GV->isThreadLocal()) return false; // Can't handle !absolute_symbol references yet. if (GV->isAbsoluteSymbolRef()) return false; // RIP-relative addresses can't have additional register operands, so if // we've already folded stuff into the addressing mode, just force the // global value into its own register, which we can use as the basereg. if (!Subtarget->isPICStyleRIPRel() || (AM.Base.Reg == 0 && AM.IndexReg == 0)) { // Okay, we've committed to selecting this global. Set up the address. AM.GV = GV; // Allow the subtarget to classify the global. unsigned char GVFlags = Subtarget->classifyGlobalReference(GV); // If this reference is relative to the pic base, set it now. if (isGlobalRelativeToPICBase(GVFlags)) { // FIXME: How do we know Base.Reg is free?? AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); } // Unless the ABI requires an extra load, return a direct reference to // the global. if (!isGlobalStubReference(GVFlags)) { if (Subtarget->isPICStyleRIPRel()) { // Use rip-relative addressing if we can. Above we verified that the // base and index registers are unused. assert(AM.Base.Reg == 0 && AM.IndexReg == 0); AM.Base.Reg = X86::RIP; } AM.GVOpFlags = GVFlags; return true; } // Ok, we need to do a load from a stub. If we've already loaded from // this stub, reuse the loaded pointer, otherwise emit the load now. DenseMap::iterator I = LocalValueMap.find(V); unsigned LoadReg; if (I != LocalValueMap.end() && I->second != 0) { LoadReg = I->second; } else { // Issue load from stub. unsigned Opc = 0; const TargetRegisterClass *RC = nullptr; X86AddressMode StubAM; StubAM.Base.Reg = AM.Base.Reg; StubAM.GV = GV; StubAM.GVOpFlags = GVFlags; // Prepare for inserting code in the local-value area. SavePoint SaveInsertPt = enterLocalValueArea(); if (TLI.getPointerTy(DL) == MVT::i64) { Opc = X86::MOV64rm; RC = &X86::GR64RegClass; if (Subtarget->isPICStyleRIPRel()) StubAM.Base.Reg = X86::RIP; } else { Opc = X86::MOV32rm; RC = &X86::GR32RegClass; } LoadReg = createResultReg(RC); MachineInstrBuilder LoadMI = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg); addFullAddress(LoadMI, StubAM); // Ok, back to normal mode. leaveLocalValueArea(SaveInsertPt); // Prevent loading GV stub multiple times in same MBB. LocalValueMap[V] = LoadReg; } // Now construct the final address. Note that the Disp, Scale, // and Index values may already be set here. AM.Base.Reg = LoadReg; AM.GV = nullptr; return true; } } // If all else fails, try to materialize the value in a register. if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { if (AM.Base.Reg == 0) { AM.Base.Reg = getRegForValue(V); return AM.Base.Reg != 0; } if (AM.IndexReg == 0) { assert(AM.Scale == 1 && "Scale with no index!"); AM.IndexReg = getRegForValue(V); return AM.IndexReg != 0; } } return false; } /// X86SelectAddress - Attempt to fill in an address from the given value. /// bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { SmallVector GEPs; redo_gep: const User *U = nullptr; unsigned Opcode = Instruction::UserOp1; if (const Instruction *I = dyn_cast(V)) { // Don't walk into other basic blocks; it's possible we haven't // visited them yet, so the instructions may not yet be assigned // virtual registers. if (FuncInfo.StaticAllocaMap.count(static_cast(V)) || FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { Opcode = I->getOpcode(); U = I; } } else if (const ConstantExpr *C = dyn_cast(V)) { Opcode = C->getOpcode(); U = C; } if (PointerType *Ty = dyn_cast(V->getType())) if (Ty->getAddressSpace() > 255) // Fast instruction selection doesn't support the special // address spaces. return false; switch (Opcode) { default: break; case Instruction::BitCast: // Look past bitcasts. return X86SelectAddress(U->getOperand(0), AM); case Instruction::IntToPtr: // Look past no-op inttoptrs. if (TLI.getValueType(DL, U->getOperand(0)->getType()) == TLI.getPointerTy(DL)) return X86SelectAddress(U->getOperand(0), AM); break; case Instruction::PtrToInt: // Look past no-op ptrtoints. if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return X86SelectAddress(U->getOperand(0), AM); break; case Instruction::Alloca: { // Do static allocas. const AllocaInst *A = cast(V); DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(A); if (SI != FuncInfo.StaticAllocaMap.end()) { AM.BaseType = X86AddressMode::FrameIndexBase; AM.Base.FrameIndex = SI->second; return true; } break; } case Instruction::Add: { // Adds of constants are common and easy enough. if (const ConstantInt *CI = dyn_cast(U->getOperand(1))) { uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue(); // They have to fit in the 32-bit signed displacement field though. if (isInt<32>(Disp)) { AM.Disp = (uint32_t)Disp; return X86SelectAddress(U->getOperand(0), AM); } } break; } case Instruction::GetElementPtr: { X86AddressMode SavedAM = AM; // Pattern-match simple GEPs. uint64_t Disp = (int32_t)AM.Disp; unsigned IndexReg = AM.IndexReg; unsigned Scale = AM.Scale; gep_type_iterator GTI = gep_type_begin(U); // Iterate through the indices, folding what we can. Constants can be // folded, and one dynamic index can be handled, if the scale is supported. for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i, ++GTI) { const Value *Op = *i; if (StructType *STy = GTI.getStructTypeOrNull()) { const StructLayout *SL = DL.getStructLayout(STy); Disp += SL->getElementOffset(cast(Op)->getZExtValue()); continue; } // A array/variable index is always of the form i*S where S is the // constant scale size. See if we can push the scale into immediates. uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); for (;;) { if (const ConstantInt *CI = dyn_cast(Op)) { // Constant-offset addressing. Disp += CI->getSExtValue() * S; break; } if (canFoldAddIntoGEP(U, Op)) { // A compatible add with a constant operand. Fold the constant. ConstantInt *CI = cast(cast(Op)->getOperand(1)); Disp += CI->getSExtValue() * S; // Iterate on the other operand. Op = cast(Op)->getOperand(0); continue; } if (IndexReg == 0 && (!AM.GV || !Subtarget->isPICStyleRIPRel()) && (S == 1 || S == 2 || S == 4 || S == 8)) { // Scaled-index addressing. Scale = S; IndexReg = getRegForGEPIndex(Op).first; if (IndexReg == 0) return false; break; } // Unsupported. goto unsupported_gep; } } // Check for displacement overflow. if (!isInt<32>(Disp)) break; AM.IndexReg = IndexReg; AM.Scale = Scale; AM.Disp = (uint32_t)Disp; GEPs.push_back(V); if (const GetElementPtrInst *GEP = dyn_cast(U->getOperand(0))) { // Ok, the GEP indices were covered by constant-offset and scaled-index // addressing. Update the address state and move on to examining the base. V = GEP; goto redo_gep; } else if (X86SelectAddress(U->getOperand(0), AM)) { return true; } // If we couldn't merge the gep value into this addr mode, revert back to // our address and just match the value instead of completely failing. AM = SavedAM; for (const Value *I : reverse(GEPs)) if (handleConstantAddresses(I, AM)) return true; return false; unsupported_gep: // Ok, the GEP indices weren't all covered. break; } } return handleConstantAddresses(V, AM); } /// X86SelectCallAddress - Attempt to fill in an address from the given value. /// bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { const User *U = nullptr; unsigned Opcode = Instruction::UserOp1; const Instruction *I = dyn_cast(V); // Record if the value is defined in the same basic block. // // This information is crucial to know whether or not folding an // operand is valid. // Indeed, FastISel generates or reuses a virtual register for all // operands of all instructions it selects. Obviously, the definition and // its uses must use the same virtual register otherwise the produced // code is incorrect. // Before instruction selection, FunctionLoweringInfo::set sets the virtual // registers for values that are alive across basic blocks. This ensures // that the values are consistently set between across basic block, even // if different instruction selection mechanisms are used (e.g., a mix of // SDISel and FastISel). // For values local to a basic block, the instruction selection process // generates these virtual registers with whatever method is appropriate // for its needs. In particular, FastISel and SDISel do not share the way // local virtual registers are set. // Therefore, this is impossible (or at least unsafe) to share values // between basic blocks unless they use the same instruction selection // method, which is not guarantee for X86. // Moreover, things like hasOneUse could not be used accurately, if we // allow to reference values across basic blocks whereas they are not // alive across basic blocks initially. bool InMBB = true; if (I) { Opcode = I->getOpcode(); U = I; InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); } else if (const ConstantExpr *C = dyn_cast(V)) { Opcode = C->getOpcode(); U = C; } switch (Opcode) { default: break; case Instruction::BitCast: // Look past bitcasts if its operand is in the same BB. if (InMBB) return X86SelectCallAddress(U->getOperand(0), AM); break; case Instruction::IntToPtr: // Look past no-op inttoptrs if its operand is in the same BB. if (InMBB && TLI.getValueType(DL, U->getOperand(0)->getType()) == TLI.getPointerTy(DL)) return X86SelectCallAddress(U->getOperand(0), AM); break; case Instruction::PtrToInt: // Look past no-op ptrtoints if its operand is in the same BB. if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) return X86SelectCallAddress(U->getOperand(0), AM); break; } // Handle constant address. if (const GlobalValue *GV = dyn_cast(V)) { // Can't handle alternate code models yet. if (TM.getCodeModel() != CodeModel::Small) return false; // RIP-relative addresses can't have additional register operands. if (Subtarget->isPICStyleRIPRel() && (AM.Base.Reg != 0 || AM.IndexReg != 0)) return false; // Can't handle TLS. if (const GlobalVariable *GVar = dyn_cast(GV)) if (GVar->isThreadLocal()) return false; // Okay, we've committed to selecting this global. Set up the basic address. AM.GV = GV; // Return a direct reference to the global. Fastisel can handle calls to // functions that require loads, such as dllimport and nonlazybind // functions. if (Subtarget->isPICStyleRIPRel()) { // Use rip-relative addressing if we can. Above we verified that the // base and index registers are unused. assert(AM.Base.Reg == 0 && AM.IndexReg == 0); AM.Base.Reg = X86::RIP; } else { AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr); } return true; } // If all else fails, try to materialize the value in a register. if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { if (AM.Base.Reg == 0) { AM.Base.Reg = getRegForValue(V); return AM.Base.Reg != 0; } if (AM.IndexReg == 0) { assert(AM.Scale == 1 && "Scale with no index!"); AM.IndexReg = getRegForValue(V); return AM.IndexReg != 0; } } return false; } /// X86SelectStore - Select and emit code to implement store instructions. bool X86FastISel::X86SelectStore(const Instruction *I) { // Atomic stores need special handling. const StoreInst *S = cast(I); if (S->isAtomic()) return false; const Value *PtrV = I->getOperand(1); if (TLI.supportSwiftError()) { // Swifterror values can come from either a function parameter with // swifterror attribute or an alloca with swifterror attribute. if (const Argument *Arg = dyn_cast(PtrV)) { if (Arg->hasSwiftErrorAttr()) return false; } if (const AllocaInst *Alloca = dyn_cast(PtrV)) { if (Alloca->isSwiftError()) return false; } } const Value *Val = S->getValueOperand(); const Value *Ptr = S->getPointerOperand(); MVT VT; if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true)) return false; unsigned Alignment = S->getAlignment(); unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType()); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = ABIAlignment; bool Aligned = Alignment >= ABIAlignment; X86AddressMode AM; if (!X86SelectAddress(Ptr, AM)) return false; return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned); } /// X86SelectRet - Select and emit code to implement ret instructions. bool X86FastISel::X86SelectRet(const Instruction *I) { const ReturnInst *Ret = cast(I); const Function &F = *I->getParent()->getParent(); const X86MachineFunctionInfo *X86MFInfo = FuncInfo.MF->getInfo(); if (!FuncInfo.CanLowerReturn) return false; if (TLI.supportSwiftError() && F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) return false; if (TLI.supportSplitCSR(FuncInfo.MF)) return false; CallingConv::ID CC = F.getCallingConv(); if (CC != CallingConv::C && CC != CallingConv::Fast && CC != CallingConv::X86_FastCall && CC != CallingConv::X86_StdCall && CC != CallingConv::X86_ThisCall && CC != CallingConv::X86_64_SysV && CC != CallingConv::Win64) return false; // Don't handle popping bytes if they don't fit the ret's immediate. if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn())) return false; // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) return false; // Let SDISel handle vararg functions. if (F.isVarArg()) return false; // Build a list of return value registers. SmallVector RetRegs; if (Ret->getNumOperands() > 0) { SmallVector Outs; GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); // Analyze operands of the call, assigning locations to each operand. SmallVector ValLocs; CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_X86); const Value *RV = Ret->getOperand(0); unsigned Reg = getRegForValue(RV); if (Reg == 0) return false; // Only handle a single return value for now. if (ValLocs.size() != 1) return false; CCValAssign &VA = ValLocs[0]; // Don't bother handling odd stuff for now. if (VA.getLocInfo() != CCValAssign::Full) return false; // Only handle register returns for now. if (!VA.isRegLoc()) return false; // The calling-convention tables for x87 returns don't tell // the whole story. if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) return false; unsigned SrcReg = Reg + VA.getValNo(); EVT SrcVT = TLI.getValueType(DL, RV->getType()); EVT DstVT = VA.getValVT(); // Special handling for extended integers. if (SrcVT != DstVT) { if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16) return false; if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) return false; assert(DstVT == MVT::i32 && "X86 should always ext to i32"); if (SrcVT == MVT::i1) { if (Outs[0].Flags.isSExt()) return false; SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false); SrcVT = MVT::i8; } unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op, SrcReg, /*TODO: Kill=*/false); } // Make the copy. unsigned DstReg = VA.getLocReg(); const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); // Avoid a cross-class copy. This is very unlikely. if (!SrcRC->contains(DstReg)) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg); // Add register to return instruction. RetRegs.push_back(VA.getLocReg()); } // Swift calling convention does not require we copy the sret argument // into %rax/%eax for the return, and SRetReturnReg is not set for Swift. // All x86 ABIs require that for returning structs by value we copy // the sret argument into %rax/%eax (depending on ABI) for the return. // We saved the argument into a virtual register in the entry block, // so now we copy the value out and into %rax/%eax. if (F.hasStructRetAttr() && CC != CallingConv::Swift) { unsigned Reg = X86MFInfo->getSRetReturnReg(); assert(Reg && "SRetReturnReg should have been set in LowerFormalArguments()!"); unsigned RetReg = Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), RetReg).addReg(Reg); RetRegs.push_back(RetReg); } // Now emit the RET. MachineInstrBuilder MIB; if (X86MFInfo->getBytesToPopOnReturn()) { MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Subtarget->is64Bit() ? X86::RETIQ : X86::RETIL)) .addImm(X86MFInfo->getBytesToPopOnReturn()); } else { MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL)); } for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) MIB.addReg(RetRegs[i], RegState::Implicit); return true; } /// X86SelectLoad - Select and emit code to implement load instructions. /// bool X86FastISel::X86SelectLoad(const Instruction *I) { const LoadInst *LI = cast(I); // Atomic loads need special handling. if (LI->isAtomic()) return false; const Value *SV = I->getOperand(0); if (TLI.supportSwiftError()) { // Swifterror values can come from either a function parameter with // swifterror attribute or an alloca with swifterror attribute. if (const Argument *Arg = dyn_cast(SV)) { if (Arg->hasSwiftErrorAttr()) return false; } if (const AllocaInst *Alloca = dyn_cast(SV)) { if (Alloca->isSwiftError()) return false; } } MVT VT; if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true)) return false; const Value *Ptr = LI->getPointerOperand(); X86AddressMode AM; if (!X86SelectAddress(Ptr, AM)) return false; unsigned Alignment = LI->getAlignment(); unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType()); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = ABIAlignment; unsigned ResultReg = 0; if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg, Alignment)) return false; updateValueMap(I, ResultReg); return true; } static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { bool HasAVX512 = Subtarget->hasAVX512(); bool HasAVX = Subtarget->hasAVX(); bool X86ScalarSSEf32 = Subtarget->hasSSE1(); bool X86ScalarSSEf64 = Subtarget->hasSSE2(); switch (VT.getSimpleVT().SimpleTy) { default: return 0; case MVT::i8: return X86::CMP8rr; case MVT::i16: return X86::CMP16rr; case MVT::i32: return X86::CMP32rr; case MVT::i64: return X86::CMP64rr; case MVT::f32: return X86ScalarSSEf32 ? (HasAVX512 ? X86::VUCOMISSZrr : HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0; case MVT::f64: return X86ScalarSSEf64 ? (HasAVX512 ? X86::VUCOMISDZrr : HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0; } } /// If we have a comparison with RHS as the RHS of the comparison, return an /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0. static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) { int64_t Val = RHSC->getSExtValue(); switch (VT.getSimpleVT().SimpleTy) { // Otherwise, we can't fold the immediate into this comparison. default: return 0; case MVT::i8: return X86::CMP8ri; case MVT::i16: if (isInt<8>(Val)) return X86::CMP16ri8; return X86::CMP16ri; case MVT::i32: if (isInt<8>(Val)) return X86::CMP32ri8; return X86::CMP32ri; case MVT::i64: if (isInt<8>(Val)) return X86::CMP64ri8; // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext // field. if (isInt<32>(Val)) return X86::CMP64ri32; return 0; } } bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT, const DebugLoc &CurDbgLoc) { unsigned Op0Reg = getRegForValue(Op0); if (Op0Reg == 0) return false; // Handle 'null' like i32/i64 0. if (isa(Op1)) Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext())); // We have two options: compare with register or immediate. If the RHS of // the compare is an immediate that we can fold into this compare, use // CMPri, otherwise use CMPrr. if (const ConstantInt *Op1C = dyn_cast(Op1)) { if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc)) .addReg(Op0Reg) .addImm(Op1C->getSExtValue()); return true; } } unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget); if (CompareOpc == 0) return false; unsigned Op1Reg = getRegForValue(Op1); if (Op1Reg == 0) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc)) .addReg(Op0Reg) .addReg(Op1Reg); return true; } bool X86FastISel::X86SelectCmp(const Instruction *I) { const CmpInst *CI = cast(I); MVT VT; if (!isTypeLegal(I->getOperand(0)->getType(), VT)) return false; // Try to optimize or fold the cmp. CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); unsigned ResultReg = 0; switch (Predicate) { default: break; case CmpInst::FCMP_FALSE: { ResultReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0), ResultReg); ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true, X86::sub_8bit); if (!ResultReg) return false; break; } case CmpInst::FCMP_TRUE: { ResultReg = createResultReg(&X86::GR8RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri), ResultReg).addImm(1); break; } } if (ResultReg) { updateValueMap(I, ResultReg); return true; } const Value *LHS = CI->getOperand(0); const Value *RHS = CI->getOperand(1); // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0. // We don't have to materialize a zero constant for this case and can just use // %x again on the RHS. if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { const auto *RHSC = dyn_cast(RHS); if (RHSC && RHSC->isNullValue()) RHS = LHS; } // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. static const uint16_t SETFOpcTable[2][3] = { { X86::SETEr, X86::SETNPr, X86::AND8rr }, { X86::SETNEr, X86::SETPr, X86::OR8rr } }; const uint16_t *SETFOpc = nullptr; switch (Predicate) { default: break; case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break; case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break; } ResultReg = createResultReg(&X86::GR8RegClass); if (SETFOpc) { if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc())) return false; unsigned FlagReg1 = createResultReg(&X86::GR8RegClass); unsigned FlagReg2 = createResultReg(&X86::GR8RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]), FlagReg1); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]), FlagReg2); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]), ResultReg).addReg(FlagReg1).addReg(FlagReg2); updateValueMap(I, ResultReg); return true; } X86::CondCode CC; bool SwapArgs; std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate); assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); unsigned Opc = X86::getSETFromCond(CC); if (SwapArgs) std::swap(LHS, RHS); // Emit a compare of LHS/RHS. if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc())) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); updateValueMap(I, ResultReg); return true; } bool X86FastISel::X86SelectZExt(const Instruction *I) { EVT DstVT = TLI.getValueType(DL, I->getType()); if (!TLI.isTypeLegal(DstVT)) return false; unsigned ResultReg = getRegForValue(I->getOperand(0)); if (ResultReg == 0) return false; // Handle zero-extension from i1 to i8, which is common. MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType()); if (SrcVT == MVT::i1) { // Set the high bits to zero. ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); SrcVT = MVT::i8; if (ResultReg == 0) return false; } if (DstVT == MVT::i64) { // Handle extension to 64-bits via sub-register shenanigans. unsigned MovInst; switch (SrcVT.SimpleTy) { case MVT::i8: MovInst = X86::MOVZX32rr8; break; case MVT::i16: MovInst = X86::MOVZX32rr16; break; case MVT::i32: MovInst = X86::MOV32rr; break; default: llvm_unreachable("Unexpected zext to i64 source type"); } unsigned Result32 = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32) .addReg(ResultReg); ResultReg = createResultReg(&X86::GR64RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg) .addImm(0).addReg(Result32).addImm(X86::sub_32bit); } else if (DstVT == MVT::i16) { // i8->i16 doesn't exist in the autogenerated isel table. Need to zero // extend to 32-bits and then extract down to 16-bits. unsigned Result32 = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVZX32rr8), Result32).addReg(ResultReg); ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true, X86::sub_16bit); } else if (DstVT != MVT::i8) { ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND, ResultReg, /*Kill=*/true); if (ResultReg == 0) return false; } updateValueMap(I, ResultReg); return true; } bool X86FastISel::X86SelectSExt(const Instruction *I) { EVT DstVT = TLI.getValueType(DL, I->getType()); if (!TLI.isTypeLegal(DstVT)) return false; unsigned ResultReg = getRegForValue(I->getOperand(0)); if (ResultReg == 0) return false; // Handle sign-extension from i1 to i8. MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType()); if (SrcVT == MVT::i1) { // Set the high bits to zero. unsigned ZExtReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); if (ZExtReg == 0) return false; // Negate the result to make an 8-bit sign extended value. ResultReg = createResultReg(&X86::GR8RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::NEG8r), ResultReg).addReg(ZExtReg); SrcVT = MVT::i8; } if (DstVT == MVT::i16) { // i8->i16 doesn't exist in the autogenerated isel table. Need to sign // extend to 32-bits and then extract down to 16-bits. unsigned Result32 = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOVSX32rr8), Result32).addReg(ResultReg); ResultReg = fastEmitInst_extractsubreg(MVT::i16, Result32, /*Kill=*/true, X86::sub_16bit); } else if (DstVT != MVT::i8) { ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::SIGN_EXTEND, ResultReg, /*Kill=*/true); if (ResultReg == 0) return false; } updateValueMap(I, ResultReg); return true; } bool X86FastISel::X86SelectBranch(const Instruction *I) { // Unconditional branches are selected by tablegen-generated code. // Handle a conditional branch. const BranchInst *BI = cast(I); MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; // Fold the common case of a conditional branch with a comparison // in the same block (values defined on other blocks may not have // initialized registers). X86::CondCode CC; if (const CmpInst *CI = dyn_cast(BI->getCondition())) { if (CI->hasOneUse() && CI->getParent() == I->getParent()) { EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType()); // Try to optimize or fold the cmp. CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); switch (Predicate) { default: break; case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true; case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true; } const Value *CmpLHS = CI->getOperand(0); const Value *CmpRHS = CI->getOperand(1); // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, // 0.0. // We don't have to materialize a zero constant for this case and can just // use %x again on the RHS. if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { const auto *CmpRHSC = dyn_cast(CmpRHS); if (CmpRHSC && CmpRHSC->isNullValue()) CmpRHS = CmpLHS; } // Try to take advantage of fallthrough opportunities. if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { std::swap(TrueMBB, FalseMBB); Predicate = CmpInst::getInversePredicate(Predicate); } // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition // code check. Instead two branch instructions are required to check all // the flags. First we change the predicate to a supported condition code, // which will be the first branch. Later one we will emit the second // branch. bool NeedExtraBranch = false; switch (Predicate) { default: break; case CmpInst::FCMP_OEQ: std::swap(TrueMBB, FalseMBB); LLVM_FALLTHROUGH; case CmpInst::FCMP_UNE: NeedExtraBranch = true; Predicate = CmpInst::FCMP_ONE; break; } bool SwapArgs; unsigned BranchOpc; std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate); assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); BranchOpc = X86::GetCondBranchFromCond(CC); if (SwapArgs) std::swap(CmpLHS, CmpRHS); // Emit a compare of the LHS and RHS, setting the flags. if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc())) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc)) .addMBB(TrueMBB); // X86 requires a second branch to handle UNE (and OEQ, which is mapped // to UNE above). if (NeedExtraBranch) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_1)) .addMBB(TrueMBB); } finishCondBranch(BI->getParent(), TrueMBB, FalseMBB); return true; } } else if (TruncInst *TI = dyn_cast(BI->getCondition())) { // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which // typically happen for _Bool and C++ bools. MVT SourceVT; if (TI->hasOneUse() && TI->getParent() == I->getParent() && isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) { unsigned TestOpc = 0; switch (SourceVT.SimpleTy) { default: break; case MVT::i8: TestOpc = X86::TEST8ri; break; case MVT::i16: TestOpc = X86::TEST16ri; break; case MVT::i32: TestOpc = X86::TEST32ri; break; case MVT::i64: TestOpc = X86::TEST64ri32; break; } if (TestOpc) { unsigned OpReg = getRegForValue(TI->getOperand(0)); if (OpReg == 0) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc)) .addReg(OpReg).addImm(1); unsigned JmpOpc = X86::JNE_1; if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { std::swap(TrueMBB, FalseMBB); JmpOpc = X86::JE_1; } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc)) .addMBB(TrueMBB); finishCondBranch(BI->getParent(), TrueMBB, FalseMBB); return true; } } } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) { // Fake request the condition, otherwise the intrinsic might be completely // optimized away. unsigned TmpReg = getRegForValue(BI->getCondition()); if (TmpReg == 0) return false; unsigned BranchOpc = X86::GetCondBranchFromCond(CC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc)) .addMBB(TrueMBB); finishCondBranch(BI->getParent(), TrueMBB, FalseMBB); return true; } // Otherwise do a clumsy setcc and re-test it. // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used // in an explicit cast, so make sure to handle that correctly. unsigned OpReg = getRegForValue(BI->getCondition()); if (OpReg == 0) return false; // In case OpReg is a K register, COPY to a GPR if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) { unsigned KOpReg = OpReg; OpReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), OpReg) .addReg(KOpReg); OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true, X86::sub_8bit); } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) .addReg(OpReg) .addImm(1); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1)) .addMBB(TrueMBB); finishCondBranch(BI->getParent(), TrueMBB, FalseMBB); return true; } bool X86FastISel::X86SelectShift(const Instruction *I) { unsigned CReg = 0, OpReg = 0; const TargetRegisterClass *RC = nullptr; if (I->getType()->isIntegerTy(8)) { CReg = X86::CL; RC = &X86::GR8RegClass; switch (I->getOpcode()) { case Instruction::LShr: OpReg = X86::SHR8rCL; break; case Instruction::AShr: OpReg = X86::SAR8rCL; break; case Instruction::Shl: OpReg = X86::SHL8rCL; break; default: return false; } } else if (I->getType()->isIntegerTy(16)) { CReg = X86::CX; RC = &X86::GR16RegClass; switch (I->getOpcode()) { default: llvm_unreachable("Unexpected shift opcode"); case Instruction::LShr: OpReg = X86::SHR16rCL; break; case Instruction::AShr: OpReg = X86::SAR16rCL; break; case Instruction::Shl: OpReg = X86::SHL16rCL; break; } } else if (I->getType()->isIntegerTy(32)) { CReg = X86::ECX; RC = &X86::GR32RegClass; switch (I->getOpcode()) { default: llvm_unreachable("Unexpected shift opcode"); case Instruction::LShr: OpReg = X86::SHR32rCL; break; case Instruction::AShr: OpReg = X86::SAR32rCL; break; case Instruction::Shl: OpReg = X86::SHL32rCL; break; } } else if (I->getType()->isIntegerTy(64)) { CReg = X86::RCX; RC = &X86::GR64RegClass; switch (I->getOpcode()) { default: llvm_unreachable("Unexpected shift opcode"); case Instruction::LShr: OpReg = X86::SHR64rCL; break; case Instruction::AShr: OpReg = X86::SAR64rCL; break; case Instruction::Shl: OpReg = X86::SHL64rCL; break; } } else { return false; } MVT VT; if (!isTypeLegal(I->getType(), VT)) return false; unsigned Op0Reg = getRegForValue(I->getOperand(0)); if (Op0Reg == 0) return false; unsigned Op1Reg = getRegForValue(I->getOperand(1)); if (Op1Reg == 0) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), CReg).addReg(Op1Reg); // The shift instruction uses X86::CL. If we defined a super-register // of X86::CL, emit a subreg KILL to precisely describe what we're doing here. if (CReg != X86::CL) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::KILL), X86::CL) .addReg(CReg, RegState::Kill); unsigned ResultReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg) .addReg(Op0Reg); updateValueMap(I, ResultReg); return true; } bool X86FastISel::X86SelectDivRem(const Instruction *I) { const static unsigned NumTypes = 4; // i8, i16, i32, i64 const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem const static bool S = true; // IsSigned const static bool U = false; // !IsSigned const static unsigned Copy = TargetOpcode::COPY; // For the X86 DIV/IDIV instruction, in most cases the dividend // (numerator) must be in a specific register pair highreg:lowreg, // producing the quotient in lowreg and the remainder in highreg. // For most data types, to set up the instruction, the dividend is // copied into lowreg, and lowreg is sign-extended or zero-extended // into highreg. The exception is i8, where the dividend is defined // as a single register rather than a register pair, and we // therefore directly sign-extend or zero-extend the dividend into // lowreg, instead of copying, and ignore the highreg. const static struct DivRemEntry { // The following portion depends only on the data type. const TargetRegisterClass *RC; unsigned LowInReg; // low part of the register pair unsigned HighInReg; // high part of the register pair // The following portion depends on both the data type and the operation. struct DivRemResult { unsigned OpDivRem; // The specific DIV/IDIV opcode to use. unsigned OpSignExtend; // Opcode for sign-extending lowreg into // highreg, or copying a zero into highreg. unsigned OpCopy; // Opcode for copying dividend into lowreg, or // zero/sign-extending into lowreg for i8. unsigned DivRemResultReg; // Register containing the desired result. bool IsOpSigned; // Whether to use signed or unsigned form. } ResultTable[NumOps]; } OpTable[NumTypes] = { { &X86::GR8RegClass, X86::AX, 0, { { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem } }, // i8 { &X86::GR16RegClass, X86::AX, X86::DX, { { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem } }, // i16 { &X86::GR32RegClass, X86::EAX, X86::EDX, { { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem } }, // i32 { &X86::GR64RegClass, X86::RAX, X86::RDX, { { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem } }, // i64 }; MVT VT; if (!isTypeLegal(I->getType(), VT)) return false; unsigned TypeIndex, OpIndex; switch (VT.SimpleTy) { default: return false; case MVT::i8: TypeIndex = 0; break; case MVT::i16: TypeIndex = 1; break; case MVT::i32: TypeIndex = 2; break; case MVT::i64: TypeIndex = 3; if (!Subtarget->is64Bit()) return false; break; } switch (I->getOpcode()) { default: llvm_unreachable("Unexpected div/rem opcode"); case Instruction::SDiv: OpIndex = 0; break; case Instruction::SRem: OpIndex = 1; break; case Instruction::UDiv: OpIndex = 2; break; case Instruction::URem: OpIndex = 3; break; } const DivRemEntry &TypeEntry = OpTable[TypeIndex]; const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex]; unsigned Op0Reg = getRegForValue(I->getOperand(0)); if (Op0Reg == 0) return false; unsigned Op1Reg = getRegForValue(I->getOperand(1)); if (Op1Reg == 0) return false; // Move op0 into low-order input register. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg); // Zero-extend or sign-extend into high-order input register. if (OpEntry.OpSignExtend) { if (OpEntry.IsOpSigned) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpEntry.OpSignExtend)); else { unsigned Zero32 = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0), Zero32); // Copy the zero into the appropriate sub/super/identical physical // register. Unfortunately the operations needed are not uniform enough // to fit neatly into the table above. if (VT == MVT::i16) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), TypeEntry.HighInReg) .addReg(Zero32, 0, X86::sub_16bit); } else if (VT == MVT::i32) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), TypeEntry.HighInReg) .addReg(Zero32); } else if (VT == MVT::i64) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg) .addImm(0).addReg(Zero32).addImm(X86::sub_32bit); } } } // Generate the DIV/IDIV instruction. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpEntry.OpDivRem)).addReg(Op1Reg); // For i8 remainder, we can't reference ah directly, as we'll end // up with bogus copies like %r9b = COPY %ah. Reference ax // instead to prevent ah references in a rex instruction. // // The current assumption of the fast register allocator is that isel // won't generate explicit references to the GR8_NOREX registers. If // the allocator and/or the backend get enhanced to be more robust in // that regard, this can be, and should be, removed. unsigned ResultReg = 0; if ((I->getOpcode() == Instruction::SRem || I->getOpcode() == Instruction::URem) && OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) { unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass); unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), SourceSuperReg).addReg(X86::AX); // Shift AX right by 8 bits instead of using AH. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri), ResultSuperReg).addReg(SourceSuperReg).addImm(8); // Now reference the 8-bit subreg of the result. ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg, /*Kill=*/true, X86::sub_8bit); } // Copy the result out of the physreg if we haven't already. if (!ResultReg) { ResultReg = createResultReg(TypeEntry.RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg) .addReg(OpEntry.DivRemResultReg); } updateValueMap(I, ResultReg); return true; } /// Emit a conditional move instruction (if the are supported) to lower /// the select. bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { // Check if the subtarget supports these instructions. if (!Subtarget->hasCMov()) return false; // FIXME: Add support for i8. if (RetVT < MVT::i16 || RetVT > MVT::i64) return false; const Value *Cond = I->getOperand(0); const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); bool NeedTest = true; X86::CondCode CC = X86::COND_NE; // Optimize conditions coming from a compare if both instructions are in the // same basic block (values defined in other basic blocks may not have // initialized registers). const auto *CI = dyn_cast(Cond); if (CI && (CI->getParent() == I->getParent())) { CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. static const uint16_t SETFOpcTable[2][3] = { { X86::SETNPr, X86::SETEr , X86::TEST8rr }, { X86::SETPr, X86::SETNEr, X86::OR8rr } }; const uint16_t *SETFOpc = nullptr; switch (Predicate) { default: break; case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; Predicate = CmpInst::ICMP_NE; break; case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; Predicate = CmpInst::ICMP_NE; break; } bool NeedSwap; std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate); assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); const Value *CmpLHS = CI->getOperand(0); const Value *CmpRHS = CI->getOperand(1); if (NeedSwap) std::swap(CmpLHS, CmpRHS); EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType()); // Emit a compare of the LHS and RHS, setting the flags. if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc())) return false; if (SETFOpc) { unsigned FlagReg1 = createResultReg(&X86::GR8RegClass); unsigned FlagReg2 = createResultReg(&X86::GR8RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]), FlagReg1); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]), FlagReg2); auto const &II = TII.get(SETFOpc[2]); if (II.getNumDefs()) { unsigned TmpReg = createResultReg(&X86::GR8RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg) .addReg(FlagReg2).addReg(FlagReg1); } else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(FlagReg2).addReg(FlagReg1); } } NeedTest = false; } else if (foldX86XALUIntrinsic(CC, I, Cond)) { // Fake request the condition, otherwise the intrinsic might be completely // optimized away. unsigned TmpReg = getRegForValue(Cond); if (TmpReg == 0) return false; NeedTest = false; } if (NeedTest) { // Selects operate on i1, however, CondReg is 8 bits width and may contain // garbage. Indeed, only the less significant bit is supposed to be // accurate. If we read more than the lsb, we may see non-zero values // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for // the select. This is achieved by performing TEST against 1. unsigned CondReg = getRegForValue(Cond); if (CondReg == 0) return false; bool CondIsKill = hasTrivialKill(Cond); // In case OpReg is a K register, COPY to a GPR if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) { unsigned KCondReg = CondReg; CondReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), CondReg) .addReg(KCondReg, getKillRegState(CondIsKill)); CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true, X86::sub_8bit); } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) .addReg(CondReg, getKillRegState(CondIsKill)) .addImm(1); } const Value *LHS = I->getOperand(1); const Value *RHS = I->getOperand(2); unsigned RHSReg = getRegForValue(RHS); bool RHSIsKill = hasTrivialKill(RHS); unsigned LHSReg = getRegForValue(LHS); bool LHSIsKill = hasTrivialKill(LHS); if (!LHSReg || !RHSReg) return false; const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo(); unsigned Opc = X86::getCMovFromCond(CC, TRI.getRegSizeInBits(*RC)/8); unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill); updateValueMap(I, ResultReg); return true; } /// Emit SSE or AVX instructions to lower the select. /// /// Try to use SSE1/SSE2 instructions to simulate a select without branches. /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary /// SSE instructions are available. If AVX is available, try to use a VBLENDV. bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { // Optimize conditions coming from a compare if both instructions are in the // same basic block (values defined in other basic blocks may not have // initialized registers). const auto *CI = dyn_cast(I->getOperand(0)); if (!CI || (CI->getParent() != I->getParent())) return false; if (I->getType() != CI->getOperand(0)->getType() || !((Subtarget->hasSSE1() && RetVT == MVT::f32) || (Subtarget->hasSSE2() && RetVT == MVT::f64))) return false; const Value *CmpLHS = CI->getOperand(0); const Value *CmpRHS = CI->getOperand(1); CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0. // We don't have to materialize a zero constant for this case and can just use // %x again on the RHS. if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { const auto *CmpRHSC = dyn_cast(CmpRHS); if (CmpRHSC && CmpRHSC->isNullValue()) CmpRHS = CmpLHS; } unsigned CC; bool NeedSwap; std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate); if (CC > 7 && !Subtarget->hasAVX()) return false; if (NeedSwap) std::swap(CmpLHS, CmpRHS); // Choose the SSE instruction sequence based on data type (float or double). static const uint16_t OpcTable[2][4] = { { X86::CMPSSrr, X86::ANDPSrr, X86::ANDNPSrr, X86::ORPSrr }, { X86::CMPSDrr, X86::ANDPDrr, X86::ANDNPDrr, X86::ORPDrr } }; const uint16_t *Opc = nullptr; switch (RetVT.SimpleTy) { default: return false; case MVT::f32: Opc = &OpcTable[0][0]; break; case MVT::f64: Opc = &OpcTable[1][0]; break; } const Value *LHS = I->getOperand(1); const Value *RHS = I->getOperand(2); unsigned LHSReg = getRegForValue(LHS); bool LHSIsKill = hasTrivialKill(LHS); unsigned RHSReg = getRegForValue(RHS); bool RHSIsKill = hasTrivialKill(RHS); unsigned CmpLHSReg = getRegForValue(CmpLHS); bool CmpLHSIsKill = hasTrivialKill(CmpLHS); unsigned CmpRHSReg = getRegForValue(CmpRHS); bool CmpRHSIsKill = hasTrivialKill(CmpRHS); if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS) return false; const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); unsigned ResultReg; if (Subtarget->hasAVX512()) { // If we have AVX512 we can use a mask compare and masked movss/sd. const TargetRegisterClass *VR128X = &X86::VR128XRegClass; const TargetRegisterClass *VK1 = &X86::VK1RegClass; unsigned CmpOpcode = (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr; unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill, CmpRHSReg, CmpRHSIsKill, CC); // Need an IMPLICIT_DEF for the input that is used to generate the upper // bits of the result register since its not based on any of the inputs. unsigned ImplicitDefReg = createResultReg(VR128X); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); // Place RHSReg is the passthru of the masked movss/sd operation and put // LHS in the input. The mask input comes from the compare. unsigned MovOpcode = (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk; unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, RHSIsKill, CmpReg, true, ImplicitDefReg, true, LHSReg, LHSIsKill); ResultReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg); } else if (Subtarget->hasAVX()) { const TargetRegisterClass *VR128 = &X86::VR128RegClass; // If we have AVX, create 1 blendv instead of 3 logic instructions. // Blendv was introduced with SSE 4.1, but the 2 register form implicitly // uses XMM0 as the selection register. That may need just as many // instructions as the AND/ANDN/OR sequence due to register moves, so // don't bother. unsigned CmpOpcode = (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr; unsigned BlendOpcode = (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr; unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill, CmpRHSReg, CmpRHSIsKill, CC); unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CmpReg, true); ResultReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg); } else { const TargetRegisterClass *VR128 = &X86::VR128RegClass; unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill, CmpRHSReg, CmpRHSIsKill, CC); unsigned AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false, LHSReg, LHSIsKill); unsigned AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true, RHSReg, RHSIsKill); unsigned OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true, AndReg, /*IsKill=*/true); ResultReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg); } updateValueMap(I, ResultReg); return true; } bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) { // These are pseudo CMOV instructions and will be later expanded into control- // flow. unsigned Opc; switch (RetVT.SimpleTy) { default: return false; case MVT::i8: Opc = X86::CMOV_GR8; break; case MVT::i16: Opc = X86::CMOV_GR16; break; case MVT::i32: Opc = X86::CMOV_GR32; break; case MVT::f32: Opc = X86::CMOV_FR32; break; case MVT::f64: Opc = X86::CMOV_FR64; break; } const Value *Cond = I->getOperand(0); X86::CondCode CC = X86::COND_NE; // Optimize conditions coming from a compare if both instructions are in the // same basic block (values defined in other basic blocks may not have // initialized registers). const auto *CI = dyn_cast(Cond); if (CI && (CI->getParent() == I->getParent())) { bool NeedSwap; std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate()); if (CC > X86::LAST_VALID_COND) return false; const Value *CmpLHS = CI->getOperand(0); const Value *CmpRHS = CI->getOperand(1); if (NeedSwap) std::swap(CmpLHS, CmpRHS); EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType()); if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc())) return false; } else { unsigned CondReg = getRegForValue(Cond); if (CondReg == 0) return false; bool CondIsKill = hasTrivialKill(Cond); // In case OpReg is a K register, COPY to a GPR if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) { unsigned KCondReg = CondReg; CondReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), CondReg) .addReg(KCondReg, getKillRegState(CondIsKill)); CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true, X86::sub_8bit); } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) .addReg(CondReg, getKillRegState(CondIsKill)) .addImm(1); } const Value *LHS = I->getOperand(1); const Value *RHS = I->getOperand(2); unsigned LHSReg = getRegForValue(LHS); bool LHSIsKill = hasTrivialKill(LHS); unsigned RHSReg = getRegForValue(RHS); bool RHSIsKill = hasTrivialKill(RHS); if (!LHSReg || !RHSReg) return false; const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); unsigned ResultReg = fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC); updateValueMap(I, ResultReg); return true; } bool X86FastISel::X86SelectSelect(const Instruction *I) { MVT RetVT; if (!isTypeLegal(I->getType(), RetVT)) return false; // Check if we can fold the select. if (const auto *CI = dyn_cast(I->getOperand(0))) { CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); const Value *Opnd = nullptr; switch (Predicate) { default: break; case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break; case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break; } // No need for a select anymore - this is an unconditional move. if (Opnd) { unsigned OpReg = getRegForValue(Opnd); if (OpReg == 0) return false; bool OpIsKill = hasTrivialKill(Opnd); const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); unsigned ResultReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(OpReg, getKillRegState(OpIsKill)); updateValueMap(I, ResultReg); return true; } } // First try to use real conditional move instructions. if (X86FastEmitCMoveSelect(RetVT, I)) return true; // Try to use a sequence of SSE instructions to simulate a conditional move. if (X86FastEmitSSESelect(RetVT, I)) return true; // Fall-back to pseudo conditional move instructions, which will be later // converted to control-flow. if (X86FastEmitPseudoSelect(RetVT, I)) return true; return false; } // Common code for X86SelectSIToFP and X86SelectUIToFP. bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) { // The target-independent selection algorithm in FastISel already knows how // to select a SINT_TO_FP if the target is SSE but not AVX. // Early exit if the subtarget doesn't have AVX. // Unsigned conversion requires avx512. bool HasAVX512 = Subtarget->hasAVX512(); if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512)) return false; // TODO: We could sign extend narrower types. MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType()); if (SrcVT != MVT::i32 && SrcVT != MVT::i64) return false; // Select integer to float/double conversion. unsigned OpReg = getRegForValue(I->getOperand(0)); if (OpReg == 0) return false; unsigned Opcode; static const uint16_t SCvtOpc[2][2][2] = { { { X86::VCVTSI2SSrr, X86::VCVTSI642SSrr }, { X86::VCVTSI2SDrr, X86::VCVTSI642SDrr } }, { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr }, { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } }, }; static const uint16_t UCvtOpc[2][2] = { { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr }, { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr }, }; bool Is64Bit = SrcVT == MVT::i64; if (I->getType()->isDoubleTy()) { // s/uitofp int -> double Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit]; } else if (I->getType()->isFloatTy()) { // s/uitofp int -> float Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit]; } else return false; MVT DstVT = TLI.getValueType(DL, I->getType()).getSimpleVT(); const TargetRegisterClass *RC = TLI.getRegClassFor(DstVT); unsigned ImplicitDefReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); unsigned ResultReg = fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false); updateValueMap(I, ResultReg); return true; } bool X86FastISel::X86SelectSIToFP(const Instruction *I) { return X86SelectIntToFP(I, /*IsSigned*/true); } bool X86FastISel::X86SelectUIToFP(const Instruction *I) { return X86SelectIntToFP(I, /*IsSigned*/false); } // Helper method used by X86SelectFPExt and X86SelectFPTrunc. bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned TargetOpc, const TargetRegisterClass *RC) { assert((I->getOpcode() == Instruction::FPExt || I->getOpcode() == Instruction::FPTrunc) && "Instruction must be an FPExt or FPTrunc!"); unsigned OpReg = getRegForValue(I->getOperand(0)); if (OpReg == 0) return false; unsigned ImplicitDefReg; if (Subtarget->hasAVX()) { ImplicitDefReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); } unsigned ResultReg = createResultReg(RC); MachineInstrBuilder MIB; MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc), ResultReg); if (Subtarget->hasAVX()) MIB.addReg(ImplicitDefReg); MIB.addReg(OpReg); updateValueMap(I, ResultReg); return true; } bool X86FastISel::X86SelectFPExt(const Instruction *I) { if (X86ScalarSSEf64 && I->getType()->isDoubleTy() && I->getOperand(0)->getType()->isFloatTy()) { bool HasAVX512 = Subtarget->hasAVX512(); // fpext from float to double. unsigned Opc = HasAVX512 ? X86::VCVTSS2SDZrr : Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr; return X86SelectFPExtOrFPTrunc( I, Opc, HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass); } return false; } bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { if (X86ScalarSSEf64 && I->getType()->isFloatTy() && I->getOperand(0)->getType()->isDoubleTy()) { bool HasAVX512 = Subtarget->hasAVX512(); // fptrunc from double to float. unsigned Opc = HasAVX512 ? X86::VCVTSD2SSZrr : Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr; return X86SelectFPExtOrFPTrunc( I, Opc, HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass); } return false; } bool X86FastISel::X86SelectTrunc(const Instruction *I) { EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(DL, I->getType()); // This code only handles truncation to byte. if (DstVT != MVT::i8 && DstVT != MVT::i1) return false; if (!TLI.isTypeLegal(SrcVT)) return false; unsigned InputReg = getRegForValue(I->getOperand(0)); if (!InputReg) // Unhandled operand. Halt "fast" selection and bail. return false; if (SrcVT == MVT::i8) { // Truncate from i8 to i1; no code needed. updateValueMap(I, InputReg); return true; } // Issue an extract_subreg. unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8, InputReg, false, X86::sub_8bit); if (!ResultReg) return false; updateValueMap(I, ResultReg); return true; } bool X86FastISel::IsMemcpySmall(uint64_t Len) { return Len <= (Subtarget->is64Bit() ? 32 : 16); } bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM, X86AddressMode SrcAM, uint64_t Len) { // Make sure we don't bloat code by inlining very large memcpy's. if (!IsMemcpySmall(Len)) return false; bool i64Legal = Subtarget->is64Bit(); // We don't care about alignment here since we just emit integer accesses. while (Len) { MVT VT; if (Len >= 8 && i64Legal) VT = MVT::i64; else if (Len >= 4) VT = MVT::i32; else if (Len >= 2) VT = MVT::i16; else VT = MVT::i8; unsigned Reg; bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg); RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM); assert(RV && "Failed to emit load or store??"); unsigned Size = VT.getSizeInBits()/8; Len -= Size; DestAM.Disp += Size; SrcAM.Disp += Size; } return true; } bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { // FIXME: Handle more intrinsics. switch (II->getIntrinsicID()) { default: return false; case Intrinsic::convert_from_fp16: case Intrinsic::convert_to_fp16: { if (Subtarget->useSoftFloat() || !Subtarget->hasF16C()) return false; const Value *Op = II->getArgOperand(0); unsigned InputReg = getRegForValue(Op); if (InputReg == 0) return false; // F16C only allows converting from float to half and from half to float. bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16; if (IsFloatToHalf) { if (!Op->getType()->isFloatTy()) return false; } else { if (!II->getType()->isFloatTy()) return false; } unsigned ResultReg = 0; const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16); if (IsFloatToHalf) { // 'InputReg' is implicitly promoted from register class FR32 to // register class VR128 by method 'constrainOperandRegClass' which is // directly called by 'fastEmitInst_ri'. // Instruction VCVTPS2PHrr takes an extra immediate operand which is // used to provide rounding control: use MXCSR.RC, encoded as 0b100. // It's consistent with the other FP instructions, which are usually // controlled by MXCSR. InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 4); // Move the lower 32-bits of ResultReg to another register of class GR32. ResultReg = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::VMOVPDI2DIrr), ResultReg) .addReg(InputReg, RegState::Kill); // The result value is in the lower 16-bits of ResultReg. unsigned RegIdx = X86::sub_16bit; ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx); } else { assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!"); // Explicitly sign-extend the input to 32-bit. InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg, /*Kill=*/false); // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr. InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR, InputReg, /*Kill=*/true); InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true); // The result value is in the lower 32-bits of ResultReg. // Emit an explicit copy from register class VR128 to register class FR32. ResultReg = createResultReg(&X86::FR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(InputReg, RegState::Kill); } updateValueMap(II, ResultReg); return true; } case Intrinsic::frameaddress: { MachineFunction *MF = FuncInfo.MF; if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI()) return false; Type *RetTy = II->getCalledFunction()->getReturnType(); MVT VT; if (!isTypeLegal(RetTy, VT)) return false; unsigned Opc; const TargetRegisterClass *RC = nullptr; switch (VT.SimpleTy) { default: llvm_unreachable("Invalid result type for frameaddress."); case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break; case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break; } // This needs to be set before we call getPtrSizedFrameRegister, otherwise // we get the wrong frame register. MachineFrameInfo &MFI = MF->getFrameInfo(); MFI.setFrameAddressIsTaken(true); const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF); assert(((FrameReg == X86::RBP && VT == MVT::i64) || (FrameReg == X86::EBP && VT == MVT::i32)) && "Invalid Frame Register!"); // Always make a copy of the frame register to a vreg first, so that we // never directly reference the frame register (the TwoAddressInstruction- // Pass doesn't like that). unsigned SrcReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg); // Now recursively load from the frame address. // movq (%rbp), %rax // movq (%rax), %rax // movq (%rax), %rax // ... unsigned DestReg; unsigned Depth = cast(II->getOperand(0))->getZExtValue(); while (Depth--) { DestReg = createResultReg(RC); addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg), SrcReg); SrcReg = DestReg; } updateValueMap(II, SrcReg); return true; } case Intrinsic::memcpy: { const MemCpyInst *MCI = cast(II); // Don't handle volatile or variable length memcpys. if (MCI->isVolatile()) return false; if (isa(MCI->getLength())) { // Small memcpy's are common enough that we want to do them // without a call if possible. uint64_t Len = cast(MCI->getLength())->getZExtValue(); if (IsMemcpySmall(Len)) { X86AddressMode DestAM, SrcAM; if (!X86SelectAddress(MCI->getRawDest(), DestAM) || !X86SelectAddress(MCI->getRawSource(), SrcAM)) return false; TryEmitSmallMemcpy(DestAM, SrcAM, Len); return true; } } unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32; if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth)) return false; if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255) return false; return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 1); } case Intrinsic::memset: { const MemSetInst *MSI = cast(II); if (MSI->isVolatile()) return false; unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32; if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth)) return false; if (MSI->getDestAddressSpace() > 255) return false; return lowerCallTo(II, "memset", II->getNumArgOperands() - 1); } case Intrinsic::stackprotector: { // Emit code to store the stack guard onto the stack. EVT PtrTy = TLI.getPointerTy(DL); const Value *Op1 = II->getArgOperand(0); // The guard's value. const AllocaInst *Slot = cast(II->getArgOperand(1)); MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]); // Grab the frame index. X86AddressMode AM; if (!X86SelectAddress(Slot, AM)) return false; if (!X86FastEmitStore(PtrTy, Op1, AM)) return false; return true; } case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast(II); X86AddressMode AM; assert(DI->getAddress() && "Null address should be checked earlier!"); if (!X86SelectAddress(DI->getAddress(), AM)) return false; const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); // FIXME may need to add RegState::Debug to any registers produced, // although ESP/EBP should be the only ones at the moment. assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && "Expected inlined-at fields to agree"); addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM) .addImm(0) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); return true; } case Intrinsic::trap: { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP)); return true; } case Intrinsic::sqrt: { if (!Subtarget->hasSSE1()) return false; Type *RetTy = II->getCalledFunction()->getReturnType(); MVT VT; if (!isTypeLegal(RetTy, VT)) return false; // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT // is not generated by FastISel yet. // FIXME: Update this code once tablegen can handle it. static const uint16_t SqrtOpc[3][2] = { { X86::SQRTSSr, X86::SQRTSDr }, { X86::VSQRTSSr, X86::VSQRTSDr }, { X86::VSQRTSSZr, X86::VSQRTSDZr }, }; unsigned AVXLevel = Subtarget->hasAVX512() ? 2 : Subtarget->hasAVX() ? 1 : 0; unsigned Opc; switch (VT.SimpleTy) { default: return false; case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break; case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break; } const Value *SrcVal = II->getArgOperand(0); unsigned SrcReg = getRegForValue(SrcVal); if (SrcReg == 0) return false; const TargetRegisterClass *RC = TLI.getRegClassFor(VT); unsigned ImplicitDefReg = 0; if (AVXLevel > 0) { ImplicitDefReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); } unsigned ResultReg = createResultReg(RC); MachineInstrBuilder MIB; MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); if (ImplicitDefReg) MIB.addReg(ImplicitDefReg); MIB.addReg(SrcReg); updateValueMap(II, ResultReg); return true; } case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::usub_with_overflow: case Intrinsic::smul_with_overflow: case Intrinsic::umul_with_overflow: { // This implements the basic lowering of the xalu with overflow intrinsics // into add/sub/mul followed by either seto or setb. const Function *Callee = II->getCalledFunction(); auto *Ty = cast(Callee->getReturnType()); Type *RetTy = Ty->getTypeAtIndex(0U); assert(Ty->getTypeAtIndex(1)->isIntegerTy() && Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 && "Overflow value expected to be an i1"); MVT VT; if (!isTypeLegal(RetTy, VT)) return false; if (VT < MVT::i8 || VT > MVT::i64) return false; const Value *LHS = II->getArgOperand(0); const Value *RHS = II->getArgOperand(1); // Canonicalize immediate to the RHS. if (isa(LHS) && !isa(RHS) && isCommutativeIntrinsic(II)) std::swap(LHS, RHS); unsigned BaseOpc, CondOpc; switch (II->getIntrinsicID()) { default: llvm_unreachable("Unexpected intrinsic!"); case Intrinsic::sadd_with_overflow: BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break; case Intrinsic::uadd_with_overflow: BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break; case Intrinsic::ssub_with_overflow: BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break; case Intrinsic::usub_with_overflow: BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break; case Intrinsic::smul_with_overflow: BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break; case Intrinsic::umul_with_overflow: BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break; } unsigned LHSReg = getRegForValue(LHS); if (LHSReg == 0) return false; bool LHSIsKill = hasTrivialKill(LHS); unsigned ResultReg = 0; // Check if we have an immediate version. if (const auto *CI = dyn_cast(RHS)) { static const uint16_t Opc[2][4] = { { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r }, { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r } }; if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) && CondOpc == X86::SETOr) { // We can use INC/DEC. ResultReg = createResultReg(TLI.getRegClassFor(VT)); bool IsDec = BaseOpc == ISD::SUB; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg) .addReg(LHSReg, getKillRegState(LHSIsKill)); } else ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill, CI->getZExtValue()); } unsigned RHSReg; bool RHSIsKill; if (!ResultReg) { RHSReg = getRegForValue(RHS); if (RHSReg == 0) return false; RHSIsKill = hasTrivialKill(RHS); ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill); } // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit // it manually. if (BaseOpc == X86ISD::UMUL && !ResultReg) { static const uint16_t MULOpc[] = { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r }; static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX }; // First copy the first operand into RAX, which is an implicit input to // the X86::MUL*r instruction. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8]) .addReg(LHSReg, getKillRegState(LHSIsKill)); ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8], TLI.getRegClassFor(VT), RHSReg, RHSIsKill); } else if (BaseOpc == X86ISD::SMUL && !ResultReg) { static const uint16_t MULOpc[] = { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr }; if (VT == MVT::i8) { // Copy the first operand into AL, which is an implicit input to the // X86::IMUL8r instruction. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), X86::AL) .addReg(LHSReg, getKillRegState(LHSIsKill)); ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg, RHSIsKill); } else ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8], TLI.getRegClassFor(VT), LHSReg, LHSIsKill, RHSReg, RHSIsKill); } if (!ResultReg) return false; // Assign to a GPR since the overflow return value is lowered to a SETcc. unsigned ResultReg2 = createResultReg(&X86::GR8RegClass); assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers."); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc), ResultReg2); updateValueMap(II, ResultReg, 2); return true; } case Intrinsic::x86_sse_cvttss2si: case Intrinsic::x86_sse_cvttss2si64: case Intrinsic::x86_sse2_cvttsd2si: case Intrinsic::x86_sse2_cvttsd2si64: { bool IsInputDouble; switch (II->getIntrinsicID()) { default: llvm_unreachable("Unexpected intrinsic."); case Intrinsic::x86_sse_cvttss2si: case Intrinsic::x86_sse_cvttss2si64: if (!Subtarget->hasSSE1()) return false; IsInputDouble = false; break; case Intrinsic::x86_sse2_cvttsd2si: case Intrinsic::x86_sse2_cvttsd2si64: if (!Subtarget->hasSSE2()) return false; IsInputDouble = true; break; } Type *RetTy = II->getCalledFunction()->getReturnType(); MVT VT; if (!isTypeLegal(RetTy, VT)) return false; static const uint16_t CvtOpc[3][2][2] = { { { X86::CVTTSS2SIrr, X86::CVTTSS2SI64rr }, { X86::CVTTSD2SIrr, X86::CVTTSD2SI64rr } }, { { X86::VCVTTSS2SIrr, X86::VCVTTSS2SI64rr }, { X86::VCVTTSD2SIrr, X86::VCVTTSD2SI64rr } }, { { X86::VCVTTSS2SIZrr, X86::VCVTTSS2SI64Zrr }, { X86::VCVTTSD2SIZrr, X86::VCVTTSD2SI64Zrr } }, }; unsigned AVXLevel = Subtarget->hasAVX512() ? 2 : Subtarget->hasAVX() ? 1 : 0; unsigned Opc; switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected result type."); case MVT::i32: Opc = CvtOpc[AVXLevel][IsInputDouble][0]; break; case MVT::i64: Opc = CvtOpc[AVXLevel][IsInputDouble][1]; break; } // Check if we can fold insertelement instructions into the convert. const Value *Op = II->getArgOperand(0); while (auto *IE = dyn_cast(Op)) { const Value *Index = IE->getOperand(2); if (!isa(Index)) break; unsigned Idx = cast(Index)->getZExtValue(); if (Idx == 0) { Op = IE->getOperand(1); break; } Op = IE->getOperand(0); } unsigned Reg = getRegForValue(Op); if (Reg == 0) return false; unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(Reg); updateValueMap(II, ResultReg); return true; } } } bool X86FastISel::fastLowerArguments() { if (!FuncInfo.CanLowerReturn) return false; const Function *F = FuncInfo.Fn; if (F->isVarArg()) return false; CallingConv::ID CC = F->getCallingConv(); if (CC != CallingConv::C) return false; if (Subtarget->isCallingConvWin64(CC)) return false; if (!Subtarget->is64Bit()) return false; if (Subtarget->useSoftFloat()) return false; // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments. unsigned GPRCnt = 0; unsigned FPRCnt = 0; for (auto const &Arg : F->args()) { if (Arg.hasAttribute(Attribute::ByVal) || Arg.hasAttribute(Attribute::InReg) || Arg.hasAttribute(Attribute::StructRet) || Arg.hasAttribute(Attribute::SwiftSelf) || Arg.hasAttribute(Attribute::SwiftError) || Arg.hasAttribute(Attribute::Nest)) return false; Type *ArgTy = Arg.getType(); if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) return false; EVT ArgVT = TLI.getValueType(DL, ArgTy); if (!ArgVT.isSimple()) return false; switch (ArgVT.getSimpleVT().SimpleTy) { default: return false; case MVT::i32: case MVT::i64: ++GPRCnt; break; case MVT::f32: case MVT::f64: if (!Subtarget->hasSSE1()) return false; ++FPRCnt; break; } if (GPRCnt > 6) return false; if (FPRCnt > 8) return false; } static const MCPhysReg GPR32ArgRegs[] = { X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D }; static const MCPhysReg GPR64ArgRegs[] = { X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9 }; static const MCPhysReg XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; unsigned GPRIdx = 0; unsigned FPRIdx = 0; for (auto const &Arg : F->args()) { MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); const TargetRegisterClass *RC = TLI.getRegClassFor(VT); unsigned SrcReg; switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type."); case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break; case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break; case MVT::f32: LLVM_FALLTHROUGH; case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break; } unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. // Without this, EmitLiveInCopies may eliminate the livein if its only // use is a bitcast (which isn't turned into an instruction). unsigned ResultReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(DstReg, getKillRegState(true)); updateValueMap(&Arg, ResultReg); } return true; } static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, CallingConv::ID CC, ImmutableCallSite *CS) { if (Subtarget->is64Bit()) return 0; if (Subtarget->getTargetTriple().isOSMSVCRT()) return 0; if (CC == CallingConv::Fast || CC == CallingConv::GHC || CC == CallingConv::HiPE) return 0; if (CS) if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) || CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU()) return 0; return 4; } bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { auto &OutVals = CLI.OutVals; auto &OutFlags = CLI.OutFlags; auto &OutRegs = CLI.OutRegs; auto &Ins = CLI.Ins; auto &InRegs = CLI.InRegs; CallingConv::ID CC = CLI.CallConv; bool &IsTailCall = CLI.IsTailCall; bool IsVarArg = CLI.IsVarArg; const Value *Callee = CLI.Callee; MCSymbol *Symbol = CLI.Symbol; bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isCallingConvWin64(CC); const CallInst *CI = CLI.CS ? dyn_cast(CLI.CS->getInstruction()) : nullptr; const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr; // Call / invoke instructions with NoCfCheck attribute require special // handling. const auto *II = CLI.CS ? dyn_cast(CLI.CS->getInstruction()) : nullptr; if ((CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck())) return false; // Functions with no_caller_saved_registers that need special handling. if ((CI && CI->hasFnAttr("no_caller_saved_registers")) || (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers"))) return false; // Functions using retpoline for indirect calls need to use SDISel. if (Subtarget->useRetpolineIndirectCalls()) return false; // Handle only C, fastcc, and webkit_js calling conventions for now. switch (CC) { default: return false; case CallingConv::C: case CallingConv::Fast: case CallingConv::WebKit_JS: case CallingConv::Swift: case CallingConv::X86_FastCall: case CallingConv::X86_StdCall: case CallingConv::X86_ThisCall: case CallingConv::Win64: case CallingConv::X86_64_SysV: break; } // Allow SelectionDAG isel to handle tail calls. if (IsTailCall) return false; // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) return false; // Don't know how to handle Win64 varargs yet. Nothing special needed for // x86-32. Special handling for x86-64 is implemented. if (IsVarArg && IsWin64) return false; // Don't know about inalloca yet. if (CLI.CS && CLI.CS->hasInAllocaArgument()) return false; for (auto Flag : CLI.OutFlags) if (Flag.isSwiftError()) return false; SmallVector OutVTs; SmallVector ArgRegs; // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra // instruction. This is safe because it is common to all FastISel supported // calling conventions on x86. for (int i = 0, e = OutVals.size(); i != e; ++i) { Value *&Val = OutVals[i]; ISD::ArgFlagsTy Flags = OutFlags[i]; if (auto *CI = dyn_cast(Val)) { if (CI->getBitWidth() < 32) { if (Flags.isSExt()) Val = ConstantExpr::getSExt(CI, Type::getInt32Ty(CI->getContext())); else Val = ConstantExpr::getZExt(CI, Type::getInt32Ty(CI->getContext())); } } // Passing bools around ends up doing a trunc to i1 and passing it. // Codegen this as an argument + "and 1". MVT VT; auto *TI = dyn_cast(Val); unsigned ResultReg; if (TI && TI->getType()->isIntegerTy(1) && CLI.CS && (TI->getParent() == CLI.CS->getInstruction()->getParent()) && TI->hasOneUse()) { Value *PrevVal = TI->getOperand(0); ResultReg = getRegForValue(PrevVal); if (!ResultReg) return false; if (!isTypeLegal(PrevVal->getType(), VT)) return false; ResultReg = fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1); } else { if (!isTypeLegal(Val->getType(), VT)) return false; ResultReg = getRegForValue(Val); } if (!ResultReg) return false; ArgRegs.push_back(ResultReg); OutVTs.push_back(VT); } // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext()); // Allocate shadow area for Win64 if (IsWin64) CCInfo.AllocateStack(32, 8); CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); // Issue CALLSEQ_START unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) .addImm(NumBytes).addImm(0).addImm(0); // Walk the register/memloc assignments, inserting copies/loads. const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign const &VA = ArgLocs[i]; const Value *ArgVal = OutVals[VA.getValNo()]; MVT ArgVT = OutVTs[VA.getValNo()]; if (ArgVT == MVT::x86mmx) return false; unsigned ArgReg = ArgRegs[VA.getValNo()]; // Promote the value if needed. switch (VA.getLocInfo()) { case CCValAssign::Full: break; case CCValAssign::SExt: { assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && "Unexpected extend"); if (ArgVT == MVT::i1) return false; bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg, ArgVT, ArgReg); assert(Emitted && "Failed to emit a sext!"); (void)Emitted; ArgVT = VA.getLocVT(); break; } case CCValAssign::ZExt: { assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && "Unexpected extend"); // Handle zero-extension from i1 to i8, which is common. if (ArgVT == MVT::i1) { // Set the high bits to zero. ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false); ArgVT = MVT::i8; if (ArgReg == 0) return false; } bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg, ArgVT, ArgReg); assert(Emitted && "Failed to emit a zext!"); (void)Emitted; ArgVT = VA.getLocVT(); break; } case CCValAssign::AExt: { assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && "Unexpected extend"); bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg, ArgVT, ArgReg); if (!Emitted) Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg, ArgVT, ArgReg); if (!Emitted) Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg, ArgVT, ArgReg); assert(Emitted && "Failed to emit a aext!"); (void)Emitted; ArgVT = VA.getLocVT(); break; } case CCValAssign::BCvt: { ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg, /*TODO: Kill=*/false); assert(ArgReg && "Failed to emit a bitcast!"); ArgVT = VA.getLocVT(); break; } case CCValAssign::VExt: // VExt has not been implemented, so this should be impossible to reach // for now. However, fallback to Selection DAG isel once implemented. return false; case CCValAssign::AExtUpper: case CCValAssign::SExtUpper: case CCValAssign::ZExtUpper: case CCValAssign::FPExt: llvm_unreachable("Unexpected loc info!"); case CCValAssign::Indirect: // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully // support this. return false; } if (VA.isRegLoc()) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); OutRegs.push_back(VA.getLocReg()); } else { assert(VA.isMemLoc()); // Don't emit stores for undef values. if (isa(ArgVal)) continue; unsigned LocMemOffset = VA.getLocMemOffset(); X86AddressMode AM; AM.Base.Reg = RegInfo->getStackRegister(); AM.Disp = LocMemOffset; ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()]; unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset), MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); if (Flags.isByVal()) { X86AddressMode SrcAM; SrcAM.Base.Reg = ArgReg; if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize())) return false; } else if (isa(ArgVal) || isa(ArgVal)) { // If this is a really simple value, emit this with the Value* version // of X86FastEmitStore. If it isn't simple, we don't want to do this, // as it can cause us to reevaluate the argument. if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO)) return false; } else { bool ValIsKill = hasTrivialKill(ArgVal); if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO)) return false; } } } // ELF / PIC requires GOT in the EBX register before function calls via PLT // GOT pointer. if (Subtarget->isPICStyleGOT()) { unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base); } if (Is64Bit && IsVarArg && !IsWin64) { // From AMD64 ABI document: // For calls that may call functions that use varargs or stdargs // (prototype-less calls or calls to functions containing ellipsis (...) in // the declaration) %al is used as hidden argument to specify the number // of SSE registers used. The contents of %al do not need to match exactly // the number of registers, but must be an ubound on the number of SSE // registers used and is in the range 0 - 8 inclusive. // Count the number of XMM registers allocated. static const MCPhysReg XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs); assert((Subtarget->hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri), X86::AL).addImm(NumXMMRegs); } // Materialize callee address in a register. FIXME: GV address can be // handled with a CALLpcrel32 instead. X86AddressMode CalleeAM; if (!X86SelectCallAddress(Callee, CalleeAM)) return false; unsigned CalleeOp = 0; const GlobalValue *GV = nullptr; if (CalleeAM.GV != nullptr) { GV = CalleeAM.GV; } else if (CalleeAM.Base.Reg != 0) { CalleeOp = CalleeAM.Base.Reg; } else return false; // Issue the call. MachineInstrBuilder MIB; if (CalleeOp) { // Register-indirect call. unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r; MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)) .addReg(CalleeOp); } else { // Direct call. assert(GV && "Not a direct call"); // See if we need any target-specific flags on the GV operand. unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV); // This will be a direct call, or an indirect call through memory for // NonLazyBind calls or dllimport calls. bool NeedLoad = OpFlags == X86II::MO_DLLIMPORT || OpFlags == X86II::MO_GOTPCREL; unsigned CallOpc = NeedLoad ? (Is64Bit ? X86::CALL64m : X86::CALL32m) : (Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32); MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)); if (NeedLoad) MIB.addReg(Is64Bit ? X86::RIP : 0).addImm(1).addReg(0); if (Symbol) MIB.addSym(Symbol, OpFlags); else MIB.addGlobalAddress(GV, 0, OpFlags); if (NeedLoad) MIB.addReg(0); } // Add a register mask operand representing the call-preserved registers. // Proper defs for return values will be added by setPhysRegsDeadExcept(). MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); // Add an implicit use GOT pointer in EBX. if (Subtarget->isPICStyleGOT()) MIB.addReg(X86::EBX, RegState::Implicit); if (Is64Bit && IsVarArg && !IsWin64) MIB.addReg(X86::AL, RegState::Implicit); // Add implicit physical register uses to the call. for (auto Reg : OutRegs) MIB.addReg(Reg, RegState::Implicit); // Issue CALLSEQ_END unsigned NumBytesForCalleeToPop = X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg, TM.Options.GuaranteedTailCallOpt) ? NumBytes // Callee pops everything. : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CS); unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) .addImm(NumBytes).addImm(NumBytesForCalleeToPop); // Now handle call return values. SmallVector RVLocs; CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, CLI.RetTy->getContext()); CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86); // Copy all of the result registers out of their specified physreg. unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy); for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; EVT CopyVT = VA.getValVT(); unsigned CopyReg = ResultReg + i; unsigned SrcReg = VA.getLocReg(); // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { report_fatal_error("SSE register return with SSE disabled"); } // If we prefer to use the value in xmm registers, copy it out as f80 and // use a truncate to move it from fp stack reg to xmm reg. if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) && isScalarFPTypeInSSEReg(VA.getValVT())) { CopyVT = MVT::f80; CopyReg = createResultReg(&X86::RFP80RegClass); } // Copy out the result. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg); InRegs.push_back(VA.getLocReg()); // Round the f80 to the right size, which also moves it to the appropriate // xmm register. This is accomplished by storing the f80 value in memory // and then loading it back. if (CopyVT != VA.getValVT()) { EVT ResVT = VA.getValVT(); unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; unsigned MemSize = ResVT.getSizeInBits()/8; int FI = MFI.CreateStackObject(MemSize, MemSize, false); addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)), FI) .addReg(CopyReg); Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm; addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg + i), FI); } } CLI.ResultReg = ResultReg; CLI.NumResultRegs = RVLocs.size(); CLI.Call = MIB; return true; } bool X86FastISel::fastSelectInstruction(const Instruction *I) { switch (I->getOpcode()) { default: break; case Instruction::Load: return X86SelectLoad(I); case Instruction::Store: return X86SelectStore(I); case Instruction::Ret: return X86SelectRet(I); case Instruction::ICmp: case Instruction::FCmp: return X86SelectCmp(I); case Instruction::ZExt: return X86SelectZExt(I); case Instruction::SExt: return X86SelectSExt(I); case Instruction::Br: return X86SelectBranch(I); case Instruction::LShr: case Instruction::AShr: case Instruction::Shl: return X86SelectShift(I); case Instruction::SDiv: case Instruction::UDiv: case Instruction::SRem: case Instruction::URem: return X86SelectDivRem(I); case Instruction::Select: return X86SelectSelect(I); case Instruction::Trunc: return X86SelectTrunc(I); case Instruction::FPExt: return X86SelectFPExt(I); case Instruction::FPTrunc: return X86SelectFPTrunc(I); case Instruction::SIToFP: return X86SelectSIToFP(I); case Instruction::UIToFP: return X86SelectUIToFP(I); case Instruction::IntToPtr: // Deliberate fall-through. case Instruction::PtrToInt: { EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(DL, I->getType()); if (DstVT.bitsGT(SrcVT)) return X86SelectZExt(I); if (DstVT.bitsLT(SrcVT)) return X86SelectTrunc(I); unsigned Reg = getRegForValue(I->getOperand(0)); if (Reg == 0) return false; updateValueMap(I, Reg); return true; } case Instruction::BitCast: { // Select SSE2/AVX bitcasts between 128/256 bit vector types. if (!Subtarget->hasSSE2()) return false; EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(DL, I->getType()); if (!SrcVT.isSimple() || !DstVT.isSimple()) return false; MVT SVT = SrcVT.getSimpleVT(); MVT DVT = DstVT.getSimpleVT(); if (!SVT.is128BitVector() && !(Subtarget->hasAVX() && SVT.is256BitVector()) && !(Subtarget->hasAVX512() && SVT.is512BitVector() && (Subtarget->hasBWI() || (SVT.getScalarSizeInBits() >= 32 && DVT.getScalarSizeInBits() >= 32)))) return false; unsigned Reg = getRegForValue(I->getOperand(0)); if (Reg == 0) return false; // No instruction is needed for conversion. Reuse the register used by // the fist operand. updateValueMap(I, Reg); return true; } } return false; } unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) { if (VT > MVT::i64) return 0; uint64_t Imm = CI->getZExtValue(); if (Imm == 0) { unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass); switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type"); case MVT::i1: case MVT::i8: return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true, X86::sub_8bit); case MVT::i16: return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true, X86::sub_16bit); case MVT::i32: return SrcReg; case MVT::i64: { unsigned ResultReg = createResultReg(&X86::GR64RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg) .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit); return ResultReg; } } } unsigned Opc = 0; switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type"); case MVT::i1: VT = MVT::i8; LLVM_FALLTHROUGH; case MVT::i8: Opc = X86::MOV8ri; break; case MVT::i16: Opc = X86::MOV16ri; break; case MVT::i32: Opc = X86::MOV32ri; break; case MVT::i64: { if (isUInt<32>(Imm)) Opc = X86::MOV32ri64; else if (isInt<32>(Imm)) Opc = X86::MOV64ri32; else Opc = X86::MOV64ri; break; } } return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); } unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) { if (CFP->isNullValue()) return fastMaterializeFloatZero(CFP); // Can't handle alternate code models yet. CodeModel::Model CM = TM.getCodeModel(); if (CM != CodeModel::Small && CM != CodeModel::Large) return 0; // Get opcode and regclass of the output for the given load instruction. unsigned Opc = 0; const TargetRegisterClass *RC = nullptr; switch (VT.SimpleTy) { default: return 0; case MVT::f32: if (X86ScalarSSEf32) { Opc = Subtarget->hasAVX512() ? X86::VMOVSSZrm : Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm; RC = Subtarget->hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass; } else { Opc = X86::LD_Fp32m; RC = &X86::RFP32RegClass; } break; case MVT::f64: if (X86ScalarSSEf64) { Opc = Subtarget->hasAVX512() ? X86::VMOVSDZrm : Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm; RC = Subtarget->hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass; } else { Opc = X86::LD_Fp64m; RC = &X86::RFP64RegClass; } break; case MVT::f80: // No f80 support yet. return 0; } // MachineConstantPool wants an explicit alignment. unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); if (Align == 0) { // Alignment of vector types. FIXME! Align = DL.getTypeAllocSize(CFP->getType()); } // x86-32 PIC requires a PIC base register for constant pools. unsigned PICBase = 0; unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr); if (OpFlag == X86II::MO_PIC_BASE_OFFSET) PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); else if (OpFlag == X86II::MO_GOTOFF) PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small) PICBase = X86::RIP; // Create the load from the constant pool. unsigned CPI = MCP.getConstantPoolIndex(CFP, Align); unsigned ResultReg = createResultReg(RC); if (CM == CodeModel::Large) { unsigned AddrReg = createResultReg(&X86::GR64RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri), AddrReg) .addConstantPoolIndex(CPI, 0, OpFlag); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); addDirectMem(MIB, AddrReg); MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( MachinePointerInfo::getConstantPool(*FuncInfo.MF), MachineMemOperand::MOLoad, DL.getPointerSize(), Align); MIB->addMemOperand(*FuncInfo.MF, MMO); return ResultReg; } addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg), CPI, PICBase, OpFlag); return ResultReg; } unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) { // Can't handle alternate code models yet. if (TM.getCodeModel() != CodeModel::Small) return 0; // Materialize addresses with LEA/MOV instructions. X86AddressMode AM; if (X86SelectAddress(GV, AM)) { // If the expression is just a basereg, then we're done, otherwise we need // to emit an LEA. if (AM.BaseType == X86AddressMode::RegBase && AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr) return AM.Base.Reg; unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); if (TM.getRelocationModel() == Reloc::Static && TLI.getPointerTy(DL) == MVT::i64) { // The displacement code could be more than 32 bits away so we need to use // an instruction with a 64 bit immediate BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri), ResultReg) .addGlobalAddress(GV); } else { unsigned Opc = TLI.getPointerTy(DL) == MVT::i32 ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r) : X86::LEA64r; addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg), AM); } return ResultReg; } return 0; } unsigned X86FastISel::fastMaterializeConstant(const Constant *C) { EVT CEVT = TLI.getValueType(DL, C->getType(), true); // Only handle simple types. if (!CEVT.isSimple()) return 0; MVT VT = CEVT.getSimpleVT(); if (const auto *CI = dyn_cast(C)) return X86MaterializeInt(CI, VT); else if (const ConstantFP *CFP = dyn_cast(C)) return X86MaterializeFP(CFP, VT); else if (const GlobalValue *GV = dyn_cast(C)) return X86MaterializeGV(GV, VT); return 0; } unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) { // Fail on dynamic allocas. At this point, getRegForValue has already // checked its CSE maps, so if we're here trying to handle a dynamic // alloca, we're not going to succeed. X86SelectAddress has a // check for dynamic allocas, because it's called directly from // various places, but targetMaterializeAlloca also needs a check // in order to avoid recursion between getRegForValue, // X86SelectAddrss, and targetMaterializeAlloca. if (!FuncInfo.StaticAllocaMap.count(C)) return 0; assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?"); X86AddressMode AM; if (!X86SelectAddress(C, AM)) return 0; unsigned Opc = TLI.getPointerTy(DL) == MVT::i32 ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r) : X86::LEA64r; const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); unsigned ResultReg = createResultReg(RC); addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg), AM); return ResultReg; } unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) { MVT VT; if (!isTypeLegal(CF->getType(), VT)) return 0; // Get opcode and regclass for the given zero. bool HasAVX512 = Subtarget->hasAVX512(); unsigned Opc = 0; const TargetRegisterClass *RC = nullptr; switch (VT.SimpleTy) { default: return 0; case MVT::f32: if (X86ScalarSSEf32) { Opc = HasAVX512 ? X86::AVX512_FsFLD0SS : X86::FsFLD0SS; RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass; } else { Opc = X86::LD_Fp032; RC = &X86::RFP32RegClass; } break; case MVT::f64: if (X86ScalarSSEf64) { Opc = HasAVX512 ? X86::AVX512_FsFLD0SD : X86::FsFLD0SD; RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass; } else { Opc = X86::LD_Fp064; RC = &X86::RFP64RegClass; } break; case MVT::f80: // No f80 support yet. return 0; } unsigned ResultReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); return ResultReg; } bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, const LoadInst *LI) { const Value *Ptr = LI->getPointerOperand(); X86AddressMode AM; if (!X86SelectAddress(Ptr, AM)) return false; const X86InstrInfo &XII = (const X86InstrInfo &)TII; unsigned Size = DL.getTypeAllocSize(LI->getType()); unsigned Alignment = LI->getAlignment(); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = DL.getABITypeAlignment(LI->getType()); SmallVector AddrOps; AM.getFullAddress(AddrOps); MachineInstr *Result = XII.foldMemoryOperandImpl( *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment, /*AllowCommute=*/true); if (!Result) return false; // The index register could be in the wrong register class. Unfortunately, // foldMemoryOperandImpl could have commuted the instruction so its not enough // to just look at OpNo + the offset to the index reg. We actually need to // scan the instruction to find the index reg and see if its the correct reg // class. unsigned OperandNo = 0; for (MachineInstr::mop_iterator I = Result->operands_begin(), E = Result->operands_end(); I != E; ++I, ++OperandNo) { MachineOperand &MO = *I; if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg) continue; // Found the index reg, now try to rewrite it. unsigned IndexReg = constrainOperandRegClass(Result->getDesc(), MO.getReg(), OperandNo); if (IndexReg == MO.getReg()) continue; MO.setReg(IndexReg); } Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI)); MachineBasicBlock::iterator I(MI); removeDeadCode(I, std::next(I)); return true; } unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, unsigned Op2, bool Op2IsKill, unsigned Op3, bool Op3IsKill) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2); Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, getKillRegState(Op0IsKill)) .addReg(Op1, getKillRegState(Op1IsKill)) .addReg(Op2, getKillRegState(Op2IsKill)) .addReg(Op3, getKillRegState(Op3IsKill)); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addReg(Op0, getKillRegState(Op0IsKill)) .addReg(Op1, getKillRegState(Op1IsKill)) .addReg(Op2, getKillRegState(Op2IsKill)) .addReg(Op3, getKillRegState(Op3IsKill)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } namespace llvm { FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) { return new X86FastISel(funcInfo, libInfo); } } Index: vendor/llvm/dist-release_80/lib/Target/X86/X86TargetMachine.cpp =================================================================== --- vendor/llvm/dist-release_80/lib/Target/X86/X86TargetMachine.cpp (revision 348931) +++ vendor/llvm/dist-release_80/lib/Target/X86/X86TargetMachine.cpp (revision 348932) @@ -1,517 +1,521 @@ //===-- X86TargetMachine.cpp - Define TargetMachine for the X86 -----------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the X86 specific subclass of TargetMachine. // //===----------------------------------------------------------------------===// #include "X86TargetMachine.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "X86.h" #include "X86CallLowering.h" #include "X86LegalizerInfo.h" #include "X86MacroFusion.h" #include "X86Subtarget.h" #include "X86TargetObjectFile.h" #include "X86TargetTransformInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/ExecutionDomainFix.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" #include #include using namespace llvm; static cl::opt EnableMachineCombinerPass("x86-machine-combiner", cl::desc("Enable the machine combiner pass"), cl::init(true), cl::Hidden); static cl::opt EnableCondBrFoldingPass("x86-condbr-folding", cl::desc("Enable the conditional branch " "folding pass"), cl::init(false), cl::Hidden); extern "C" void LLVMInitializeX86Target() { // Register the target. RegisterTargetMachine X(getTheX86_32Target()); RegisterTargetMachine Y(getTheX86_64Target()); PassRegistry &PR = *PassRegistry::getPassRegistry(); initializeGlobalISel(PR); initializeWinEHStatePassPass(PR); initializeFixupBWInstPassPass(PR); initializeEvexToVexInstPassPass(PR); initializeFixupLEAPassPass(PR); initializeShadowCallStackPass(PR); initializeX86CallFrameOptimizationPass(PR); initializeX86CmovConverterPassPass(PR); initializeX86ExecutionDomainFixPass(PR); initializeX86DomainReassignmentPass(PR); initializeX86AvoidSFBPassPass(PR); initializeX86SpeculativeLoadHardeningPassPass(PR); initializeX86FlagsCopyLoweringPassPass(PR); initializeX86CondBrFoldingPassPass(PR); } static std::unique_ptr createTLOF(const Triple &TT) { if (TT.isOSBinFormatMachO()) { if (TT.getArch() == Triple::x86_64) return llvm::make_unique(); return llvm::make_unique(); } if (TT.isOSFreeBSD()) return llvm::make_unique(); if (TT.isOSLinux() || TT.isOSNaCl() || TT.isOSIAMCU()) return llvm::make_unique(); if (TT.isOSSolaris()) return llvm::make_unique(); if (TT.isOSFuchsia()) return llvm::make_unique(); if (TT.isOSBinFormatELF()) return llvm::make_unique(); if (TT.isOSBinFormatCOFF()) return llvm::make_unique(); llvm_unreachable("unknown subtarget type"); } static std::string computeDataLayout(const Triple &TT) { // X86 is little endian std::string Ret = "e"; Ret += DataLayout::getManglingComponent(TT); // X86 and x32 have 32 bit pointers. if ((TT.isArch64Bit() && (TT.getEnvironment() == Triple::GNUX32 || TT.isOSNaCl())) || !TT.isArch64Bit()) Ret += "-p:32:32"; // Some ABIs align 64 bit integers and doubles to 64 bits, others to 32. if (TT.isArch64Bit() || TT.isOSWindows() || TT.isOSNaCl()) Ret += "-i64:64"; else if (TT.isOSIAMCU()) Ret += "-i64:32-f64:32"; else Ret += "-f64:32:64"; // Some ABIs align long double to 128 bits, others to 32. if (TT.isOSNaCl() || TT.isOSIAMCU()) ; // No f80 else if (TT.isArch64Bit() || TT.isOSDarwin()) Ret += "-f80:128"; else Ret += "-f80:32"; if (TT.isOSIAMCU()) Ret += "-f128:32"; // The registers can hold 8, 16, 32 or, in x86-64, 64 bits. if (TT.isArch64Bit()) Ret += "-n8:16:32:64"; else Ret += "-n8:16:32"; // The stack is aligned to 32 bits on some ABIs and 128 bits on others. if ((!TT.isArch64Bit() && TT.isOSWindows()) || TT.isOSIAMCU()) Ret += "-a:0:32-S32"; else Ret += "-S128"; return Ret; } static Reloc::Model getEffectiveRelocModel(const Triple &TT, bool JIT, Optional RM) { bool is64Bit = TT.getArch() == Triple::x86_64; if (!RM.hasValue()) { // JIT codegen should use static relocations by default, since it's // typically executed in process and not relocatable. if (JIT) return Reloc::Static; // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode. // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we // use static relocation model by default. if (TT.isOSDarwin()) { if (is64Bit) return Reloc::PIC_; return Reloc::DynamicNoPIC; } if (TT.isOSWindows() && is64Bit) return Reloc::PIC_; return Reloc::Static; } // ELF and X86-64 don't have a distinct DynamicNoPIC model. DynamicNoPIC // is defined as a model for code which may be used in static or dynamic // executables but not necessarily a shared library. On X86-32 we just // compile in -static mode, in x86-64 we use PIC. if (*RM == Reloc::DynamicNoPIC) { if (is64Bit) return Reloc::PIC_; if (!TT.isOSDarwin()) return Reloc::Static; } // If we are on Darwin, disallow static relocation model in X86-64 mode, since // the Mach-O file format doesn't support it. if (*RM == Reloc::Static && TT.isOSDarwin() && is64Bit) return Reloc::PIC_; return *RM; } static CodeModel::Model getEffectiveX86CodeModel(Optional CM, bool JIT, bool Is64Bit) { if (CM) { if (*CM == CodeModel::Tiny) report_fatal_error("Target does not support the tiny CodeModel"); return *CM; } if (JIT) return Is64Bit ? CodeModel::Large : CodeModel::Small; return CodeModel::Small; } /// Create an X86 target. /// X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional RM, Optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine( T, computeDataLayout(TT), TT, CPU, FS, Options, getEffectiveRelocModel(TT, JIT, RM), getEffectiveX86CodeModel(CM, JIT, TT.getArch() == Triple::x86_64), OL), TLOF(createTLOF(getTargetTriple())) { // Windows stack unwinder gets confused when execution flow "falls through" // after a call to 'noreturn' function. // To prevent that, we emit a trap for 'unreachable' IR instructions. // (which on X86, happens to be the 'ud2' instruction) // On PS4, the "return address" of a 'noreturn' call must still be within // the calling function, and TrapUnreachable is an easy way to get that. // The check here for 64-bit windows is a bit icky, but as we're unlikely // to ever want to mix 32 and 64-bit windows code in a single module // this should be fine. if ((TT.isOSWindows() && TT.getArch() == Triple::x86_64) || TT.isPS4() || TT.isOSBinFormatMachO()) { this->Options.TrapUnreachable = true; this->Options.NoTrapAfterNoreturn = TT.isOSBinFormatMachO(); } // Outlining is available for x86-64. if (TT.getArch() == Triple::x86_64) setMachineOutliner(true); initAsmInfo(); } X86TargetMachine::~X86TargetMachine() = default; const X86Subtarget * X86TargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); Attribute FSAttr = F.getFnAttribute("target-features"); StringRef CPU = !CPUAttr.hasAttribute(Attribute::None) ? CPUAttr.getValueAsString() : (StringRef)TargetCPU; StringRef FS = !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : (StringRef)TargetFS; SmallString<512> Key; Key.reserve(CPU.size() + FS.size()); Key += CPU; Key += FS; // FIXME: This is related to the code below to reset the target options, // we need to know whether or not the soft float flag is set on the // function before we can generate a subtarget. We also need to use // it as a key for the subtarget since that can be the only difference // between two functions. bool SoftFloat = F.getFnAttribute("use-soft-float").getValueAsString() == "true"; // If the soft float attribute is set on the function turn on the soft float // subtarget feature. if (SoftFloat) Key += FS.empty() ? "+soft-float" : ",+soft-float"; // Keep track of the key width after all features are added so we can extract // the feature string out later. unsigned CPUFSWidth = Key.size(); // Extract prefer-vector-width attribute. unsigned PreferVectorWidthOverride = 0; if (F.hasFnAttribute("prefer-vector-width")) { StringRef Val = F.getFnAttribute("prefer-vector-width").getValueAsString(); unsigned Width; if (!Val.getAsInteger(0, Width)) { Key += ",prefer-vector-width="; Key += Val; PreferVectorWidthOverride = Width; } } // Extract min-legal-vector-width attribute. unsigned RequiredVectorWidth = UINT32_MAX; if (F.hasFnAttribute("min-legal-vector-width")) { StringRef Val = F.getFnAttribute("min-legal-vector-width").getValueAsString(); unsigned Width; if (!Val.getAsInteger(0, Width)) { Key += ",min-legal-vector-width="; Key += Val; RequiredVectorWidth = Width; } } // Extracted here so that we make sure there is backing for the StringRef. If // we assigned earlier, its possible the SmallString reallocated leaving a // dangling StringRef. FS = Key.slice(CPU.size(), CPUFSWidth); auto &I = SubtargetMap[Key]; if (!I) { // This needs to be done before we create a new subtarget since any // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); I = llvm::make_unique(TargetTriple, CPU, FS, *this, Options.StackAlignmentOverride, PreferVectorWidthOverride, RequiredVectorWidth); } return I.get(); } //===----------------------------------------------------------------------===// // Command line options for x86 //===----------------------------------------------------------------------===// static cl::opt UseVZeroUpper("x86-use-vzeroupper", cl::Hidden, cl::desc("Minimize AVX to SSE transition penalty"), cl::init(true)); //===----------------------------------------------------------------------===// // X86 TTI query. //===----------------------------------------------------------------------===// TargetTransformInfo X86TargetMachine::getTargetTransformInfo(const Function &F) { return TargetTransformInfo(X86TTIImpl(this, F)); } //===----------------------------------------------------------------------===// // Pass Pipeline Configuration //===----------------------------------------------------------------------===// namespace { /// X86 Code Generator Pass Configuration Options. class X86PassConfig : public TargetPassConfig { public: X86PassConfig(X86TargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} X86TargetMachine &getX86TargetMachine() const { return getTM(); } ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override { ScheduleDAGMILive *DAG = createGenericSchedLive(C); DAG->addMutation(createX86MacroFusionDAGMutation()); return DAG; } void addIRPasses() override; bool addInstSelector() override; bool addIRTranslator() override; bool addLegalizeMachineIR() override; bool addRegBankSelect() override; bool addGlobalInstructionSelect() override; bool addILPOpts() override; bool addPreISel() override; void addMachineSSAOptimization() override; void addPreRegAlloc() override; void addPostRegAlloc() override; void addPreEmitPass() override; void addPreEmitPass2() override; void addPreSched2() override; }; class X86ExecutionDomainFix : public ExecutionDomainFix { public: static char ID; X86ExecutionDomainFix() : ExecutionDomainFix(ID, X86::VR128XRegClass) {} StringRef getPassName() const override { return "X86 Execution Dependency Fix"; } }; char X86ExecutionDomainFix::ID; } // end anonymous namespace INITIALIZE_PASS_BEGIN(X86ExecutionDomainFix, "x86-execution-domain-fix", "X86 Execution Domain Fix", false, false) INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis) INITIALIZE_PASS_END(X86ExecutionDomainFix, "x86-execution-domain-fix", "X86 Execution Domain Fix", false, false) TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) { return new X86PassConfig(*this, PM); } void X86PassConfig::addIRPasses() { addPass(createAtomicExpandPass()); TargetPassConfig::addIRPasses(); if (TM->getOptLevel() != CodeGenOpt::None) addPass(createInterleavedAccessPass()); // Add passes that handle indirect branch removal and insertion of a retpoline // thunk. These will be a no-op unless a function subtarget has the retpoline // feature enabled. addPass(createIndirectBrExpandPass()); } bool X86PassConfig::addInstSelector() { // Install an instruction selector. addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel())); // For ELF, cleanup any local-dynamic TLS accesses. if (TM->getTargetTriple().isOSBinFormatELF() && getOptLevel() != CodeGenOpt::None) addPass(createCleanupLocalDynamicTLSPass()); addPass(createX86GlobalBaseRegPass()); return false; } bool X86PassConfig::addIRTranslator() { addPass(new IRTranslator()); return false; } bool X86PassConfig::addLegalizeMachineIR() { addPass(new Legalizer()); return false; } bool X86PassConfig::addRegBankSelect() { addPass(new RegBankSelect()); return false; } bool X86PassConfig::addGlobalInstructionSelect() { addPass(new InstructionSelect()); return false; } bool X86PassConfig::addILPOpts() { if (EnableCondBrFoldingPass) addPass(createX86CondBrFolding()); addPass(&EarlyIfConverterID); if (EnableMachineCombinerPass) addPass(&MachineCombinerID); addPass(createX86CmovConverterPass()); return true; } bool X86PassConfig::addPreISel() { // Only add this pass for 32-bit x86 Windows. const Triple &TT = TM->getTargetTriple(); if (TT.isOSWindows() && TT.getArch() == Triple::x86) addPass(createX86WinEHStatePass()); return true; } void X86PassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOpt::None) { addPass(&LiveRangeShrinkID); addPass(createX86FixupSetCC()); addPass(createX86OptimizeLEAs()); addPass(createX86CallFrameOptimization()); addPass(createX86AvoidStoreForwardingBlocks()); } addPass(createX86SpeculativeLoadHardeningPass()); addPass(createX86FlagsCopyLoweringPass()); addPass(createX86WinAllocaExpander()); } void X86PassConfig::addMachineSSAOptimization() { addPass(createX86DomainReassignmentPass()); TargetPassConfig::addMachineSSAOptimization(); } void X86PassConfig::addPostRegAlloc() { addPass(createX86FloatingPointStackifierPass()); } void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); } void X86PassConfig::addPreEmitPass() { if (getOptLevel() != CodeGenOpt::None) { addPass(new X86ExecutionDomainFix()); addPass(createBreakFalseDeps()); } addPass(createShadowCallStackPass()); addPass(createX86IndirectBranchTrackingPass()); if (UseVZeroUpper) addPass(createX86IssueVZeroUpperPass()); if (getOptLevel() != CodeGenOpt::None) { addPass(createX86FixupBWInsts()); addPass(createX86PadShortFunctions()); addPass(createX86FixupLEAs()); addPass(createX86EvexToVexInsts()); } addPass(createX86DiscriminateMemOpsPass()); addPass(createX86InsertPrefetchPass()); } void X86PassConfig::addPreEmitPass2() { addPass(createX86RetpolineThunksPass()); // Verify basic block incoming and outgoing cfa offset and register values and // correct CFA calculation rule where needed by inserting appropriate CFI // instructions. const Triple &TT = TM->getTargetTriple(); - if (!TT.isOSDarwin() && !TT.isOSWindows()) + const MCAsmInfo *MAI = TM->getMCAsmInfo(); + if (!TT.isOSDarwin() && + (!TT.isOSWindows() || + MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI)) addPass(createCFIInstrInserter()); } Index: vendor/llvm/dist-release_80/test/CodeGen/AMDGPU/add.ll =================================================================== --- vendor/llvm/dist-release_80/test/CodeGen/AMDGPU/add.ll (revision 348931) +++ vendor/llvm/dist-release_80/test/CodeGen/AMDGPU/add.ll (revision 348932) @@ -1,223 +1,184 @@ ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}s_add_i32: -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - ; GCN: s_add_i32 s[[REG:[0-9]+]], {{s[0-9]+, s[0-9]+}} ; GCN: v_mov_b32_e32 v[[V_REG:[0-9]+]], s[[REG]] ; GCN: buffer_store_dword v[[V_REG]], define amdgpu_kernel void @s_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 %a = load i32, i32 addrspace(1)* %in %b = load i32, i32 addrspace(1)* %b_ptr %result = add i32 %a, %b store i32 %result, i32 addrspace(1)* %out ret void } ; FUNC-LABEL: {{^}}s_add_v2i32: -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - ; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} ; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} define amdgpu_kernel void @s_add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 %a = load <2 x i32>, <2 x i32> addrspace(1)* %in %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr %result = add <2 x i32> %a, %b store <2 x i32> %result, <2 x i32> addrspace(1)* %out ret void } ; FUNC-LABEL: {{^}}s_add_v4i32: -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} - ; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} ; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} ; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} ; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} define amdgpu_kernel void @s_add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 %a = load <4 x i32>, <4 x i32> addrspace(1)* %in %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr %result = add <4 x i32> %a, %b store <4 x i32> %result, <4 x i32> addrspace(1)* %out ret void } ; FUNC-LABEL: {{^}}s_add_v8i32: -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT - ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 define amdgpu_kernel void @s_add_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) { entry: %0 = add <8 x i32> %a, %b store <8 x i32> %0, <8 x i32> addrspace(1)* %out ret void } ; FUNC-LABEL: {{^}}s_add_v16i32: -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT -; EG: ADD_INT - ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 ; GCN: s_add_i32 define amdgpu_kernel void @s_add_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) { entry: %0 = add <16 x i32> %a, %b store <16 x i32> %0, <16 x i32> addrspace(1)* %out ret void } ; FUNC-LABEL: {{^}}v_add_i32: ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, [[A]], [[B]] ; GFX9: v_add_u32_e32 v{{[0-9]+}}, [[A]], [[B]] define amdgpu_kernel void @v_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1 %a = load volatile i32, i32 addrspace(1)* %gep %b = load volatile i32, i32 addrspace(1)* %b_ptr %result = add i32 %a, %b store i32 %result, i32 addrspace(1)* %out ret void } ; FUNC-LABEL: {{^}}v_add_imm_i32: ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, 0x7b, [[A]] ; GFX9: v_add_u32_e32 v{{[0-9]+}}, 0x7b, [[A]] define amdgpu_kernel void @v_add_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1 %a = load volatile i32, i32 addrspace(1)* %gep %result = add i32 %a, 123 store i32 %result, i32 addrspace(1)* %out ret void } ; FUNC-LABEL: {{^}}add64: ; GCN: s_add_u32 ; GCN: s_addc_u32 - -; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] -; EG-DAG: ADD_INT {{[* ]*}} -; EG-DAG: ADDC_UINT -; EG-DAG: ADD_INT -; EG-DAG: ADD_INT {{[* ]*}} -; EG-NOT: SUB define amdgpu_kernel void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) { entry: %add = add i64 %a, %b store i64 %add, i64 addrspace(1)* %out ret void } ; The v_addc_u32 and v_add_i32 instruction can't read SGPRs, because they ; use VCC. The test is designed so that %a will be stored in an SGPR and ; %0 will be stored in a VGPR, so the comiler will be forced to copy %a ; to a VGPR before doing the add. ; FUNC-LABEL: {{^}}add64_sgpr_vgpr: ; GCN-NOT: v_addc_u32_e32 s - -; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] -; EG-DAG: ADD_INT {{[* ]*}} -; EG-DAG: ADDC_UINT -; EG-DAG: ADD_INT -; EG-DAG: ADD_INT {{[* ]*}} -; EG-NOT: SUB define amdgpu_kernel void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) { entry: %0 = load i64, i64 addrspace(1)* %in %1 = add i64 %a, %0 store i64 %1, i64 addrspace(1)* %out ret void } ; Test i64 add inside a branch. ; FUNC-LABEL: {{^}}add64_in_branch: ; GCN: s_add_u32 ; GCN: s_addc_u32 - -; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]] -; EG-DAG: ADD_INT {{[* ]*}} -; EG-DAG: ADDC_UINT -; EG-DAG: ADD_INT -; EG-DAG: ADD_INT {{[* ]*}} -; EG-NOT: SUB define amdgpu_kernel void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) { entry: %0 = icmp eq i64 %a, 0 br i1 %0, label %if, label %else if: %1 = load i64, i64 addrspace(1)* %in br label %endif else: %2 = add i64 %a, %b br label %endif endif: %3 = phi i64 [%1, %if], [%2, %else] store i64 %3, i64 addrspace(1)* %out ret void } -declare i32 @llvm.r600.read.tidig.x() #1 +; Make sure the VOP3 form of add is initially selected. Otherwise pair +; of opies from/to VCC would be necessary + +; GCN-LABEL: {{^}}add_select_vop3: +; SI: v_add_i32_e64 v0, s[0:1], s0, v0 +; VI: v_add_u32_e64 v0, s[0:1], s0, v0 +; GFX9: v_add_u32_e32 v0, s0, v0 + +; GCN: ; def vcc +; GCN: ds_write_b32 +; GCN: ; use vcc +define amdgpu_ps void @add_select_vop3(i32 inreg %s, i32 %v) { + %vcc = call i64 asm sideeffect "; def vcc", "={vcc}"() + %sub = add i32 %v, %s + store i32 %sub, i32 addrspace(3)* undef + call void asm sideeffect "; use vcc", "{vcc}"(i64 %vcc) + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone speculatable } Index: vendor/llvm/dist-release_80/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll =================================================================== --- vendor/llvm/dist-release_80/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll (revision 348931) +++ vendor/llvm/dist-release_80/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll (revision 348932) @@ -1,70 +1,72 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI --check-prefix=CHECK %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s ; RUN: llc -march=amdgcn -verify-machineinstrs -mattr=+load-store-opt,+unsafe-ds-offset-folding < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s declare i32 @llvm.amdgcn.workitem.id.x() #0 declare void @llvm.amdgcn.s.barrier() #1 ; Function Attrs: nounwind ; CHECK-LABEL: {{^}}signed_ds_offset_addressing_loop: +; SI: s_movk_i32 [[K_0X88:s[0-9]+]], 0x +; SI: s_movk_i32 [[K_0X100:s[0-9]+]], 0x100 ; CHECK: BB0_1: ; CHECK: v_add_i32_e32 [[VADDR:v[0-9]+]], ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] ; SI-DAG: v_add_i32_e32 [[VADDR8:v[0-9]+]], vcc, 8, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR8]] ; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], vcc, 0x80, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x80]] -; SI-DAG: v_add_i32_e32 [[VADDR0x88:v[0-9]+]], vcc, 0x88, [[VADDR]] +; SI-DAG: v_add_i32_e32 [[VADDR0x88:v[0-9]+]], vcc, [[K_0X88]], [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x88]] -; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], vcc, 0x100, [[VADDR]] +; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], vcc, [[K_0X100]], [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]] ; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:2 ; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:34 ; CI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] offset:256 ; CHECK: s_endpgm define amdgpu_kernel void @signed_ds_offset_addressing_loop(float addrspace(1)* noalias nocapture %out, float addrspace(3)* noalias nocapture readonly %lptr, i32 %n) #2 { entry: %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %mul = shl nsw i32 %x.i, 1 br label %for.body for.body: ; preds = %for.body, %entry %sum.03 = phi float [ 0.000000e+00, %entry ], [ %add13, %for.body ] %offset.02 = phi i32 [ %mul, %entry ], [ %add14, %for.body ] %k.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ] tail call void @llvm.amdgcn.s.barrier() #1 %arrayidx = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %offset.02 %tmp = load float, float addrspace(3)* %arrayidx, align 4 %add1 = add nsw i32 %offset.02, 2 %arrayidx2 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add1 %tmp1 = load float, float addrspace(3)* %arrayidx2, align 4 %add3 = add nsw i32 %offset.02, 32 %arrayidx4 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add3 %tmp2 = load float, float addrspace(3)* %arrayidx4, align 4 %add5 = add nsw i32 %offset.02, 34 %arrayidx6 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add5 %tmp3 = load float, float addrspace(3)* %arrayidx6, align 4 %add7 = add nsw i32 %offset.02, 64 %arrayidx8 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add7 %tmp4 = load float, float addrspace(3)* %arrayidx8, align 4 %add9 = fadd float %tmp, %tmp1 %add10 = fadd float %add9, %tmp2 %add11 = fadd float %add10, %tmp3 %add12 = fadd float %add11, %tmp4 %add13 = fadd float %sum.03, %add12 %inc = add nsw i32 %k.01, 1 %add14 = add nsw i32 %offset.02, 97 %exitcond = icmp eq i32 %inc, 8 br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body %tmp5 = sext i32 %x.i to i64 %arrayidx15 = getelementptr inbounds float, float addrspace(1)* %out, i64 %tmp5 store float %add13, float addrspace(1)* %arrayidx15, align 4 ret void } attributes #0 = { nounwind readnone } attributes #1 = { convergent nounwind } attributes #2 = { nounwind } Index: vendor/llvm/dist-release_80/test/CodeGen/AMDGPU/fence-barrier.ll =================================================================== --- vendor/llvm/dist-release_80/test/CodeGen/AMDGPU/fence-barrier.ll (revision 348931) +++ vendor/llvm/dist-release_80/test/CodeGen/AMDGPU/fence-barrier.ll (revision 348932) @@ -1,198 +1,199 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s ; RUN: llvm-as -data-layout=A5 < %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=GCN %s declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() declare i32 @llvm.amdgcn.workitem.id.x() declare i32 @llvm.amdgcn.workgroup.id.x() declare void @llvm.amdgcn.s.barrier() @test_local.temp = internal addrspace(3) global [1 x i32] undef, align 4 @test_global_local.temp = internal addrspace(3) global [1 x i32] undef, align 4 ; GCN-LABEL: {{^}}test_local ; GCN: v_mov_b32_e32 v[[VAL:[0-9]+]], 0x777 ; GCN: ds_write_b32 v{{[0-9]+}}, v[[VAL]] ; GCN: s_waitcnt lgkmcnt(0){{$}} ; GCN-NEXT: s_barrier ; GCN: flat_store_dword define amdgpu_kernel void @test_local(i32 addrspace(1)*) { %2 = alloca i32 addrspace(1)*, align 4, addrspace(5) store i32 addrspace(1)* %0, i32 addrspace(1)* addrspace(5)* %2, align 4 %3 = call i32 @llvm.amdgcn.workitem.id.x() %4 = zext i32 %3 to i64 %5 = icmp eq i64 %4, 0 br i1 %5, label %6, label %7 ;